blob: 6739d913e5521221d0c8c66cb45865f1b9d10bf8 [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
28
29#include <cfloat>
30#include <cmath>
31#include <cstdio>
32#include <cstdlib>
33#include <cstring>
34
35#include "test-runner.h"
36#include "test-utils.h"
37#include "aarch64/test-utils-aarch64.h"
38
39#include "aarch64/cpu-aarch64.h"
40#include "aarch64/disasm-aarch64.h"
41#include "aarch64/macro-assembler-aarch64.h"
42#include "aarch64/simulator-aarch64.h"
43#include "test-assembler-aarch64.h"
44
45namespace vixl {
46namespace aarch64 {
47
Jacob Bramleye8289202019-07-31 11:25:23 +010048Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
49 // We never free this memory, but we need it to live for as long as the static
50 // linked list of tests, and this is the easiest way to do it.
51 Test* test = new Test(name, fn);
52 test->set_sve_vl_in_bits(vl);
53 return test;
54}
55
56// The TEST_SVE macro works just like the usual TEST macro, but the resulting
57// function receives a `const Test& config` argument, to allow it to query the
58// vector length.
59#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
60// On the Simulator, run SVE tests with several vector lengths, including the
61// extreme values and an intermediate value that isn't a power of two.
62
63#define TEST_SVE(name) \
64 void Test##name(Test* config); \
65 Test* test_##name##_list[] = \
66 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
67 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
68 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
69 void Test##name(Test* config)
70
71#define SVE_SETUP_WITH_FEATURES(...) \
72 SETUP_WITH_FEATURES(__VA_ARGS__); \
73 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
74
75#else
76// Otherwise, just use whatever the hardware provides.
77static const int kSVEVectorLengthInBits =
78 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
79 ? CPU::ReadSVEVectorLengthInBits()
80 : 0;
81
82#define TEST_SVE(name) \
83 void Test##name(Test* config); \
84 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
85 "AARCH64_ASM_" #name "_vlauto", \
86 &Test##name); \
87 void Test##name(Test* config)
88
89#define SVE_SETUP_WITH_FEATURES(...) \
90 SETUP_WITH_FEATURES(__VA_ARGS__); \
91 USE(config)
92
93#endif
94
Jacob Bramley03c0b512019-02-22 16:42:06 +000095// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
96// is optimised for call-site clarity, not generated code quality, so it doesn't
97// exist in the MacroAssembler itself.
98//
99// Usage:
100//
101// int values[] = { 42, 43, 44 };
102// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
103//
104// The rightmost (highest-indexed) array element maps to the lowest-numbered
105// lane.
106template <typename T, size_t N>
107void InsrHelper(MacroAssembler* masm,
108 const ZRegister& zdn,
109 const T (&values)[N]) {
110 for (size_t i = 0; i < N; i++) {
111 masm->Insr(zdn, values[i]);
112 }
113}
114
Jacob Bramley0ce75842019-07-17 18:12:50 +0100115// Conveniently initialise P registers with scalar bit patterns. The destination
116// lane size is ignored. This is optimised for call-site clarity, not generated
117// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100118//
119// Usage:
120//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100121// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100122void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100123 const PRegister& pd,
124 uint64_t value3,
125 uint64_t value2,
126 uint64_t value1,
127 uint64_t value0) {
128 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100129 UseScratchRegisterScope temps(masm);
130 Register temp = temps.AcquireX();
131 Label data;
132 Label done;
133
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100134 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100135 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100136 masm->B(&done);
137 {
138 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
139 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100140 masm->dc64(value0);
141 masm->dc64(value1);
142 masm->dc64(value2);
143 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100144 }
145 masm->Bind(&done);
146}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100147void Initialise(MacroAssembler* masm,
148 const PRegister& pd,
149 uint64_t value2,
150 uint64_t value1,
151 uint64_t value0) {
152 Initialise(masm, pd, 0, value2, value1, value0);
153}
154void Initialise(MacroAssembler* masm,
155 const PRegister& pd,
156 uint64_t value1,
157 uint64_t value0) {
158 Initialise(masm, pd, 0, 0, value1, value0);
159}
160void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
161 Initialise(masm, pd, 0, 0, 0, value0);
162}
163
164// Conveniently initialise P registers by lane. This is optimised for call-site
165// clarity, not generated code quality.
166//
167// Usage:
168//
169// int values[] = { 0x0, 0x1, 0x2 };
170// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
171//
172// The rightmost (highest-indexed) array element maps to the lowest-numbered
173// lane. Unspecified lanes are set to 0 (inactive).
174//
175// Each element of the `values` array is mapped onto a lane in `pd`. The
176// architecture only respects the lower bit, and writes zero the upper bits, but
177// other (encodable) values can be specified if required by the test.
178template <typename T, size_t N>
179void Initialise(MacroAssembler* masm,
180 const PRegisterWithLaneSize& pd,
181 const T (&values)[N]) {
182 // Turn the array into 64-bit chunks.
183 uint64_t chunks[4] = {0, 0, 0, 0};
184 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
185
186 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
187 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
188 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
189
190 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
191
192 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
193 size_t bit = 0;
194 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
195 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
196 uint64_t value = values[n] & p_lane_mask;
197 chunks[bit / 64] |= value << (bit % 64);
198 bit += p_bits_per_lane;
199 }
200
201 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
202}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100203
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000204// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100205TEST_SVE(sve_test_infrastructure_z) {
206 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000207 START();
208
Jacob Bramley03c0b512019-02-22 16:42:06 +0000209 __ Mov(x0, 0x0123456789abcdef);
210
211 // Test basic `Insr` behaviour.
212 __ Insr(z0.VnB(), 1);
213 __ Insr(z0.VnB(), 2);
214 __ Insr(z0.VnB(), x0);
215 __ Insr(z0.VnB(), -42);
216 __ Insr(z0.VnB(), 0);
217
218 // Test array inputs.
219 int z1_inputs[] = {3, 4, 5, -42, 0};
220 InsrHelper(&masm, z1.VnH(), z1_inputs);
221
222 // Test that sign-extension works as intended for various lane sizes.
223 __ Dup(z2.VnD(), 0); // Clear the register first.
224 __ Insr(z2.VnB(), -42); // 0xd6
225 __ Insr(z2.VnB(), 0xfe); // 0xfe
226 __ Insr(z2.VnH(), -42); // 0xffd6
227 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
228 __ Insr(z2.VnS(), -42); // 0xffffffd6
229 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
230 // Use another register for VnD(), so we can support 128-bit Z registers.
231 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
232 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
233
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000234 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000235
Jacob Bramley119bd212019-04-16 10:13:09 +0100236 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100237 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000238
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100239 // Test that array checks work properly on a register initialised
240 // lane-by-lane.
241 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
242 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000243
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100244 // Test that lane-by-lane checks work properly on a register initialised
245 // by array.
246 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
247 // The rightmost (highest-indexed) array element maps to the
248 // lowest-numbered lane.
249 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
250 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000251 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100252
253 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
254 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
255 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
256 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100257 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000258}
259
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100260// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100261TEST_SVE(sve_test_infrastructure_p) {
262 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100263 START();
264
265 // Simple cases: move boolean (0 or 1) values.
266
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100267 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100268 Initialise(&masm, p0.VnB(), p0_inputs);
269
270 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
271 Initialise(&masm, p1.VnH(), p1_inputs);
272
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100273 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100274 Initialise(&masm, p2.VnS(), p2_inputs);
275
276 int p3_inputs[] = {0, 1};
277 Initialise(&masm, p3.VnD(), p3_inputs);
278
279 // Advanced cases: move numeric value into architecturally-ignored bits.
280
281 // B-sized lanes get one bit in a P register, so there are no ignored bits.
282
283 // H-sized lanes get two bits in a P register.
284 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
285 Initialise(&masm, p4.VnH(), p4_inputs);
286
287 // S-sized lanes get four bits in a P register.
288 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
289 Initialise(&masm, p5.VnS(), p5_inputs);
290
291 // D-sized lanes get eight bits in a P register.
292 int p6_inputs[] = {0x81, 0xcc, 0x55};
293 Initialise(&masm, p6.VnD(), p6_inputs);
294
295 // The largest possible P register has 32 bytes.
296 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
297 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
298 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
299 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
300 Initialise(&masm, p7.VnD(), p7_inputs);
301
302 END();
303
304 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100305 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100306
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100307 // Test that lane-by-lane checks work properly. The rightmost
308 // (highest-indexed) array element maps to the lowest-numbered lane.
309 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
310 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
311 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100312 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100313 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
314 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
315 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
316 }
317 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
318 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
319 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
320 }
321 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
322 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
323 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
324 }
325
326 // Test that array checks work properly on predicates initialised with a
327 // possibly-different lane size.
328 // 0b...11'10'01'00'01'10'11
329 int p4_expected[] = {0x39, 0x1b};
330 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
331
332 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
333
334 // 0b...10000001'11001100'01010101
335 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
336 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
337
338 // 0b...10011100'10011101'10011110'10011111
339 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
340 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
341 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100342 }
343}
344
Jacob Bramley935b15b2019-07-04 14:09:22 +0100345// Test that writes to V registers clear the high bits of the corresponding Z
346// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100347TEST_SVE(sve_v_write_clear) {
348 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
349 CPUFeatures::kFP,
350 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100351 START();
352
353 // The Simulator has two mechansisms for writing V registers:
354 // - Write*Register, calling through to SimRegisterBase::Write.
355 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
356 // Try to cover both variants.
357
358 // Prepare some known inputs.
359 uint8_t data[kQRegSizeInBytes];
360 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
361 data[i] = 42 + i;
362 }
363 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
364 __ Fmov(d30, 42.0);
365
Jacob Bramley199339d2019-08-05 18:49:13 +0100366 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100367 // diagnose.
368 __ Index(z0.VnB(), 0, 1);
369 __ Index(z1.VnB(), 0, 1);
370 __ Index(z2.VnB(), 0, 1);
371 __ Index(z3.VnB(), 0, 1);
372 __ Index(z4.VnB(), 0, 1);
373
374 __ Index(z10.VnB(), 0, -1);
375 __ Index(z11.VnB(), 0, -1);
376 __ Index(z12.VnB(), 0, -1);
377 __ Index(z13.VnB(), 0, -1);
378 __ Index(z14.VnB(), 0, -1);
379
380 // Instructions using Write*Register (and SimRegisterBase::Write).
381 __ Ldr(b0, MemOperand(x10));
382 __ Fcvt(h1, d30);
383 __ Fmov(s2, 1.5f);
384 __ Fmov(d3, d30);
385 __ Ldr(q4, MemOperand(x10));
386
387 // Instructions using LogicVRegister::ClearForWrite.
388 // These also (incidentally) test that across-lane instructions correctly
389 // ignore the high-order Z register lanes.
390 __ Sminv(b10, v10.V16B());
391 __ Addv(h11, v11.V4H());
392 __ Saddlv(s12, v12.V8H());
393 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
394 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
395
396 END();
397
398 if (CAN_RUN()) {
399 RUN();
400
401 // Check the Q part first.
402 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
403 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
404 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
405 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
406 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
407 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
408 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
409 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
410 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
411 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
412 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
413 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
414 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
415 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
416 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
417
418 // Check that the upper lanes are all clear.
419 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
420 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
421 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
422 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
430 }
431 }
432}
433
Jacob Bramleye8289202019-07-31 11:25:23 +0100434static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
435 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100436 START();
437
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100438 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100439 int za_inputs[] = {-39, 1, -3, 2};
440 int zn_inputs[] = {-5, -20, 9, 8};
441 int zm_inputs[] = {9, -5, 4, 5};
442
443 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
444 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
445 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
446 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
447
448 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100449 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100450 InsrHelper(&masm, za, za_inputs);
451 InsrHelper(&masm, zn, zn_inputs);
452 InsrHelper(&masm, zm, zm_inputs);
453
454 int p0_inputs[] = {1, 1, 0, 1};
455 int p1_inputs[] = {1, 0, 1, 1};
456 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100457 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100458
459 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
460 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
461 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
462 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
463
464 // The Mla macro automatically selects between mla, mad and movprfx + mla
465 // based on what registers are aliased.
466 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
467 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
468 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100469 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100470
471 __ Mov(mla_da_result, za);
472 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
473
474 __ Mov(mla_dn_result, zn);
475 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
476
477 __ Mov(mla_dm_result, zm);
478 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
479
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100480 __ Mov(mla_d_result, zd);
481 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100482
483 // The Mls macro automatically selects between mls, msb and movprfx + mls
484 // based on what registers are aliased.
485 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
486 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
487 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100488 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100489
490 __ Mov(mls_da_result, za);
491 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
492
493 __ Mov(mls_dn_result, zn);
494 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
495
496 __ Mov(mls_dm_result, zm);
497 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
498
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100499 __ Mov(mls_d_result, zd);
500 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100501
502 END();
503
504 if (CAN_RUN()) {
505 RUN();
506
507 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
508 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
509 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
510
511 int mla[] = {-84, 101, 33, 42};
512 int mls[] = {6, -99, -39, -38};
513
514 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
515 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
516
517 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
518 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
519
520 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
521 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
522
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100523 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
524 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100525
526 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
527 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
528
529 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
530 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
531
532 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
533 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
534
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100535 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
536 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100537 }
538}
539
Jacob Bramleye8289202019-07-31 11:25:23 +0100540TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
541TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
542TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
543TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100544
Jacob Bramleye8289202019-07-31 11:25:23 +0100545TEST_SVE(sve_bitwise_unpredicate_logical) {
546 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700547 START();
548
549 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
550 InsrHelper(&masm, z8.VnD(), z8_inputs);
551 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
552 InsrHelper(&masm, z15.VnD(), z15_inputs);
553
554 __ And(z1.VnD(), z8.VnD(), z15.VnD());
555 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
556 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
557 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
558
559 END();
560
561 if (CAN_RUN()) {
562 RUN();
563 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
564 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
565 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
566 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
567
568 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
569 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
570 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
571 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
572 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700573}
574
Jacob Bramleye8289202019-07-31 11:25:23 +0100575TEST_SVE(sve_predicate_logical) {
576 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700577 START();
578
579 // 0b...01011010'10110111
580 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
581 // 0b...11011001'01010010
582 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
583 // 0b...01010101'10110010
584 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
585
586 Initialise(&masm, p10.VnB(), p10_inputs);
587 Initialise(&masm, p11.VnB(), p11_inputs);
588 Initialise(&masm, p12.VnB(), p12_inputs);
589
590 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
591 __ Mrs(x0, NZCV);
592 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
593 __ Mrs(x1, NZCV);
594 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
595 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
596 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
597 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
598 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
599 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
600
601 END();
602
603 if (CAN_RUN()) {
604 RUN();
605
606 // 0b...01010000'00010010
607 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
608 // 0b...00000001'00000000
609 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
610 // 0b...00000001'10100000
611 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
612 // 0b...00000101'10100000
613 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
614 // 0b...00000100'00000000
615 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
616 // 0b...01010101'00010010
617 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
618 // 0b...01010001'10110010
619 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
620 // 0b...01011011'00010111
621 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
622
623 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
624 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
625 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
626 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
627 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
628 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
629 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
630 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
631
TatWai Chong96713fe2019-06-04 16:39:37 -0700632 ASSERT_EQUAL_32(SVEFirstFlag, w0);
633 ASSERT_EQUAL_32(SVENotLastFlag, w1);
634 }
635}
TatWai Chongf4fa8222019-06-17 12:08:14 -0700636
Jacob Bramleye8289202019-07-31 11:25:23 +0100637TEST_SVE(sve_int_compare_vectors) {
638 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700639 START();
640
641 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
642 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
643 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
644 InsrHelper(&masm, z10.VnB(), z10_inputs);
645 InsrHelper(&masm, z11.VnB(), z11_inputs);
646 Initialise(&masm, p0.VnB(), p0_inputs);
647
648 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
649 __ Mrs(x6, NZCV);
650
651 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
652 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
653 int p1_inputs[] = {1, 1};
654 InsrHelper(&masm, z12.VnD(), z12_inputs);
655 InsrHelper(&masm, z13.VnD(), z13_inputs);
656 Initialise(&masm, p1.VnD(), p1_inputs);
657
658 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
659 __ Mrs(x7, NZCV);
660
661 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
662 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
663
664 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
665 InsrHelper(&masm, z14.VnH(), z14_inputs);
666 InsrHelper(&masm, z15.VnH(), z15_inputs);
667 Initialise(&masm, p2.VnH(), p2_inputs);
668
669 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
670 __ Mrs(x8, NZCV);
671
672 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
673 __ Mrs(x9, NZCV);
674
675 int z16_inputs[] = {0, -1, 0, 0};
676 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
677 int p3_inputs[] = {1, 1, 1, 1};
678 InsrHelper(&masm, z16.VnS(), z16_inputs);
679 InsrHelper(&masm, z17.VnS(), z17_inputs);
680 Initialise(&masm, p3.VnS(), p3_inputs);
681
682 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
683 __ Mrs(x10, NZCV);
684
685 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
686 __ Mrs(x11, NZCV);
687
688 // Architectural aliases testing.
689 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
690 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
691 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
692 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
693
694 END();
695
696 if (CAN_RUN()) {
697 RUN();
698
699 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
700 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
701 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
702 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
703 }
704
705 int p7_expected[] = {1, 0};
706 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
707
708 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
709 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
710
711 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
712 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
713
714 int p10_expected[] = {0, 0, 0, 1};
715 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
716
717 int p11_expected[] = {0, 1, 1, 1};
718 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
719
720 // Reuse the expected results to verify the architectural aliases.
721 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
722 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
723 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
724 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
725
726 ASSERT_EQUAL_32(SVEFirstFlag, w6);
727 ASSERT_EQUAL_32(NoFlag, w7);
728 ASSERT_EQUAL_32(NoFlag, w8);
729 ASSERT_EQUAL_32(NoFlag, w9);
730 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
731 }
732}
733
Jacob Bramleye8289202019-07-31 11:25:23 +0100734TEST_SVE(sve_int_compare_vectors_wide_elements) {
735 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700736 START();
737
738 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
739 int src2_inputs_1[] = {0, -1};
740 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
741 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
742 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
743 Initialise(&masm, p0.VnB(), mask_inputs_1);
744
745 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
746 __ Mrs(x2, NZCV);
747 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
748 __ Mrs(x3, NZCV);
749
750 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
751 int src2_inputs_2[] = {0, -32767};
752 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
753 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
754 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
755 Initialise(&masm, p0.VnH(), mask_inputs_2);
756
757 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
758 __ Mrs(x4, NZCV);
759 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
760 __ Mrs(x5, NZCV);
761
762 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
763 int src2_inputs_3[] = {0, -2147483648};
764 int mask_inputs_3[] = {1, 1, 1, 1};
765 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
766 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
767 Initialise(&masm, p0.VnS(), mask_inputs_3);
768
769 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
770 __ Mrs(x6, NZCV);
771 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
772 __ Mrs(x7, NZCV);
773
774 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
775 int src2_inputs_4[] = {0x00, 0x7f};
776 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
777 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
778 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
779 Initialise(&masm, p0.VnB(), mask_inputs_4);
780
781 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
782 __ Mrs(x8, NZCV);
783 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
784 __ Mrs(x9, NZCV);
785
786 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
787 int src2_inputs_5[] = {0x8000, 0xffff};
788 int mask_inputs_5[] = {1, 1, 1, 1};
789 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
790 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
791 Initialise(&masm, p0.VnS(), mask_inputs_5);
792
793 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
794 __ Mrs(x10, NZCV);
795 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
796 __ Mrs(x11, NZCV);
797
798 END();
799
800 if (CAN_RUN()) {
801 RUN();
802 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
803 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
804
805 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
806 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
807
808 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
809 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
810
811 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
812 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
813
814 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
815 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
816
817 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
818 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
819
820 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
821 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
822
823 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
824 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
825
826 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
827 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
828
829 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
830 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
831
832 ASSERT_EQUAL_32(NoFlag, w2);
833 ASSERT_EQUAL_32(NoFlag, w3);
834 ASSERT_EQUAL_32(NoFlag, w4);
835 ASSERT_EQUAL_32(SVENotLastFlag, w5);
836 ASSERT_EQUAL_32(SVEFirstFlag, w6);
837 ASSERT_EQUAL_32(SVENotLastFlag, w7);
838 ASSERT_EQUAL_32(SVEFirstFlag, w8);
839 ASSERT_EQUAL_32(SVEFirstFlag, w9);
840 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
841 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700842 }
TatWai Chongf4fa8222019-06-17 12:08:14 -0700843}
844
Jacob Bramleye8289202019-07-31 11:25:23 +0100845TEST_SVE(sve_bitwise_imm) {
846 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -0700847 START();
848
849 // clang-format off
850 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
851 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
852 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
853 0x0123, 0x4567, 0x89ab, 0xcdef};
854 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
855 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
856 // clang-format on
857
858 InsrHelper(&masm, z1.VnD(), z21_inputs);
859 InsrHelper(&masm, z2.VnS(), z22_inputs);
860 InsrHelper(&masm, z3.VnH(), z23_inputs);
861 InsrHelper(&masm, z4.VnB(), z24_inputs);
862
863 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
864 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
865 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
866 __ And(z4.VnB(), z4.VnB(), 0x3f);
867
868 InsrHelper(&masm, z5.VnD(), z21_inputs);
869 InsrHelper(&masm, z6.VnS(), z22_inputs);
870 InsrHelper(&masm, z7.VnH(), z23_inputs);
871 InsrHelper(&masm, z8.VnB(), z24_inputs);
872
873 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
874 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
875 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
876 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
877
878 InsrHelper(&masm, z9.VnD(), z21_inputs);
879 InsrHelper(&masm, z10.VnS(), z22_inputs);
880 InsrHelper(&masm, z11.VnH(), z23_inputs);
881 InsrHelper(&masm, z12.VnB(), z24_inputs);
882
883 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
884 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
885 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
886 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
887
Jacob Bramley6069fd42019-06-24 10:20:45 +0100888 {
889 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
890 // so here we test `dupm` directly.
891 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
892 __ dupm(z13.VnD(), 0x7ffffff800000000);
893 __ dupm(z14.VnS(), 0x7ffc7ffc);
894 __ dupm(z15.VnH(), 0x3ffc);
895 __ dupm(z16.VnB(), 0xc3);
896 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700897
898 END();
899
900 if (CAN_RUN()) {
901 RUN();
902
903 // clang-format off
904 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
905 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
906 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
907 0x0120, 0x0560, 0x09a0, 0x0de0};
908 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
909 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
910
911 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
912 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
913 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
914 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
915
916 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
917 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
918 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
919 0x0ed3, 0x4a97, 0x865b, 0xc21f};
920 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
921 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
922
923 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
924 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
925 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
926 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
927
928 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
929 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
930 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
931 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
932 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
933 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
934
935 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
936 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
937 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
938 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
939
940 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
941 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
942 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
943 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
944 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
945 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
946 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
947 // clang-format on
948 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700949}
950
Jacob Bramleye8289202019-07-31 11:25:23 +0100951TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +0100952 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
953 // unencodable immediates.
954
Jacob Bramleye8289202019-07-31 11:25:23 +0100955 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100956 START();
957
958 // Encodable with `dup` (shift 0).
959 __ Dup(z0.VnD(), -1);
960 __ Dup(z1.VnS(), 0x7f);
961 __ Dup(z2.VnH(), -0x80);
962 __ Dup(z3.VnB(), 42);
963
964 // Encodable with `dup` (shift 8).
TatWai Chong6995bfd2019-09-26 10:48:05 +0100965 __ Dup(z4.VnD(), -42 * 256);
966 __ Dup(z5.VnS(), -0x8000);
967 __ Dup(z6.VnH(), 0x7f00);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100968 // B-sized lanes cannot take a shift of 8.
969
970 // Encodable with `dupm` (but not `dup`).
971 __ Dup(z10.VnD(), 0x3fc);
972 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
973 __ Dup(z12.VnH(), 0x0001);
974 // All values that fit B-sized lanes are encodable with `dup`.
975
976 // Cases that require immediate synthesis.
977 __ Dup(z20.VnD(), 0x1234);
978 __ Dup(z21.VnD(), -4242);
979 __ Dup(z22.VnD(), 0xfedcba9876543210);
980 __ Dup(z23.VnS(), 0x01020304);
981 __ Dup(z24.VnS(), -0x01020304);
982 __ Dup(z25.VnH(), 0x3c38);
983 // All values that fit B-sized lanes are directly encodable.
984
985 END();
986
987 if (CAN_RUN()) {
988 RUN();
989
990 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
991 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
992 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
993 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
994
TatWai Chong6995bfd2019-09-26 10:48:05 +0100995 ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
996 ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
997 ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley6069fd42019-06-24 10:20:45 +0100998
999 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1000 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1001 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1002
1003 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1004 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1005 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1006 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1007 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1008 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1009 }
1010}
1011
Jacob Bramleye8289202019-07-31 11:25:23 +01001012TEST_SVE(sve_inc_dec_p_scalar) {
1013 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001014 START();
1015
1016 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1017 Initialise(&masm, p0.VnB(), p0_inputs);
1018
1019 int p0_b_count = 9;
1020 int p0_h_count = 5;
1021 int p0_s_count = 3;
1022 int p0_d_count = 2;
1023
1024 // 64-bit operations preserve their high bits.
1025 __ Mov(x0, 0x123456780000002a);
1026 __ Decp(x0, p0.VnB());
1027
1028 __ Mov(x1, 0x123456780000002a);
1029 __ Incp(x1, p0.VnH());
1030
1031 // Check that saturation does not occur.
1032 __ Mov(x10, 1);
1033 __ Decp(x10, p0.VnS());
1034
1035 __ Mov(x11, UINT64_MAX);
1036 __ Incp(x11, p0.VnD());
1037
1038 __ Mov(x12, INT64_MAX);
1039 __ Incp(x12, p0.VnB());
1040
1041 // With an all-true predicate, these instructions increment or decrement by
1042 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001043 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001044
1045 __ Mov(x20, 0x4000000000000000);
1046 __ Decp(x20, p15.VnB());
1047
1048 __ Mov(x21, 0x4000000000000000);
1049 __ Incp(x21, p15.VnH());
1050
1051 END();
1052 if (CAN_RUN()) {
1053 RUN();
1054
1055 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1056 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1057
1058 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1059 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1060 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1061
1062 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1063 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1064 }
1065}
1066
Jacob Bramleye8289202019-07-31 11:25:23 +01001067TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1068 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001069 START();
1070
1071 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1072 Initialise(&masm, p0.VnB(), p0_inputs);
1073
1074 int p0_b_count = 9;
1075 int p0_h_count = 5;
1076 int p0_s_count = 3;
1077 int p0_d_count = 2;
1078
1079 uint64_t dummy_high = 0x1234567800000000;
1080
1081 // 64-bit operations preserve their high bits.
1082 __ Mov(x0, dummy_high + 42);
1083 __ Sqdecp(x0, p0.VnB());
1084
1085 __ Mov(x1, dummy_high + 42);
1086 __ Sqincp(x1, p0.VnH());
1087
1088 // 32-bit operations sign-extend into their high bits.
1089 __ Mov(x2, dummy_high + 42);
1090 __ Sqdecp(x2, p0.VnS(), w2);
1091
1092 __ Mov(x3, dummy_high + 42);
1093 __ Sqincp(x3, p0.VnD(), w3);
1094
1095 __ Mov(x4, dummy_high + 1);
1096 __ Sqdecp(x4, p0.VnS(), w4);
1097
1098 __ Mov(x5, dummy_high - 1);
1099 __ Sqincp(x5, p0.VnD(), w5);
1100
1101 // Check that saturation behaves correctly.
1102 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
1103 __ Sqdecp(x10, p0.VnB(), x10);
1104
1105 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1106 __ Sqdecp(x11, p0.VnH(), w11);
1107
1108 __ Mov(x12, 1);
1109 __ Sqdecp(x12, p0.VnS(), x12);
1110
1111 __ Mov(x13, dummy_high + 1);
1112 __ Sqdecp(x13, p0.VnD(), w13);
1113
1114 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
1115 __ Sqincp(x14, p0.VnB(), x14);
1116
1117 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1118 __ Sqincp(x15, p0.VnH(), w15);
1119
1120 // Don't use x16 and x17 since they are scratch registers by default.
1121
1122 __ Mov(x18, 0xffffffffffffffff);
1123 __ Sqincp(x18, p0.VnS(), x18);
1124
1125 __ Mov(x19, dummy_high + 0xffffffff);
1126 __ Sqincp(x19, p0.VnD(), w19);
1127
1128 __ Mov(x20, dummy_high + 0xffffffff);
1129 __ Sqdecp(x20, p0.VnB(), w20);
1130
1131 // With an all-true predicate, these instructions increment or decrement by
1132 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001133 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001134
1135 __ Mov(x21, 0);
1136 __ Sqdecp(x21, p15.VnB(), x21);
1137
1138 __ Mov(x22, 0);
1139 __ Sqincp(x22, p15.VnH(), x22);
1140
1141 __ Mov(x23, dummy_high);
1142 __ Sqdecp(x23, p15.VnS(), w23);
1143
1144 __ Mov(x24, dummy_high);
1145 __ Sqincp(x24, p15.VnD(), w24);
1146
1147 END();
1148 if (CAN_RUN()) {
1149 RUN();
1150
1151 // 64-bit operations preserve their high bits.
1152 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1153 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1154
1155 // 32-bit operations sign-extend into their high bits.
1156 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1157 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1158 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1159 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1160
1161 // Check that saturation behaves correctly.
1162 ASSERT_EQUAL_64(INT64_MIN, x10);
1163 ASSERT_EQUAL_64(INT32_MIN, x11);
1164 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1165 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1166 ASSERT_EQUAL_64(INT64_MAX, x14);
1167 ASSERT_EQUAL_64(INT32_MAX, x15);
1168 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1169 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1170 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1171
1172 // Check all-true predicates.
1173 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1174 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1175 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1176 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1177 }
1178}
1179
Jacob Bramleye8289202019-07-31 11:25:23 +01001180TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1181 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001182 START();
1183
1184 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1185 Initialise(&masm, p0.VnB(), p0_inputs);
1186
1187 int p0_b_count = 9;
1188 int p0_h_count = 5;
1189 int p0_s_count = 3;
1190 int p0_d_count = 2;
1191
1192 uint64_t dummy_high = 0x1234567800000000;
1193
1194 // 64-bit operations preserve their high bits.
1195 __ Mov(x0, dummy_high + 42);
1196 __ Uqdecp(x0, p0.VnB());
1197
1198 __ Mov(x1, dummy_high + 42);
1199 __ Uqincp(x1, p0.VnH());
1200
1201 // 32-bit operations zero-extend into their high bits.
1202 __ Mov(x2, dummy_high + 42);
1203 __ Uqdecp(x2, p0.VnS(), w2);
1204
1205 __ Mov(x3, dummy_high + 42);
1206 __ Uqincp(x3, p0.VnD(), w3);
1207
1208 __ Mov(x4, dummy_high + 0x80000001);
1209 __ Uqdecp(x4, p0.VnS(), w4);
1210
1211 __ Mov(x5, dummy_high + 0x7fffffff);
1212 __ Uqincp(x5, p0.VnD(), w5);
1213
1214 // Check that saturation behaves correctly.
1215 __ Mov(x10, 1);
1216 __ Uqdecp(x10, p0.VnB(), x10);
1217
1218 __ Mov(x11, dummy_high + 1);
1219 __ Uqdecp(x11, p0.VnH(), w11);
1220
1221 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1222 __ Uqdecp(x12, p0.VnS(), x12);
1223
1224 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1225 __ Uqdecp(x13, p0.VnD(), w13);
1226
1227 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1228 __ Uqincp(x14, p0.VnB(), x14);
1229
1230 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1231 __ Uqincp(x15, p0.VnH(), w15);
1232
1233 // Don't use x16 and x17 since they are scratch registers by default.
1234
1235 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1236 __ Uqincp(x18, p0.VnS(), x18);
1237
1238 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1239 __ Uqincp(x19, p0.VnD(), w19);
1240
1241 // With an all-true predicate, these instructions increment or decrement by
1242 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001243 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001244
1245 __ Mov(x20, 0x4000000000000000);
1246 __ Uqdecp(x20, p15.VnB(), x20);
1247
1248 __ Mov(x21, 0x4000000000000000);
1249 __ Uqincp(x21, p15.VnH(), x21);
1250
1251 __ Mov(x22, dummy_high + 0x40000000);
1252 __ Uqdecp(x22, p15.VnS(), w22);
1253
1254 __ Mov(x23, dummy_high + 0x40000000);
1255 __ Uqincp(x23, p15.VnD(), w23);
1256
1257 END();
1258 if (CAN_RUN()) {
1259 RUN();
1260
1261 // 64-bit operations preserve their high bits.
1262 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1263 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1264
1265 // 32-bit operations zero-extend into their high bits.
1266 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1267 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1268 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1269 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1270
1271 // Check that saturation behaves correctly.
1272 ASSERT_EQUAL_64(0, x10);
1273 ASSERT_EQUAL_64(0, x11);
1274 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1275 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1276 ASSERT_EQUAL_64(UINT64_MAX, x14);
1277 ASSERT_EQUAL_64(UINT32_MAX, x15);
1278 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1279 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1280
1281 // Check all-true predicates.
1282 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1283 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1284 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1285 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1286 }
1287}
1288
Jacob Bramleye8289202019-07-31 11:25:23 +01001289TEST_SVE(sve_inc_dec_p_vector) {
1290 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001291 START();
1292
1293 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1294 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1295 Initialise(&masm, p0.VnB(), p0_inputs);
1296
1297 // Check that saturation does not occur.
1298
1299 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1300 InsrHelper(&masm, z0.VnD(), z0_inputs);
1301
1302 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1303 InsrHelper(&masm, z1.VnD(), z1_inputs);
1304
1305 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1306 InsrHelper(&masm, z2.VnS(), z2_inputs);
1307
1308 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1309 InsrHelper(&masm, z3.VnH(), z3_inputs);
1310
1311 // The MacroAssembler implements non-destructive operations using movprfx.
1312 __ Decp(z10.VnD(), p0, z0.VnD());
1313 __ Decp(z11.VnD(), p0, z1.VnD());
1314 __ Decp(z12.VnS(), p0, z2.VnS());
1315 __ Decp(z13.VnH(), p0, z3.VnH());
1316
1317 __ Incp(z14.VnD(), p0, z0.VnD());
1318 __ Incp(z15.VnD(), p0, z1.VnD());
1319 __ Incp(z16.VnS(), p0, z2.VnS());
1320 __ Incp(z17.VnH(), p0, z3.VnH());
1321
1322 // Also test destructive forms.
1323 __ Mov(z4, z0);
1324 __ Mov(z5, z1);
1325 __ Mov(z6, z2);
1326 __ Mov(z7, z3);
1327
1328 __ Decp(z0.VnD(), p0);
1329 __ Decp(z1.VnD(), p0);
1330 __ Decp(z2.VnS(), p0);
1331 __ Decp(z3.VnH(), p0);
1332
1333 __ Incp(z4.VnD(), p0);
1334 __ Incp(z5.VnD(), p0);
1335 __ Incp(z6.VnS(), p0);
1336 __ Incp(z7.VnH(), p0);
1337
1338 END();
1339 if (CAN_RUN()) {
1340 RUN();
1341
1342 // z0_inputs[...] - number of active D lanes (2)
1343 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1344 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1345
1346 // z1_inputs[...] - number of active D lanes (2)
1347 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1348 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1349
1350 // z2_inputs[...] - number of active S lanes (3)
1351 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1352 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1353
1354 // z3_inputs[...] - number of active H lanes (5)
1355 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1356 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1357
1358 // z0_inputs[...] + number of active D lanes (2)
1359 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1360 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1361
1362 // z1_inputs[...] + number of active D lanes (2)
1363 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1364 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1365
1366 // z2_inputs[...] + number of active S lanes (3)
1367 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1368 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1369
1370 // z3_inputs[...] + number of active H lanes (5)
1371 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1372 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1373
1374 // Check that the non-destructive macros produced the same results.
1375 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1376 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1377 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1378 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1379 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1380 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1381 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1382 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1383 }
1384}
1385
Jacob Bramleye8289202019-07-31 11:25:23 +01001386TEST_SVE(sve_inc_dec_ptrue_vector) {
1387 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001388 START();
1389
1390 // With an all-true predicate, these instructions increment or decrement by
1391 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001392 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001393
1394 __ Dup(z0.VnD(), 0);
1395 __ Decp(z0.VnD(), p15);
1396
1397 __ Dup(z1.VnS(), 0);
1398 __ Decp(z1.VnS(), p15);
1399
1400 __ Dup(z2.VnH(), 0);
1401 __ Decp(z2.VnH(), p15);
1402
1403 __ Dup(z3.VnD(), 0);
1404 __ Incp(z3.VnD(), p15);
1405
1406 __ Dup(z4.VnS(), 0);
1407 __ Incp(z4.VnS(), p15);
1408
1409 __ Dup(z5.VnH(), 0);
1410 __ Incp(z5.VnH(), p15);
1411
1412 END();
1413 if (CAN_RUN()) {
1414 RUN();
1415
1416 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1417 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1418 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1419
1420 for (int i = 0; i < d_lane_count; i++) {
1421 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1422 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1423 }
1424
1425 for (int i = 0; i < s_lane_count; i++) {
1426 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1427 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1428 }
1429
1430 for (int i = 0; i < h_lane_count; i++) {
1431 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1432 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1433 }
1434 }
1435}
1436
Jacob Bramleye8289202019-07-31 11:25:23 +01001437TEST_SVE(sve_sqinc_sqdec_p_vector) {
1438 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001439 START();
1440
1441 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1442 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1443 Initialise(&masm, p0.VnB(), p0_inputs);
1444
1445 // Check that saturation behaves correctly.
1446
1447 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1448 InsrHelper(&masm, z0.VnD(), z0_inputs);
1449
1450 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1451 InsrHelper(&masm, z1.VnD(), z1_inputs);
1452
1453 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1454 InsrHelper(&masm, z2.VnS(), z2_inputs);
1455
1456 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1457 InsrHelper(&masm, z3.VnH(), z3_inputs);
1458
1459 // The MacroAssembler implements non-destructive operations using movprfx.
1460 __ Sqdecp(z10.VnD(), p0, z0.VnD());
1461 __ Sqdecp(z11.VnD(), p0, z1.VnD());
1462 __ Sqdecp(z12.VnS(), p0, z2.VnS());
1463 __ Sqdecp(z13.VnH(), p0, z3.VnH());
1464
1465 __ Sqincp(z14.VnD(), p0, z0.VnD());
1466 __ Sqincp(z15.VnD(), p0, z1.VnD());
1467 __ Sqincp(z16.VnS(), p0, z2.VnS());
1468 __ Sqincp(z17.VnH(), p0, z3.VnH());
1469
1470 // Also test destructive forms.
1471 __ Mov(z4, z0);
1472 __ Mov(z5, z1);
1473 __ Mov(z6, z2);
1474 __ Mov(z7, z3);
1475
1476 __ Sqdecp(z0.VnD(), p0);
1477 __ Sqdecp(z1.VnD(), p0);
1478 __ Sqdecp(z2.VnS(), p0);
1479 __ Sqdecp(z3.VnH(), p0);
1480
1481 __ Sqincp(z4.VnD(), p0);
1482 __ Sqincp(z5.VnD(), p0);
1483 __ Sqincp(z6.VnS(), p0);
1484 __ Sqincp(z7.VnH(), p0);
1485
1486 END();
1487 if (CAN_RUN()) {
1488 RUN();
1489
1490 // z0_inputs[...] - number of active D lanes (2)
1491 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
1492 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1493
1494 // z1_inputs[...] - number of active D lanes (2)
1495 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1496 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1497
1498 // z2_inputs[...] - number of active S lanes (3)
1499 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
1500 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1501
1502 // z3_inputs[...] - number of active H lanes (5)
1503 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
1504 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1505
1506 // z0_inputs[...] + number of active D lanes (2)
1507 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1508 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1509
1510 // z1_inputs[...] + number of active D lanes (2)
1511 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
1512 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1513
1514 // z2_inputs[...] + number of active S lanes (3)
1515 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
1516 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1517
1518 // z3_inputs[...] + number of active H lanes (5)
1519 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
1520 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1521
1522 // Check that the non-destructive macros produced the same results.
1523 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1524 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1525 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1526 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1527 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1528 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1529 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1530 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1531 }
1532}
1533
Jacob Bramleye8289202019-07-31 11:25:23 +01001534TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
1535 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001536 START();
1537
1538 // With an all-true predicate, these instructions increment or decrement by
1539 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001540 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001541
1542 __ Dup(z0.VnD(), 0);
1543 __ Sqdecp(z0.VnD(), p15);
1544
1545 __ Dup(z1.VnS(), 0);
1546 __ Sqdecp(z1.VnS(), p15);
1547
1548 __ Dup(z2.VnH(), 0);
1549 __ Sqdecp(z2.VnH(), p15);
1550
1551 __ Dup(z3.VnD(), 0);
1552 __ Sqincp(z3.VnD(), p15);
1553
1554 __ Dup(z4.VnS(), 0);
1555 __ Sqincp(z4.VnS(), p15);
1556
1557 __ Dup(z5.VnH(), 0);
1558 __ Sqincp(z5.VnH(), p15);
1559
1560 END();
1561 if (CAN_RUN()) {
1562 RUN();
1563
1564 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1565 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1566 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1567
1568 for (int i = 0; i < d_lane_count; i++) {
1569 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1570 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1571 }
1572
1573 for (int i = 0; i < s_lane_count; i++) {
1574 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1575 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1576 }
1577
1578 for (int i = 0; i < h_lane_count; i++) {
1579 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1580 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1581 }
1582 }
1583}
1584
Jacob Bramleye8289202019-07-31 11:25:23 +01001585TEST_SVE(sve_uqinc_uqdec_p_vector) {
1586 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001587 START();
1588
1589 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1590 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1591 Initialise(&masm, p0.VnB(), p0_inputs);
1592
1593 // Check that saturation behaves correctly.
1594
1595 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
1596 InsrHelper(&masm, z0.VnD(), z0_inputs);
1597
1598 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
1599 InsrHelper(&masm, z1.VnD(), z1_inputs);
1600
1601 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
1602 InsrHelper(&masm, z2.VnS(), z2_inputs);
1603
1604 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
1605 InsrHelper(&masm, z3.VnH(), z3_inputs);
1606
1607 // The MacroAssembler implements non-destructive operations using movprfx.
1608 __ Uqdecp(z10.VnD(), p0, z0.VnD());
1609 __ Uqdecp(z11.VnD(), p0, z1.VnD());
1610 __ Uqdecp(z12.VnS(), p0, z2.VnS());
1611 __ Uqdecp(z13.VnH(), p0, z3.VnH());
1612
1613 __ Uqincp(z14.VnD(), p0, z0.VnD());
1614 __ Uqincp(z15.VnD(), p0, z1.VnD());
1615 __ Uqincp(z16.VnS(), p0, z2.VnS());
1616 __ Uqincp(z17.VnH(), p0, z3.VnH());
1617
1618 // Also test destructive forms.
1619 __ Mov(z4, z0);
1620 __ Mov(z5, z1);
1621 __ Mov(z6, z2);
1622 __ Mov(z7, z3);
1623
1624 __ Uqdecp(z0.VnD(), p0);
1625 __ Uqdecp(z1.VnD(), p0);
1626 __ Uqdecp(z2.VnS(), p0);
1627 __ Uqdecp(z3.VnH(), p0);
1628
1629 __ Uqincp(z4.VnD(), p0);
1630 __ Uqincp(z5.VnD(), p0);
1631 __ Uqincp(z6.VnS(), p0);
1632 __ Uqincp(z7.VnH(), p0);
1633
1634 END();
1635 if (CAN_RUN()) {
1636 RUN();
1637
1638 // z0_inputs[...] - number of active D lanes (2)
1639 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
1640 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1641
1642 // z1_inputs[...] - number of active D lanes (2)
1643 uint64_t z1_expected[] = {0x12345678ffffff28,
1644 0,
1645 0xfffffffffffffffd,
1646 0x7ffffffffffffffd};
1647 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1648
1649 // z2_inputs[...] - number of active S lanes (3)
1650 uint32_t z2_expected[] =
1651 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
1652 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1653
1654 // z3_inputs[...] - number of active H lanes (5)
1655 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
1656 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1657
1658 // z0_inputs[...] + number of active D lanes (2)
1659 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1660 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1661
1662 // z1_inputs[...] + number of active D lanes (2)
1663 uint64_t z5_expected[] = {0x12345678ffffff2c,
1664 2,
1665 UINT64_MAX,
1666 0x8000000000000001};
1667 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1668
1669 // z2_inputs[...] + number of active S lanes (3)
1670 uint32_t z6_expected[] =
1671 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
1672 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1673
1674 // z3_inputs[...] + number of active H lanes (5)
1675 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
1676 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1677
1678 // Check that the non-destructive macros produced the same results.
1679 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1680 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1681 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1682 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1683 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1684 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1685 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1686 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1687 }
1688}
1689
Jacob Bramleye8289202019-07-31 11:25:23 +01001690TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
1691 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001692 START();
1693
1694 // With an all-true predicate, these instructions increment or decrement by
1695 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001696 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001697
1698 __ Mov(x0, 0x1234567800000000);
1699 __ Mov(x1, 0x12340000);
1700 __ Mov(x2, 0x1200);
1701
1702 __ Dup(z0.VnD(), x0);
1703 __ Uqdecp(z0.VnD(), p15);
1704
1705 __ Dup(z1.VnS(), x1);
1706 __ Uqdecp(z1.VnS(), p15);
1707
1708 __ Dup(z2.VnH(), x2);
1709 __ Uqdecp(z2.VnH(), p15);
1710
1711 __ Dup(z3.VnD(), x0);
1712 __ Uqincp(z3.VnD(), p15);
1713
1714 __ Dup(z4.VnS(), x1);
1715 __ Uqincp(z4.VnS(), p15);
1716
1717 __ Dup(z5.VnH(), x2);
1718 __ Uqincp(z5.VnH(), p15);
1719
1720 END();
1721 if (CAN_RUN()) {
1722 RUN();
1723
1724 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1725 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1726 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1727
1728 for (int i = 0; i < d_lane_count; i++) {
1729 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
1730 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
1731 }
1732
1733 for (int i = 0; i < s_lane_count; i++) {
1734 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
1735 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
1736 }
1737
1738 for (int i = 0; i < h_lane_count; i++) {
1739 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
1740 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
1741 }
1742 }
1743}
1744
Jacob Bramleye8289202019-07-31 11:25:23 +01001745TEST_SVE(sve_index) {
1746 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01001747 START();
1748
1749 // Simple cases.
1750 __ Index(z0.VnB(), 0, 1);
1751 __ Index(z1.VnH(), 1, 1);
1752 __ Index(z2.VnS(), 2, 1);
1753 __ Index(z3.VnD(), 3, 1);
1754
1755 // Synthesised immediates.
1756 __ Index(z4.VnB(), 42, -1);
1757 __ Index(z5.VnH(), -1, 42);
1758 __ Index(z6.VnS(), 42, 42);
1759
1760 // Register arguments.
1761 __ Mov(x0, 42);
1762 __ Mov(x1, -3);
1763 __ Index(z10.VnD(), x0, x1);
1764 __ Index(z11.VnB(), w0, w1);
1765 // The register size should correspond to the lane size, but VIXL allows any
1766 // register at least as big as the lane size.
1767 __ Index(z12.VnB(), x0, x1);
1768 __ Index(z13.VnH(), w0, x1);
1769 __ Index(z14.VnS(), x0, w1);
1770
1771 // Integer overflow.
1772 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
1773 __ Index(z21.VnH(), 7, -3);
1774 __ Index(z22.VnS(), INT32_MAX - 2, 1);
1775 __ Index(z23.VnD(), INT64_MIN + 6, -7);
1776
1777 END();
1778
1779 if (CAN_RUN()) {
1780 RUN();
1781
1782 int b_lane_count = core.GetSVELaneCount(kBRegSize);
1783 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1784 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1785 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1786
1787 uint64_t b_mask = GetUintMask(kBRegSize);
1788 uint64_t h_mask = GetUintMask(kHRegSize);
1789 uint64_t s_mask = GetUintMask(kSRegSize);
1790 uint64_t d_mask = GetUintMask(kDRegSize);
1791
1792 // Simple cases.
1793 for (int i = 0; i < b_lane_count; i++) {
1794 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
1795 }
1796 for (int i = 0; i < h_lane_count; i++) {
1797 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
1798 }
1799 for (int i = 0; i < s_lane_count; i++) {
1800 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
1801 }
1802 for (int i = 0; i < d_lane_count; i++) {
1803 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
1804 }
1805
1806 // Synthesised immediates.
1807 for (int i = 0; i < b_lane_count; i++) {
1808 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
1809 }
1810 for (int i = 0; i < h_lane_count; i++) {
1811 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
1812 }
1813 for (int i = 0; i < s_lane_count; i++) {
1814 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
1815 }
1816
1817 // Register arguments.
1818 for (int i = 0; i < d_lane_count; i++) {
1819 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
1820 }
1821 for (int i = 0; i < b_lane_count; i++) {
1822 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
1823 }
1824 for (int i = 0; i < b_lane_count; i++) {
1825 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
1826 }
1827 for (int i = 0; i < h_lane_count; i++) {
1828 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
1829 }
1830 for (int i = 0; i < s_lane_count; i++) {
1831 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
1832 }
1833
1834 // Integer overflow.
1835 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
1836 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
1837 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
1838 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
1839 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
1840 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
1841 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
1842 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
1843 }
1844}
1845
TatWai Chongc844bb22019-06-10 15:32:53 -07001846TEST(sve_int_compare_count_and_limit_scalars) {
1847 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1848 START();
1849
1850 __ Mov(w20, 0xfffffffd);
1851 __ Mov(w21, 0xffffffff);
1852
1853 __ Whilele(p0.VnB(), w20, w21);
1854 __ Mrs(x0, NZCV);
1855 __ Whilele(p1.VnH(), w20, w21);
1856 __ Mrs(x1, NZCV);
1857
1858 __ Mov(w20, 0xffffffff);
1859 __ Mov(w21, 0x00000000);
1860
1861 __ Whilelt(p2.VnS(), w20, w21);
1862 __ Mrs(x2, NZCV);
1863 __ Whilelt(p3.VnD(), w20, w21);
1864 __ Mrs(x3, NZCV);
1865
1866 __ Mov(w20, 0xfffffffd);
1867 __ Mov(w21, 0xffffffff);
1868
1869 __ Whilels(p4.VnB(), w20, w21);
1870 __ Mrs(x4, NZCV);
1871 __ Whilels(p5.VnH(), w20, w21);
1872 __ Mrs(x5, NZCV);
1873
1874 __ Mov(w20, 0xffffffff);
1875 __ Mov(w21, 0x00000000);
1876
1877 __ Whilelo(p6.VnS(), w20, w21);
1878 __ Mrs(x6, NZCV);
1879 __ Whilelo(p7.VnD(), w20, w21);
1880 __ Mrs(x7, NZCV);
1881
1882 __ Mov(x20, 0xfffffffffffffffd);
1883 __ Mov(x21, 0xffffffffffffffff);
1884
1885 __ Whilele(p8.VnB(), x20, x21);
1886 __ Mrs(x8, NZCV);
1887 __ Whilele(p9.VnH(), x20, x21);
1888 __ Mrs(x9, NZCV);
1889
1890 __ Mov(x20, 0xffffffffffffffff);
1891 __ Mov(x21, 0x0000000000000000);
1892
1893 __ Whilelt(p10.VnS(), x20, x21);
1894 __ Mrs(x10, NZCV);
1895 __ Whilelt(p11.VnD(), x20, x21);
1896 __ Mrs(x11, NZCV);
1897
1898 __ Mov(x20, 0xfffffffffffffffd);
1899 __ Mov(x21, 0xffffffffffffffff);
1900
1901 __ Whilels(p12.VnB(), x20, x21);
1902 __ Mrs(x12, NZCV);
1903 __ Whilels(p13.VnH(), x20, x21);
1904 __ Mrs(x13, NZCV);
1905
1906 __ Mov(x20, 0xffffffffffffffff);
1907 __ Mov(x21, 0x0000000000000000);
1908
1909 __ Whilelo(p14.VnS(), x20, x21);
1910 __ Mrs(x14, NZCV);
1911 __ Whilelo(p15.VnD(), x20, x21);
1912 __ Mrs(x15, NZCV);
1913
1914 END();
1915
1916 if (CAN_RUN()) {
1917 RUN();
1918
1919 // 0b...00000000'00000111
1920 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1921 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1922
1923 // 0b...00000000'00010101
1924 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1925 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
1926
1927 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
1928 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
1929
1930 int p3_expected[] = {0x00, 0x01};
1931 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
1932
1933 // 0b...11111111'11111111
1934 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1935 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1936
1937 // 0b...01010101'01010101
1938 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1939 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1940
1941 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
1942 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1943
1944 int p7_expected[] = {0x00, 0x00};
1945 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1946
1947 // 0b...00000000'00000111
1948 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1949 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1950
1951 // 0b...00000000'00010101
1952 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1953 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1954
1955 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
1956 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1957
1958 int p11_expected[] = {0x00, 0x01};
1959 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
1960
1961 // 0b...11111111'11111111
1962 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1963 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
1964
1965 // 0b...01010101'01010101
1966 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1967 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
1968
1969 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
1970 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
1971
1972 int p15_expected[] = {0x00, 0x00};
1973 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
1974
1975 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
1976 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
1977 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
1978 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
1979 ASSERT_EQUAL_32(SVEFirstFlag, w4);
1980 ASSERT_EQUAL_32(SVEFirstFlag, w5);
1981 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
1982 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
1983 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
1984 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
1985 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1986 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
1987 ASSERT_EQUAL_32(SVEFirstFlag, w12);
1988 ASSERT_EQUAL_32(SVEFirstFlag, w13);
1989 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
1990 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
1991 }
1992}
1993
TatWai Chong302729c2019-06-14 16:18:51 -07001994TEST(sve_int_compare_vectors_signed_imm) {
1995 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1996 START();
1997
1998 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
1999 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2000 InsrHelper(&masm, z13.VnB(), z13_inputs);
2001 Initialise(&masm, p0.VnB(), mask_inputs1);
2002
2003 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2004 __ Mrs(x2, NZCV);
2005 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2006
2007 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2008 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2009 InsrHelper(&masm, z14.VnH(), z14_inputs);
2010 Initialise(&masm, p0.VnH(), mask_inputs2);
2011
2012 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2013 __ Mrs(x4, NZCV);
2014 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2015
2016 int z15_inputs[] = {0, 1, -1, INT_MIN};
2017 int mask_inputs3[] = {0, 1, 1, 1};
2018 InsrHelper(&masm, z15.VnS(), z15_inputs);
2019 Initialise(&masm, p0.VnS(), mask_inputs3);
2020
2021 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2022 __ Mrs(x6, NZCV);
2023 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2024
2025 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2026 __ Mrs(x8, NZCV);
2027 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2028
2029 int64_t z16_inputs[] = {0, -1};
2030 int mask_inputs4[] = {1, 1};
2031 InsrHelper(&masm, z16.VnD(), z16_inputs);
2032 Initialise(&masm, p0.VnD(), mask_inputs4);
2033
2034 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2035 __ Mrs(x10, NZCV);
2036 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2037
2038 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2039 __ Mrs(x12, NZCV);
2040 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2041
2042 END();
2043
2044 if (CAN_RUN()) {
2045 RUN();
2046
2047 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2048 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2049
2050 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2051 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2052
2053 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2054 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2055
2056 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2057 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2058
2059 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2060 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2061
2062 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2063 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2064
2065 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2066 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2067
2068 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2069 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2070
2071 int p10_expected[] = {0x00, 0x01};
2072 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2073
2074 int p11_expected[] = {0x00, 0x00};
2075 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2076
2077 int p12_expected[] = {0x01, 0x00};
2078 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2079
2080 int p13_expected[] = {0x01, 0x01};
2081 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2082
2083 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2084 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2085 ASSERT_EQUAL_32(NoFlag, w6);
2086 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2087 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2088 ASSERT_EQUAL_32(NoFlag, w12);
2089 }
2090}
2091
2092TEST(sve_int_compare_vectors_unsigned_imm) {
2093 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2094 START();
2095
2096 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2097 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2098 InsrHelper(&masm, z13.VnB(), src1_inputs);
2099 Initialise(&masm, p0.VnB(), mask_inputs1);
2100
2101 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2102 __ Mrs(x2, NZCV);
2103 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2104
2105 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2106 int mask_inputs2[] = {1, 1, 1, 1, 0};
2107 InsrHelper(&masm, z13.VnH(), src2_inputs);
2108 Initialise(&masm, p0.VnH(), mask_inputs2);
2109
2110 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2111 __ Mrs(x4, NZCV);
2112 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2113
2114 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2115 int mask_inputs3[] = {1, 1, 1, 1};
2116 InsrHelper(&masm, z13.VnS(), src3_inputs);
2117 Initialise(&masm, p0.VnS(), mask_inputs3);
2118
2119 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2120 __ Mrs(x6, NZCV);
2121 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2122
2123 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2124 int mask_inputs4[] = {1, 1};
2125 InsrHelper(&masm, z13.VnD(), src4_inputs);
2126 Initialise(&masm, p0.VnD(), mask_inputs4);
2127
2128 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2129 __ Mrs(x8, NZCV);
2130 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2131
2132 END();
2133
2134 if (CAN_RUN()) {
2135 RUN();
2136
2137 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2138 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2139
2140 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2141 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2142
2143 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2144 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2145
2146 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2147 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2148
2149 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2150 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2151
2152 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2153 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2154
2155 int p8_expected[] = {0x00, 0x01};
2156 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2157
2158 int p9_expected[] = {0x00, 0x01};
2159 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2160
2161 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2162 ASSERT_EQUAL_32(NoFlag, w4);
2163 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2164 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2165 }
2166}
2167
TatWai Chongc844bb22019-06-10 15:32:53 -07002168TEST(sve_int_compare_conditionally_terminate_scalars) {
2169 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2170 START();
2171
2172 __ Mov(x0, 0xfedcba9887654321);
2173 __ Mov(x1, 0x1000100010001000);
2174
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002175 // Initialise Z and C. These are preserved by cterm*, and the V flag is set to
2176 // !C if the condition does not hold.
2177 __ Mov(x10, NoFlag);
2178 __ Msr(NZCV, x10);
2179
TatWai Chongc844bb22019-06-10 15:32:53 -07002180 __ Ctermeq(w0, w0);
2181 __ Mrs(x2, NZCV);
2182 __ Ctermeq(x0, x1);
2183 __ Mrs(x3, NZCV);
2184 __ Ctermne(x0, x0);
2185 __ Mrs(x4, NZCV);
2186 __ Ctermne(w0, w1);
2187 __ Mrs(x5, NZCV);
2188
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002189 // As above, but with all flags initially set.
2190 __ Mov(x10, NZCVFlag);
2191 __ Msr(NZCV, x10);
2192
2193 __ Ctermeq(w0, w0);
2194 __ Mrs(x6, NZCV);
2195 __ Ctermeq(x0, x1);
2196 __ Mrs(x7, NZCV);
2197 __ Ctermne(x0, x0);
2198 __ Mrs(x8, NZCV);
2199 __ Ctermne(w0, w1);
2200 __ Mrs(x9, NZCV);
2201
TatWai Chongc844bb22019-06-10 15:32:53 -07002202 END();
2203
2204 if (CAN_RUN()) {
2205 RUN();
2206
2207 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2208 ASSERT_EQUAL_32(VFlag, w3);
2209 ASSERT_EQUAL_32(VFlag, w4);
2210 ASSERT_EQUAL_32(SVEFirstFlag, w5);
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002211
2212 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w6);
2213 ASSERT_EQUAL_32(ZCFlag, w7);
2214 ASSERT_EQUAL_32(ZCFlag, w8);
2215 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w9);
TatWai Chongc844bb22019-06-10 15:32:53 -07002216 }
2217}
2218
Jacob Bramley0ce75842019-07-17 18:12:50 +01002219// Work out what the architectural `PredTest` pseudocode should produce for the
2220// given result and governing predicate.
2221template <typename Tg, typename Td, int N>
2222static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2223 const Tg (&pg)[N],
2224 int vl) {
2225 int first = -1;
2226 int last = -1;
2227 bool any_active = false;
2228
2229 // Only consider potentially-active lanes.
2230 int start = (N > vl) ? (N - vl) : 0;
2231 for (int i = start; i < N; i++) {
2232 if ((pg[i] & 1) == 1) {
2233 // Look for the first and last active lanes.
2234 // Note that the 'first' lane is the one with the highest index.
2235 if (last < 0) last = i;
2236 first = i;
2237 // Look for any active lanes that are also active in pd.
2238 if ((pd[i] & 1) == 1) any_active = true;
2239 }
2240 }
2241
2242 uint32_t flags = 0;
2243 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2244 if (!any_active) flags |= SVENoneFlag;
2245 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2246 return static_cast<StatusFlags>(flags);
2247}
2248
2249typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2250 const PRegister& pg,
2251 const PRegisterWithLaneSize& pn);
2252template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002253static void PfirstPnextHelper(Test* config,
2254 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002255 unsigned lane_size_in_bits,
2256 const Tg& pg_inputs,
2257 const Tn& pn_inputs,
2258 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002259 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002260 START();
2261
2262 PRegister pg = p15;
2263 PRegister pn = p14;
2264 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2265 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2266
2267 // Initialise NZCV to an impossible value, to check that we actually write it.
2268 __ Mov(x10, NZCVFlag);
2269
2270 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2271 // the Assembler.
2272 __ Msr(NZCV, x10);
2273 __ Mov(p0, pn);
2274 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2275 pg,
2276 p0.WithLaneSize(lane_size_in_bits));
2277 __ Mrs(x0, NZCV);
2278
2279 // The MacroAssembler supports non-destructive use.
2280 __ Msr(NZCV, x10);
2281 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2282 pg,
2283 pn.WithLaneSize(lane_size_in_bits));
2284 __ Mrs(x1, NZCV);
2285
2286 // If pd.Aliases(pg) the macro requires a scratch register.
2287 {
2288 UseScratchRegisterScope temps(&masm);
2289 temps.Include(p13);
2290 __ Msr(NZCV, x10);
2291 __ Mov(p2, p15);
2292 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2293 p2,
2294 pn.WithLaneSize(lane_size_in_bits));
2295 __ Mrs(x2, NZCV);
2296 }
2297
2298 END();
2299
2300 if (CAN_RUN()) {
2301 RUN();
2302
2303 // Check that the inputs weren't modified.
2304 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2305 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2306
2307 // Check the primary operation.
2308 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2309 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2310 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2311
2312 // Check that the flags were properly set.
2313 StatusFlags nzcv_expected =
2314 GetPredTestFlags(pd_expected,
2315 pg_inputs,
2316 core.GetSVELaneCount(kBRegSize));
2317 ASSERT_EQUAL_64(nzcv_expected, x0);
2318 ASSERT_EQUAL_64(nzcv_expected, x1);
2319 ASSERT_EQUAL_64(nzcv_expected, x2);
2320 }
2321}
2322
2323template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002324static void PfirstHelper(Test* config,
2325 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002326 const Tn& pn_inputs,
2327 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002328 PfirstPnextHelper(config,
2329 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002330 kBRegSize, // pfirst only accepts B-sized lanes.
2331 pg_inputs,
2332 pn_inputs,
2333 pd_expected);
2334}
2335
2336template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002337static void PnextHelper(Test* config,
2338 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002339 const Tg& pg_inputs,
2340 const Tn& pn_inputs,
2341 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002342 PfirstPnextHelper(config,
2343 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002344 lane_size_in_bits,
2345 pg_inputs,
2346 pn_inputs,
2347 pd_expected);
2348}
2349
Jacob Bramleye8289202019-07-31 11:25:23 +01002350TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002351 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2352 // large VL), but few enough to make the test easy to read.
2353 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2354 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2355 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2356 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2357 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2358 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2359
2360 // Pfirst finds the first active lane in pg, and activates the corresponding
2361 // lane in pn (if it isn't already active).
2362
2363 // The first active lane in in1 is here. |
2364 // v
2365 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2366 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2367 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2368 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002369 PfirstHelper(config, in1, in0, exp10);
2370 PfirstHelper(config, in1, in2, exp12);
2371 PfirstHelper(config, in1, in3, exp13);
2372 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002373
2374 // The first active lane in in2 is here. |
2375 // v
2376 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2377 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2378 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2379 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002380 PfirstHelper(config, in2, in0, exp20);
2381 PfirstHelper(config, in2, in1, exp21);
2382 PfirstHelper(config, in2, in3, exp23);
2383 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002384
2385 // The first active lane in in3 is here. |
2386 // v
2387 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2388 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2389 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2390 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002391 PfirstHelper(config, in3, in0, exp30);
2392 PfirstHelper(config, in3, in1, exp31);
2393 PfirstHelper(config, in3, in2, exp32);
2394 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002395
2396 // | The first active lane in in4 is here.
2397 // v
2398 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2399 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2400 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2401 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002402 PfirstHelper(config, in4, in0, exp40);
2403 PfirstHelper(config, in4, in1, exp41);
2404 PfirstHelper(config, in4, in2, exp42);
2405 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002406
2407 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002408 PfirstHelper(config, in0, in0, in0);
2409 PfirstHelper(config, in0, in1, in1);
2410 PfirstHelper(config, in0, in2, in2);
2411 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002412
2413 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002414 PfirstHelper(config, in0, in0, in0);
2415 PfirstHelper(config, in1, in1, in1);
2416 PfirstHelper(config, in2, in2, in2);
2417 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002418}
2419
Jacob Bramleye8289202019-07-31 11:25:23 +01002420TEST_SVE(sve_pfirst_alias) {
2421 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002422 START();
2423
2424 // Check that the Simulator behaves correctly when all arguments are aliased.
2425 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2426 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2427 int in_s[] = {0, 1, 1, 0};
2428 int in_d[] = {1, 1};
2429
2430 Initialise(&masm, p0.VnB(), in_b);
2431 Initialise(&masm, p1.VnH(), in_h);
2432 Initialise(&masm, p2.VnS(), in_s);
2433 Initialise(&masm, p3.VnD(), in_d);
2434
2435 // Initialise NZCV to an impossible value, to check that we actually write it.
2436 __ Mov(x10, NZCVFlag);
2437
2438 __ Msr(NZCV, x10);
2439 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
2440 __ Mrs(x0, NZCV);
2441
2442 __ Msr(NZCV, x10);
2443 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
2444 __ Mrs(x1, NZCV);
2445
2446 __ Msr(NZCV, x10);
2447 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
2448 __ Mrs(x2, NZCV);
2449
2450 __ Msr(NZCV, x10);
2451 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
2452 __ Mrs(x3, NZCV);
2453
2454 END();
2455
2456 if (CAN_RUN()) {
2457 RUN();
2458
2459 // The first lane from pg is already active in pdn, so the P register should
2460 // be unchanged.
2461 ASSERT_EQUAL_SVE(in_b, p0.VnB());
2462 ASSERT_EQUAL_SVE(in_h, p1.VnH());
2463 ASSERT_EQUAL_SVE(in_s, p2.VnS());
2464 ASSERT_EQUAL_SVE(in_d, p3.VnD());
2465
2466 ASSERT_EQUAL_64(SVEFirstFlag, x0);
2467 ASSERT_EQUAL_64(SVEFirstFlag, x1);
2468 ASSERT_EQUAL_64(SVEFirstFlag, x2);
2469 ASSERT_EQUAL_64(SVEFirstFlag, x3);
2470 }
2471}
2472
Jacob Bramleye8289202019-07-31 11:25:23 +01002473TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002474 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2475 // (to check propagation if we have a large VL), but few enough to make the
2476 // test easy to read.
2477 // For now, we just use kPRegMinSize so that the test works anywhere.
2478 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2479 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2480 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2481 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
2482 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2483
2484 // Pnext activates the next element that is true in pg, after the last-active
2485 // element in pn. If all pn elements are false (as in in0), it starts looking
2486 // at element 0.
2487
2488 // There are no active lanes in in0, so the result is simply the first active
2489 // lane from pg.
2490 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2491 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2492 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2493 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2494 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2495
2496 // The last active lane in in1 is here. |
2497 // v
2498 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2499 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2500 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2501 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2502 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2503
2504 // | The last active lane in in2 is here.
2505 // v
2506 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2507 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2508 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2509 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2510 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2511
2512 // | The last active lane in in3 is here.
2513 // v
2514 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2515 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2516 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2517 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2518 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2519
2520 // | The last active lane in in4 is here.
2521 // v
2522 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2523 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2524 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2525 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2526 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2527
Jacob Bramleye8289202019-07-31 11:25:23 +01002528 PnextHelper(config, kBRegSize, in0, in0, exp00);
2529 PnextHelper(config, kBRegSize, in1, in0, exp10);
2530 PnextHelper(config, kBRegSize, in2, in0, exp20);
2531 PnextHelper(config, kBRegSize, in3, in0, exp30);
2532 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002533
Jacob Bramleye8289202019-07-31 11:25:23 +01002534 PnextHelper(config, kBRegSize, in0, in1, exp01);
2535 PnextHelper(config, kBRegSize, in1, in1, exp11);
2536 PnextHelper(config, kBRegSize, in2, in1, exp21);
2537 PnextHelper(config, kBRegSize, in3, in1, exp31);
2538 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002539
Jacob Bramleye8289202019-07-31 11:25:23 +01002540 PnextHelper(config, kBRegSize, in0, in2, exp02);
2541 PnextHelper(config, kBRegSize, in1, in2, exp12);
2542 PnextHelper(config, kBRegSize, in2, in2, exp22);
2543 PnextHelper(config, kBRegSize, in3, in2, exp32);
2544 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002545
Jacob Bramleye8289202019-07-31 11:25:23 +01002546 PnextHelper(config, kBRegSize, in0, in3, exp03);
2547 PnextHelper(config, kBRegSize, in1, in3, exp13);
2548 PnextHelper(config, kBRegSize, in2, in3, exp23);
2549 PnextHelper(config, kBRegSize, in3, in3, exp33);
2550 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002551
Jacob Bramleye8289202019-07-31 11:25:23 +01002552 PnextHelper(config, kBRegSize, in0, in4, exp04);
2553 PnextHelper(config, kBRegSize, in1, in4, exp14);
2554 PnextHelper(config, kBRegSize, in2, in4, exp24);
2555 PnextHelper(config, kBRegSize, in3, in4, exp34);
2556 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002557}
2558
Jacob Bramleye8289202019-07-31 11:25:23 +01002559TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002560 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2561 // (to check propagation if we have a large VL), but few enough to make the
2562 // test easy to read.
2563 // For now, we just use kPRegMinSize so that the test works anywhere.
2564 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
2565 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
2566 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
2567 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
2568 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
2569
2570 // Pnext activates the next element that is true in pg, after the last-active
2571 // element in pn. If all pn elements are false (as in in0), it starts looking
2572 // at element 0.
2573 //
2574 // As for other SVE instructions, elements are only considered to be active if
2575 // the _first_ bit in each field is one. Other bits are ignored.
2576
2577 // There are no active lanes in in0, so the result is simply the first active
2578 // lane from pg.
2579 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
2580 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
2581 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
2582 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
2583 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
2584
2585 // | The last active lane in in1 is here.
2586 // v
2587 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
2588 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
2589 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
2590 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
2591 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
2592
2593 // | The last active lane in in2 is here.
2594 // v
2595 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
2596 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
2597 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
2598 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
2599 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
2600
2601 // | The last active lane in in3 is here.
2602 // v
2603 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
2604 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
2605 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
2606 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
2607 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
2608
2609 // | The last active lane in in4 is here.
2610 // v
2611 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
2612 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
2613 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
2614 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
2615 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
2616
Jacob Bramleye8289202019-07-31 11:25:23 +01002617 PnextHelper(config, kHRegSize, in0, in0, exp00);
2618 PnextHelper(config, kHRegSize, in1, in0, exp10);
2619 PnextHelper(config, kHRegSize, in2, in0, exp20);
2620 PnextHelper(config, kHRegSize, in3, in0, exp30);
2621 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002622
Jacob Bramleye8289202019-07-31 11:25:23 +01002623 PnextHelper(config, kHRegSize, in0, in1, exp01);
2624 PnextHelper(config, kHRegSize, in1, in1, exp11);
2625 PnextHelper(config, kHRegSize, in2, in1, exp21);
2626 PnextHelper(config, kHRegSize, in3, in1, exp31);
2627 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002628
Jacob Bramleye8289202019-07-31 11:25:23 +01002629 PnextHelper(config, kHRegSize, in0, in2, exp02);
2630 PnextHelper(config, kHRegSize, in1, in2, exp12);
2631 PnextHelper(config, kHRegSize, in2, in2, exp22);
2632 PnextHelper(config, kHRegSize, in3, in2, exp32);
2633 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002634
Jacob Bramleye8289202019-07-31 11:25:23 +01002635 PnextHelper(config, kHRegSize, in0, in3, exp03);
2636 PnextHelper(config, kHRegSize, in1, in3, exp13);
2637 PnextHelper(config, kHRegSize, in2, in3, exp23);
2638 PnextHelper(config, kHRegSize, in3, in3, exp33);
2639 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002640
Jacob Bramleye8289202019-07-31 11:25:23 +01002641 PnextHelper(config, kHRegSize, in0, in4, exp04);
2642 PnextHelper(config, kHRegSize, in1, in4, exp14);
2643 PnextHelper(config, kHRegSize, in2, in4, exp24);
2644 PnextHelper(config, kHRegSize, in3, in4, exp34);
2645 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002646}
2647
Jacob Bramleye8289202019-07-31 11:25:23 +01002648TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002649 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2650 // (to check propagation if we have a large VL), but few enough to make the
2651 // test easy to read.
2652 // For now, we just use kPRegMinSize so that the test works anywhere.
2653 int in0[] = {0xe, 0xc, 0x8, 0x0};
2654 int in1[] = {0x0, 0x2, 0x0, 0x1};
2655 int in2[] = {0x0, 0x1, 0xf, 0x0};
2656 int in3[] = {0xf, 0x0, 0x0, 0x0};
2657
2658 // Pnext activates the next element that is true in pg, after the last-active
2659 // element in pn. If all pn elements are false (as in in0), it starts looking
2660 // at element 0.
2661 //
2662 // As for other SVE instructions, elements are only considered to be active if
2663 // the _first_ bit in each field is one. Other bits are ignored.
2664
2665 // There are no active lanes in in0, so the result is simply the first active
2666 // lane from pg.
2667 int exp00[] = {0, 0, 0, 0};
2668 int exp10[] = {0, 0, 0, 1};
2669 int exp20[] = {0, 0, 1, 0};
2670 int exp30[] = {1, 0, 0, 0};
2671
2672 // | The last active lane in in1 is here.
2673 // v
2674 int exp01[] = {0, 0, 0, 0};
2675 int exp11[] = {0, 0, 0, 0};
2676 int exp21[] = {0, 0, 1, 0};
2677 int exp31[] = {1, 0, 0, 0};
2678
2679 // | The last active lane in in2 is here.
2680 // v
2681 int exp02[] = {0, 0, 0, 0};
2682 int exp12[] = {0, 0, 0, 0};
2683 int exp22[] = {0, 0, 0, 0};
2684 int exp32[] = {1, 0, 0, 0};
2685
2686 // | The last active lane in in3 is here.
2687 // v
2688 int exp03[] = {0, 0, 0, 0};
2689 int exp13[] = {0, 0, 0, 0};
2690 int exp23[] = {0, 0, 0, 0};
2691 int exp33[] = {0, 0, 0, 0};
2692
Jacob Bramleye8289202019-07-31 11:25:23 +01002693 PnextHelper(config, kSRegSize, in0, in0, exp00);
2694 PnextHelper(config, kSRegSize, in1, in0, exp10);
2695 PnextHelper(config, kSRegSize, in2, in0, exp20);
2696 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002697
Jacob Bramleye8289202019-07-31 11:25:23 +01002698 PnextHelper(config, kSRegSize, in0, in1, exp01);
2699 PnextHelper(config, kSRegSize, in1, in1, exp11);
2700 PnextHelper(config, kSRegSize, in2, in1, exp21);
2701 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002702
Jacob Bramleye8289202019-07-31 11:25:23 +01002703 PnextHelper(config, kSRegSize, in0, in2, exp02);
2704 PnextHelper(config, kSRegSize, in1, in2, exp12);
2705 PnextHelper(config, kSRegSize, in2, in2, exp22);
2706 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002707
Jacob Bramleye8289202019-07-31 11:25:23 +01002708 PnextHelper(config, kSRegSize, in0, in3, exp03);
2709 PnextHelper(config, kSRegSize, in1, in3, exp13);
2710 PnextHelper(config, kSRegSize, in2, in3, exp23);
2711 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002712}
2713
Jacob Bramleye8289202019-07-31 11:25:23 +01002714TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002715 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2716 // (to check propagation if we have a large VL), but few enough to make the
2717 // test easy to read.
2718 // For now, we just use kPRegMinSize so that the test works anywhere.
2719 int in0[] = {0xfe, 0xf0};
2720 int in1[] = {0x00, 0x55};
2721 int in2[] = {0x33, 0xff};
2722
2723 // Pnext activates the next element that is true in pg, after the last-active
2724 // element in pn. If all pn elements are false (as in in0), it starts looking
2725 // at element 0.
2726 //
2727 // As for other SVE instructions, elements are only considered to be active if
2728 // the _first_ bit in each field is one. Other bits are ignored.
2729
2730 // There are no active lanes in in0, so the result is simply the first active
2731 // lane from pg.
2732 int exp00[] = {0, 0};
2733 int exp10[] = {0, 1};
2734 int exp20[] = {0, 1};
2735
2736 // | The last active lane in in1 is here.
2737 // v
2738 int exp01[] = {0, 0};
2739 int exp11[] = {0, 0};
2740 int exp21[] = {1, 0};
2741
2742 // | The last active lane in in2 is here.
2743 // v
2744 int exp02[] = {0, 0};
2745 int exp12[] = {0, 0};
2746 int exp22[] = {0, 0};
2747
Jacob Bramleye8289202019-07-31 11:25:23 +01002748 PnextHelper(config, kDRegSize, in0, in0, exp00);
2749 PnextHelper(config, kDRegSize, in1, in0, exp10);
2750 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002751
Jacob Bramleye8289202019-07-31 11:25:23 +01002752 PnextHelper(config, kDRegSize, in0, in1, exp01);
2753 PnextHelper(config, kDRegSize, in1, in1, exp11);
2754 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002755
Jacob Bramleye8289202019-07-31 11:25:23 +01002756 PnextHelper(config, kDRegSize, in0, in2, exp02);
2757 PnextHelper(config, kDRegSize, in1, in2, exp12);
2758 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002759}
2760
Jacob Bramleye8289202019-07-31 11:25:23 +01002761TEST_SVE(sve_pnext_alias) {
2762 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002763 START();
2764
2765 // Check that the Simulator behaves correctly when all arguments are aliased.
2766 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2767 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2768 int in_s[] = {0, 1, 1, 0};
2769 int in_d[] = {1, 1};
2770
2771 Initialise(&masm, p0.VnB(), in_b);
2772 Initialise(&masm, p1.VnH(), in_h);
2773 Initialise(&masm, p2.VnS(), in_s);
2774 Initialise(&masm, p3.VnD(), in_d);
2775
2776 // Initialise NZCV to an impossible value, to check that we actually write it.
2777 __ Mov(x10, NZCVFlag);
2778
2779 __ Msr(NZCV, x10);
2780 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
2781 __ Mrs(x0, NZCV);
2782
2783 __ Msr(NZCV, x10);
2784 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
2785 __ Mrs(x1, NZCV);
2786
2787 __ Msr(NZCV, x10);
2788 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
2789 __ Mrs(x2, NZCV);
2790
2791 __ Msr(NZCV, x10);
2792 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
2793 __ Mrs(x3, NZCV);
2794
2795 END();
2796
2797 if (CAN_RUN()) {
2798 RUN();
2799
2800 // Since pg.Is(pdn), there can be no active lanes in pg above the last
2801 // active lane in pdn, so the result should always be zero.
2802 ASSERT_EQUAL_SVE(0, p0.VnB());
2803 ASSERT_EQUAL_SVE(0, p1.VnH());
2804 ASSERT_EQUAL_SVE(0, p2.VnS());
2805 ASSERT_EQUAL_SVE(0, p3.VnD());
2806
2807 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
2808 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
2809 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
2810 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
2811 }
2812}
2813
Jacob Bramleye8289202019-07-31 11:25:23 +01002814static void PtrueHelper(Test* config,
2815 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002816 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002817 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002818 START();
2819
2820 PRegisterWithLaneSize p[kNumberOfPRegisters];
2821 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
2822 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
2823 }
2824
2825 // Initialise NZCV to an impossible value, to check that we actually write it.
2826 StatusFlags nzcv_unmodified = NZCVFlag;
2827 __ Mov(x20, nzcv_unmodified);
2828
2829 // We don't have enough registers to conveniently test every pattern, so take
2830 // samples from each group.
2831 __ Msr(NZCV, x20);
2832 __ Ptrue(p[0], SVE_POW2, s);
2833 __ Mrs(x0, NZCV);
2834
2835 __ Msr(NZCV, x20);
2836 __ Ptrue(p[1], SVE_VL1, s);
2837 __ Mrs(x1, NZCV);
2838
2839 __ Msr(NZCV, x20);
2840 __ Ptrue(p[2], SVE_VL2, s);
2841 __ Mrs(x2, NZCV);
2842
2843 __ Msr(NZCV, x20);
2844 __ Ptrue(p[3], SVE_VL5, s);
2845 __ Mrs(x3, NZCV);
2846
2847 __ Msr(NZCV, x20);
2848 __ Ptrue(p[4], SVE_VL6, s);
2849 __ Mrs(x4, NZCV);
2850
2851 __ Msr(NZCV, x20);
2852 __ Ptrue(p[5], SVE_VL8, s);
2853 __ Mrs(x5, NZCV);
2854
2855 __ Msr(NZCV, x20);
2856 __ Ptrue(p[6], SVE_VL16, s);
2857 __ Mrs(x6, NZCV);
2858
2859 __ Msr(NZCV, x20);
2860 __ Ptrue(p[7], SVE_VL64, s);
2861 __ Mrs(x7, NZCV);
2862
2863 __ Msr(NZCV, x20);
2864 __ Ptrue(p[8], SVE_VL256, s);
2865 __ Mrs(x8, NZCV);
2866
2867 {
2868 // We have to use the Assembler to use values not defined by
2869 // SVEPredicateConstraint, so call `ptrues` directly..
2870 typedef void (
2871 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
2872 int pattern);
2873 AssemblePtrueFn assemble =
2874 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
2875
2876 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
2877 __ msr(NZCV, x20);
2878 (masm.*assemble)(p[9], 0xe);
2879 __ mrs(x9, NZCV);
2880
2881 __ msr(NZCV, x20);
2882 (masm.*assemble)(p[10], 0x16);
2883 __ mrs(x10, NZCV);
2884
2885 __ msr(NZCV, x20);
2886 (masm.*assemble)(p[11], 0x1a);
2887 __ mrs(x11, NZCV);
2888
2889 __ msr(NZCV, x20);
2890 (masm.*assemble)(p[12], 0x1c);
2891 __ mrs(x12, NZCV);
2892 }
2893
2894 __ Msr(NZCV, x20);
2895 __ Ptrue(p[13], SVE_MUL4, s);
2896 __ Mrs(x13, NZCV);
2897
2898 __ Msr(NZCV, x20);
2899 __ Ptrue(p[14], SVE_MUL3, s);
2900 __ Mrs(x14, NZCV);
2901
2902 __ Msr(NZCV, x20);
2903 __ Ptrue(p[15], SVE_ALL, s);
2904 __ Mrs(x15, NZCV);
2905
2906 END();
2907
2908 if (CAN_RUN()) {
2909 RUN();
2910
2911 int all = core.GetSVELaneCount(lane_size_in_bits);
2912 int pow2 = 1 << HighestSetBitPosition(all);
2913 int mul4 = all - (all % 4);
2914 int mul3 = all - (all % 3);
2915
2916 // Check P register results.
2917 for (int i = 0; i < all; i++) {
2918 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
2919 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
2920 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
2921 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
2922 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
2923 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
2924 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
2925 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
2926 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
2927 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
2928 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
2929 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
2930 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
2931 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
2932 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
2933 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
2934 }
2935
2936 // Check NZCV results.
2937 if (s == LeaveFlags) {
2938 // No flags should have been updated.
2939 for (int i = 0; i <= 15; i++) {
2940 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
2941 }
2942 } else {
2943 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
2944 StatusFlags nonzero = SVEFirstFlag;
2945
2946 // POW2
2947 ASSERT_EQUAL_64(nonzero, x0);
2948 // VL*
2949 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
2950 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
2951 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
2952 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
2953 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
2954 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
2955 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
2956 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
2957 // #uimm5
2958 ASSERT_EQUAL_64(zero, x9);
2959 ASSERT_EQUAL_64(zero, x10);
2960 ASSERT_EQUAL_64(zero, x11);
2961 ASSERT_EQUAL_64(zero, x12);
2962 // MUL*
2963 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
2964 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
2965 // ALL
2966 ASSERT_EQUAL_64(nonzero, x15);
2967 }
2968 }
2969}
2970
Jacob Bramleye8289202019-07-31 11:25:23 +01002971TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
2972TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
2973TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
2974TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002975
Jacob Bramleye8289202019-07-31 11:25:23 +01002976TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
2977TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
2978TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
2979TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002980
Jacob Bramleye8289202019-07-31 11:25:23 +01002981TEST_SVE(sve_pfalse) {
2982 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002983 START();
2984
2985 // Initialise non-zero inputs.
2986 __ Ptrue(p0.VnB());
2987 __ Ptrue(p1.VnH());
2988 __ Ptrue(p2.VnS());
2989 __ Ptrue(p3.VnD());
2990
2991 // The instruction only supports B-sized lanes, but the lane size has no
2992 // logical effect, so the MacroAssembler accepts anything.
2993 __ Pfalse(p0.VnB());
2994 __ Pfalse(p1.VnH());
2995 __ Pfalse(p2.VnS());
2996 __ Pfalse(p3.VnD());
2997
2998 END();
2999
3000 if (CAN_RUN()) {
3001 RUN();
3002
3003 ASSERT_EQUAL_SVE(0, p0.VnB());
3004 ASSERT_EQUAL_SVE(0, p1.VnB());
3005 ASSERT_EQUAL_SVE(0, p2.VnB());
3006 ASSERT_EQUAL_SVE(0, p3.VnB());
3007 }
3008}
3009
Jacob Bramleye8289202019-07-31 11:25:23 +01003010TEST_SVE(sve_ptest) {
3011 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003012 START();
3013
3014 // Initialise NZCV to a known (impossible) value.
3015 StatusFlags nzcv_unmodified = NZCVFlag;
3016 __ Mov(x0, nzcv_unmodified);
3017 __ Msr(NZCV, x0);
3018
3019 // Construct some test inputs.
3020 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
3021 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
3022 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3023 __ Pfalse(p0.VnB());
3024 __ Ptrue(p1.VnB());
3025 Initialise(&masm, p2.VnB(), in2);
3026 Initialise(&masm, p3.VnB(), in3);
3027 Initialise(&masm, p4.VnB(), in4);
3028
3029 // All-inactive pg.
3030 __ Ptest(p0, p0.VnB());
3031 __ Mrs(x0, NZCV);
3032 __ Ptest(p0, p1.VnB());
3033 __ Mrs(x1, NZCV);
3034 __ Ptest(p0, p2.VnB());
3035 __ Mrs(x2, NZCV);
3036 __ Ptest(p0, p3.VnB());
3037 __ Mrs(x3, NZCV);
3038 __ Ptest(p0, p4.VnB());
3039 __ Mrs(x4, NZCV);
3040
3041 // All-active pg.
3042 __ Ptest(p1, p0.VnB());
3043 __ Mrs(x5, NZCV);
3044 __ Ptest(p1, p1.VnB());
3045 __ Mrs(x6, NZCV);
3046 __ Ptest(p1, p2.VnB());
3047 __ Mrs(x7, NZCV);
3048 __ Ptest(p1, p3.VnB());
3049 __ Mrs(x8, NZCV);
3050 __ Ptest(p1, p4.VnB());
3051 __ Mrs(x9, NZCV);
3052
3053 // Combinations of other inputs.
3054 __ Ptest(p2, p2.VnB());
3055 __ Mrs(x20, NZCV);
3056 __ Ptest(p2, p3.VnB());
3057 __ Mrs(x21, NZCV);
3058 __ Ptest(p2, p4.VnB());
3059 __ Mrs(x22, NZCV);
3060 __ Ptest(p3, p2.VnB());
3061 __ Mrs(x23, NZCV);
3062 __ Ptest(p3, p3.VnB());
3063 __ Mrs(x24, NZCV);
3064 __ Ptest(p3, p4.VnB());
3065 __ Mrs(x25, NZCV);
3066 __ Ptest(p4, p2.VnB());
3067 __ Mrs(x26, NZCV);
3068 __ Ptest(p4, p3.VnB());
3069 __ Mrs(x27, NZCV);
3070 __ Ptest(p4, p4.VnB());
3071 __ Mrs(x28, NZCV);
3072
3073 END();
3074
3075 if (CAN_RUN()) {
3076 RUN();
3077
3078 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3079
3080 // If pg is all inactive, the value of pn is irrelevant.
3081 ASSERT_EQUAL_64(zero, x0);
3082 ASSERT_EQUAL_64(zero, x1);
3083 ASSERT_EQUAL_64(zero, x2);
3084 ASSERT_EQUAL_64(zero, x3);
3085 ASSERT_EQUAL_64(zero, x4);
3086
3087 // All-active pg.
3088 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3089 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3090 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3091 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3092 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3093 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3094
3095 // Other inputs.
3096 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3097 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3098 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3099 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3100 x23); // pg: in3, pn: in2
3101 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3102 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3103 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3104 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3105 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3106 }
3107}
3108
Jacob Bramleye8289202019-07-31 11:25:23 +01003109TEST_SVE(sve_cntp) {
3110 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003111 START();
3112
3113 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3114 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3115 Initialise(&masm, p0.VnB(), p0_inputs);
3116
3117 // With an all-true predicate, these instructions measure the vector length.
3118 __ Ptrue(p10.VnB());
3119 __ Ptrue(p11.VnH());
3120 __ Ptrue(p12.VnS());
3121 __ Ptrue(p13.VnD());
3122
3123 // `ptrue p10.b` provides an all-active pg.
3124 __ Cntp(x10, p10, p10.VnB());
3125 __ Cntp(x11, p10, p11.VnH());
3126 __ Cntp(x12, p10, p12.VnS());
3127 __ Cntp(x13, p10, p13.VnD());
3128
3129 // Check that the predicate mask is applied properly.
3130 __ Cntp(x14, p10, p10.VnB());
3131 __ Cntp(x15, p11, p10.VnB());
3132 __ Cntp(x16, p12, p10.VnB());
3133 __ Cntp(x17, p13, p10.VnB());
3134
3135 // Check other patterns (including some ignored bits).
3136 __ Cntp(x0, p10, p0.VnB());
3137 __ Cntp(x1, p10, p0.VnH());
3138 __ Cntp(x2, p10, p0.VnS());
3139 __ Cntp(x3, p10, p0.VnD());
3140 __ Cntp(x4, p0, p10.VnB());
3141 __ Cntp(x5, p0, p10.VnH());
3142 __ Cntp(x6, p0, p10.VnS());
3143 __ Cntp(x7, p0, p10.VnD());
3144
3145 END();
3146
3147 if (CAN_RUN()) {
3148 RUN();
3149
3150 int vl_b = core.GetSVELaneCount(kBRegSize);
3151 int vl_h = core.GetSVELaneCount(kHRegSize);
3152 int vl_s = core.GetSVELaneCount(kSRegSize);
3153 int vl_d = core.GetSVELaneCount(kDRegSize);
3154
3155 // Check all-active predicates in various combinations.
3156 ASSERT_EQUAL_64(vl_b, x10);
3157 ASSERT_EQUAL_64(vl_h, x11);
3158 ASSERT_EQUAL_64(vl_s, x12);
3159 ASSERT_EQUAL_64(vl_d, x13);
3160
3161 ASSERT_EQUAL_64(vl_b, x14);
3162 ASSERT_EQUAL_64(vl_h, x15);
3163 ASSERT_EQUAL_64(vl_s, x16);
3164 ASSERT_EQUAL_64(vl_d, x17);
3165
3166 // Check that irrelevant bits are properly ignored.
3167 ASSERT_EQUAL_64(7, x0);
3168 ASSERT_EQUAL_64(5, x1);
3169 ASSERT_EQUAL_64(2, x2);
3170 ASSERT_EQUAL_64(1, x3);
3171
3172 ASSERT_EQUAL_64(7, x4);
3173 ASSERT_EQUAL_64(5, x5);
3174 ASSERT_EQUAL_64(2, x6);
3175 ASSERT_EQUAL_64(1, x7);
3176 }
3177}
3178
Martyn Capewell74f84f62019-10-30 15:30:44 +00003179typedef void (MacroAssembler::*CntFn)(const Register& dst,
3180 int pattern,
3181 int multiplier);
3182
3183static void CntHelper(Test* config,
3184 CntFn cnt,
3185 int multiplier,
3186 int lane_size_in_bits) {
3187 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3188 START();
3189
3190 (masm.*cnt)(w0, SVE_POW2, multiplier);
3191 (masm.*cnt)(x1, SVE_VL1, multiplier);
3192 (masm.*cnt)(x2, SVE_VL2, multiplier);
3193 (masm.*cnt)(x3, SVE_VL3, multiplier);
3194 (masm.*cnt)(x4, SVE_VL4, multiplier);
3195 (masm.*cnt)(x5, SVE_VL5, multiplier);
3196 (masm.*cnt)(x6, SVE_VL6, multiplier);
3197 (masm.*cnt)(x7, SVE_VL7, multiplier);
3198 (masm.*cnt)(x8, SVE_VL8, multiplier);
3199 (masm.*cnt)(x9, SVE_VL16, multiplier);
3200 (masm.*cnt)(x10, SVE_VL32, multiplier);
3201 (masm.*cnt)(x11, SVE_VL64, multiplier);
3202 (masm.*cnt)(x12, SVE_VL128, multiplier);
3203 (masm.*cnt)(x13, SVE_VL256, multiplier);
3204 (masm.*cnt)(x14, 16, multiplier);
3205 (masm.*cnt)(x15, 23, multiplier);
3206 (masm.*cnt)(x18, 28, multiplier);
3207 (masm.*cnt)(x19, SVE_MUL4, multiplier);
3208 (masm.*cnt)(x20, SVE_MUL3, multiplier);
3209 (masm.*cnt)(x21, SVE_ALL, multiplier);
3210
3211 END();
3212
3213 if (CAN_RUN()) {
3214 RUN();
3215
3216 int all = core.GetSVELaneCount(lane_size_in_bits);
3217 int pow2 = 1 << HighestSetBitPosition(all);
3218 int mul4 = all - (all % 4);
3219 int mul3 = all - (all % 3);
3220
3221 ASSERT_EQUAL_64(multiplier * pow2, x0);
3222 ASSERT_EQUAL_64(multiplier * (all >= 1 ? 1 : 0), x1);
3223 ASSERT_EQUAL_64(multiplier * (all >= 2 ? 2 : 0), x2);
3224 ASSERT_EQUAL_64(multiplier * (all >= 3 ? 3 : 0), x3);
3225 ASSERT_EQUAL_64(multiplier * (all >= 4 ? 4 : 0), x4);
3226 ASSERT_EQUAL_64(multiplier * (all >= 5 ? 5 : 0), x5);
3227 ASSERT_EQUAL_64(multiplier * (all >= 6 ? 6 : 0), x6);
3228 ASSERT_EQUAL_64(multiplier * (all >= 7 ? 7 : 0), x7);
3229 ASSERT_EQUAL_64(multiplier * (all >= 8 ? 8 : 0), x8);
3230 ASSERT_EQUAL_64(multiplier * (all >= 16 ? 16 : 0), x9);
3231 ASSERT_EQUAL_64(multiplier * (all >= 32 ? 32 : 0), x10);
3232 ASSERT_EQUAL_64(multiplier * (all >= 64 ? 64 : 0), x11);
3233 ASSERT_EQUAL_64(multiplier * (all >= 128 ? 128 : 0), x12);
3234 ASSERT_EQUAL_64(multiplier * (all >= 256 ? 256 : 0), x13);
3235 ASSERT_EQUAL_64(0, x14);
3236 ASSERT_EQUAL_64(0, x15);
3237 ASSERT_EQUAL_64(0, x18);
3238 ASSERT_EQUAL_64(multiplier * mul4, x19);
3239 ASSERT_EQUAL_64(multiplier * mul3, x20);
3240 ASSERT_EQUAL_64(multiplier * all, x21);
3241 }
3242}
3243
3244TEST_SVE(sve_cntb) {
3245 CntHelper(config, &MacroAssembler::Cntb, 1, kBRegSize);
3246 CntHelper(config, &MacroAssembler::Cntb, 2, kBRegSize);
3247 CntHelper(config, &MacroAssembler::Cntb, 15, kBRegSize);
3248 CntHelper(config, &MacroAssembler::Cntb, 16, kBRegSize);
3249}
3250
3251TEST_SVE(sve_cnth) {
3252 CntHelper(config, &MacroAssembler::Cnth, 1, kHRegSize);
3253 CntHelper(config, &MacroAssembler::Cnth, 2, kHRegSize);
3254 CntHelper(config, &MacroAssembler::Cnth, 15, kHRegSize);
3255 CntHelper(config, &MacroAssembler::Cnth, 16, kHRegSize);
3256}
3257
3258TEST_SVE(sve_cntw) {
3259 CntHelper(config, &MacroAssembler::Cntw, 1, kWRegSize);
3260 CntHelper(config, &MacroAssembler::Cntw, 2, kWRegSize);
3261 CntHelper(config, &MacroAssembler::Cntw, 15, kWRegSize);
3262 CntHelper(config, &MacroAssembler::Cntw, 16, kWRegSize);
3263}
3264
3265TEST_SVE(sve_cntd) {
3266 CntHelper(config, &MacroAssembler::Cntd, 1, kDRegSize);
3267 CntHelper(config, &MacroAssembler::Cntd, 2, kDRegSize);
3268 CntHelper(config, &MacroAssembler::Cntd, 15, kDRegSize);
3269 CntHelper(config, &MacroAssembler::Cntd, 16, kDRegSize);
3270}
3271
TatWai Chong13634762019-07-16 16:20:45 -07003272typedef void (MacroAssembler::*IntBinArithFn)(const ZRegister& zd,
3273 const PRegisterM& pg,
3274 const ZRegister& zn,
3275 const ZRegister& zm);
3276
3277template <typename Td, typename Tg, typename Tn>
3278static void IntBinArithHelper(Test* config,
3279 IntBinArithFn macro,
3280 unsigned lane_size_in_bits,
3281 const Tg& pg_inputs,
3282 const Tn& zn_inputs,
3283 const Tn& zm_inputs,
3284 const Td& zd_expected) {
3285 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3286 START();
3287
3288 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
3289 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
3290 InsrHelper(&masm, src_a, zn_inputs);
3291 InsrHelper(&masm, src_b, zm_inputs);
3292
3293 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
3294
3295 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
3296 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
3297 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
3298
3299 // `instr` zd(dst), zd(src_a), zn(src_b)
3300 __ Mov(zd_1, src_a);
3301 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
3302
3303 // `instr` zd(dst), zm(src_a), zd(src_b)
3304 // Based on whether zd and zm registers are aliased, the macro of instructions
3305 // (`Instr`) swaps the order of operands if it has the commutative property,
3306 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
3307 __ Mov(zd_2, src_b);
3308 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
3309
3310 // `instr` zd(dst), zm(src_a), zn(src_b)
3311 // The macro of instructions (`Instr`) automatically selects between `instr`
3312 // and movprfx + `instr` based on whether zd and zn registers are aliased.
TatWai Chongd316c5e2019-10-16 12:22:10 -07003313 // A generated movprfx instruction is predicated that using the same
TatWai Chong13634762019-07-16 16:20:45 -07003314 // governing predicate register. In order to keep the result constant,
3315 // initialize the destination register first.
3316 __ Mov(zd_3, src_a);
3317 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
3318
3319 END();
3320
3321 if (CAN_RUN()) {
3322 RUN();
3323 ASSERT_EQUAL_SVE(zd_expected, zd_1);
3324
3325 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
3326 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
3327 if (!core.HasSVELane(zd_1, lane)) break;
TatWai Chongd316c5e2019-10-16 12:22:10 -07003328 if ((pg_inputs[i] & 1) != 0) {
TatWai Chong13634762019-07-16 16:20:45 -07003329 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
3330 } else {
3331 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
3332 }
3333 }
3334
3335 ASSERT_EQUAL_SVE(zd_expected, zd_3);
3336 }
3337}
3338
3339TEST_SVE(sve_binary_arithmetic_predicated_add) {
3340 // clang-format off
3341 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
3342
3343 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
3344
3345 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
3346
3347 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
3348
3349 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
3350 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
3351
3352 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
3353 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
3354
3355 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
3356 0x1010101010101010, 0x8181818181818181,
3357 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
3358 0x0101010101010101, 0x7f7f7f7fffffffff};
3359
3360 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
3361 0x1010101010101010, 0x0000000000000000,
3362 0x8181818181818181, 0x8080808080808080,
3363 0xffffffffffffffff, 0xffffffffffffffff};
3364
3365 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3366 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3367 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3368 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3369
3370 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
3371
3372 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
3373 0x8180, 0x8f8f, 0x0101, 0x7f7e};
3374
3375 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
3376 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
3377
3378 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
3379 0x2020202020202020, 0x8181818181818181,
3380 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
3381 0x0101010101010100, 0x7f7f7f7ffffffffe};
3382
3383 IntBinArithFn fn = &MacroAssembler::Add;
3384 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
3385 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
3386 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
3387 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
3388
3389 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
3390
3391 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
3392 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
3393
3394 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
3395 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
3396
3397 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
3398 0x0000000000000000, 0x8181818181818181,
3399 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
3400 0x0101010101010102, 0x7f7f7f8000000000};
3401
3402 fn = &MacroAssembler::Sub;
3403 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
3404 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
3405 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
3406 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
3407 // clang-format on
3408}
3409
3410TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
3411 // clang-format off
3412 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
3413
3414 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
3415
3416 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
3417 0xff00, 0xba98, 0x5555, 0x4567};
3418
3419 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
3420 0xfe00, 0xabab, 0xcdcd, 0x5678};
3421
3422 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
3423 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
3424
3425 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
3426 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
3427
3428 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
3429 0x5555555555555555, 0x0000000001234567};
3430
3431 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
3432 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3433
3434 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3435 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3436 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3437 int pg_d[] = {1, 0, 1, 1};
3438
3439 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
3440
3441 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
3442 0xff00, 0xba98, 0x5555, 0x5678};
3443
3444 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
3445 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
3446
3447 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3448 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3449
3450 IntBinArithFn fn = &MacroAssembler::Umax;
3451 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
3452 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
3453 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
3454 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
3455
3456 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
3457
3458 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
3459 0xfe00, 0xabab, 0x5555, 0x4567};
3460
3461 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
3462 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
3463
3464 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
3465 0x5555555555555555, 0x0000000001234567};
3466 fn = &MacroAssembler::Umin;
3467 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
3468 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
3469 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
3470 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
3471
3472 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
3473
3474 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
3475 0x0100, 0x0eed, 0x5555, 0x1111};
3476
3477 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
3478 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
3479
3480 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3481 0x7878787878787878, 0x0000000011111111};
3482
3483 fn = &MacroAssembler::Uabd;
3484 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
3485 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
3486 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
3487 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
3488 // clang-format on
3489}
3490
3491TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
3492 // clang-format off
3493 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
3494
3495 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
3496
3497 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
3498 INT16_MIN, INT16_MAX, INT16_MAX, 1};
3499
3500 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
3501 INT16_MAX, INT16_MAX - 1, -1, 0};
3502
3503 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
3504 INT32_MIN, INT32_MAX, INT32_MAX, 1};
3505
3506 int zm_s[] = {-1, 0, -1, -INT32_MAX,
3507 INT32_MAX, INT32_MAX - 1, -1, 0};
3508
3509 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3510 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3511
3512 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
3513 INT64_MAX, INT64_MAX - 1, -1, 0};
3514
3515 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3516 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3517 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3518 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3519
3520 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
3521
3522 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
3523 INT16_MAX, INT16_MAX, INT16_MAX, 1};
3524
3525 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
3526 INT32_MAX, INT32_MAX, INT32_MAX, 1};
3527
3528 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
3529 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3530
3531 IntBinArithFn fn = &MacroAssembler::Smax;
3532 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
3533 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
3534 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
3535 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
3536
3537 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
3538
3539 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
3540 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
3541
3542 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
3543 INT32_MIN, INT32_MAX, -1, 0};
3544
3545 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3546 INT64_MIN, INT64_MAX - 1, -1, 0};
3547
3548 fn = &MacroAssembler::Smin;
3549 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
3550 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
3551 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
3552 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
3553
3554 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
3555
3556 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
3557
3558 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
3559 0xffffffff, 0x7fffffff, 0x80000000, 1};
3560
3561 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
3562 0x8000000000000000, 1, 0x8000000000000000, 1};
3563
3564 fn = &MacroAssembler::Sabd;
3565 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
3566 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
3567 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
3568 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
3569 // clang-format on
3570}
3571
3572TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
3573 // clang-format off
3574 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3575
3576 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3577
3578 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
3579 0x8000, 0xff00, 0x5555, 0xaaaa};
3580
3581 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
3582 0x5555, 0xaaaa, 0x0001, 0x1234};
3583
3584 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3585 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
3586
3587 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3588 0x12345678, 0x22223333, 0x55556666, 0x77778888};
3589
3590 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
3591 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
3592
3593 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
3594 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
3595
3596 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3597 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3598 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3599 int pg_d[] = {1, 1, 0, 1};
3600
3601 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
3602
3603 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
3604 0x8000, 0xff00, 0x5555, 0x9e88};
3605
3606 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
3607 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
3608
3609 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
3610 0xffffffffffffffff, 0x38e38e38e38e38e4};
3611
3612 IntBinArithFn fn = &MacroAssembler::Mul;
3613 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
3614 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
3615 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
3616 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
3617
3618 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
3619
3620 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
3621 0x2aaa, 0xff00, 0x0000, 0x0c22};
3622
3623 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
3624 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
3625
3626 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
3627 0xffffffffffffffff, 0x71c71c71c71c71c6};
3628
3629 fn = &MacroAssembler::Umulh;
3630 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
3631 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
3632 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
3633 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
3634 // clang-format on
3635}
3636
3637TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
3638 // clang-format off
3639 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
3640
3641 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
3642
3643 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
3644
3645 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
3646
3647 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
3648
3649 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
3650
3651 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
3652
3653 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
3654
3655 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3656 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3657 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3658 int pg_d[] = {1, 1, 0, 1};
3659
3660 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
3661
3662 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
3663
3664 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
3665
3666 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
3667
3668 IntBinArithFn fn = &MacroAssembler::Smulh;
3669 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
3670 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
3671 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3672 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3673 // clang-format on
3674}
3675
3676TEST_SVE(sve_binary_arithmetic_predicated_logical) {
3677 // clang-format off
3678 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3679 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3680
3681 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
3682 0x8000, 0xffff, 0x5555, 0xaaaa};
3683 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
3684 0x5555, 0xaaaa, 0x0000, 0x0800};
3685
3686 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
3687 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
3688
3689 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
3690 0x0001200880ff55aa, 0x0022446688aaccee};
3691 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
3692 0x7fcd80ff55aa0008, 0x1133557799bbddff};
3693
3694 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3695 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3696 int pg_s[] = {1, 1, 1, 0};
3697 int pg_d[] = {1, 1, 0, 1};
3698
3699 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
3700
3701 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
3702 0x0000, 0xffff, 0x0000, 0x0800};
3703
3704 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
3705
3706 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
3707 0x0001200880ff55aa, 0x0022446688aaccee};
3708
3709 IntBinArithFn fn = &MacroAssembler::And;
3710 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
3711 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
3712 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
3713 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
3714
3715 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
3716
3717 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
3718 0x8000, 0xffff, 0x5555, 0xa2aa};
3719
3720 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
3721
3722 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
3723 0x0001200880ff55aa, 0x0000000000000000};
3724
3725 fn = &MacroAssembler::Bic;
3726 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
3727 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
3728 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
3729 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
3730
3731 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
3732
3733 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
3734 0xd555, 0xffff, 0x5555, 0xa2aa};
3735
3736 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
3737
3738 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
3739 0x0001200880ff55aa, 0x1111111111111111};
3740
3741 fn = &MacroAssembler::Eor;
3742 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
3743 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
3744 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
3745 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
3746
3747 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
3748
3749 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
3750 0xd555, 0xffff, 0x5555, 0xaaaa};
3751
3752 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
3753
3754 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
3755 0x0001200880ff55aa, 0x1133557799bbddff};
3756
3757 fn = &MacroAssembler::Orr;
3758 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
3759 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
3760 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
3761 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
3762 // clang-format on
3763}
3764
3765TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
3766 // clang-format off
3767 int zn_s[] = {0, 1, -1, 2468,
3768 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
3769 -11111111, 87654321, 0, 0};
3770
3771 int zm_s[] = {1, -1, 1, 1234,
3772 -1, INT32_MIN, 1, -1,
3773 22222222, 80000000, -1, 0};
3774
3775 int64_t zn_d[] = {0, 1, -1, 2468,
3776 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
3777 -11111111, 87654321, 0, 0};
3778
3779 int64_t zm_d[] = {1, -1, 1, 1234,
3780 -1, INT64_MIN, 1, -1,
3781 22222222, 80000000, -1, 0};
3782
3783 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
3784 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
3785
3786 int exp_s[] = {0, 1, -1, 2,
3787 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
3788 0, 1, 0, 0};
3789
3790 int64_t exp_d[] = {0, -1, -1, 2,
3791 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
3792 0, 1, 0, 0};
3793
3794 IntBinArithFn fn = &MacroAssembler::Sdiv;
3795 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3796 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3797 // clang-format on
3798}
3799
3800TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
3801 // clang-format off
3802 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
3803 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
3804
3805 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
3806 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
3807
3808 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
3809 0xffffffffffffffff, 0x8000000000000000,
3810 0xffffffffffffffff, 0x8000000000000000,
3811 0xffffffffffffffff, 0xf0000000f0000000};
3812
3813 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
3814 0x8000000000000000, 0x0000000000000002,
3815 0x8888888888888888, 0x0000000000000001,
3816 0x0000000080000000, 0x00000000f0000000};
3817
3818 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
3819 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
3820
3821 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
3822 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
3823
3824 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
3825 0x0000000000000001, 0x4000000000000000,
3826 0x0000000000000001, 0x8000000000000000,
3827 0xffffffffffffffff, 0x0000000100000001};
3828
3829 IntBinArithFn fn = &MacroAssembler::Udiv;
3830 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3831 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3832 // clang-format on
3833}
3834
TatWai Chongfe536042019-10-23 16:34:11 -07003835typedef void (MacroAssembler::*ArithmeticFn)(const ZRegister& zd,
3836 const ZRegister& zn,
3837 const ZRegister& zm);
TatWai Chong845246b2019-08-08 00:01:58 -07003838
3839template <typename T>
3840static void IntArithHelper(Test* config,
TatWai Chongfe536042019-10-23 16:34:11 -07003841 ArithmeticFn macro,
TatWai Chong845246b2019-08-08 00:01:58 -07003842 unsigned lane_size_in_bits,
3843 const T& zn_inputs,
3844 const T& zm_inputs,
3845 const T& zd_expected) {
3846 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3847 START();
3848
3849 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
3850 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
3851 InsrHelper(&masm, zn, zn_inputs);
3852 InsrHelper(&masm, zm, zm_inputs);
3853
3854 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
3855 (masm.*macro)(zd, zn, zm);
3856
3857 END();
3858
3859 if (CAN_RUN()) {
3860 RUN();
3861 ASSERT_EQUAL_SVE(zd_expected, zd);
3862 }
3863}
3864
3865TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
3866 // clang-format off
TatWai Chong6995bfd2019-09-26 10:48:05 +01003867 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
3868 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
3869 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
3870 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong845246b2019-08-08 00:01:58 -07003871 0x1000000010001010, 0xf0000000f000f0f0};
3872
TatWai Chongfe536042019-10-23 16:34:11 -07003873 ArithmeticFn fn = &MacroAssembler::Add;
TatWai Chong845246b2019-08-08 00:01:58 -07003874
3875 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
3876 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
3877 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
3878 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
3879 0x2000000020002020, 0xe0000001e001e1e0};
3880
TatWai Chong6995bfd2019-09-26 10:48:05 +01003881 IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
3882 IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
3883 IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
3884 IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003885
3886 fn = &MacroAssembler::Sqadd;
3887
3888 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
3889 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
3890 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
3891 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3892 0x2000000020002020, 0xe0000001e001e1e0};
3893
TatWai Chong6995bfd2019-09-26 10:48:05 +01003894 IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
3895 IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
3896 IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
3897 IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003898
3899 fn = &MacroAssembler::Uqadd;
3900
3901 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
3902 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
3903 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
3904 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
3905 0x2000000020002020, 0xffffffffffffffff};
3906
TatWai Chong6995bfd2019-09-26 10:48:05 +01003907 IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
3908 IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
3909 IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
3910 IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003911 // clang-format on
3912}
3913
3914TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
3915 // clang-format off
3916
3917 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
3918 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
3919
3920 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
3921 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
3922
3923 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
3924 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
3925
3926 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
3927 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
3928 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
3929 0xf0000000f000f0f0, 0x5555555555555555};
3930
TatWai Chongfe536042019-10-23 16:34:11 -07003931 ArithmeticFn fn = &MacroAssembler::Sub;
TatWai Chong845246b2019-08-08 00:01:58 -07003932
3933 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
3934 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
3935 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
3936 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
3937 0x8eeeeeed8eed8d8e, 0x5555555555555555};
3938
3939 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
3940 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
3941 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
3942 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
3943
3944 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
3945 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
3946 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
3947 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
3948 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
3949
3950 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
3951 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
3952 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
3953 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
3954
3955 fn = &MacroAssembler::Sqsub;
3956
3957 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
3958 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
3959 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
3960 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3961 0x7fffffffffffffff, 0x8000000000000000};
3962
3963 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
3964 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
3965 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
3966 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
3967
3968 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
3969 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
3970 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
3971 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
3972 0x8000000000000000, 0x7fffffffffffffff};
3973
3974 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
3975 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
3976 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
3977 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
3978
3979 fn = &MacroAssembler::Uqsub;
3980
3981 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
3982 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
3983 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
3984 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
3985 0x0000000000000000, 0x5555555555555555};
3986
3987 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
3988 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
3989 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
3990 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
3991
3992 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
3993 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
3994 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
3995 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
3996 0x7111111271127272, 0x0000000000000000};
3997
3998 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
3999 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
4000 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
4001 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
4002 // clang-format on
4003}
4004
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004005TEST_SVE(sve_rdvl) {
4006 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4007 START();
4008
4009 // Encodable multipliers.
4010 __ Rdvl(x0, 0);
4011 __ Rdvl(x1, 1);
4012 __ Rdvl(x2, 2);
4013 __ Rdvl(x3, 31);
4014 __ Rdvl(x4, -1);
4015 __ Rdvl(x5, -2);
4016 __ Rdvl(x6, -32);
4017
4018 // For unencodable multipliers, the MacroAssembler uses a sequence of
4019 // instructions.
4020 __ Rdvl(x10, 32);
4021 __ Rdvl(x11, -33);
4022 __ Rdvl(x12, 42);
4023 __ Rdvl(x13, -42);
4024
4025 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
4026 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
4027 // occurs in the macro.
4028 __ Rdvl(x14, 0x007fffffffffffff);
4029 __ Rdvl(x15, -0x0080000000000000);
4030
4031 END();
4032
4033 if (CAN_RUN()) {
4034 RUN();
4035
4036 uint64_t vl = config->sve_vl_in_bytes();
4037
4038 ASSERT_EQUAL_64(vl * 0, x0);
4039 ASSERT_EQUAL_64(vl * 1, x1);
4040 ASSERT_EQUAL_64(vl * 2, x2);
4041 ASSERT_EQUAL_64(vl * 31, x3);
4042 ASSERT_EQUAL_64(vl * -1, x4);
4043 ASSERT_EQUAL_64(vl * -2, x5);
4044 ASSERT_EQUAL_64(vl * -32, x6);
4045
4046 ASSERT_EQUAL_64(vl * 32, x10);
4047 ASSERT_EQUAL_64(vl * -33, x11);
4048 ASSERT_EQUAL_64(vl * 42, x12);
4049 ASSERT_EQUAL_64(vl * -42, x13);
4050
4051 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
4052 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
4053 }
4054}
4055
4056TEST_SVE(sve_rdpl) {
4057 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4058 START();
4059
4060 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
4061 // Addpl(xd, xzr, ...).
4062
4063 // Encodable multipliers (as `addvl`).
4064 __ Rdpl(x0, 0);
4065 __ Rdpl(x1, 8);
4066 __ Rdpl(x2, 248);
4067 __ Rdpl(x3, -8);
4068 __ Rdpl(x4, -256);
4069
4070 // Encodable multipliers (as `movz` + `addpl`).
4071 __ Rdpl(x7, 31);
Jacob Bramley889984c2019-10-28 17:28:48 +00004072 __ Rdpl(x8, -31);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004073
4074 // For unencodable multipliers, the MacroAssembler uses a sequence of
4075 // instructions.
4076 __ Rdpl(x10, 42);
4077 __ Rdpl(x11, -42);
4078
4079 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
4080 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
4081 // occurs in the macro.
4082 __ Rdpl(x12, 0x007fffffffffffff);
4083 __ Rdpl(x13, -0x0080000000000000);
4084
4085 END();
4086
4087 if (CAN_RUN()) {
4088 RUN();
4089
4090 uint64_t vl = config->sve_vl_in_bytes();
4091 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4092 uint64_t pl = vl / kZRegBitsPerPRegBit;
4093
4094 ASSERT_EQUAL_64(pl * 0, x0);
4095 ASSERT_EQUAL_64(pl * 8, x1);
4096 ASSERT_EQUAL_64(pl * 248, x2);
4097 ASSERT_EQUAL_64(pl * -8, x3);
4098 ASSERT_EQUAL_64(pl * -256, x4);
4099
4100 ASSERT_EQUAL_64(pl * 31, x7);
Jacob Bramley889984c2019-10-28 17:28:48 +00004101 ASSERT_EQUAL_64(pl * -31, x8);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004102
4103 ASSERT_EQUAL_64(pl * 42, x10);
4104 ASSERT_EQUAL_64(pl * -42, x11);
4105
4106 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
4107 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
4108 }
4109}
4110
4111TEST_SVE(sve_addvl) {
4112 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4113 START();
4114
4115 uint64_t base = 0x1234567800000000;
4116 __ Mov(x30, base);
4117
4118 // Encodable multipliers.
4119 __ Addvl(x0, x30, 0);
4120 __ Addvl(x1, x30, 1);
4121 __ Addvl(x2, x30, 31);
4122 __ Addvl(x3, x30, -1);
4123 __ Addvl(x4, x30, -32);
4124
4125 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
4126 __ Addvl(x5, x30, 32);
4127 __ Addvl(x6, x30, -33);
4128
4129 // Test the limits of the multiplier supported by the `Rdvl` macro.
4130 __ Addvl(x7, x30, 0x007fffffffffffff);
4131 __ Addvl(x8, x30, -0x0080000000000000);
4132
4133 // Check that xzr behaves correctly.
4134 __ Addvl(x9, xzr, 8);
4135 __ Addvl(x10, xzr, 42);
4136
4137 // Check that sp behaves correctly with encodable and unencodable multipliers.
4138 __ Addvl(sp, sp, -5);
4139 __ Addvl(sp, sp, -37);
4140 __ Addvl(x11, sp, -2);
4141 __ Addvl(sp, x11, 2);
4142 __ Addvl(x12, sp, -42);
4143
4144 // Restore the value of sp.
4145 __ Addvl(sp, x11, 39);
4146 __ Addvl(sp, sp, 5);
4147
4148 // Adjust x11 and x12 to make the test sp-agnostic.
4149 __ Sub(x11, sp, x11);
4150 __ Sub(x12, sp, x12);
4151
4152 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4153 __ Mov(x20, x30);
4154 __ Mov(x21, x30);
4155 __ Mov(x22, x30);
4156 __ Addvl(x20, x20, 4);
4157 __ Addvl(x21, x21, 42);
4158 __ Addvl(x22, x22, -0x0080000000000000);
4159
4160 END();
4161
4162 if (CAN_RUN()) {
4163 RUN();
4164
4165 uint64_t vl = config->sve_vl_in_bytes();
4166
4167 ASSERT_EQUAL_64(base + (vl * 0), x0);
4168 ASSERT_EQUAL_64(base + (vl * 1), x1);
4169 ASSERT_EQUAL_64(base + (vl * 31), x2);
4170 ASSERT_EQUAL_64(base + (vl * -1), x3);
4171 ASSERT_EQUAL_64(base + (vl * -32), x4);
4172
4173 ASSERT_EQUAL_64(base + (vl * 32), x5);
4174 ASSERT_EQUAL_64(base + (vl * -33), x6);
4175
4176 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
4177 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
4178
4179 ASSERT_EQUAL_64(vl * 8, x9);
4180 ASSERT_EQUAL_64(vl * 42, x10);
4181
4182 ASSERT_EQUAL_64(vl * 44, x11);
4183 ASSERT_EQUAL_64(vl * 84, x12);
4184
4185 ASSERT_EQUAL_64(base + (vl * 4), x20);
4186 ASSERT_EQUAL_64(base + (vl * 42), x21);
4187 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
4188
4189 ASSERT_EQUAL_64(base, x30);
4190 }
4191}
4192
4193TEST_SVE(sve_addpl) {
4194 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4195 START();
4196
4197 uint64_t base = 0x1234567800000000;
4198 __ Mov(x30, base);
4199
4200 // Encodable multipliers.
4201 __ Addpl(x0, x30, 0);
4202 __ Addpl(x1, x30, 1);
4203 __ Addpl(x2, x30, 31);
4204 __ Addpl(x3, x30, -1);
4205 __ Addpl(x4, x30, -32);
4206
4207 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
4208 // it falls back to `Rdvl` and `Add`.
4209 __ Addpl(x5, x30, 32);
4210 __ Addpl(x6, x30, -33);
4211
4212 // Test the limits of the multiplier supported by the `Rdvl` macro.
4213 __ Addpl(x7, x30, 0x007fffffffffffff);
4214 __ Addpl(x8, x30, -0x0080000000000000);
4215
4216 // Check that xzr behaves correctly.
4217 __ Addpl(x9, xzr, 8);
4218 __ Addpl(x10, xzr, 42);
4219
4220 // Check that sp behaves correctly with encodable and unencodable multipliers.
4221 __ Addpl(sp, sp, -5);
4222 __ Addpl(sp, sp, -37);
4223 __ Addpl(x11, sp, -2);
4224 __ Addpl(sp, x11, 2);
4225 __ Addpl(x12, sp, -42);
4226
4227 // Restore the value of sp.
4228 __ Addpl(sp, x11, 39);
4229 __ Addpl(sp, sp, 5);
4230
4231 // Adjust x11 and x12 to make the test sp-agnostic.
4232 __ Sub(x11, sp, x11);
4233 __ Sub(x12, sp, x12);
4234
4235 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4236 __ Mov(x20, x30);
4237 __ Mov(x21, x30);
4238 __ Mov(x22, x30);
4239 __ Addpl(x20, x20, 4);
4240 __ Addpl(x21, x21, 42);
4241 __ Addpl(x22, x22, -0x0080000000000000);
4242
4243 END();
4244
4245 if (CAN_RUN()) {
4246 RUN();
4247
4248 uint64_t vl = config->sve_vl_in_bytes();
4249 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4250 uint64_t pl = vl / kZRegBitsPerPRegBit;
4251
4252 ASSERT_EQUAL_64(base + (pl * 0), x0);
4253 ASSERT_EQUAL_64(base + (pl * 1), x1);
4254 ASSERT_EQUAL_64(base + (pl * 31), x2);
4255 ASSERT_EQUAL_64(base + (pl * -1), x3);
4256 ASSERT_EQUAL_64(base + (pl * -32), x4);
4257
4258 ASSERT_EQUAL_64(base + (pl * 32), x5);
4259 ASSERT_EQUAL_64(base + (pl * -33), x6);
4260
4261 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
4262 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
4263
4264 ASSERT_EQUAL_64(pl * 8, x9);
4265 ASSERT_EQUAL_64(pl * 42, x10);
4266
4267 ASSERT_EQUAL_64(pl * 44, x11);
4268 ASSERT_EQUAL_64(pl * 84, x12);
4269
4270 ASSERT_EQUAL_64(base + (pl * 4), x20);
4271 ASSERT_EQUAL_64(base + (pl * 42), x21);
4272 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
4273
4274 ASSERT_EQUAL_64(base, x30);
4275 }
4276}
4277
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004278TEST_SVE(sve_calculate_sve_address) {
4279 // Shadow the `MacroAssembler` type so that the test macros work without
4280 // modification.
4281 typedef CalculateSVEAddressMacroAssembler MacroAssembler;
4282
Jacob Bramley1314c462019-08-08 10:54:16 +01004283 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004284 START(); // NOLINT(clang-diagnostic-local-type-template-args)
Jacob Bramley1314c462019-08-08 10:54:16 +01004285
4286 uint64_t base = 0x1234567800000000;
4287 __ Mov(x28, base);
4288 __ Mov(x29, 48);
4289 __ Mov(x30, -48);
4290
4291 // Simple scalar (or equivalent) cases.
4292
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004293 __ CalculateSVEAddress(x0, SVEMemOperand(x28));
4294 __ CalculateSVEAddress(x1, SVEMemOperand(x28, 0));
4295 __ CalculateSVEAddress(x2, SVEMemOperand(x28, 0, SVE_MUL_VL));
4296 __ CalculateSVEAddress(x3, SVEMemOperand(x28, 0, SVE_MUL_VL), 3);
4297 __ CalculateSVEAddress(x4, SVEMemOperand(x28, xzr));
4298 __ CalculateSVEAddress(x5, SVEMemOperand(x28, xzr, LSL, 42));
Jacob Bramley1314c462019-08-08 10:54:16 +01004299
4300 // scalar-plus-immediate
4301
4302 // Unscaled immediates, handled with `Add`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004303 __ CalculateSVEAddress(x6, SVEMemOperand(x28, 42));
4304 __ CalculateSVEAddress(x7, SVEMemOperand(x28, -42));
Jacob Bramley1314c462019-08-08 10:54:16 +01004305 // Scaled immediates, handled with `Addvl` or `Addpl`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004306 __ CalculateSVEAddress(x8, SVEMemOperand(x28, 31, SVE_MUL_VL), 0);
4307 __ CalculateSVEAddress(x9, SVEMemOperand(x28, -32, SVE_MUL_VL), 0);
Jacob Bramley1314c462019-08-08 10:54:16 +01004308 // Out of `addvl` or `addpl` range.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004309 __ CalculateSVEAddress(x10, SVEMemOperand(x28, 42, SVE_MUL_VL), 0);
4310 __ CalculateSVEAddress(x11, SVEMemOperand(x28, -42, SVE_MUL_VL), 0);
4311 // As above, for VL-based accesses smaller than a Z register.
4312 VIXL_STATIC_ASSERT(kZRegBitsPerPRegBitLog2 == 3);
4313 __ CalculateSVEAddress(x12, SVEMemOperand(x28, -32 * 8, SVE_MUL_VL), 3);
4314 __ CalculateSVEAddress(x13, SVEMemOperand(x28, -42 * 8, SVE_MUL_VL), 3);
4315 __ CalculateSVEAddress(x14, SVEMemOperand(x28, -32 * 4, SVE_MUL_VL), 2);
4316 __ CalculateSVEAddress(x15, SVEMemOperand(x28, -42 * 4, SVE_MUL_VL), 2);
4317 __ CalculateSVEAddress(x18, SVEMemOperand(x28, -32 * 2, SVE_MUL_VL), 1);
4318 __ CalculateSVEAddress(x19, SVEMemOperand(x28, -42 * 2, SVE_MUL_VL), 1);
Jacob Bramley1314c462019-08-08 10:54:16 +01004319
4320 // scalar-plus-scalar
4321
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004322 __ CalculateSVEAddress(x20, SVEMemOperand(x28, x29));
4323 __ CalculateSVEAddress(x21, SVEMemOperand(x28, x30));
4324 __ CalculateSVEAddress(x22, SVEMemOperand(x28, x29, LSL, 8));
4325 __ CalculateSVEAddress(x23, SVEMemOperand(x28, x30, LSL, 8));
Jacob Bramley1314c462019-08-08 10:54:16 +01004326
4327 // In-place updates, to stress scratch register allocation.
4328
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004329 __ Mov(x24, 0xabcd000000000000);
4330 __ Mov(x25, 0xabcd101100000000);
4331 __ Mov(x26, 0xabcd202200000000);
4332 __ Mov(x27, 0xabcd303300000000);
4333 __ Mov(x28, 0xabcd404400000000);
4334 __ Mov(x29, 0xabcd505500000000);
Jacob Bramley1314c462019-08-08 10:54:16 +01004335
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004336 __ CalculateSVEAddress(x24, SVEMemOperand(x24));
4337 __ CalculateSVEAddress(x25, SVEMemOperand(x25, 0x42));
4338 __ CalculateSVEAddress(x26, SVEMemOperand(x26, 3, SVE_MUL_VL), 0);
4339 __ CalculateSVEAddress(x27, SVEMemOperand(x27, 0x42, SVE_MUL_VL), 3);
4340 __ CalculateSVEAddress(x28, SVEMemOperand(x28, x30));
4341 __ CalculateSVEAddress(x29, SVEMemOperand(x29, x30, LSL, 4));
Jacob Bramley1314c462019-08-08 10:54:16 +01004342
4343 END();
4344
4345 if (CAN_RUN()) {
4346 RUN();
4347
4348 uint64_t vl = config->sve_vl_in_bytes();
4349 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4350 uint64_t pl = vl / kZRegBitsPerPRegBit;
4351
4352 // Simple scalar (or equivalent) cases.
4353 ASSERT_EQUAL_64(base, x0);
4354 ASSERT_EQUAL_64(base, x1);
4355 ASSERT_EQUAL_64(base, x2);
4356 ASSERT_EQUAL_64(base, x3);
4357 ASSERT_EQUAL_64(base, x4);
4358 ASSERT_EQUAL_64(base, x5);
4359
4360 // scalar-plus-immediate
4361 ASSERT_EQUAL_64(base + 42, x6);
4362 ASSERT_EQUAL_64(base - 42, x7);
4363 ASSERT_EQUAL_64(base + (31 * vl), x8);
4364 ASSERT_EQUAL_64(base - (32 * vl), x9);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004365 ASSERT_EQUAL_64(base + (42 * vl), x10);
4366 ASSERT_EQUAL_64(base - (42 * vl), x11);
4367 ASSERT_EQUAL_64(base - (32 * vl), x12);
Jacob Bramley1314c462019-08-08 10:54:16 +01004368 ASSERT_EQUAL_64(base - (42 * vl), x13);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004369 ASSERT_EQUAL_64(base - (32 * vl), x14);
4370 ASSERT_EQUAL_64(base - (42 * vl), x15);
4371 ASSERT_EQUAL_64(base - (32 * vl), x18);
4372 ASSERT_EQUAL_64(base - (42 * vl), x19);
Jacob Bramley1314c462019-08-08 10:54:16 +01004373
4374 // scalar-plus-scalar
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004375 ASSERT_EQUAL_64(base + 48, x20);
4376 ASSERT_EQUAL_64(base - 48, x21);
4377 ASSERT_EQUAL_64(base + (48 << 8), x22);
4378 ASSERT_EQUAL_64(base - (48 << 8), x23);
Jacob Bramley1314c462019-08-08 10:54:16 +01004379
4380 // In-place updates.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004381 ASSERT_EQUAL_64(0xabcd000000000000, x24);
4382 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x25);
4383 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x26);
4384 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x27);
4385 ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28);
4386 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29);
Jacob Bramley1314c462019-08-08 10:54:16 +01004387 }
4388}
4389
TatWai Chong4f28df72019-08-14 17:50:30 -07004390TEST_SVE(sve_permute_vector_unpredicated) {
4391 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
4392 START();
4393
Jacob Bramleye4983d42019-10-08 10:56:15 +01004394 // Initialise registers with known values first.
4395 __ Dup(z1.VnB(), 0x11);
4396 __ Dup(z2.VnB(), 0x22);
4397 __ Dup(z3.VnB(), 0x33);
4398 __ Dup(z4.VnB(), 0x44);
4399
TatWai Chong4f28df72019-08-14 17:50:30 -07004400 __ Mov(x0, 0x0123456789abcdef);
4401 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
4402 __ Insr(z1.VnS(), w0);
4403 __ Insr(z2.VnD(), x0);
4404 __ Insr(z3.VnH(), h0);
4405 __ Insr(z4.VnD(), d0);
4406
4407 uint64_t inputs[] = {0xfedcba9876543210,
4408 0x0123456789abcdef,
4409 0x8f8e8d8c8b8a8988,
4410 0x8786858483828180};
4411
4412 // Initialize a distinguishable value throughout the register first.
4413 __ Dup(z9.VnB(), 0xff);
4414 InsrHelper(&masm, z9.VnD(), inputs);
4415
4416 __ Rev(z5.VnB(), z9.VnB());
4417 __ Rev(z6.VnH(), z9.VnH());
4418 __ Rev(z7.VnS(), z9.VnS());
4419 __ Rev(z8.VnD(), z9.VnD());
4420
4421 int index[7] = {22, 7, 7, 3, 1, 1, 63};
4422 // Broadcasting an data within the input array.
4423 __ Dup(z10.VnB(), z9.VnB(), index[0]);
4424 __ Dup(z11.VnH(), z9.VnH(), index[1]);
4425 __ Dup(z12.VnS(), z9.VnS(), index[2]);
4426 __ Dup(z13.VnD(), z9.VnD(), index[3]);
4427 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
4428 // Test dst == src
4429 __ Mov(z15, z9);
4430 __ Dup(z15.VnS(), z15.VnS(), index[5]);
4431 // Selecting an data beyond the input array.
4432 __ Dup(z16.VnB(), z9.VnB(), index[6]);
4433
4434 END();
4435
4436 if (CAN_RUN()) {
4437 RUN();
4438
4439 // Insr
Jacob Bramleye4983d42019-10-08 10:56:15 +01004440 uint64_t z1_expected[] = {0x1111111111111111, 0x1111111189abcdef};
4441 uint64_t z2_expected[] = {0x2222222222222222, 0x0123456789abcdef};
4442 uint64_t z3_expected[] = {0x3333333333333333, 0x3333333333333456};
4443 uint64_t z4_expected[] = {0x4444444444444444, 0x7ffaaaaa22223456};
TatWai Chong4f28df72019-08-14 17:50:30 -07004444 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
4445 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
4446 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
4447 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
4448
4449 // Rev
4450 int lane_count = core.GetSVELaneCount(kBRegSize);
4451 for (int i = 0; i < lane_count; i++) {
4452 uint64_t expected =
4453 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
4454 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
4455 ASSERT_EQUAL_64(expected, input);
4456 }
4457
4458 lane_count = core.GetSVELaneCount(kHRegSize);
4459 for (int i = 0; i < lane_count; i++) {
4460 uint64_t expected =
4461 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
4462 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
4463 ASSERT_EQUAL_64(expected, input);
4464 }
4465
4466 lane_count = core.GetSVELaneCount(kSRegSize);
4467 for (int i = 0; i < lane_count; i++) {
4468 uint64_t expected =
4469 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
4470 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
4471 ASSERT_EQUAL_64(expected, input);
4472 }
4473
4474 lane_count = core.GetSVELaneCount(kDRegSize);
4475 for (int i = 0; i < lane_count; i++) {
4476 uint64_t expected =
4477 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
4478 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
4479 ASSERT_EQUAL_64(expected, input);
4480 }
4481
4482 // Dup
4483 unsigned vl = config->sve_vl_in_bits();
4484 lane_count = core.GetSVELaneCount(kBRegSize);
4485 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
4486 for (int i = 0; i < lane_count; i++) {
4487 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
4488 }
4489
4490 lane_count = core.GetSVELaneCount(kHRegSize);
4491 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
4492 for (int i = 0; i < lane_count; i++) {
4493 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
4494 }
4495
4496 lane_count = core.GetSVELaneCount(kSRegSize);
4497 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
4498 for (int i = 0; i < lane_count; i++) {
4499 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
4500 }
4501
4502 lane_count = core.GetSVELaneCount(kDRegSize);
4503 uint64_t expected_z13 =
4504 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
4505 for (int i = 0; i < lane_count; i++) {
4506 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
4507 }
4508
4509 lane_count = core.GetSVELaneCount(kDRegSize);
4510 uint64_t expected_z14_lo = 0;
4511 uint64_t expected_z14_hi = 0;
4512 if (vl > (index[4] * kQRegSize)) {
4513 expected_z14_lo = 0x0123456789abcdef;
4514 expected_z14_hi = 0xfedcba9876543210;
4515 }
4516 for (int i = 0; i < lane_count; i += 2) {
4517 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
4518 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
4519 }
4520
4521 lane_count = core.GetSVELaneCount(kSRegSize);
4522 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
4523 for (int i = 0; i < lane_count; i++) {
4524 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
4525 }
4526
4527 lane_count = core.GetSVELaneCount(kBRegSize);
4528 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
4529 for (int i = 0; i < lane_count; i++) {
4530 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
4531 }
4532 }
4533}
4534
4535TEST_SVE(sve_permute_vector_unpredicated_uppack_vector_elements) {
4536 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4537 START();
4538
4539 uint64_t z9_inputs[] = {0xfedcba9876543210,
4540 0x0123456789abcdef,
4541 0x8f8e8d8c8b8a8988,
4542 0x8786858483828180};
4543 InsrHelper(&masm, z9.VnD(), z9_inputs);
4544
4545 __ Sunpkhi(z10.VnH(), z9.VnB());
4546 __ Sunpkhi(z11.VnS(), z9.VnH());
4547 __ Sunpkhi(z12.VnD(), z9.VnS());
4548
4549 __ Sunpklo(z13.VnH(), z9.VnB());
4550 __ Sunpklo(z14.VnS(), z9.VnH());
4551 __ Sunpklo(z15.VnD(), z9.VnS());
4552
4553 __ Uunpkhi(z16.VnH(), z9.VnB());
4554 __ Uunpkhi(z17.VnS(), z9.VnH());
4555 __ Uunpkhi(z18.VnD(), z9.VnS());
4556
4557 __ Uunpklo(z19.VnH(), z9.VnB());
4558 __ Uunpklo(z20.VnS(), z9.VnH());
4559 __ Uunpklo(z21.VnD(), z9.VnS());
4560
4561 END();
4562
4563 if (CAN_RUN()) {
4564 RUN();
4565
4566 // Suunpkhi
4567 int lane_count = core.GetSVELaneCount(kHRegSize);
4568 for (int i = lane_count - 1; i >= 0; i--) {
4569 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
4570 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4571 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4572 ASSERT_EQUAL_64(expected, input);
4573 }
4574
4575 lane_count = core.GetSVELaneCount(kSRegSize);
4576 for (int i = lane_count - 1; i >= 0; i--) {
4577 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
4578 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4579 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4580 ASSERT_EQUAL_64(expected, input);
4581 }
4582
4583 lane_count = core.GetSVELaneCount(kDRegSize);
4584 for (int i = lane_count - 1; i >= 0; i--) {
4585 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
4586 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4587 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4588 ASSERT_EQUAL_64(expected, input);
4589 }
4590
4591 // Suunpklo
4592 lane_count = core.GetSVELaneCount(kHRegSize);
4593 for (int i = lane_count - 1; i >= 0; i--) {
4594 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
4595 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4596 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4597 ASSERT_EQUAL_64(expected, input);
4598 }
4599
4600 lane_count = core.GetSVELaneCount(kSRegSize);
4601 for (int i = lane_count - 1; i >= 0; i--) {
4602 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
4603 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4604 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4605 ASSERT_EQUAL_64(expected, input);
4606 }
4607
4608 lane_count = core.GetSVELaneCount(kDRegSize);
4609 for (int i = lane_count - 1; i >= 0; i--) {
4610 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
4611 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4612 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4613 ASSERT_EQUAL_64(expected, input);
4614 }
4615
4616 // Uuunpkhi
4617 lane_count = core.GetSVELaneCount(kHRegSize);
4618 for (int i = lane_count - 1; i >= 0; i--) {
4619 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
4620 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4621 ASSERT_EQUAL_64(expected, input);
4622 }
4623
4624 lane_count = core.GetSVELaneCount(kSRegSize);
4625 for (int i = lane_count - 1; i >= 0; i--) {
4626 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
4627 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4628 ASSERT_EQUAL_64(expected, input);
4629 }
4630
4631 lane_count = core.GetSVELaneCount(kDRegSize);
4632 for (int i = lane_count - 1; i >= 0; i--) {
4633 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
4634 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4635 ASSERT_EQUAL_64(expected, input);
4636 }
4637
4638 // Uuunpklo
4639 lane_count = core.GetSVELaneCount(kHRegSize);
4640 for (int i = lane_count - 1; i >= 0; i--) {
4641 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
4642 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4643 ASSERT_EQUAL_64(expected, input);
4644 }
4645
4646 lane_count = core.GetSVELaneCount(kSRegSize);
4647 for (int i = lane_count - 1; i >= 0; i--) {
4648 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
4649 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4650 ASSERT_EQUAL_64(expected, input);
4651 }
4652
4653 lane_count = core.GetSVELaneCount(kDRegSize);
4654 for (int i = lane_count - 1; i >= 0; i--) {
4655 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
4656 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4657 ASSERT_EQUAL_64(expected, input);
4658 }
4659 }
4660}
4661
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004662TEST_SVE(sve_cnot_not) {
4663 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4664 START();
4665
4666 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
4667
4668 // For simplicity, we re-use the same pg for various lane sizes.
4669 // For D lanes: 1, 1, 0
4670 // For S lanes: 1, 1, 1, 0, 0
4671 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4672 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4673 Initialise(&masm, p0.VnB(), pg_in);
4674 PRegisterM pg = p0.Merging();
4675
4676 // These are merging operations, so we have to initialise the result register.
4677 // We use a mixture of constructive and destructive operations.
4678
4679 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004680 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004681 __ Mov(z30, z31);
4682
4683 // For constructive operations, use a different initial result value.
4684 __ Index(z29.VnB(), 0, -1);
4685
4686 __ Mov(z0, z31);
4687 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
4688 __ Mov(z1, z29);
4689 __ Cnot(z1.VnH(), pg, z31.VnH());
4690 __ Mov(z2, z31);
4691 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
4692 __ Mov(z3, z29);
4693 __ Cnot(z3.VnD(), pg, z31.VnD());
4694
4695 __ Mov(z4, z29);
4696 __ Not(z4.VnB(), pg, z31.VnB());
4697 __ Mov(z5, z31);
4698 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
4699 __ Mov(z6, z29);
4700 __ Not(z6.VnS(), pg, z31.VnS());
4701 __ Mov(z7, z31);
4702 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
4703
4704 END();
4705
4706 if (CAN_RUN()) {
4707 RUN();
4708
4709 // Check that constructive operations preserve their inputs.
4710 ASSERT_EQUAL_SVE(z30, z31);
4711
4712 // clang-format off
4713
4714 // Cnot (B) destructive
4715 uint64_t expected_z0[] =
4716 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4717 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
4718 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4719
4720 // Cnot (H)
4721 uint64_t expected_z1[] =
4722 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4723 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
4724 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4725
4726 // Cnot (S) destructive
4727 uint64_t expected_z2[] =
4728 // pg: 0 1 1 1 0 0
4729 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
4730 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4731
4732 // Cnot (D)
4733 uint64_t expected_z3[] =
4734 // pg: 1 1 0
4735 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
4736 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4737
4738 // Not (B)
4739 uint64_t expected_z4[] =
4740 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4741 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
4742 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4743
4744 // Not (H) destructive
4745 uint64_t expected_z5[] =
4746 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4747 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
4748 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4749
4750 // Not (S)
4751 uint64_t expected_z6[] =
4752 // pg: 0 1 1 1 0 0
4753 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
4754 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
4755
4756 // Not (D) destructive
4757 uint64_t expected_z7[] =
4758 // pg: 1 1 0
4759 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
4760 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
4761
4762 // clang-format on
4763 }
4764}
4765
4766TEST_SVE(sve_fabs_fneg) {
4767 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4768 START();
4769
4770 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
4771 // NaNs, but fabs and fneg do not.
4772 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
4773 0xfff00000ff80fc01, // Signalling NaNs.
4774 0x123456789abcdef0};
4775
4776 // For simplicity, we re-use the same pg for various lane sizes.
4777 // For D lanes: 1, 1, 0
4778 // For S lanes: 1, 1, 1, 0, 0
4779 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4780 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4781 Initialise(&masm, p0.VnB(), pg_in);
4782 PRegisterM pg = p0.Merging();
4783
4784 // These are merging operations, so we have to initialise the result register.
4785 // We use a mixture of constructive and destructive operations.
4786
4787 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004788 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004789 __ Mov(z30, z31);
4790
4791 // For constructive operations, use a different initial result value.
4792 __ Index(z29.VnB(), 0, -1);
4793
4794 __ Mov(z0, z29);
4795 __ Fabs(z0.VnH(), pg, z31.VnH());
4796 __ Mov(z1, z31);
4797 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
4798 __ Mov(z2, z29);
4799 __ Fabs(z2.VnD(), pg, z31.VnD());
4800
4801 __ Mov(z3, z31);
4802 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
4803 __ Mov(z4, z29);
4804 __ Fneg(z4.VnS(), pg, z31.VnS());
4805 __ Mov(z5, z31);
4806 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
4807
4808 END();
4809
4810 if (CAN_RUN()) {
4811 RUN();
4812
4813 // Check that constructive operations preserve their inputs.
4814 ASSERT_EQUAL_SVE(z30, z31);
4815
4816 // clang-format off
4817
4818 // Fabs (H)
4819 uint64_t expected_z0[] =
4820 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4821 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
4822 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4823
4824 // Fabs (S) destructive
4825 uint64_t expected_z1[] =
4826 // pg: 0 1 1 1 0 0
4827 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
4828 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4829
4830 // Fabs (D)
4831 uint64_t expected_z2[] =
4832 // pg: 1 1 0
4833 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
4834 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4835
4836 // Fneg (H) destructive
4837 uint64_t expected_z3[] =
4838 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4839 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
4840 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4841
4842 // Fneg (S)
4843 uint64_t expected_z4[] =
4844 // pg: 0 1 1 1 0 0
4845 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
4846 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4847
4848 // Fneg (D) destructive
4849 uint64_t expected_z5[] =
4850 // pg: 1 1 0
4851 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
4852 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4853
4854 // clang-format on
4855 }
4856}
4857
4858TEST_SVE(sve_cls_clz_cnt) {
4859 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4860 START();
4861
4862 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4863
4864 // For simplicity, we re-use the same pg for various lane sizes.
4865 // For D lanes: 1, 1, 0
4866 // For S lanes: 1, 1, 1, 0, 0
4867 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4868 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4869 Initialise(&masm, p0.VnB(), pg_in);
4870 PRegisterM pg = p0.Merging();
4871
4872 // These are merging operations, so we have to initialise the result register.
4873 // We use a mixture of constructive and destructive operations.
4874
4875 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004876 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004877 __ Mov(z30, z31);
4878
4879 // For constructive operations, use a different initial result value.
4880 __ Index(z29.VnB(), 0, -1);
4881
4882 __ Mov(z0, z29);
4883 __ Cls(z0.VnB(), pg, z31.VnB());
4884 __ Mov(z1, z31);
4885 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
4886 __ Mov(z2, z29);
4887 __ Cnt(z2.VnS(), pg, z31.VnS());
4888 __ Mov(z3, z31);
4889 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
4890
4891 END();
4892
4893 if (CAN_RUN()) {
4894 RUN();
4895 // Check that non-destructive operations preserve their inputs.
4896 ASSERT_EQUAL_SVE(z30, z31);
4897
4898 // clang-format off
4899
4900 // cls (B)
4901 uint8_t expected_z0[] =
4902 // pg: 0 0 0 0 1 0 1 1
4903 // pg: 1 0 0 1 0 1 1 1
4904 // pg: 0 0 1 0 1 1 1 0
4905 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
4906 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
4907 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
4908 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
4909
4910 // clz (H) destructive
4911 uint16_t expected_z1[] =
4912 // pg: 0 0 0 1
4913 // pg: 0 1 1 1
4914 // pg: 0 0 1 0
4915 {0x0000, 0x0000, 0x0000, 16,
4916 0xfefc, 0, 0, 0,
4917 0x1234, 0x5678, 0, 0xdef0};
4918 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
4919
4920 // cnt (S)
4921 uint32_t expected_z2[] =
4922 // pg: 0 1
4923 // pg: 1 1
4924 // pg: 0 0
4925 {0xe9eaebec, 0,
4926 22, 16,
4927 0xf9fafbfc, 0xfdfeff00};
4928 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
4929
4930 // cnt (D) destructive
4931 uint64_t expected_z3[] =
4932 // pg: 1 1 0
4933 { 0, 38, 0x123456789abcdef0};
4934 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4935
4936 // clang-format on
4937 }
4938}
4939
4940TEST_SVE(sve_sxt) {
4941 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4942 START();
4943
4944 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4945
4946 // For simplicity, we re-use the same pg for various lane sizes.
4947 // For D lanes: 1, 1, 0
4948 // For S lanes: 1, 1, 1, 0, 0
4949 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4950 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4951 Initialise(&masm, p0.VnB(), pg_in);
4952 PRegisterM pg = p0.Merging();
4953
4954 // These are merging operations, so we have to initialise the result register.
4955 // We use a mixture of constructive and destructive operations.
4956
4957 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004958 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004959 __ Mov(z30, z31);
4960
4961 // For constructive operations, use a different initial result value.
4962 __ Index(z29.VnB(), 0, -1);
4963
4964 __ Mov(z0, z31);
4965 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
4966 __ Mov(z1, z29);
4967 __ Sxtb(z1.VnS(), pg, z31.VnS());
4968 __ Mov(z2, z31);
4969 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
4970 __ Mov(z3, z29);
4971 __ Sxth(z3.VnS(), pg, z31.VnS());
4972 __ Mov(z4, z31);
4973 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
4974 __ Mov(z5, z29);
4975 __ Sxtw(z5.VnD(), pg, z31.VnD());
4976
4977 END();
4978
4979 if (CAN_RUN()) {
4980 RUN();
4981 // Check that constructive operations preserve their inputs.
4982 ASSERT_EQUAL_SVE(z30, z31);
4983
4984 // clang-format off
4985
4986 // Sxtb (H) destructive
4987 uint64_t expected_z0[] =
4988 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4989 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
4990 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4991
4992 // Sxtb (S)
4993 uint64_t expected_z1[] =
4994 // pg: 0 1 1 1 0 0
4995 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
4996 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4997
4998 // Sxtb (D) destructive
4999 uint64_t expected_z2[] =
5000 // pg: 1 1 0
5001 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
5002 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5003
5004 // Sxth (S)
5005 uint64_t expected_z3[] =
5006 // pg: 0 1 1 1 0 0
5007 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
5008 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5009
5010 // Sxth (D) destructive
5011 uint64_t expected_z4[] =
5012 // pg: 1 1 0
5013 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
5014 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5015
5016 // Sxtw (D)
5017 uint64_t expected_z5[] =
5018 // pg: 1 1 0
5019 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
5020 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5021
5022 // clang-format on
5023 }
5024}
5025
5026TEST_SVE(sve_uxt) {
5027 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5028 START();
5029
5030 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5031
5032 // For simplicity, we re-use the same pg for various lane sizes.
5033 // For D lanes: 1, 1, 0
5034 // For S lanes: 1, 1, 1, 0, 0
5035 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5036 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5037 Initialise(&masm, p0.VnB(), pg_in);
5038 PRegisterM pg = p0.Merging();
5039
5040 // These are merging operations, so we have to initialise the result register.
5041 // We use a mixture of constructive and destructive operations.
5042
5043 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005044 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005045 __ Mov(z30, z31);
5046
5047 // For constructive operations, use a different initial result value.
5048 __ Index(z29.VnB(), 0, -1);
5049
5050 __ Mov(z0, z29);
5051 __ Uxtb(z0.VnH(), pg, z31.VnH());
5052 __ Mov(z1, z31);
5053 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
5054 __ Mov(z2, z29);
5055 __ Uxtb(z2.VnD(), pg, z31.VnD());
5056 __ Mov(z3, z31);
5057 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
5058 __ Mov(z4, z29);
5059 __ Uxth(z4.VnD(), pg, z31.VnD());
5060 __ Mov(z5, z31);
5061 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
5062
5063 END();
5064
5065 if (CAN_RUN()) {
5066 RUN();
5067 // clang-format off
5068
5069 // Uxtb (H)
5070 uint64_t expected_z0[] =
5071 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5072 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
5073 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5074
5075 // Uxtb (S) destructive
5076 uint64_t expected_z1[] =
5077 // pg: 0 1 1 1 0 0
5078 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
5079 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5080
5081 // Uxtb (D)
5082 uint64_t expected_z2[] =
5083 // pg: 1 1 0
5084 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
5085 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5086
5087 // Uxth (S) destructive
5088 uint64_t expected_z3[] =
5089 // pg: 0 1 1 1 0 0
5090 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
5091 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5092
5093 // Uxth (D)
5094 uint64_t expected_z4[] =
5095 // pg: 1 1 0
5096 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
5097 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5098
5099 // Uxtw (D) destructive
5100 uint64_t expected_z5[] =
5101 // pg: 1 1 0
5102 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
5103 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5104
5105 // clang-format on
5106 }
5107}
5108
5109TEST_SVE(sve_abs_neg) {
5110 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5111 START();
5112
5113 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5114
5115 // For simplicity, we re-use the same pg for various lane sizes.
5116 // For D lanes: 1, 1, 0
5117 // For S lanes: 1, 1, 1, 0, 0
5118 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5119 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5120 Initialise(&masm, p0.VnB(), pg_in);
5121 PRegisterM pg = p0.Merging();
5122
5123 InsrHelper(&masm, z31.VnD(), in);
5124
5125 // These are merging operations, so we have to initialise the result register.
5126 // We use a mixture of constructive and destructive operations.
5127
5128 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005129 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005130 __ Mov(z30, z31);
5131
5132 // For constructive operations, use a different initial result value.
5133 __ Index(z29.VnB(), 0, -1);
5134
5135 __ Mov(z0, z31);
5136 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
5137 __ Mov(z1, z29);
5138 __ Abs(z1.VnB(), pg, z31.VnB());
5139
5140 __ Mov(z2, z31);
5141 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
5142 __ Mov(z3, z29);
5143 __ Neg(z3.VnS(), pg, z31.VnS());
5144
Jacob Bramleyc0066272019-09-30 16:30:47 +01005145 // The unpredicated form of `Neg` is implemented using `subr`.
5146 __ Mov(z4, z31);
5147 __ Neg(z4.VnB(), z4.VnB()); // destructive
5148 __ Mov(z5, z29);
5149 __ Neg(z5.VnD(), z31.VnD());
5150
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005151 END();
5152
5153 if (CAN_RUN()) {
5154 RUN();
Jacob Bramleyc0066272019-09-30 16:30:47 +01005155
5156 ASSERT_EQUAL_SVE(z30, z31);
5157
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005158 // clang-format off
5159
5160 // Abs (D) destructive
5161 uint64_t expected_z0[] =
5162 // pg: 1 1 0
5163 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
5164 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5165
5166 // Abs (B)
5167 uint64_t expected_z1[] =
5168 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5169 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
5170 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5171
5172 // Neg (H) destructive
5173 uint64_t expected_z2[] =
5174 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5175 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
5176 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5177
5178 // Neg (S)
5179 uint64_t expected_z3[] =
5180 // pg: 0 1 1 1 0 0
5181 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
5182 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5183
Jacob Bramleyc0066272019-09-30 16:30:47 +01005184 // Neg (B) destructive, unpredicated
5185 uint64_t expected_z4[] =
5186 {0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
5187 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5188
5189 // Neg (D) unpredicated
5190 uint64_t expected_z5[] =
5191 {0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
5192 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5193
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005194 // clang-format on
5195 }
5196}
5197
Jacob Bramley0093bb92019-10-04 15:54:10 +01005198TEST_SVE(sve_cpy) {
5199 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5200 START();
5201
5202 // For simplicity, we re-use the same pg for various lane sizes.
5203 // For D lanes: 0, 1, 1
5204 // For S lanes: 0, 1, 1, 0, 1
5205 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5206 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5207
5208 PRegisterM pg = p7.Merging();
5209 Initialise(&masm, pg.VnB(), pg_in);
5210
5211 // These are merging operations, so we have to initialise the result registers
5212 // for each operation.
5213 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5214 __ Index(ZRegister(i, kBRegSize), 0, -1);
5215 }
5216
5217 // Recognisable values to copy.
5218 __ Mov(x0, 0xdeadbeefdeadbe42);
5219 __ Mov(x1, 0xdeadbeefdead8421);
5220 __ Mov(x2, 0xdeadbeef80042001);
5221 __ Mov(x3, 0x8000000420000001);
5222
5223 // Use NEON moves, to avoid testing SVE `cpy` against itself.
5224 __ Dup(v28.V2D(), x0);
5225 __ Dup(v29.V2D(), x1);
5226 __ Dup(v30.V2D(), x2);
5227 __ Dup(v31.V2D(), x3);
5228
5229 // Register forms (CPY_z_p_r)
5230 __ Cpy(z0.VnB(), pg, w0);
5231 __ Cpy(z1.VnH(), pg, x1); // X registers are accepted for small lanes.
5232 __ Cpy(z2.VnS(), pg, w2);
5233 __ Cpy(z3.VnD(), pg, x3);
5234
5235 // VRegister forms (CPY_z_p_v)
5236 __ Cpy(z4.VnB(), pg, b28);
5237 __ Cpy(z5.VnH(), pg, h29);
5238 __ Cpy(z6.VnS(), pg, s30);
5239 __ Cpy(z7.VnD(), pg, d31);
5240
5241 // Check that we can copy the stack pointer.
5242 __ Mov(x10, sp);
5243 __ Mov(sp, 0xabcabcabcabcabca); // Set sp to a known value.
5244 __ Cpy(z16.VnB(), pg, sp);
5245 __ Cpy(z17.VnH(), pg, wsp);
5246 __ Cpy(z18.VnS(), pg, wsp);
5247 __ Cpy(z19.VnD(), pg, sp);
5248 __ Mov(sp, x10); // Restore sp.
5249
5250 END();
5251
5252 if (CAN_RUN()) {
5253 RUN();
5254 // clang-format off
5255
5256 uint64_t expected_b[] =
5257 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5258 {0xe9eaebec424242f0, 0x42f2f34242f64242, 0xf942fbfcfdfeff42};
5259 ASSERT_EQUAL_SVE(expected_b, z0.VnD());
5260 ASSERT_EQUAL_SVE(expected_b, z4.VnD());
5261
5262 uint64_t expected_h[] =
5263 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5264 {0xe9eaebec8421eff0, 0xf1f28421f5f68421, 0x8421fbfcfdfe8421};
5265 ASSERT_EQUAL_SVE(expected_h, z1.VnD());
5266 ASSERT_EQUAL_SVE(expected_h, z5.VnD());
5267
5268 uint64_t expected_s[] =
5269 // pg: 0 0 1 1 0 1
5270 {0xe9eaebecedeeeff0, 0x8004200180042001, 0xf9fafbfc80042001};
5271 ASSERT_EQUAL_SVE(expected_s, z2.VnD());
5272 ASSERT_EQUAL_SVE(expected_s, z6.VnD());
5273
5274 uint64_t expected_d[] =
5275 // pg: 0 1 1
5276 {0xe9eaebecedeeeff0, 0x8000000420000001, 0x8000000420000001};
5277 ASSERT_EQUAL_SVE(expected_d, z3.VnD());
5278 ASSERT_EQUAL_SVE(expected_d, z7.VnD());
5279
5280
5281 uint64_t expected_b_sp[] =
5282 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5283 {0xe9eaebeccacacaf0, 0xcaf2f3cacaf6caca, 0xf9cafbfcfdfeffca};
5284 ASSERT_EQUAL_SVE(expected_b_sp, z16.VnD());
5285
5286 uint64_t expected_h_sp[] =
5287 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5288 {0xe9eaebecabcaeff0, 0xf1f2abcaf5f6abca, 0xabcafbfcfdfeabca};
5289 ASSERT_EQUAL_SVE(expected_h_sp, z17.VnD());
5290
5291 uint64_t expected_s_sp[] =
5292 // pg: 0 0 1 1 0 1
5293 {0xe9eaebecedeeeff0, 0xcabcabcacabcabca, 0xf9fafbfccabcabca};
5294 ASSERT_EQUAL_SVE(expected_s_sp, z18.VnD());
5295
5296 uint64_t expected_d_sp[] =
5297 // pg: 0 1 1
5298 {0xe9eaebecedeeeff0, 0xabcabcabcabcabca, 0xabcabcabcabcabca};
5299 ASSERT_EQUAL_SVE(expected_d_sp, z19.VnD());
5300
5301 // clang-format on
5302 }
5303}
5304
Jacob Bramley0f62eab2019-10-23 17:07:47 +01005305TEST_SVE(sve_cpy_imm) {
5306 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5307 START();
5308
5309 // For simplicity, we re-use the same pg for various lane sizes.
5310 // For D lanes: 0, 1, 1
5311 // For S lanes: 0, 1, 1, 0, 1
5312 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5313 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5314
5315 PRegister pg = p7;
5316 Initialise(&masm, pg.VnB(), pg_in);
5317
5318 // These are (mostly) merging operations, so we have to initialise the result
5319 // registers for each operation.
5320 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5321 __ Index(ZRegister(i, kBRegSize), 0, -1);
5322 }
5323
5324 // Encodable integer forms (CPY_z_p_i)
5325 __ Cpy(z0.VnB(), pg.Merging(), 0);
5326 __ Cpy(z1.VnB(), pg.Zeroing(), 42);
5327 __ Cpy(z2.VnB(), pg.Merging(), -42);
5328 __ Cpy(z3.VnB(), pg.Zeroing(), 0xff);
5329 __ Cpy(z4.VnH(), pg.Merging(), 127);
5330 __ Cpy(z5.VnS(), pg.Zeroing(), -128);
5331 __ Cpy(z6.VnD(), pg.Merging(), -1);
5332
5333 // Forms encodable using fcpy.
5334 __ Cpy(z7.VnH(), pg.Merging(), Float16ToRawbits(Float16(-31.0)));
5335 __ Cpy(z8.VnS(), pg.Zeroing(), FloatToRawbits(2.0f));
5336 __ Cpy(z9.VnD(), pg.Merging(), DoubleToRawbits(-4.0));
5337
5338 // Other forms use a scratch register.
5339 __ Cpy(z10.VnH(), pg.Merging(), 0xff);
5340 __ Cpy(z11.VnD(), pg.Zeroing(), 0x0123456789abcdef);
5341
5342 END();
5343
5344 if (CAN_RUN()) {
5345 RUN();
5346 // clang-format off
5347
5348 uint64_t expected_z0[] =
5349 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5350 {0xe9eaebec000000f0, 0x00f2f30000f60000, 0xf900fbfcfdfeff00};
5351 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5352
5353 uint64_t expected_z1[] =
5354 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5355 {0x000000002a2a2a00, 0x2a00002a2a002a2a, 0x002a00000000002a};
5356 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5357
5358 uint64_t expected_z2[] =
5359 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5360 {0xe9eaebecd6d6d6f0, 0xd6f2f3d6d6f6d6d6, 0xf9d6fbfcfdfeffd6};
5361 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5362
5363 uint64_t expected_z3[] =
5364 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5365 {0x00000000ffffff00, 0xff0000ffff00ffff, 0x00ff0000000000ff};
5366 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5367
5368 uint64_t expected_z4[] =
5369 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5370 {0xe9eaebec007feff0, 0xf1f2007ff5f6007f, 0x007ffbfcfdfe007f};
5371 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5372
5373 uint64_t expected_z5[] =
5374 // pg: 0 0 1 1 0 1
5375 {0x0000000000000000, 0xffffff80ffffff80, 0x00000000ffffff80};
5376 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5377
5378 uint64_t expected_z6[] =
5379 // pg: 0 1 1
5380 {0xe9eaebecedeeeff0, 0xffffffffffffffff, 0xffffffffffffffff};
5381 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
5382
5383 uint64_t expected_z7[] =
5384 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5385 {0xe9eaebeccfc0eff0, 0xf1f2cfc0f5f6cfc0, 0xcfc0fbfcfdfecfc0};
5386 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
5387
5388 uint64_t expected_z8[] =
5389 // pg: 0 0 1 1 0 1
5390 {0x0000000000000000, 0x4000000040000000, 0x0000000040000000};
5391 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
5392
5393 uint64_t expected_z9[] =
5394 // pg: 0 1 1
5395 {0xe9eaebecedeeeff0, 0xc010000000000000, 0xc010000000000000};
5396 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
5397
5398 uint64_t expected_z10[] =
5399 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5400 {0xe9eaebec00ffeff0, 0xf1f200fff5f600ff, 0x00fffbfcfdfe00ff};
5401 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
5402
5403 uint64_t expected_z11[] =
5404 // pg: 0 1 1
5405 {0x0000000000000000, 0x0123456789abcdef, 0x0123456789abcdef};
5406 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
5407
5408 // clang-format on
5409 }
5410}
5411
5412TEST_SVE(sve_fcpy_imm) {
5413 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5414 START();
5415
5416 // For simplicity, we re-use the same pg for various lane sizes.
5417 // For D lanes: 0, 1, 1
5418 // For S lanes: 0, 1, 1, 0, 1
5419 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5420 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5421
5422 PRegister pg = p7;
5423 Initialise(&masm, pg.VnB(), pg_in);
5424
5425 // These are (mostly) merging operations, so we have to initialise the result
5426 // registers for each operation.
5427 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5428 __ Index(ZRegister(i, kBRegSize), 0, -1);
5429 }
5430
5431 // Encodable floating-point forms (FCPY_z_p_i)
5432 __ Fcpy(z1.VnH(), pg.Merging(), Float16(1.0));
5433 __ Fcpy(z2.VnH(), pg.Merging(), -2.0f);
5434 __ Fcpy(z3.VnH(), pg.Merging(), 3.0);
5435 __ Fcpy(z4.VnS(), pg.Merging(), Float16(-4.0));
5436 __ Fcpy(z5.VnS(), pg.Merging(), 5.0f);
5437 __ Fcpy(z6.VnS(), pg.Merging(), 6.0);
5438 __ Fcpy(z7.VnD(), pg.Merging(), Float16(7.0));
5439 __ Fcpy(z8.VnD(), pg.Merging(), 8.0f);
5440 __ Fcpy(z9.VnD(), pg.Merging(), -9.0);
5441
5442 // Unencodable immediates.
5443 __ Fcpy(z10.VnS(), pg.Merging(), 0.0);
5444 __ Fcpy(z11.VnH(), pg.Merging(), Float16(42.0));
5445 __ Fcpy(z12.VnD(), pg.Merging(), RawbitsToDouble(0x7ff0000012340000)); // NaN
5446 __ Fcpy(z13.VnH(), pg.Merging(), kFP64NegativeInfinity);
5447
5448 END();
5449
5450 if (CAN_RUN()) {
5451 RUN();
5452 // clang-format off
5453
5454 // 1.0 as FP16: 0x3c00
5455 uint64_t expected_z1[] =
5456 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5457 {0xe9eaebec3c00eff0, 0xf1f23c00f5f63c00, 0x3c00fbfcfdfe3c00};
5458 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5459
5460 // -2.0 as FP16: 0xc000
5461 uint64_t expected_z2[] =
5462 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5463 {0xe9eaebecc000eff0, 0xf1f2c000f5f6c000, 0xc000fbfcfdfec000};
5464 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5465
5466 // 3.0 as FP16: 0x4200
5467 uint64_t expected_z3[] =
5468 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5469 {0xe9eaebec4200eff0, 0xf1f24200f5f64200, 0x4200fbfcfdfe4200};
5470 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5471
5472 // -4.0 as FP32: 0xc0800000
5473 uint64_t expected_z4[] =
5474 // pg: 0 0 1 1 0 1
5475 {0xe9eaebecedeeeff0, 0xc0800000c0800000, 0xf9fafbfcc0800000};
5476 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5477
5478 // 5.0 as FP32: 0x40a00000
5479 uint64_t expected_z5[] =
5480 // pg: 0 0 1 1 0 1
5481 {0xe9eaebecedeeeff0, 0x40a0000040a00000, 0xf9fafbfc40a00000};
5482 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5483
5484 // 6.0 as FP32: 0x40c00000
5485 uint64_t expected_z6[] =
5486 // pg: 0 0 1 1 0 1
5487 {0xe9eaebecedeeeff0, 0x40c0000040c00000, 0xf9fafbfc40c00000};
5488 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
5489
5490 // 7.0 as FP64: 0x401c000000000000
5491 uint64_t expected_z7[] =
5492 // pg: 0 1 1
5493 {0xe9eaebecedeeeff0, 0x401c000000000000, 0x401c000000000000};
5494 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
5495
5496 // 8.0 as FP64: 0x4020000000000000
5497 uint64_t expected_z8[] =
5498 // pg: 0 1 1
5499 {0xe9eaebecedeeeff0, 0x4020000000000000, 0x4020000000000000};
5500 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
5501
5502 // -9.0 as FP64: 0xc022000000000000
5503 uint64_t expected_z9[] =
5504 // pg: 0 1 1
5505 {0xe9eaebecedeeeff0, 0xc022000000000000, 0xc022000000000000};
5506 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
5507
5508 // 0.0 as FP32: 0x00000000
5509 uint64_t expected_z10[] =
5510 // pg: 0 0 1 1 0 1
5511 {0xe9eaebecedeeeff0, 0x0000000000000000, 0xf9fafbfc00000000};
5512 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
5513
5514 // 42.0 as FP16: 0x5140
5515 uint64_t expected_z11[] =
5516 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5517 {0xe9eaebec5140eff0, 0xf1f25140f5f65140, 0x5140fbfcfdfe5140};
5518 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
5519
5520 // Signalling NaN (with payload): 0x7ff0000012340000
5521 uint64_t expected_z12[] =
5522 // pg: 0 1 1
5523 {0xe9eaebecedeeeff0, 0x7ff0000012340000, 0x7ff0000012340000};
5524 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
5525
5526 // -infinity as FP16: 0xfc00
5527 uint64_t expected_z13[] =
5528 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5529 {0xe9eaebecfc00eff0, 0xf1f2fc00f5f6fc00, 0xfc00fbfcfdfefc00};
5530 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
5531
5532 // clang-format on
5533 }
5534}
5535
TatWai Chong4f28df72019-08-14 17:50:30 -07005536TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
5537 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5538 START();
5539
5540 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
5541
5542 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
5543
5544 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
5545
5546 int index_s[] = {1, 3, 2, 31, -1};
5547
5548 int index_d[] = {31, 1};
5549
5550 // Initialize the register with a value that doesn't existed in the table.
5551 __ Dup(z9.VnB(), 0x1f);
5552 InsrHelper(&masm, z9.VnD(), table_inputs);
5553
5554 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
5555 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
5556 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
5557 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
5558
5559 InsrHelper(&masm, ind_b, index_b);
5560 InsrHelper(&masm, ind_h, index_h);
5561 InsrHelper(&masm, ind_s, index_s);
5562 InsrHelper(&masm, ind_d, index_d);
5563
5564 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
5565
5566 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
5567
5568 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
5569
5570 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
5571
5572 END();
5573
5574 if (CAN_RUN()) {
5575 RUN();
5576
5577 // clang-format off
5578 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
5579 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
5580
5581 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
5582 0x5544, 0x7766, 0xddcc, 0x9988};
5583
5584 unsigned z28_expected[] =
5585 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
5586
5587 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
5588 // clang-format on
5589
5590 unsigned vl = config->sve_vl_in_bits();
5591 for (size_t i = 0; i < ArrayLength(index_b); i++) {
5592 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
5593 if (!core.HasSVELane(z26.VnB(), lane)) break;
5594 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
5595 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
5596 }
5597
5598 for (size_t i = 0; i < ArrayLength(index_h); i++) {
5599 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
5600 if (!core.HasSVELane(z27.VnH(), lane)) break;
5601 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
5602 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
5603 }
5604
5605 for (size_t i = 0; i < ArrayLength(index_s); i++) {
5606 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
5607 if (!core.HasSVELane(z28.VnS(), lane)) break;
5608 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
5609 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
5610 }
5611
5612 for (size_t i = 0; i < ArrayLength(index_d); i++) {
5613 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
5614 if (!core.HasSVELane(z29.VnD(), lane)) break;
5615 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
5616 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
5617 }
5618 }
5619}
5620
Jacob Bramley199339d2019-08-05 18:49:13 +01005621TEST_SVE(ldr_str_z_bi) {
5622 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5623 START();
5624
5625 int vl = config->sve_vl_in_bytes();
5626
5627 // The immediate can address [-256, 255] times the VL, so allocate enough
5628 // space to exceed that in both directions.
5629 int data_size = vl * 1024;
5630
5631 uint8_t* data = new uint8_t[data_size];
5632 memset(data, 0, data_size);
5633
5634 // Set the base half-way through the buffer so we can use negative indices.
5635 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5636
5637 __ Index(z1.VnB(), 1, 3);
5638 __ Index(z2.VnB(), 2, 5);
5639 __ Index(z3.VnB(), 3, 7);
5640 __ Index(z4.VnB(), 4, 11);
5641 __ Index(z5.VnB(), 5, 13);
5642 __ Index(z6.VnB(), 6, 2);
5643 __ Index(z7.VnB(), 7, 3);
5644 __ Index(z8.VnB(), 8, 5);
5645 __ Index(z9.VnB(), 9, 7);
5646
5647 // Encodable cases.
5648 __ Str(z1, SVEMemOperand(x0));
5649 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
5650 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
5651 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
5652 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
5653
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005654 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01005655 __ Str(z6, SVEMemOperand(x0, 6 * vl));
5656 __ Str(z7, SVEMemOperand(x0, -7 * vl));
5657 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
5658 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
5659
5660 // Corresponding loads.
5661 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
5662 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
5663 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
5664 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
5665 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
5666
5667 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
5668 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
5669 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
5670 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
5671
5672 END();
5673
5674 if (CAN_RUN()) {
5675 RUN();
5676
5677 uint8_t* expected = new uint8_t[data_size];
5678 memset(expected, 0, data_size);
5679 uint8_t* middle = &expected[data_size / 2];
5680
5681 for (int i = 0; i < vl; i++) {
5682 middle[i] = (1 + (3 * i)) & 0xff; // z1
5683 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
5684 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
5685 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
5686 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
5687 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
5688 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
5689 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
5690 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
5691 }
5692
Jacob Bramley33c99f92019-10-08 15:24:12 +01005693 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01005694
5695 ASSERT_EQUAL_SVE(z1, z11);
5696 ASSERT_EQUAL_SVE(z2, z12);
5697 ASSERT_EQUAL_SVE(z3, z13);
5698 ASSERT_EQUAL_SVE(z4, z14);
5699 ASSERT_EQUAL_SVE(z5, z15);
5700 ASSERT_EQUAL_SVE(z6, z16);
5701 ASSERT_EQUAL_SVE(z7, z17);
5702 ASSERT_EQUAL_SVE(z8, z18);
5703 ASSERT_EQUAL_SVE(z9, z19);
5704
5705 delete[] expected;
5706 }
5707 delete[] data;
5708}
5709
5710TEST_SVE(ldr_str_p_bi) {
5711 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5712 START();
5713
5714 int vl = config->sve_vl_in_bytes();
5715 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5716 int pl = vl / kZRegBitsPerPRegBit;
5717
5718 // The immediate can address [-256, 255] times the PL, so allocate enough
5719 // space to exceed that in both directions.
5720 int data_size = pl * 1024;
5721
5722 uint8_t* data = new uint8_t[data_size];
5723 memset(data, 0, data_size);
5724
5725 // Set the base half-way through the buffer so we can use negative indices.
5726 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5727
5728 uint64_t pattern[4] = {0x1010101011101111,
5729 0x0010111011000101,
5730 0x1001101110010110,
5731 0x1010110101100011};
5732 for (int i = 8; i <= 15; i++) {
5733 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
5734 Initialise(&masm,
5735 PRegister(i),
5736 pattern[3] * i,
5737 pattern[2] * i,
5738 pattern[1] * i,
5739 pattern[0] * i);
5740 }
5741
5742 // Encodable cases.
5743 __ Str(p8, SVEMemOperand(x0));
5744 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
5745 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
5746 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
5747
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005748 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01005749 __ Str(p12, SVEMemOperand(x0, 6 * pl));
5750 __ Str(p13, SVEMemOperand(x0, -7 * pl));
5751 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
5752 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
5753
5754 // Corresponding loads.
5755 __ Ldr(p0, SVEMemOperand(x0));
5756 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
5757 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
5758 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
5759
5760 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
5761 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
5762 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
5763 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
5764
5765 END();
5766
5767 if (CAN_RUN()) {
5768 RUN();
5769
5770 uint8_t* expected = new uint8_t[data_size];
5771 memset(expected, 0, data_size);
5772 uint8_t* middle = &expected[data_size / 2];
5773
5774 for (int i = 0; i < pl; i++) {
5775 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
5776 size_t index = i / sizeof(pattern[0]);
5777 VIXL_ASSERT(index < ArrayLength(pattern));
5778 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
5779 // Each byte of `pattern` can be multiplied by 15 without carry.
5780 VIXL_ASSERT((byte * 15) <= 0xff);
5781
5782 middle[i] = byte * 8; // p8
5783 middle[(2 * pl) + i] = byte * 9; // p9
5784 middle[(-3 * pl) + i] = byte * 10; // p10
5785 middle[(255 * pl) + i] = byte * 11; // p11
5786 middle[(6 * pl) + i] = byte * 12; // p12
5787 middle[(-7 * pl) + i] = byte * 13; // p13
5788 middle[(314 * pl) + i] = byte * 14; // p14
5789 middle[(-314 * pl) + i] = byte * 15; // p15
5790 }
5791
Jacob Bramley33c99f92019-10-08 15:24:12 +01005792 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01005793
5794 ASSERT_EQUAL_SVE(p0, p8);
5795 ASSERT_EQUAL_SVE(p1, p9);
5796 ASSERT_EQUAL_SVE(p2, p10);
5797 ASSERT_EQUAL_SVE(p3, p11);
5798 ASSERT_EQUAL_SVE(p4, p12);
5799 ASSERT_EQUAL_SVE(p5, p13);
5800 ASSERT_EQUAL_SVE(p6, p14);
5801 ASSERT_EQUAL_SVE(p7, p15);
5802
5803 delete[] expected;
5804 }
5805 delete[] data;
5806}
5807
Jacob Bramleye668b202019-08-14 17:57:34 +01005808template <typename T>
5809static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
5810 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
5811}
5812
5813TEST_SVE(sve_ld1_st1_contiguous) {
5814 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5815 START();
5816
5817 int vl = config->sve_vl_in_bytes();
5818
5819 // The immediate can address [-8, 7] times the VL, so allocate enough space to
5820 // exceed that in both directions.
5821 int data_size = vl * 128;
5822
5823 uint8_t* data = new uint8_t[data_size];
5824 memset(data, 0, data_size);
5825
5826 // Set the base half-way through the buffer so we can use negative indeces.
5827 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5828
Jacob Bramleye668b202019-08-14 17:57:34 +01005829 // Encodable scalar-plus-immediate cases.
5830 __ Index(z1.VnB(), 1, -3);
5831 __ Ptrue(p1.VnB());
5832 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
5833
5834 __ Index(z2.VnH(), -2, 5);
5835 __ Ptrue(p2.VnH(), SVE_MUL3);
5836 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
5837
5838 __ Index(z3.VnS(), 3, -7);
5839 __ Ptrue(p3.VnS(), SVE_POW2);
5840 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
5841
5842 // Encodable scalar-plus-scalar cases.
5843 __ Index(z4.VnD(), -4, 11);
5844 __ Ptrue(p4.VnD(), SVE_VL3);
5845 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
5846 __ Mov(x2, 17);
5847 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
5848
5849 __ Index(z5.VnD(), 6, -2);
5850 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01005851 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
5852 __ Mov(x4, 6);
5853 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01005854
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005855 // Unencodable cases fall back on `CalculateSVEAddress`.
Jacob Bramleye668b202019-08-14 17:57:34 +01005856 __ Index(z6.VnS(), -7, 3);
5857 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
5858 // predicate bits when handling larger lanes.
5859 __ Ptrue(p6.VnB(), SVE_ALL);
5860 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
5861
TatWai Chong6205eb42019-09-24 10:07:20 +01005862 __ Index(z7.VnD(), 32, -11);
5863 __ Ptrue(p7.VnD(), SVE_MUL4);
5864 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01005865
TatWai Chong6205eb42019-09-24 10:07:20 +01005866 // Corresponding loads.
5867 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
5868 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5869 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5870 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5871 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
5872 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
5873
5874 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5875 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5876 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5877 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
5878
5879 // We can test ld1 by comparing the value loaded with the value stored. In
5880 // most cases, there are two complications:
5881 // - Loads have zeroing predication, so we have to clear the inactive
5882 // elements on our reference.
5883 // - We have to replicate any sign- or zero-extension.
5884
5885 // Ld1b(z8.VnB(), ...)
5886 __ Dup(z18.VnB(), 0);
5887 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
5888
5889 // Ld1b(z9.VnH(), ...)
5890 __ Dup(z19.VnH(), 0);
5891 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
5892
5893 // Ld1h(z10.VnS(), ...)
5894 __ Dup(z20.VnS(), 0);
5895 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
5896
5897 // Ld1b(z11.VnD(), ...)
5898 __ Dup(z21.VnD(), 0);
5899 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
5900
5901 // Ld1d(z12.VnD(), ...)
5902 __ Dup(z22.VnD(), 0);
5903 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
5904
5905 // Ld1w(z13.VnS(), ...)
5906 __ Dup(z23.VnS(), 0);
5907 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
5908
5909 // Ld1sb(z14.VnH(), ...)
5910 __ Dup(z24.VnH(), 0);
5911 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
5912
5913 // Ld1sh(z15.VnS(), ...)
5914 __ Dup(z25.VnS(), 0);
5915 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
5916
5917 // Ld1sb(z16.VnD(), ...)
5918 __ Dup(z26.VnD(), 0);
5919 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
5920
5921 // Ld1sw(z17.VnD(), ...)
5922 __ Dup(z27.VnD(), 0);
5923 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01005924
5925 END();
5926
5927 if (CAN_RUN()) {
5928 RUN();
5929
5930 uint8_t* expected = new uint8_t[data_size];
5931 memset(expected, 0, data_size);
5932 uint8_t* middle = &expected[data_size / 2];
5933
5934 int vl_b = vl / kBRegSizeInBytes;
5935 int vl_h = vl / kHRegSizeInBytes;
5936 int vl_s = vl / kSRegSizeInBytes;
5937 int vl_d = vl / kDRegSizeInBytes;
5938
5939 // Encodable cases.
5940
5941 // st1b { z1.b }, SVE_ALL
5942 for (int i = 0; i < vl_b; i++) {
5943 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
5944 }
5945
5946 // st1b { z2.h }, SVE_MUL3
5947 int vl_h_mul3 = vl_h - (vl_h % 3);
5948 for (int i = 0; i < vl_h_mul3; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005949 int64_t offset = 7 * static_cast<int>(vl / (kHRegSize / kBRegSize));
5950 MemoryWrite(middle, offset, i, static_cast<uint8_t>(-2 + (5 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01005951 }
5952
5953 // st1h { z3.s }, SVE_POW2
5954 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
5955 for (int i = 0; i < vl_s_pow2; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005956 int64_t offset = -8 * static_cast<int>(vl / (kSRegSize / kHRegSize));
5957 MemoryWrite(middle, offset, i, static_cast<uint16_t>(3 - (7 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01005958 }
5959
5960 // st1b { z4.d }, SVE_VL3
5961 if (vl_d >= 3) {
5962 for (int i = 0; i < 3; i++) {
5963 MemoryWrite(middle,
5964 (8 * vl) + 17,
5965 i,
5966 static_cast<uint8_t>(-4 + (11 * i)));
5967 }
5968 }
5969
5970 // st1d { z5.d }, SVE_VL16
5971 if (vl_d >= 16) {
5972 for (int i = 0; i < 16; i++) {
5973 MemoryWrite(middle,
5974 (10 * vl) + (6 * kDRegSizeInBytes),
5975 i,
5976 static_cast<uint64_t>(6 - (2 * i)));
5977 }
5978 }
5979
5980 // Unencodable cases.
5981
5982 // st1w { z6.s }, SVE_ALL
5983 for (int i = 0; i < vl_s; i++) {
5984 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
5985 }
5986
TatWai Chong6205eb42019-09-24 10:07:20 +01005987 // st1w { z7.d }, SVE_MUL4
5988 int vl_d_mul4 = vl_d - (vl_d % 4);
5989 for (int i = 0; i < vl_d_mul4; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005990 int64_t offset = 22 * static_cast<int>(vl / (kDRegSize / kWRegSize));
5991 MemoryWrite(middle, offset, i, static_cast<uint32_t>(32 + (-11 * i)));
TatWai Chong6205eb42019-09-24 10:07:20 +01005992 }
5993
Jacob Bramley33c99f92019-10-08 15:24:12 +01005994 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramleye668b202019-08-14 17:57:34 +01005995
TatWai Chong6205eb42019-09-24 10:07:20 +01005996 // Check that we loaded back the expected values.
5997
5998 ASSERT_EQUAL_SVE(z18, z8);
5999 ASSERT_EQUAL_SVE(z19, z9);
6000 ASSERT_EQUAL_SVE(z20, z10);
6001 ASSERT_EQUAL_SVE(z21, z11);
6002 ASSERT_EQUAL_SVE(z22, z12);
6003 ASSERT_EQUAL_SVE(z23, z13);
6004 ASSERT_EQUAL_SVE(z24, z14);
6005 ASSERT_EQUAL_SVE(z25, z15);
6006 ASSERT_EQUAL_SVE(z26, z16);
6007 ASSERT_EQUAL_SVE(z27, z17);
6008
Jacob Bramleye668b202019-08-14 17:57:34 +01006009 delete[] expected;
6010 }
6011 delete[] data;
6012}
6013
TatWai Chong6995bfd2019-09-26 10:48:05 +01006014typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
6015 const ZRegister& zn,
6016 const IntegerOperand imm);
6017
6018template <typename F, typename Td, typename Tn>
6019static void IntWideImmHelper(Test* config,
6020 F macro,
6021 unsigned lane_size_in_bits,
6022 const Tn& zn_inputs,
6023 IntegerOperand imm,
6024 const Td& zd_expected) {
6025 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6026 START();
6027
6028 ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
6029 InsrHelper(&masm, zd1, zn_inputs);
6030
6031 // Also test with a different zn, to test the movprfx case.
6032 ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
6033 InsrHelper(&masm, zn, zn_inputs);
6034 ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
6035 ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
6036
6037 // Make a copy so we can check that constructive operations preserve zn.
6038 __ Mov(zn_copy, zn);
6039
6040 {
6041 UseScratchRegisterScope temps(&masm);
6042 // The MacroAssembler needs a P scratch register for some of these macros,
6043 // and it doesn't have one by default.
6044 temps.Include(p3);
6045
6046 (masm.*macro)(zd1, zd1, imm);
6047 (masm.*macro)(zd2, zn, imm);
6048 }
6049
6050 END();
6051
6052 if (CAN_RUN()) {
6053 RUN();
6054
6055 ASSERT_EQUAL_SVE(zd_expected, zd1);
6056
6057 // Check the result from `instr` with movprfx is the same as
6058 // the immediate version.
6059 ASSERT_EQUAL_SVE(zd_expected, zd2);
6060
6061 ASSERT_EQUAL_SVE(zn_copy, zn);
6062 }
6063}
6064
6065TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
6066 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
6067 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
6068 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
6069 int64_t in_d[] = {1, 10, 10000, 1000000};
6070
6071 IntWideImmFn fn = &MacroAssembler::Smax;
6072
6073 int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
6074 int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
6075 int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
6076 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
6077
6078 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
6079 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
6080 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
6081 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
6082
6083 int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
6084 int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
6085 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
6086
6087 // The immediate is in the range [-128, 127], but the macro is able to
6088 // synthesise unencodable immediates.
6089 // B-sized lanes cannot take an immediate out of the range [-128, 127].
6090 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
6091 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6092 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
6093}
6094
6095TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
6096 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
6097 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
6098 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
6099 int64_t in_d[] = {1, 10, 10000, 1000000};
6100
6101 IntWideImmFn fn = &MacroAssembler::Smin;
6102
6103 int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
6104 int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
6105 int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
6106 int64_t exp_d_1[] = {1, 10, 99, 99};
6107
6108 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
6109 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
6110 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
6111 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
6112
6113 int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
6114 int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
6115 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
6116
6117 // The immediate is in the range [-128, 127], but the macro is able to
6118 // synthesise unencodable immediates.
6119 // B-sized lanes cannot take an immediate out of the range [-128, 127].
6120 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
6121 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6122 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
6123}
6124
6125TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
6126 int in_b[] = {0, 255, 127, 0x80, 1, 55};
6127 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
6128 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
6129 int64_t in_d[] = {1, 10, 10000, 1000000};
6130
6131 IntWideImmFn fn = &MacroAssembler::Umax;
6132
6133 int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
6134 int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
6135 int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
6136 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
6137
6138 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
6139 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
6140 IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
6141 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
6142
6143 int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
6144 int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
6145 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
6146
6147 // The immediate is in the range [0, 255], but the macro is able to
6148 // synthesise unencodable immediates.
6149 // B-sized lanes cannot take an immediate out of the range [0, 255].
6150 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
6151 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6152 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
6153}
6154
6155TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
6156 int in_b[] = {0, 255, 127, 0x80, 1, 55};
6157 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
6158 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
6159 int64_t in_d[] = {1, 10, 10000, 1000000};
6160
6161 IntWideImmFn fn = &MacroAssembler::Umin;
6162
6163 int exp_b_1[] = {0, 17, 17, 17, 1, 17};
6164 int exp_h_1[] = {0, 127, 127, 127, 1, 127};
6165 int exp_s_1[] = {0, 255, 127, 255, 1, 255};
6166 int64_t exp_d_1[] = {1, 10, 99, 99};
6167
6168 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
6169 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
6170 IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
6171 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
6172
6173 int exp_h_2[] = {0, 255, 127, 511, 1, 511};
6174 int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
6175 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
6176
6177 // The immediate is in the range [0, 255], but the macro is able to
6178 // synthesise unencodable immediates.
6179 // B-sized lanes cannot take an immediate out of the range [0, 255].
6180 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
6181 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6182 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
6183}
6184
6185TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
6186 int in_b[] = {11, -1, 7, -3};
6187 int in_h[] = {111, -1, 17, -123};
6188 int in_s[] = {11111, -1, 117, -12345};
6189 int64_t in_d[] = {0x7fffffff, 0x80000000};
6190
6191 IntWideImmFn fn = &MacroAssembler::Mul;
6192
6193 int exp_b_1[] = {66, -6, 42, -18};
6194 int exp_h_1[] = {-14208, 128, -2176, 15744};
6195 int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
6196 int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
6197
6198 IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
6199 IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
6200 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6201 IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
6202
6203 int exp_h_2[] = {-28305, 255, -4335, 31365};
6204 int exp_s_2[] = {22755328, -2048, 239616, -25282560};
6205 int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
6206
6207 // The immediate is in the range [-128, 127], but the macro is able to
6208 // synthesise unencodable immediates.
6209 // B-sized lanes cannot take an immediate out of the range [0, 255].
6210 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
6211 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6212 IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
6213
6214 // Integer overflow on multiplication.
6215 unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
6216
6217 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
6218}
6219
6220TEST_SVE(sve_int_wide_imm_unpredicated_add) {
6221 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6222 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6223 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6224 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6225
6226 IntWideImmFn fn = &MacroAssembler::Add;
6227
6228 unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
6229 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
6230 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
6231 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
6232
6233 // Encodable with `add` (shift 0).
6234 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6235 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6236 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6237 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6238
6239 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
6240 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
6241 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
6242
6243 // Encodable with `add` (shift 8).
6244 // B-sized lanes cannot take a shift of 8.
6245 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6246 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6247 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
6248
6249 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
6250
6251 // The macro is able to synthesise unencodable immediates.
6252 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01006253
6254 unsigned exp_b_4[] = {0x61, 0x5f, 0xf0, 0xdf};
6255 unsigned exp_h_4[] = {0x6181, 0x5f7f, 0xf010, 0x8aaa};
6256 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
6257 uint64_t exp_d_4[] = {0x8000000180018180, 0x7fffffff7fff7f7e};
6258
6259 // Negative immediates use `sub`.
6260 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
6261 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
6262 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
6263 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006264}
6265
6266TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
6267 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6268 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6269 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6270 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6271
6272 IntWideImmFn fn = &MacroAssembler::Sqadd;
6273
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006274 unsigned exp_b_1[] = {0x02, 0x7f, 0x7f, 0x7f};
TatWai Chong6995bfd2019-09-26 10:48:05 +01006275 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
6276 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
6277 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
6278
6279 // Encodable with `sqadd` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006280 // Note that encodable immediates are unsigned, even for signed saturation.
6281 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006282 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6283 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006284 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006285
6286 unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
6287 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
6288 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
6289
6290 // Encodable with `sqadd` (shift 8).
6291 // B-sized lanes cannot take a shift of 8.
6292 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6293 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6294 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006295}
6296
6297TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
6298 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6299 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6300 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6301 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6302
6303 IntWideImmFn fn = &MacroAssembler::Uqadd;
6304
6305 unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
6306 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
6307 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
6308 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
6309
6310 // Encodable with `uqadd` (shift 0).
6311 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6312 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6313 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6314 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6315
6316 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
6317 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
6318 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
6319
6320 // Encodable with `uqadd` (shift 8).
6321 // B-sized lanes cannot take a shift of 8.
6322 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6323 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6324 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006325}
6326
6327TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
6328 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6329 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6330 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6331 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6332
6333 IntWideImmFn fn = &MacroAssembler::Sub;
6334
6335 unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
6336 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6337 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6338 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6339
6340 // Encodable with `sub` (shift 0).
6341 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6342 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6343 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6344 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6345
6346 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
6347 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6348 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6349
6350 // Encodable with `sub` (shift 8).
6351 // B-sized lanes cannot take a shift of 8.
6352 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6353 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6354 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
6355
6356 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
6357
6358 // The macro is able to synthesise unencodable immediates.
6359 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01006360
6361 unsigned exp_b_4[] = {0xa1, 0x9f, 0x30, 0x1f};
6362 unsigned exp_h_4[] = {0xa181, 0x9f7f, 0x3010, 0xcaaa};
6363 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
6364 uint64_t exp_d_4[] = {0x8000000180018182, 0x7fffffff7fff7f80};
6365
6366 // Negative immediates use `add`.
6367 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
6368 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
6369 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
6370 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006371}
6372
6373TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
6374 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6375 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6376 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6377 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6378
6379 IntWideImmFn fn = &MacroAssembler::Sqsub;
6380
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006381 unsigned exp_b_1[] = {0x80, 0xfe, 0x8f, 0x80};
TatWai Chong6995bfd2019-09-26 10:48:05 +01006382 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6383 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6384 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6385
6386 // Encodable with `sqsub` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006387 // Note that encodable immediates are unsigned, even for signed saturation.
6388 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006389 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6390 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006391 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006392
6393 unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
6394 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6395 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6396
6397 // Encodable with `sqsub` (shift 8).
6398 // B-sized lanes cannot take a shift of 8.
6399 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6400 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6401 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006402}
6403
6404TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
6405 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6406 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6407 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6408 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6409
6410 IntWideImmFn fn = &MacroAssembler::Uqsub;
6411
6412 unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
6413 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6414 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6415 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6416
6417 // Encodable with `uqsub` (shift 0).
6418 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6419 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6420 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6421 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6422
6423 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
6424 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6425 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6426
6427 // Encodable with `uqsub` (shift 8).
6428 // B-sized lanes cannot take a shift of 8.
6429 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6430 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6431 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006432}
6433
6434TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
6435 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6436 START();
6437
6438 // Encodable with `subr` (shift 0).
6439 __ Index(z0.VnD(), 1, 1);
6440 __ Sub(z0.VnD(), 100, z0.VnD());
6441 __ Index(z1.VnS(), 0x7f, 1);
6442 __ Sub(z1.VnS(), 0xf7, z1.VnS());
6443 __ Index(z2.VnH(), 0xaaaa, 0x2222);
6444 __ Sub(z2.VnH(), 0x80, z2.VnH());
6445 __ Index(z3.VnB(), 133, 1);
6446 __ Sub(z3.VnB(), 255, z3.VnB());
6447
6448 // Encodable with `subr` (shift 8).
6449 __ Index(z4.VnD(), 256, -1);
6450 __ Sub(z4.VnD(), 42 * 256, z4.VnD());
6451 __ Index(z5.VnS(), 0x7878, 1);
6452 __ Sub(z5.VnS(), 0x8000, z5.VnS());
6453 __ Index(z6.VnH(), 0x30f0, -1);
6454 __ Sub(z6.VnH(), 0x7f00, z6.VnH());
6455 // B-sized lanes cannot take a shift of 8.
6456
6457 // Select with movprfx.
6458 __ Index(z31.VnD(), 256, 4001);
6459 __ Sub(z7.VnD(), 42 * 256, z31.VnD());
6460
6461 // Out of immediate encodable range of `sub`.
6462 __ Index(z30.VnS(), 0x11223344, 1);
6463 __ Sub(z8.VnS(), 0x88776655, z30.VnS());
6464
6465 END();
6466
6467 if (CAN_RUN()) {
6468 RUN();
6469
6470 int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
6471 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6472
6473 int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
6474 ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
6475
6476 int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
6477 ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
6478
6479 int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
6480 ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
6481
6482 int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
6483 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6484
6485 int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
6486 ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
6487
6488 int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
6489 ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
6490
6491 int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
6492 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6493
6494 int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
6495 ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
6496 }
6497}
6498
6499TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
6500 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6501 START();
6502
6503 // Immediates which can be encoded in the instructions.
6504 __ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
6505 __ Fdup(z1.VnS(), Float16(2.0));
6506 __ Fdup(z2.VnD(), Float16(3.875));
6507 __ Fdup(z3.VnH(), 8.0f);
6508 __ Fdup(z4.VnS(), -4.75f);
6509 __ Fdup(z5.VnD(), 0.5f);
6510 __ Fdup(z6.VnH(), 1.0);
6511 __ Fdup(z7.VnS(), 2.125);
6512 __ Fdup(z8.VnD(), -13.0);
6513
6514 // Immediates which cannot be encoded in the instructions.
6515 __ Fdup(z10.VnH(), Float16(0.0));
6516 __ Fdup(z11.VnH(), kFP16PositiveInfinity);
6517 __ Fdup(z12.VnS(), 255.0f);
6518 __ Fdup(z13.VnS(), kFP32NegativeInfinity);
6519 __ Fdup(z14.VnD(), 12.3456);
6520 __ Fdup(z15.VnD(), kFP64PositiveInfinity);
6521
6522 END();
6523
6524 if (CAN_RUN()) {
6525 RUN();
6526
6527 ASSERT_EQUAL_SVE(0xc500, z0.VnH());
6528 ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
6529 ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
6530 ASSERT_EQUAL_SVE(0x4800, z3.VnH());
6531 ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
6532 ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
6533 ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
6534 ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
6535 ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
6536
6537 ASSERT_EQUAL_SVE(0x0000, z10.VnH());
6538 ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
6539 ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
6540 ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
6541 ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
6542 ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
6543 }
6544}
6545
TatWai Chong6f111bc2019-10-07 09:20:37 +01006546TEST_SVE(sve_andv_eorv_orv) {
6547 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6548 START();
6549
6550 uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
6551 InsrHelper(&masm, z31.VnD(), in);
6552
6553 // For simplicity, we re-use the same pg for various lane sizes.
6554 // For D lanes: 1, 1, 0
6555 // For S lanes: 1, 1, 1, 0, 0
6556 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6557 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6558 Initialise(&masm, p0.VnB(), pg_in);
6559
6560 // Make a copy so we can check that constructive operations preserve zn.
6561 __ Mov(z0, z31);
6562 __ Andv(b0, p0, z0.VnB()); // destructive
6563 __ Andv(h1, p0, z31.VnH());
6564 __ Mov(z2, z31);
6565 __ Andv(s2, p0, z2.VnS()); // destructive
6566 __ Andv(d3, p0, z31.VnD());
6567
6568 __ Eorv(b4, p0, z31.VnB());
6569 __ Mov(z5, z31);
6570 __ Eorv(h5, p0, z5.VnH()); // destructive
6571 __ Eorv(s6, p0, z31.VnS());
6572 __ Mov(z7, z31);
6573 __ Eorv(d7, p0, z7.VnD()); // destructive
6574
6575 __ Mov(z8, z31);
6576 __ Orv(b8, p0, z8.VnB()); // destructive
6577 __ Orv(h9, p0, z31.VnH());
6578 __ Mov(z10, z31);
6579 __ Orv(s10, p0, z10.VnS()); // destructive
6580 __ Orv(d11, p0, z31.VnD());
6581
6582 END();
6583
6584 if (CAN_RUN()) {
6585 RUN();
6586
6587 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6588 ASSERT_EQUAL_64(0x10, d0);
6589 ASSERT_EQUAL_64(0x1010, d1);
6590 ASSERT_EQUAL_64(0x33331111, d2);
6591 ASSERT_EQUAL_64(0x7777555533331111, d3);
6592 ASSERT_EQUAL_64(0xbf, d4);
6593 ASSERT_EQUAL_64(0xedcb, d5);
6594 ASSERT_EQUAL_64(0x44444444, d6);
6595 ASSERT_EQUAL_64(0x7777555533331111, d7);
6596 ASSERT_EQUAL_64(0xff, d8);
6597 ASSERT_EQUAL_64(0xffff, d9);
6598 ASSERT_EQUAL_64(0x77775555, d10);
6599 ASSERT_EQUAL_64(0x7777555533331111, d11);
6600 } else {
6601 ASSERT_EQUAL_64(0, d0);
6602 ASSERT_EQUAL_64(0x0010, d1);
6603 ASSERT_EQUAL_64(0x00110011, d2);
6604 ASSERT_EQUAL_64(0x0011001100110011, d3);
6605 ASSERT_EQUAL_64(0x62, d4);
6606 ASSERT_EQUAL_64(0x0334, d5);
6607 ASSERT_EQUAL_64(0x8899aabb, d6);
6608 ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
6609 ASSERT_EQUAL_64(0xff, d8);
6610 ASSERT_EQUAL_64(0xffff, d9);
6611 ASSERT_EQUAL_64(0xffffffff, d10);
6612 ASSERT_EQUAL_64(0xffffffffffffffff, d11);
6613 }
6614
6615 // Check the upper lanes above the top of the V register are all clear.
6616 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6617 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6618 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6619 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6620 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6621 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6622 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6623 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6624 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6625 ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
6626 ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
6627 ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
6628 ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
6629 }
6630 }
6631}
6632
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07006633
6634TEST_SVE(sve_saddv_uaddv) {
6635 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6636 START();
6637
6638 uint64_t in[] = {0x8899aabbccddeeff, 0x8182838485868788, 0x0807060504030201};
6639 InsrHelper(&masm, z31.VnD(), in);
6640
6641 // For simplicity, we re-use the same pg for various lane sizes.
6642 // For D lanes: 1, 1, 0
6643 // For S lanes: 1, 1, 1, 0, 0
6644 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6645 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6646 Initialise(&masm, p0.VnB(), pg_in);
6647
6648 // Make a copy so we can check that constructive operations preserve zn.
6649 __ Mov(z0, z31);
6650 __ Saddv(b0, p0, z0.VnB()); // destructive
6651 __ Saddv(h1, p0, z31.VnH());
6652 __ Mov(z2, z31);
6653 __ Saddv(s2, p0, z2.VnS()); // destructive
6654
6655 __ Uaddv(b4, p0, z31.VnB());
6656 __ Mov(z5, z31);
6657 __ Uaddv(h5, p0, z5.VnH()); // destructive
6658 __ Uaddv(s6, p0, z31.VnS());
6659 __ Mov(z7, z31);
6660 __ Uaddv(d7, p0, z7.VnD()); // destructive
6661
6662 END();
6663
6664 if (CAN_RUN()) {
6665 RUN();
6666
6667 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6668 // Saddv
6669 ASSERT_EQUAL_64(0xfffffffffffffda9, d0);
6670 ASSERT_EQUAL_64(0xfffffffffffe9495, d1);
6671 ASSERT_EQUAL_64(0xffffffff07090b0c, d2);
6672 // Uaddv
6673 ASSERT_EQUAL_64(0x00000000000002a9, d4);
6674 ASSERT_EQUAL_64(0x0000000000019495, d5);
6675 ASSERT_EQUAL_64(0x0000000107090b0c, d6);
6676 ASSERT_EQUAL_64(0x8182838485868788, d7);
6677 } else {
6678 // Saddv
6679 ASSERT_EQUAL_64(0xfffffffffffffd62, d0);
6680 ASSERT_EQUAL_64(0xfffffffffffe8394, d1);
6681 ASSERT_EQUAL_64(0xfffffffed3e6fa0b, d2);
6682 // Uaddv
6683 ASSERT_EQUAL_64(0x0000000000000562, d4);
6684 ASSERT_EQUAL_64(0x0000000000028394, d5);
6685 ASSERT_EQUAL_64(0x00000001d3e6fa0b, d6);
6686 ASSERT_EQUAL_64(0x0a1c2e4052647687, d7);
6687 }
6688
6689 // Check the upper lanes above the top of the V register are all clear.
6690 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6691 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6692 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6693 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6694 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6695 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6696 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6697 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6698 }
6699 }
6700}
6701
6702
6703TEST_SVE(sve_sminv_uminv) {
6704 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6705 START();
6706
6707 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
6708 InsrHelper(&masm, z31.VnD(), in);
6709
6710 // For simplicity, we re-use the same pg for various lane sizes.
6711 // For D lanes: 1, 0, 1
6712 // For S lanes: 1, 1, 0, 0, 1
6713 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
6714 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
6715 Initialise(&masm, p0.VnB(), pg_in);
6716
6717 // Make a copy so we can check that constructive operations preserve zn.
6718 __ Mov(z0, z31);
6719 __ Sminv(b0, p0, z0.VnB()); // destructive
6720 __ Sminv(h1, p0, z31.VnH());
6721 __ Mov(z2, z31);
6722 __ Sminv(s2, p0, z2.VnS()); // destructive
6723 __ Sminv(d3, p0, z31.VnD());
6724
6725 __ Uminv(b4, p0, z31.VnB());
6726 __ Mov(z5, z31);
6727 __ Uminv(h5, p0, z5.VnH()); // destructive
6728 __ Uminv(s6, p0, z31.VnS());
6729 __ Mov(z7, z31);
6730 __ Uminv(d7, p0, z7.VnD()); // destructive
6731
6732 END();
6733
6734 if (CAN_RUN()) {
6735 RUN();
6736
6737 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6738 // Sminv
6739 ASSERT_EQUAL_64(0xaa, d0);
6740 ASSERT_EQUAL_64(0xaabb, d1);
6741 ASSERT_EQUAL_64(0xaabbfc00, d2);
6742 ASSERT_EQUAL_64(0x00112233aabbfc00, d3); // The smaller lane is inactive.
6743 // Uminv
6744 ASSERT_EQUAL_64(0, d4);
6745 ASSERT_EQUAL_64(0x2233, d5);
6746 ASSERT_EQUAL_64(0x112233, d6);
6747 ASSERT_EQUAL_64(0x00112233aabbfc00, d7); // The smaller lane is inactive.
6748 } else {
6749 // Sminv
6750 ASSERT_EQUAL_64(0xaa, d0);
6751 ASSERT_EQUAL_64(0xaaaa, d1);
6752 ASSERT_EQUAL_64(0xaaaaaaaa, d2);
6753 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d3);
6754 // Uminv
6755 ASSERT_EQUAL_64(0, d4);
6756 ASSERT_EQUAL_64(0x2233, d5);
6757 ASSERT_EQUAL_64(0x112233, d6);
6758 ASSERT_EQUAL_64(0x00112233aabbfc00, d7);
6759 }
6760
6761 // Check the upper lanes above the top of the V register are all clear.
6762 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6763 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6764 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6765 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6766 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6767 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6768 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6769 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6770 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6771 }
6772 }
6773}
6774
6775TEST_SVE(sve_smaxv_umaxv) {
6776 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6777 START();
6778
6779 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
6780 InsrHelper(&masm, z31.VnD(), in);
6781
6782 // For simplicity, we re-use the same pg for various lane sizes.
6783 // For D lanes: 1, 0, 1
6784 // For S lanes: 1, 1, 0, 0, 1
6785 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
6786 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
6787 Initialise(&masm, p0.VnB(), pg_in);
6788
6789 // Make a copy so we can check that constructive operations preserve zn.
6790 __ Mov(z0, z31);
6791 __ Smaxv(b0, p0, z0.VnB()); // destructive
6792 __ Smaxv(h1, p0, z31.VnH());
6793 __ Mov(z2, z31);
6794 __ Smaxv(s2, p0, z2.VnS()); // destructive
6795 __ Smaxv(d3, p0, z31.VnD());
6796
6797 __ Umaxv(b4, p0, z31.VnB());
6798 __ Mov(z5, z31);
6799 __ Umaxv(h5, p0, z5.VnH()); // destructive
6800 __ Umaxv(s6, p0, z31.VnS());
6801 __ Mov(z7, z31);
6802 __ Umaxv(d7, p0, z7.VnD()); // destructive
6803
6804 END();
6805
6806 if (CAN_RUN()) {
6807 RUN();
6808
6809 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6810 // Smaxv
6811 ASSERT_EQUAL_64(0x33, d0);
6812 ASSERT_EQUAL_64(0x44aa, d1);
6813 ASSERT_EQUAL_64(0x112233, d2);
6814 ASSERT_EQUAL_64(0x112233aabbfc00, d3);
6815 // Umaxv
6816 ASSERT_EQUAL_64(0xfe, d4);
6817 ASSERT_EQUAL_64(0xfc00, d5);
6818 ASSERT_EQUAL_64(0xaabbfc00, d6);
6819 ASSERT_EQUAL_64(0x112233aabbfc00, d7);
6820 } else {
6821 // Smaxv
6822 ASSERT_EQUAL_64(0x33, d0);
6823 ASSERT_EQUAL_64(0x44aa, d1);
6824 ASSERT_EQUAL_64(0x112233, d2);
6825 ASSERT_EQUAL_64(0x00112233aabbfc00, d3);
6826 // Umaxv
6827 ASSERT_EQUAL_64(0xfe, d4);
6828 ASSERT_EQUAL_64(0xfc00, d5);
6829 ASSERT_EQUAL_64(0xaabbfc00, d6);
6830 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d7);
6831 }
6832
6833 // Check the upper lanes above the top of the V register are all clear.
6834 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6835 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6836 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6837 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6838 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6839 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6840 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6841 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6842 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6843 }
6844 }
6845}
6846
TatWai Chong4d2a4e92019-10-23 16:19:32 -07006847typedef void (MacroAssembler::*SdotUdotFn)(const ZRegister& zd,
6848 const ZRegister& za,
6849 const ZRegister& zn,
6850 const ZRegister& zm);
6851
6852template <typename Td, typename Ts, typename Te>
6853static void SdotUdotHelper(Test* config,
6854 SdotUdotFn macro,
6855 unsigned lane_size_in_bits,
6856 const Td& zd_inputs,
6857 const Td& za_inputs,
6858 const Ts& zn_inputs,
6859 const Ts& zm_inputs,
6860 const Te& zd_expected,
6861 const Te& zdnm_expected) {
6862 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6863 START();
6864
6865 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
6866 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
6867 ZRegister zn = z2.WithLaneSize(lane_size_in_bits / 4);
6868 ZRegister zm = z3.WithLaneSize(lane_size_in_bits / 4);
6869
6870 InsrHelper(&masm, zd, zd_inputs);
6871 InsrHelper(&masm, za, za_inputs);
6872 InsrHelper(&masm, zn, zn_inputs);
6873 InsrHelper(&masm, zm, zm_inputs);
6874
6875 // The Dot macro handles arbitrarily-aliased registers in the argument list.
6876 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
6877 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
6878 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
6879 ZRegister dnm_result = z13.WithLaneSize(lane_size_in_bits);
6880 ZRegister d_result = z14.WithLaneSize(lane_size_in_bits);
6881
6882 __ Mov(da_result, za);
6883 // zda = zda + (zn . zm)
6884 (masm.*macro)(da_result, da_result, zn, zm);
6885
6886 __ Mov(dn_result, zn);
6887 // zdn = za + (zdn . zm)
Jacob Bramley378fc892019-10-30 11:26:09 +00006888 (masm.*macro)(dn_result, za, dn_result.WithSameLaneSizeAs(zn), zm);
TatWai Chong4d2a4e92019-10-23 16:19:32 -07006889
6890 __ Mov(dm_result, zm);
6891 // zdm = za + (zn . zdm)
Jacob Bramley378fc892019-10-30 11:26:09 +00006892 (masm.*macro)(dm_result, za, zn, dm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07006893
6894 __ Mov(d_result, zd);
6895 // zd = za + (zn . zm)
6896 (masm.*macro)(d_result, za, zn, zm);
6897
6898 __ Mov(dnm_result, zn);
6899 // zdnm = za + (zdmn . zdnm)
Jacob Bramley378fc892019-10-30 11:26:09 +00006900 (masm.*macro)(dnm_result,
6901 za,
6902 dnm_result.WithSameLaneSizeAs(zn),
6903 dnm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07006904
6905 END();
6906
6907 if (CAN_RUN()) {
6908 RUN();
6909
6910 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
6911 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits / 4));
6912 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits / 4));
6913
6914 ASSERT_EQUAL_SVE(zd_expected, da_result);
6915 ASSERT_EQUAL_SVE(zd_expected, dn_result);
6916 ASSERT_EQUAL_SVE(zd_expected, dm_result);
6917 ASSERT_EQUAL_SVE(zd_expected, d_result);
6918
6919 ASSERT_EQUAL_SVE(zdnm_expected, dnm_result);
6920 }
6921}
6922
6923TEST_SVE(sve_sdot) {
6924 int zd_inputs[] = {0x33, 0xee, 0xff};
6925 int za_inputs[] = {INT32_MAX, -3, 2};
6926 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
6927 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
6928
6929 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
6930 int32_t zd_expected_s[] = {-2147418113, -183, 133}; // 0x8000ffff
6931 int64_t zd_expected_d[] = {2147549183, -183, 133}; // 0x8000ffff
6932
6933 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
6934 int32_t zdnm_expected_s[] = {-2147418113, 980, 572};
6935 int64_t zdnm_expected_d[] = {2147549183, 980, 572};
6936
6937 SdotUdotHelper(config,
6938 &MacroAssembler::Sdot,
6939 kSRegSize,
6940 zd_inputs,
6941 za_inputs,
6942 zn_inputs,
6943 zm_inputs,
6944 zd_expected_s,
6945 zdnm_expected_s);
6946 SdotUdotHelper(config,
6947 &MacroAssembler::Sdot,
6948 kDRegSize,
6949 zd_inputs,
6950 za_inputs,
6951 zn_inputs,
6952 zm_inputs,
6953 zd_expected_d,
6954 zdnm_expected_d);
6955}
6956
6957TEST_SVE(sve_udot) {
6958 int zd_inputs[] = {0x33, 0xee, 0xff};
6959 int za_inputs[] = {INT32_MAX, -3, 2};
6960 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
6961 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
6962
6963 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
6964 uint32_t zd_expected_s[] = {0x8000ffff, 0x00001749, 0x0000f085};
6965 uint64_t zd_expected_d[] = {0x000000047c00ffff,
6966 0x000000000017ff49,
6967 0x00000000fff00085};
6968
6969 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
6970 uint32_t zdnm_expected_s[] = {0x8000ffff, 0x000101d4, 0x0001d03c};
6971 uint64_t zdnm_expected_d[] = {0x000000047c00ffff,
6972 0x00000000fffe03d4,
6973 0x00000001ffce023c};
6974
6975 SdotUdotHelper(config,
6976 &MacroAssembler::Udot,
6977 kSRegSize,
6978 zd_inputs,
6979 za_inputs,
6980 zn_inputs,
6981 zm_inputs,
6982 zd_expected_s,
6983 zdnm_expected_s);
6984 SdotUdotHelper(config,
6985 &MacroAssembler::Udot,
6986 kDRegSize,
6987 zd_inputs,
6988 za_inputs,
6989 zn_inputs,
6990 zm_inputs,
6991 zd_expected_d,
6992 zdnm_expected_d);
6993}
6994
TatWai Chongfe536042019-10-23 16:34:11 -07006995template <size_t N>
6996static void FPToRawbits(const double (&inputs)[N],
6997 uint64_t* outputs,
6998 unsigned lane_size_in_bits) {
6999 for (size_t i = 0; i < N; i++) {
7000 switch (lane_size_in_bits) {
7001 case kHRegSize:
7002 outputs[i] = Float16ToRawbits(
7003 FPToFloat16(inputs[i], FPTieEven, kIgnoreDefaultNaN));
7004 break;
7005 case kSRegSize:
7006 outputs[i] =
7007 FloatToRawbits(FPToFloat(inputs[i], FPTieEven, kIgnoreDefaultNaN));
7008 break;
7009 case kDRegSize:
7010 outputs[i] = DoubleToRawbits(inputs[i]);
7011 break;
7012 default:
7013 VIXL_UNIMPLEMENTED();
7014 break;
7015 }
7016 }
7017}
7018
7019template <typename Td, size_t N>
7020static void FPArithmeticFnHelper(Test* config,
7021 ArithmeticFn macro,
7022 unsigned lane_size_in_bits,
7023 const double (&zn_inputs)[N],
7024 const double (&zm_inputs)[N],
7025 const Td& zd_expected) {
7026 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7027 START();
7028
7029 ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
7030 ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
7031 ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
7032
7033 uint64_t zn_rawbits[N];
7034 uint64_t zm_rawbits[N];
7035
7036 FPToRawbits(zn_inputs, zn_rawbits, lane_size_in_bits);
7037 FPToRawbits(zm_inputs, zm_rawbits, lane_size_in_bits);
7038
7039 InsrHelper(&masm, zn, zn_rawbits);
7040 InsrHelper(&masm, zm, zm_rawbits);
7041
7042 (masm.*macro)(zd, zn, zm);
7043
7044 END();
7045
7046 if (CAN_RUN()) {
7047 RUN();
7048
7049 ASSERT_EQUAL_SVE(zd_expected, zd);
7050 }
7051}
7052
7053TEST_SVE(sve_fp_arithmetic_unpredicated_fadd) {
7054 double zn_inputs[] = {24.0,
7055 5.5,
7056 0.0,
7057 3.875,
7058 2.125,
7059 kFP64PositiveInfinity,
7060 kFP64NegativeInfinity};
7061
7062 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
7063
7064 ArithmeticFn fn = &MacroAssembler::Fadd;
7065
7066 uint16_t expected_h[] = {Float16ToRawbits(Float16(1048.0)),
7067 Float16ToRawbits(Float16(2053.5)),
7068 Float16ToRawbits(Float16(0.1)),
7069 Float16ToRawbits(Float16(-0.875)),
7070 Float16ToRawbits(Float16(14.465)),
7071 Float16ToRawbits(kFP16PositiveInfinity),
7072 Float16ToRawbits(kFP16NegativeInfinity)};
7073
7074 FPArithmeticFnHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
7075
7076 uint32_t expected_s[] = {FloatToRawbits(1048.0f),
7077 FloatToRawbits(2053.5f),
7078 FloatToRawbits(0.1f),
7079 FloatToRawbits(-0.875f),
7080 FloatToRawbits(14.465f),
7081 FloatToRawbits(kFP32PositiveInfinity),
7082 FloatToRawbits(kFP32NegativeInfinity)};
7083
7084 FPArithmeticFnHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
7085
7086 uint64_t expected_d[] = {DoubleToRawbits(1048.0),
7087 DoubleToRawbits(2053.5),
7088 DoubleToRawbits(0.1),
7089 DoubleToRawbits(-0.875),
7090 DoubleToRawbits(14.465),
7091 DoubleToRawbits(kFP64PositiveInfinity),
7092 DoubleToRawbits(kFP64NegativeInfinity)};
7093
7094 FPArithmeticFnHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
7095}
7096
7097TEST_SVE(sve_fp_arithmetic_unpredicated_fsub) {
7098 double zn_inputs[] = {24.0,
7099 5.5,
7100 0.0,
7101 3.875,
7102 2.125,
7103 kFP64PositiveInfinity,
7104 kFP64NegativeInfinity};
7105
7106 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
7107
7108 ArithmeticFn fn = &MacroAssembler::Fsub;
7109
7110 uint16_t expected_h[] = {Float16ToRawbits(Float16(-1000.0)),
7111 Float16ToRawbits(Float16(-2042.5)),
7112 Float16ToRawbits(Float16(-0.1)),
7113 Float16ToRawbits(Float16(8.625)),
7114 Float16ToRawbits(Float16(-10.215)),
7115 Float16ToRawbits(kFP16PositiveInfinity),
7116 Float16ToRawbits(kFP16NegativeInfinity)};
7117
7118 FPArithmeticFnHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
7119
7120 uint32_t expected_s[] = {FloatToRawbits(-1000.0),
7121 FloatToRawbits(-2042.5),
7122 FloatToRawbits(-0.1),
7123 FloatToRawbits(8.625),
7124 FloatToRawbits(-10.215),
7125 FloatToRawbits(kFP32PositiveInfinity),
7126 FloatToRawbits(kFP32NegativeInfinity)};
7127
7128 FPArithmeticFnHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
7129
7130 uint64_t expected_d[] = {DoubleToRawbits(-1000.0),
7131 DoubleToRawbits(-2042.5),
7132 DoubleToRawbits(-0.1),
7133 DoubleToRawbits(8.625),
7134 DoubleToRawbits(-10.215),
7135 DoubleToRawbits(kFP64PositiveInfinity),
7136 DoubleToRawbits(kFP64NegativeInfinity)};
7137
7138 FPArithmeticFnHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
7139}
7140
7141TEST_SVE(sve_fp_arithmetic_unpredicated_fmul) {
7142 double zn_inputs[] = {24.0,
7143 5.5,
7144 0.0,
7145 3.875,
7146 2.125,
7147 kFP64PositiveInfinity,
7148 kFP64NegativeInfinity};
7149
7150 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
7151
7152 ArithmeticFn fn = &MacroAssembler::Fmul;
7153
7154 uint16_t expected_h[] = {Float16ToRawbits(Float16(24576.0)),
7155 Float16ToRawbits(Float16(11264.0)),
7156 Float16ToRawbits(Float16(0.0)),
7157 Float16ToRawbits(Float16(-18.4)),
7158 Float16ToRawbits(Float16(26.23)),
7159 Float16ToRawbits(kFP16PositiveInfinity),
7160 Float16ToRawbits(kFP16PositiveInfinity)};
7161
7162 FPArithmeticFnHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
7163
7164 uint32_t expected_s[] = {FloatToRawbits(24576.0),
7165 FloatToRawbits(11264.0),
7166 FloatToRawbits(0.0),
7167 FloatToRawbits(-18.40625),
7168 FloatToRawbits(26.2225),
7169 FloatToRawbits(kFP32PositiveInfinity),
7170 FloatToRawbits(kFP32PositiveInfinity)};
7171
7172 FPArithmeticFnHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
7173
7174 uint64_t expected_d[] = {DoubleToRawbits(24576.0),
7175 DoubleToRawbits(11264.0),
7176 DoubleToRawbits(0.0),
7177 DoubleToRawbits(-18.40625),
7178 DoubleToRawbits(26.2225),
7179 DoubleToRawbits(kFP64PositiveInfinity),
7180 DoubleToRawbits(kFP64PositiveInfinity)};
7181
7182 FPArithmeticFnHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
7183}
7184
TatWai Chongd316c5e2019-10-16 12:22:10 -07007185template <typename Td, size_t N>
7186static void FPBinArithHelper(Test* config,
7187 IntBinArithFn macro,
7188 unsigned lane_size_in_bits,
7189 const double (&zd_inputs)[N],
7190 const int (&pg_inputs)[N],
7191 const double (&zn_inputs)[N],
7192 const double (&zm_inputs)[N],
7193 const Td& zd_expected) {
7194 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7195 START();
7196
7197 ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
7198 ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
7199 ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
7200
7201 uint64_t zd_rawbits[N];
7202 uint64_t zn_rawbits[N];
7203 uint64_t zm_rawbits[N];
7204 FPToRawbits(zd_inputs, zd_rawbits, lane_size_in_bits);
7205 FPToRawbits(zn_inputs, zn_rawbits, lane_size_in_bits);
7206 FPToRawbits(zm_inputs, zm_rawbits, lane_size_in_bits);
7207
7208 InsrHelper(&masm, zd, zd_rawbits);
7209 InsrHelper(&masm, zn, zn_rawbits);
7210 InsrHelper(&masm, zm, zm_rawbits);
7211
7212 PRegisterWithLaneSize pg = p0.WithLaneSize(lane_size_in_bits);
7213 Initialise(&masm, pg, pg_inputs);
7214
7215 // `instr` zdn, pg, zdn, zm
7216 ZRegister dn_result = z0.WithLaneSize(lane_size_in_bits);
7217 __ Mov(dn_result, zn);
7218 (masm.*macro)(dn_result, pg.Merging(), dn_result, zm);
7219
7220 // Based on whether zd and zm registers are aliased, the macro of instructions
7221 // (`Instr`) swaps the order of operands if it has the commutative property,
7222 // otherwise, transfer to the reversed `Instr`, such as fdivr.
7223 // `instr` zdm, pg, zn, zdm
7224 ZRegister dm_result = z1.WithLaneSize(lane_size_in_bits);
7225 __ Mov(dm_result, zm);
7226 (masm.*macro)(dm_result, pg.Merging(), zn, dm_result);
7227
7228 // The macro of instructions (`Instr`) automatically selects between `instr`
7229 // and movprfx + `instr` based on whether zd and zn registers are aliased.
7230 // A generated movprfx instruction is predicated that using the same
7231 // governing predicate register. In order to keep the result constant,
7232 // initialize the destination register first.
7233 // `instr` zd, pg, zn, zm
7234 ZRegister d_result = z2.WithLaneSize(lane_size_in_bits);
7235 __ Mov(d_result, zd);
7236 (masm.*macro)(d_result, pg.Merging(), zn, zm);
7237
7238 END();
7239
7240 if (CAN_RUN()) {
7241 RUN();
7242
7243 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
7244 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
7245 if (!core.HasSVELane(dn_result, lane)) break;
7246 if ((pg_inputs[i] & 1) != 0) {
7247 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dn_result, lane);
7248 } else {
7249 ASSERT_EQUAL_SVE_LANE(zn_rawbits[i], dn_result, lane);
7250 }
7251 }
7252
7253 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
7254 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
7255 if (!core.HasSVELane(dm_result, lane)) break;
7256 if ((pg_inputs[i] & 1) != 0) {
7257 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dm_result, lane);
7258 } else {
7259 ASSERT_EQUAL_SVE_LANE(zm_rawbits[i], dm_result, lane);
7260 }
7261 }
7262
7263 ASSERT_EQUAL_SVE(zd_expected, d_result);
7264 }
7265}
7266
7267TEST_SVE(sve_binary_arithmetic_predicated_fdiv) {
7268 double zd_in[] = {0.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};
7269
7270 double zn_in[] = {24.0,
7271 24.0,
7272 -2.0,
7273 -2.0,
7274 5.5,
7275 5.5,
7276 kFP64PositiveInfinity,
7277 kFP64PositiveInfinity,
7278 kFP64NegativeInfinity,
7279 kFP64NegativeInfinity};
7280
7281 double zm_in[] = {-2.0, -2.0, 24.0, 24.0, 0.5, 0.5, 0.65, 0.65, 24.0, 24.0};
7282
7283
7284 IntBinArithFn fn = &MacroAssembler::Fdiv;
7285
7286 int pg_in[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
7287
7288 uint32_t exp_h[] = {Float16ToRawbits(Float16(0.1)),
7289 Float16ToRawbits(Float16(-12.0)),
7290 Float16ToRawbits(Float16(2.2)),
7291 Float16ToRawbits(Float16(-0.0833)),
7292 Float16ToRawbits(Float16(4.4)),
7293 Float16ToRawbits(Float16(11.0)),
7294 Float16ToRawbits(Float16(6.6)),
7295 Float16ToRawbits(kFP16PositiveInfinity),
7296 Float16ToRawbits(Float16(8.8)),
7297 Float16ToRawbits(kFP16NegativeInfinity)};
7298
7299 FPBinArithHelper(config, fn, kHRegSize, zd_in, pg_in, zn_in, zm_in, exp_h);
7300
7301 uint32_t exp_s[] = {FloatToRawbits(0.1),
7302 FloatToRawbits(-12.0),
7303 FloatToRawbits(2.2),
7304 0xbdaaaaab,
7305 FloatToRawbits(4.4),
7306 FloatToRawbits(11.0),
7307 FloatToRawbits(6.6),
7308 FloatToRawbits(kFP32PositiveInfinity),
7309 FloatToRawbits(8.8),
7310 FloatToRawbits(kFP32NegativeInfinity)};
7311
7312 FPBinArithHelper(config, fn, kSRegSize, zd_in, pg_in, zn_in, zm_in, exp_s);
7313
7314 uint64_t exp_d[] = {DoubleToRawbits(0.1),
7315 DoubleToRawbits(-12.0),
7316 DoubleToRawbits(2.2),
7317 0xbfb5555555555555,
7318 DoubleToRawbits(4.4),
7319 DoubleToRawbits(11.0),
7320 DoubleToRawbits(6.6),
7321 DoubleToRawbits(kFP64PositiveInfinity),
7322 DoubleToRawbits(8.8),
7323 DoubleToRawbits(kFP64NegativeInfinity)};
7324
7325 FPBinArithHelper(config, fn, kDRegSize, zd_in, pg_in, zn_in, zm_in, exp_d);
7326}
7327
Martyn Capewell9cc3f142019-10-29 14:06:35 +00007328TEST_SVE(sve_select) {
7329 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7330 START();
7331
7332 uint64_t in0[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
7333 uint64_t in1[] = {0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa};
7334
7335 // For simplicity, we re-use the same pg for various lane sizes.
7336 // For D lanes: 1, 1, 0
7337 // For S lanes: 1, 1, 1, 0, 0
7338 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
7339 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
7340 Initialise(&masm, p0.VnB(), pg_in);
7341 PRegisterM pg = p0.Merging();
7342
7343 InsrHelper(&masm, z30.VnD(), in0);
7344 InsrHelper(&masm, z31.VnD(), in1);
7345
7346 __ Sel(z0.VnB(), pg, z30.VnB(), z31.VnB());
7347 __ Sel(z1.VnH(), pg, z30.VnH(), z31.VnH());
7348 __ Sel(z2.VnS(), pg, z30.VnS(), z31.VnS());
7349 __ Sel(z3.VnD(), pg, z30.VnD(), z31.VnD());
7350
7351 END();
7352
7353 if (CAN_RUN()) {
7354 RUN();
7355
7356 uint64_t expected_z0[] = {0xaaaaaaaa05aa07f8,
7357 0xfeaaaaf0aac3870f,
7358 0xaaaa56aa9abcdeaa};
7359 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
7360
7361 uint64_t expected_z1[] = {0xaaaaaaaaaaaa07f8,
7362 0xaaaaf8f0e1c3870f,
7363 0xaaaaaaaa9abcaaaa};
7364 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
7365
7366 uint64_t expected_z2[] = {0xaaaaaaaa05f607f8,
7367 0xfefcf8f0e1c3870f,
7368 0xaaaaaaaaaaaaaaaa};
7369 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
7370
7371 uint64_t expected_z3[] = {0x01f203f405f607f8,
7372 0xfefcf8f0e1c3870f,
7373 0xaaaaaaaaaaaaaaaa};
7374 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
7375 }
7376}
TatWai Chongd316c5e2019-10-16 12:22:10 -07007377
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00007378} // namespace aarch64
7379} // namespace vixl