blob: 04f509055e0410a43dd206e161b05db430e29b35 [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
28
29#include <cfloat>
30#include <cmath>
31#include <cstdio>
32#include <cstdlib>
33#include <cstring>
34
35#include "test-runner.h"
36#include "test-utils.h"
37#include "aarch64/test-utils-aarch64.h"
38
39#include "aarch64/cpu-aarch64.h"
40#include "aarch64/disasm-aarch64.h"
41#include "aarch64/macro-assembler-aarch64.h"
42#include "aarch64/simulator-aarch64.h"
43#include "test-assembler-aarch64.h"
44
45namespace vixl {
46namespace aarch64 {
47
Jacob Bramleye8289202019-07-31 11:25:23 +010048Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
49 // We never free this memory, but we need it to live for as long as the static
50 // linked list of tests, and this is the easiest way to do it.
51 Test* test = new Test(name, fn);
52 test->set_sve_vl_in_bits(vl);
53 return test;
54}
55
56// The TEST_SVE macro works just like the usual TEST macro, but the resulting
57// function receives a `const Test& config` argument, to allow it to query the
58// vector length.
59#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
60// On the Simulator, run SVE tests with several vector lengths, including the
61// extreme values and an intermediate value that isn't a power of two.
62
63#define TEST_SVE(name) \
64 void Test##name(Test* config); \
65 Test* test_##name##_list[] = \
66 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
67 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
68 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
69 void Test##name(Test* config)
70
71#define SVE_SETUP_WITH_FEATURES(...) \
72 SETUP_WITH_FEATURES(__VA_ARGS__); \
73 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
74
75#else
76// Otherwise, just use whatever the hardware provides.
77static const int kSVEVectorLengthInBits =
78 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
79 ? CPU::ReadSVEVectorLengthInBits()
80 : 0;
81
82#define TEST_SVE(name) \
83 void Test##name(Test* config); \
84 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
85 "AARCH64_ASM_" #name "_vlauto", \
86 &Test##name); \
87 void Test##name(Test* config)
88
89#define SVE_SETUP_WITH_FEATURES(...) \
90 SETUP_WITH_FEATURES(__VA_ARGS__); \
91 USE(config)
92
93#endif
94
Jacob Bramley03c0b512019-02-22 16:42:06 +000095// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
96// is optimised for call-site clarity, not generated code quality, so it doesn't
97// exist in the MacroAssembler itself.
98//
99// Usage:
100//
101// int values[] = { 42, 43, 44 };
102// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
103//
104// The rightmost (highest-indexed) array element maps to the lowest-numbered
105// lane.
106template <typename T, size_t N>
107void InsrHelper(MacroAssembler* masm,
108 const ZRegister& zdn,
109 const T (&values)[N]) {
110 for (size_t i = 0; i < N; i++) {
111 masm->Insr(zdn, values[i]);
112 }
113}
114
Jacob Bramley0ce75842019-07-17 18:12:50 +0100115// Conveniently initialise P registers with scalar bit patterns. The destination
116// lane size is ignored. This is optimised for call-site clarity, not generated
117// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100118//
119// Usage:
120//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100121// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100122void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100123 const PRegister& pd,
124 uint64_t value3,
125 uint64_t value2,
126 uint64_t value1,
127 uint64_t value0) {
128 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100129 UseScratchRegisterScope temps(masm);
130 Register temp = temps.AcquireX();
131 Label data;
132 Label done;
133
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100134 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100135 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100136 masm->B(&done);
137 {
138 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
139 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100140 masm->dc64(value0);
141 masm->dc64(value1);
142 masm->dc64(value2);
143 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100144 }
145 masm->Bind(&done);
146}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100147void Initialise(MacroAssembler* masm,
148 const PRegister& pd,
149 uint64_t value2,
150 uint64_t value1,
151 uint64_t value0) {
152 Initialise(masm, pd, 0, value2, value1, value0);
153}
154void Initialise(MacroAssembler* masm,
155 const PRegister& pd,
156 uint64_t value1,
157 uint64_t value0) {
158 Initialise(masm, pd, 0, 0, value1, value0);
159}
160void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
161 Initialise(masm, pd, 0, 0, 0, value0);
162}
163
164// Conveniently initialise P registers by lane. This is optimised for call-site
165// clarity, not generated code quality.
166//
167// Usage:
168//
169// int values[] = { 0x0, 0x1, 0x2 };
170// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
171//
172// The rightmost (highest-indexed) array element maps to the lowest-numbered
173// lane. Unspecified lanes are set to 0 (inactive).
174//
175// Each element of the `values` array is mapped onto a lane in `pd`. The
176// architecture only respects the lower bit, and writes zero the upper bits, but
177// other (encodable) values can be specified if required by the test.
178template <typename T, size_t N>
179void Initialise(MacroAssembler* masm,
180 const PRegisterWithLaneSize& pd,
181 const T (&values)[N]) {
182 // Turn the array into 64-bit chunks.
183 uint64_t chunks[4] = {0, 0, 0, 0};
184 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
185
186 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
187 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
188 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
189
190 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
191
192 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
193 size_t bit = 0;
194 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
195 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
196 uint64_t value = values[n] & p_lane_mask;
197 chunks[bit / 64] |= value << (bit % 64);
198 bit += p_bits_per_lane;
199 }
200
201 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
202}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100203
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000204// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100205TEST_SVE(sve_test_infrastructure_z) {
206 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000207 START();
208
Jacob Bramley03c0b512019-02-22 16:42:06 +0000209 __ Mov(x0, 0x0123456789abcdef);
210
211 // Test basic `Insr` behaviour.
212 __ Insr(z0.VnB(), 1);
213 __ Insr(z0.VnB(), 2);
214 __ Insr(z0.VnB(), x0);
215 __ Insr(z0.VnB(), -42);
216 __ Insr(z0.VnB(), 0);
217
218 // Test array inputs.
219 int z1_inputs[] = {3, 4, 5, -42, 0};
220 InsrHelper(&masm, z1.VnH(), z1_inputs);
221
222 // Test that sign-extension works as intended for various lane sizes.
223 __ Dup(z2.VnD(), 0); // Clear the register first.
224 __ Insr(z2.VnB(), -42); // 0xd6
225 __ Insr(z2.VnB(), 0xfe); // 0xfe
226 __ Insr(z2.VnH(), -42); // 0xffd6
227 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
228 __ Insr(z2.VnS(), -42); // 0xffffffd6
229 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
230 // Use another register for VnD(), so we can support 128-bit Z registers.
231 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
232 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
233
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000234 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000235
Jacob Bramley119bd212019-04-16 10:13:09 +0100236 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100237 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000238
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100239 // Test that array checks work properly on a register initialised
240 // lane-by-lane.
241 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
242 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000243
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100244 // Test that lane-by-lane checks work properly on a register initialised
245 // by array.
246 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
247 // The rightmost (highest-indexed) array element maps to the
248 // lowest-numbered lane.
249 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
250 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000251 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100252
253 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
254 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
255 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
256 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100257 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000258}
259
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100260// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100261TEST_SVE(sve_test_infrastructure_p) {
262 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100263 START();
264
265 // Simple cases: move boolean (0 or 1) values.
266
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100267 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100268 Initialise(&masm, p0.VnB(), p0_inputs);
269
270 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
271 Initialise(&masm, p1.VnH(), p1_inputs);
272
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100273 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100274 Initialise(&masm, p2.VnS(), p2_inputs);
275
276 int p3_inputs[] = {0, 1};
277 Initialise(&masm, p3.VnD(), p3_inputs);
278
279 // Advanced cases: move numeric value into architecturally-ignored bits.
280
281 // B-sized lanes get one bit in a P register, so there are no ignored bits.
282
283 // H-sized lanes get two bits in a P register.
284 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
285 Initialise(&masm, p4.VnH(), p4_inputs);
286
287 // S-sized lanes get four bits in a P register.
288 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
289 Initialise(&masm, p5.VnS(), p5_inputs);
290
291 // D-sized lanes get eight bits in a P register.
292 int p6_inputs[] = {0x81, 0xcc, 0x55};
293 Initialise(&masm, p6.VnD(), p6_inputs);
294
295 // The largest possible P register has 32 bytes.
296 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
297 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
298 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
299 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
300 Initialise(&masm, p7.VnD(), p7_inputs);
301
302 END();
303
304 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100305 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100306
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100307 // Test that lane-by-lane checks work properly. The rightmost
308 // (highest-indexed) array element maps to the lowest-numbered lane.
309 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
310 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
311 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100312 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100313 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
314 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
315 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
316 }
317 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
318 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
319 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
320 }
321 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
322 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
323 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
324 }
325
326 // Test that array checks work properly on predicates initialised with a
327 // possibly-different lane size.
328 // 0b...11'10'01'00'01'10'11
329 int p4_expected[] = {0x39, 0x1b};
330 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
331
332 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
333
334 // 0b...10000001'11001100'01010101
335 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
336 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
337
338 // 0b...10011100'10011101'10011110'10011111
339 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
340 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
341 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100342 }
343}
344
Jacob Bramley935b15b2019-07-04 14:09:22 +0100345// Test that writes to V registers clear the high bits of the corresponding Z
346// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100347TEST_SVE(sve_v_write_clear) {
348 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
349 CPUFeatures::kFP,
350 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100351 START();
352
353 // The Simulator has two mechansisms for writing V registers:
354 // - Write*Register, calling through to SimRegisterBase::Write.
355 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
356 // Try to cover both variants.
357
358 // Prepare some known inputs.
359 uint8_t data[kQRegSizeInBytes];
360 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
361 data[i] = 42 + i;
362 }
363 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
364 __ Fmov(d30, 42.0);
365
Jacob Bramley199339d2019-08-05 18:49:13 +0100366 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100367 // diagnose.
368 __ Index(z0.VnB(), 0, 1);
369 __ Index(z1.VnB(), 0, 1);
370 __ Index(z2.VnB(), 0, 1);
371 __ Index(z3.VnB(), 0, 1);
372 __ Index(z4.VnB(), 0, 1);
373
374 __ Index(z10.VnB(), 0, -1);
375 __ Index(z11.VnB(), 0, -1);
376 __ Index(z12.VnB(), 0, -1);
377 __ Index(z13.VnB(), 0, -1);
378 __ Index(z14.VnB(), 0, -1);
379
380 // Instructions using Write*Register (and SimRegisterBase::Write).
381 __ Ldr(b0, MemOperand(x10));
382 __ Fcvt(h1, d30);
383 __ Fmov(s2, 1.5f);
384 __ Fmov(d3, d30);
385 __ Ldr(q4, MemOperand(x10));
386
387 // Instructions using LogicVRegister::ClearForWrite.
388 // These also (incidentally) test that across-lane instructions correctly
389 // ignore the high-order Z register lanes.
390 __ Sminv(b10, v10.V16B());
391 __ Addv(h11, v11.V4H());
392 __ Saddlv(s12, v12.V8H());
393 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
394 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
395
396 END();
397
398 if (CAN_RUN()) {
399 RUN();
400
401 // Check the Q part first.
402 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
403 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
404 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
405 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
406 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
407 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
408 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
409 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
410 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
411 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
412 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
413 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
414 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
415 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
416 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
417
418 // Check that the upper lanes are all clear.
419 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
420 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
421 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
422 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
430 }
431 }
432}
433
Jacob Bramleye8289202019-07-31 11:25:23 +0100434static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
435 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100436 START();
437
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100438 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100439 int za_inputs[] = {-39, 1, -3, 2};
440 int zn_inputs[] = {-5, -20, 9, 8};
441 int zm_inputs[] = {9, -5, 4, 5};
442
443 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
444 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
445 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
446 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
447
448 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100449 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100450 InsrHelper(&masm, za, za_inputs);
451 InsrHelper(&masm, zn, zn_inputs);
452 InsrHelper(&masm, zm, zm_inputs);
453
454 int p0_inputs[] = {1, 1, 0, 1};
455 int p1_inputs[] = {1, 0, 1, 1};
456 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100457 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100458
459 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
460 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
461 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
462 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
463
464 // The Mla macro automatically selects between mla, mad and movprfx + mla
465 // based on what registers are aliased.
466 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
467 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
468 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100469 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100470
471 __ Mov(mla_da_result, za);
472 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
473
474 __ Mov(mla_dn_result, zn);
475 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
476
477 __ Mov(mla_dm_result, zm);
478 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
479
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100480 __ Mov(mla_d_result, zd);
481 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100482
483 // The Mls macro automatically selects between mls, msb and movprfx + mls
484 // based on what registers are aliased.
485 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
486 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
487 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100488 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100489
490 __ Mov(mls_da_result, za);
491 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
492
493 __ Mov(mls_dn_result, zn);
494 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
495
496 __ Mov(mls_dm_result, zm);
497 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
498
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100499 __ Mov(mls_d_result, zd);
500 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100501
502 END();
503
504 if (CAN_RUN()) {
505 RUN();
506
507 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
508 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
509 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
510
511 int mla[] = {-84, 101, 33, 42};
512 int mls[] = {6, -99, -39, -38};
513
514 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
515 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
516
517 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
518 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
519
520 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
521 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
522
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100523 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
524 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100525
526 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
527 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
528
529 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
530 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
531
532 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
533 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
534
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100535 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
536 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100537 }
538}
539
Jacob Bramleye8289202019-07-31 11:25:23 +0100540TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
541TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
542TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
543TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100544
Jacob Bramleye8289202019-07-31 11:25:23 +0100545TEST_SVE(sve_bitwise_unpredicate_logical) {
546 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700547 START();
548
549 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
550 InsrHelper(&masm, z8.VnD(), z8_inputs);
551 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
552 InsrHelper(&masm, z15.VnD(), z15_inputs);
553
554 __ And(z1.VnD(), z8.VnD(), z15.VnD());
555 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
556 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
557 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
558
559 END();
560
561 if (CAN_RUN()) {
562 RUN();
563 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
564 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
565 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
566 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
567
568 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
569 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
570 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
571 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
572 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700573}
574
Jacob Bramleye8289202019-07-31 11:25:23 +0100575TEST_SVE(sve_predicate_logical) {
576 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700577 START();
578
579 // 0b...01011010'10110111
580 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
581 // 0b...11011001'01010010
582 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
583 // 0b...01010101'10110010
584 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
585
586 Initialise(&masm, p10.VnB(), p10_inputs);
587 Initialise(&masm, p11.VnB(), p11_inputs);
588 Initialise(&masm, p12.VnB(), p12_inputs);
589
590 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
591 __ Mrs(x0, NZCV);
592 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
593 __ Mrs(x1, NZCV);
594 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
595 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
596 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
597 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
598 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
599 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
600
601 END();
602
603 if (CAN_RUN()) {
604 RUN();
605
606 // 0b...01010000'00010010
607 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
608 // 0b...00000001'00000000
609 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
610 // 0b...00000001'10100000
611 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
612 // 0b...00000101'10100000
613 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
614 // 0b...00000100'00000000
615 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
616 // 0b...01010101'00010010
617 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
618 // 0b...01010001'10110010
619 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
620 // 0b...01011011'00010111
621 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
622
623 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
624 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
625 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
626 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
627 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
628 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
629 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
630 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
631
TatWai Chong96713fe2019-06-04 16:39:37 -0700632 ASSERT_EQUAL_32(SVEFirstFlag, w0);
633 ASSERT_EQUAL_32(SVENotLastFlag, w1);
634 }
635}
TatWai Chongf4fa8222019-06-17 12:08:14 -0700636
Jacob Bramleye8289202019-07-31 11:25:23 +0100637TEST_SVE(sve_int_compare_vectors) {
638 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700639 START();
640
641 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
642 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
643 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
644 InsrHelper(&masm, z10.VnB(), z10_inputs);
645 InsrHelper(&masm, z11.VnB(), z11_inputs);
646 Initialise(&masm, p0.VnB(), p0_inputs);
647
648 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
649 __ Mrs(x6, NZCV);
650
651 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
652 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
653 int p1_inputs[] = {1, 1};
654 InsrHelper(&masm, z12.VnD(), z12_inputs);
655 InsrHelper(&masm, z13.VnD(), z13_inputs);
656 Initialise(&masm, p1.VnD(), p1_inputs);
657
658 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
659 __ Mrs(x7, NZCV);
660
661 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
662 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
663
664 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
665 InsrHelper(&masm, z14.VnH(), z14_inputs);
666 InsrHelper(&masm, z15.VnH(), z15_inputs);
667 Initialise(&masm, p2.VnH(), p2_inputs);
668
669 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
670 __ Mrs(x8, NZCV);
671
672 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
673 __ Mrs(x9, NZCV);
674
675 int z16_inputs[] = {0, -1, 0, 0};
676 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
677 int p3_inputs[] = {1, 1, 1, 1};
678 InsrHelper(&masm, z16.VnS(), z16_inputs);
679 InsrHelper(&masm, z17.VnS(), z17_inputs);
680 Initialise(&masm, p3.VnS(), p3_inputs);
681
682 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
683 __ Mrs(x10, NZCV);
684
685 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
686 __ Mrs(x11, NZCV);
687
688 // Architectural aliases testing.
689 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
690 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
691 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
692 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
693
694 END();
695
696 if (CAN_RUN()) {
697 RUN();
698
699 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
700 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
701 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
702 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
703 }
704
705 int p7_expected[] = {1, 0};
706 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
707
708 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
709 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
710
711 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
712 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
713
714 int p10_expected[] = {0, 0, 0, 1};
715 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
716
717 int p11_expected[] = {0, 1, 1, 1};
718 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
719
720 // Reuse the expected results to verify the architectural aliases.
721 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
722 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
723 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
724 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
725
726 ASSERT_EQUAL_32(SVEFirstFlag, w6);
727 ASSERT_EQUAL_32(NoFlag, w7);
728 ASSERT_EQUAL_32(NoFlag, w8);
729 ASSERT_EQUAL_32(NoFlag, w9);
730 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
731 }
732}
733
Jacob Bramleye8289202019-07-31 11:25:23 +0100734TEST_SVE(sve_int_compare_vectors_wide_elements) {
735 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700736 START();
737
738 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
739 int src2_inputs_1[] = {0, -1};
740 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
741 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
742 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
743 Initialise(&masm, p0.VnB(), mask_inputs_1);
744
745 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
746 __ Mrs(x2, NZCV);
747 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
748 __ Mrs(x3, NZCV);
749
750 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
751 int src2_inputs_2[] = {0, -32767};
752 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
753 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
754 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
755 Initialise(&masm, p0.VnH(), mask_inputs_2);
756
757 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
758 __ Mrs(x4, NZCV);
759 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
760 __ Mrs(x5, NZCV);
761
762 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
763 int src2_inputs_3[] = {0, -2147483648};
764 int mask_inputs_3[] = {1, 1, 1, 1};
765 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
766 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
767 Initialise(&masm, p0.VnS(), mask_inputs_3);
768
769 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
770 __ Mrs(x6, NZCV);
771 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
772 __ Mrs(x7, NZCV);
773
774 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
775 int src2_inputs_4[] = {0x00, 0x7f};
776 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
777 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
778 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
779 Initialise(&masm, p0.VnB(), mask_inputs_4);
780
781 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
782 __ Mrs(x8, NZCV);
783 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
784 __ Mrs(x9, NZCV);
785
786 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
787 int src2_inputs_5[] = {0x8000, 0xffff};
788 int mask_inputs_5[] = {1, 1, 1, 1};
789 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
790 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
791 Initialise(&masm, p0.VnS(), mask_inputs_5);
792
793 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
794 __ Mrs(x10, NZCV);
795 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
796 __ Mrs(x11, NZCV);
797
798 END();
799
800 if (CAN_RUN()) {
801 RUN();
802 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
803 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
804
805 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
806 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
807
808 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
809 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
810
811 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
812 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
813
814 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
815 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
816
817 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
818 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
819
820 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
821 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
822
823 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
824 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
825
826 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
827 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
828
829 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
830 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
831
832 ASSERT_EQUAL_32(NoFlag, w2);
833 ASSERT_EQUAL_32(NoFlag, w3);
834 ASSERT_EQUAL_32(NoFlag, w4);
835 ASSERT_EQUAL_32(SVENotLastFlag, w5);
836 ASSERT_EQUAL_32(SVEFirstFlag, w6);
837 ASSERT_EQUAL_32(SVENotLastFlag, w7);
838 ASSERT_EQUAL_32(SVEFirstFlag, w8);
839 ASSERT_EQUAL_32(SVEFirstFlag, w9);
840 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
841 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700842 }
TatWai Chongf4fa8222019-06-17 12:08:14 -0700843}
844
Jacob Bramleye8289202019-07-31 11:25:23 +0100845TEST_SVE(sve_bitwise_imm) {
846 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -0700847 START();
848
849 // clang-format off
850 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
851 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
852 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
853 0x0123, 0x4567, 0x89ab, 0xcdef};
854 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
855 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
856 // clang-format on
857
858 InsrHelper(&masm, z1.VnD(), z21_inputs);
859 InsrHelper(&masm, z2.VnS(), z22_inputs);
860 InsrHelper(&masm, z3.VnH(), z23_inputs);
861 InsrHelper(&masm, z4.VnB(), z24_inputs);
862
863 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
864 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
865 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
866 __ And(z4.VnB(), z4.VnB(), 0x3f);
867
868 InsrHelper(&masm, z5.VnD(), z21_inputs);
869 InsrHelper(&masm, z6.VnS(), z22_inputs);
870 InsrHelper(&masm, z7.VnH(), z23_inputs);
871 InsrHelper(&masm, z8.VnB(), z24_inputs);
872
873 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
874 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
875 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
876 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
877
878 InsrHelper(&masm, z9.VnD(), z21_inputs);
879 InsrHelper(&masm, z10.VnS(), z22_inputs);
880 InsrHelper(&masm, z11.VnH(), z23_inputs);
881 InsrHelper(&masm, z12.VnB(), z24_inputs);
882
883 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
884 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
885 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
886 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
887
Jacob Bramley6069fd42019-06-24 10:20:45 +0100888 {
889 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
890 // so here we test `dupm` directly.
891 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
892 __ dupm(z13.VnD(), 0x7ffffff800000000);
893 __ dupm(z14.VnS(), 0x7ffc7ffc);
894 __ dupm(z15.VnH(), 0x3ffc);
895 __ dupm(z16.VnB(), 0xc3);
896 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700897
898 END();
899
900 if (CAN_RUN()) {
901 RUN();
902
903 // clang-format off
904 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
905 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
906 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
907 0x0120, 0x0560, 0x09a0, 0x0de0};
908 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
909 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
910
911 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
912 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
913 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
914 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
915
916 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
917 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
918 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
919 0x0ed3, 0x4a97, 0x865b, 0xc21f};
920 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
921 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
922
923 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
924 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
925 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
926 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
927
928 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
929 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
930 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
931 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
932 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
933 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
934
935 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
936 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
937 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
938 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
939
940 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
941 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
942 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
943 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
944 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
945 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
946 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
947 // clang-format on
948 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700949}
950
Jacob Bramleye8289202019-07-31 11:25:23 +0100951TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +0100952 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
953 // unencodable immediates.
954
Jacob Bramleye8289202019-07-31 11:25:23 +0100955 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100956 START();
957
958 // Encodable with `dup` (shift 0).
959 __ Dup(z0.VnD(), -1);
960 __ Dup(z1.VnS(), 0x7f);
961 __ Dup(z2.VnH(), -0x80);
962 __ Dup(z3.VnB(), 42);
963
964 // Encodable with `dup` (shift 8).
TatWai Chong6995bfd2019-09-26 10:48:05 +0100965 __ Dup(z4.VnD(), -42 * 256);
966 __ Dup(z5.VnS(), -0x8000);
967 __ Dup(z6.VnH(), 0x7f00);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100968 // B-sized lanes cannot take a shift of 8.
969
970 // Encodable with `dupm` (but not `dup`).
971 __ Dup(z10.VnD(), 0x3fc);
972 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
973 __ Dup(z12.VnH(), 0x0001);
974 // All values that fit B-sized lanes are encodable with `dup`.
975
976 // Cases that require immediate synthesis.
977 __ Dup(z20.VnD(), 0x1234);
978 __ Dup(z21.VnD(), -4242);
979 __ Dup(z22.VnD(), 0xfedcba9876543210);
980 __ Dup(z23.VnS(), 0x01020304);
981 __ Dup(z24.VnS(), -0x01020304);
982 __ Dup(z25.VnH(), 0x3c38);
983 // All values that fit B-sized lanes are directly encodable.
984
985 END();
986
987 if (CAN_RUN()) {
988 RUN();
989
990 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
991 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
992 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
993 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
994
TatWai Chong6995bfd2019-09-26 10:48:05 +0100995 ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
996 ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
997 ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley6069fd42019-06-24 10:20:45 +0100998
999 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1000 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1001 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1002
1003 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1004 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1005 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1006 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1007 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1008 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1009 }
1010}
1011
Jacob Bramleye8289202019-07-31 11:25:23 +01001012TEST_SVE(sve_inc_dec_p_scalar) {
1013 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001014 START();
1015
1016 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1017 Initialise(&masm, p0.VnB(), p0_inputs);
1018
1019 int p0_b_count = 9;
1020 int p0_h_count = 5;
1021 int p0_s_count = 3;
1022 int p0_d_count = 2;
1023
1024 // 64-bit operations preserve their high bits.
1025 __ Mov(x0, 0x123456780000002a);
1026 __ Decp(x0, p0.VnB());
1027
1028 __ Mov(x1, 0x123456780000002a);
1029 __ Incp(x1, p0.VnH());
1030
1031 // Check that saturation does not occur.
1032 __ Mov(x10, 1);
1033 __ Decp(x10, p0.VnS());
1034
1035 __ Mov(x11, UINT64_MAX);
1036 __ Incp(x11, p0.VnD());
1037
1038 __ Mov(x12, INT64_MAX);
1039 __ Incp(x12, p0.VnB());
1040
1041 // With an all-true predicate, these instructions increment or decrement by
1042 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001043 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001044
1045 __ Mov(x20, 0x4000000000000000);
1046 __ Decp(x20, p15.VnB());
1047
1048 __ Mov(x21, 0x4000000000000000);
1049 __ Incp(x21, p15.VnH());
1050
1051 END();
1052 if (CAN_RUN()) {
1053 RUN();
1054
1055 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1056 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1057
1058 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1059 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1060 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1061
1062 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1063 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1064 }
1065}
1066
Jacob Bramleye8289202019-07-31 11:25:23 +01001067TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1068 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001069 START();
1070
1071 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1072 Initialise(&masm, p0.VnB(), p0_inputs);
1073
1074 int p0_b_count = 9;
1075 int p0_h_count = 5;
1076 int p0_s_count = 3;
1077 int p0_d_count = 2;
1078
1079 uint64_t dummy_high = 0x1234567800000000;
1080
1081 // 64-bit operations preserve their high bits.
1082 __ Mov(x0, dummy_high + 42);
1083 __ Sqdecp(x0, p0.VnB());
1084
1085 __ Mov(x1, dummy_high + 42);
1086 __ Sqincp(x1, p0.VnH());
1087
1088 // 32-bit operations sign-extend into their high bits.
1089 __ Mov(x2, dummy_high + 42);
1090 __ Sqdecp(x2, p0.VnS(), w2);
1091
1092 __ Mov(x3, dummy_high + 42);
1093 __ Sqincp(x3, p0.VnD(), w3);
1094
1095 __ Mov(x4, dummy_high + 1);
1096 __ Sqdecp(x4, p0.VnS(), w4);
1097
1098 __ Mov(x5, dummy_high - 1);
1099 __ Sqincp(x5, p0.VnD(), w5);
1100
1101 // Check that saturation behaves correctly.
1102 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001103 __ Sqdecp(x10, p0.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001104
1105 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1106 __ Sqdecp(x11, p0.VnH(), w11);
1107
1108 __ Mov(x12, 1);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001109 __ Sqdecp(x12, p0.VnS());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001110
1111 __ Mov(x13, dummy_high + 1);
1112 __ Sqdecp(x13, p0.VnD(), w13);
1113
1114 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001115 __ Sqincp(x14, p0.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001116
1117 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1118 __ Sqincp(x15, p0.VnH(), w15);
1119
1120 // Don't use x16 and x17 since they are scratch registers by default.
1121
1122 __ Mov(x18, 0xffffffffffffffff);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001123 __ Sqincp(x18, p0.VnS());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001124
1125 __ Mov(x19, dummy_high + 0xffffffff);
1126 __ Sqincp(x19, p0.VnD(), w19);
1127
1128 __ Mov(x20, dummy_high + 0xffffffff);
1129 __ Sqdecp(x20, p0.VnB(), w20);
1130
1131 // With an all-true predicate, these instructions increment or decrement by
1132 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001133 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001134
1135 __ Mov(x21, 0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001136 __ Sqdecp(x21, p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001137
1138 __ Mov(x22, 0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001139 __ Sqincp(x22, p15.VnH());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001140
1141 __ Mov(x23, dummy_high);
1142 __ Sqdecp(x23, p15.VnS(), w23);
1143
1144 __ Mov(x24, dummy_high);
1145 __ Sqincp(x24, p15.VnD(), w24);
1146
1147 END();
1148 if (CAN_RUN()) {
1149 RUN();
1150
1151 // 64-bit operations preserve their high bits.
1152 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1153 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1154
1155 // 32-bit operations sign-extend into their high bits.
1156 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1157 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1158 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1159 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1160
1161 // Check that saturation behaves correctly.
1162 ASSERT_EQUAL_64(INT64_MIN, x10);
1163 ASSERT_EQUAL_64(INT32_MIN, x11);
1164 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1165 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1166 ASSERT_EQUAL_64(INT64_MAX, x14);
1167 ASSERT_EQUAL_64(INT32_MAX, x15);
1168 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1169 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1170 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1171
1172 // Check all-true predicates.
1173 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1174 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1175 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1176 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1177 }
1178}
1179
Jacob Bramleye8289202019-07-31 11:25:23 +01001180TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1181 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001182 START();
1183
1184 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1185 Initialise(&masm, p0.VnB(), p0_inputs);
1186
1187 int p0_b_count = 9;
1188 int p0_h_count = 5;
1189 int p0_s_count = 3;
1190 int p0_d_count = 2;
1191
1192 uint64_t dummy_high = 0x1234567800000000;
1193
1194 // 64-bit operations preserve their high bits.
1195 __ Mov(x0, dummy_high + 42);
1196 __ Uqdecp(x0, p0.VnB());
1197
1198 __ Mov(x1, dummy_high + 42);
1199 __ Uqincp(x1, p0.VnH());
1200
1201 // 32-bit operations zero-extend into their high bits.
1202 __ Mov(x2, dummy_high + 42);
1203 __ Uqdecp(x2, p0.VnS(), w2);
1204
1205 __ Mov(x3, dummy_high + 42);
1206 __ Uqincp(x3, p0.VnD(), w3);
1207
1208 __ Mov(x4, dummy_high + 0x80000001);
1209 __ Uqdecp(x4, p0.VnS(), w4);
1210
1211 __ Mov(x5, dummy_high + 0x7fffffff);
1212 __ Uqincp(x5, p0.VnD(), w5);
1213
1214 // Check that saturation behaves correctly.
1215 __ Mov(x10, 1);
1216 __ Uqdecp(x10, p0.VnB(), x10);
1217
1218 __ Mov(x11, dummy_high + 1);
1219 __ Uqdecp(x11, p0.VnH(), w11);
1220
1221 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1222 __ Uqdecp(x12, p0.VnS(), x12);
1223
1224 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1225 __ Uqdecp(x13, p0.VnD(), w13);
1226
1227 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1228 __ Uqincp(x14, p0.VnB(), x14);
1229
1230 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1231 __ Uqincp(x15, p0.VnH(), w15);
1232
1233 // Don't use x16 and x17 since they are scratch registers by default.
1234
1235 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1236 __ Uqincp(x18, p0.VnS(), x18);
1237
1238 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1239 __ Uqincp(x19, p0.VnD(), w19);
1240
1241 // With an all-true predicate, these instructions increment or decrement by
1242 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001243 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001244
1245 __ Mov(x20, 0x4000000000000000);
1246 __ Uqdecp(x20, p15.VnB(), x20);
1247
1248 __ Mov(x21, 0x4000000000000000);
1249 __ Uqincp(x21, p15.VnH(), x21);
1250
1251 __ Mov(x22, dummy_high + 0x40000000);
1252 __ Uqdecp(x22, p15.VnS(), w22);
1253
1254 __ Mov(x23, dummy_high + 0x40000000);
1255 __ Uqincp(x23, p15.VnD(), w23);
1256
1257 END();
1258 if (CAN_RUN()) {
1259 RUN();
1260
1261 // 64-bit operations preserve their high bits.
1262 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1263 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1264
1265 // 32-bit operations zero-extend into their high bits.
1266 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1267 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1268 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1269 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1270
1271 // Check that saturation behaves correctly.
1272 ASSERT_EQUAL_64(0, x10);
1273 ASSERT_EQUAL_64(0, x11);
1274 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1275 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1276 ASSERT_EQUAL_64(UINT64_MAX, x14);
1277 ASSERT_EQUAL_64(UINT32_MAX, x15);
1278 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1279 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1280
1281 // Check all-true predicates.
1282 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1283 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1284 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1285 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1286 }
1287}
1288
Jacob Bramleye8289202019-07-31 11:25:23 +01001289TEST_SVE(sve_inc_dec_p_vector) {
1290 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001291 START();
1292
1293 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1294 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1295 Initialise(&masm, p0.VnB(), p0_inputs);
1296
1297 // Check that saturation does not occur.
1298
1299 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1300 InsrHelper(&masm, z0.VnD(), z0_inputs);
1301
1302 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1303 InsrHelper(&masm, z1.VnD(), z1_inputs);
1304
1305 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1306 InsrHelper(&masm, z2.VnS(), z2_inputs);
1307
1308 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1309 InsrHelper(&masm, z3.VnH(), z3_inputs);
1310
1311 // The MacroAssembler implements non-destructive operations using movprfx.
1312 __ Decp(z10.VnD(), p0, z0.VnD());
1313 __ Decp(z11.VnD(), p0, z1.VnD());
1314 __ Decp(z12.VnS(), p0, z2.VnS());
1315 __ Decp(z13.VnH(), p0, z3.VnH());
1316
1317 __ Incp(z14.VnD(), p0, z0.VnD());
1318 __ Incp(z15.VnD(), p0, z1.VnD());
1319 __ Incp(z16.VnS(), p0, z2.VnS());
1320 __ Incp(z17.VnH(), p0, z3.VnH());
1321
1322 // Also test destructive forms.
1323 __ Mov(z4, z0);
1324 __ Mov(z5, z1);
1325 __ Mov(z6, z2);
1326 __ Mov(z7, z3);
1327
1328 __ Decp(z0.VnD(), p0);
1329 __ Decp(z1.VnD(), p0);
1330 __ Decp(z2.VnS(), p0);
1331 __ Decp(z3.VnH(), p0);
1332
1333 __ Incp(z4.VnD(), p0);
1334 __ Incp(z5.VnD(), p0);
1335 __ Incp(z6.VnS(), p0);
1336 __ Incp(z7.VnH(), p0);
1337
1338 END();
1339 if (CAN_RUN()) {
1340 RUN();
1341
1342 // z0_inputs[...] - number of active D lanes (2)
1343 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1344 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1345
1346 // z1_inputs[...] - number of active D lanes (2)
1347 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1348 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1349
1350 // z2_inputs[...] - number of active S lanes (3)
1351 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1352 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1353
1354 // z3_inputs[...] - number of active H lanes (5)
1355 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1356 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1357
1358 // z0_inputs[...] + number of active D lanes (2)
1359 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1360 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1361
1362 // z1_inputs[...] + number of active D lanes (2)
1363 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1364 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1365
1366 // z2_inputs[...] + number of active S lanes (3)
1367 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1368 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1369
1370 // z3_inputs[...] + number of active H lanes (5)
1371 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1372 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1373
1374 // Check that the non-destructive macros produced the same results.
1375 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1376 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1377 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1378 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1379 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1380 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1381 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1382 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1383 }
1384}
1385
Jacob Bramleye8289202019-07-31 11:25:23 +01001386TEST_SVE(sve_inc_dec_ptrue_vector) {
1387 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001388 START();
1389
1390 // With an all-true predicate, these instructions increment or decrement by
1391 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001392 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001393
1394 __ Dup(z0.VnD(), 0);
1395 __ Decp(z0.VnD(), p15);
1396
1397 __ Dup(z1.VnS(), 0);
1398 __ Decp(z1.VnS(), p15);
1399
1400 __ Dup(z2.VnH(), 0);
1401 __ Decp(z2.VnH(), p15);
1402
1403 __ Dup(z3.VnD(), 0);
1404 __ Incp(z3.VnD(), p15);
1405
1406 __ Dup(z4.VnS(), 0);
1407 __ Incp(z4.VnS(), p15);
1408
1409 __ Dup(z5.VnH(), 0);
1410 __ Incp(z5.VnH(), p15);
1411
1412 END();
1413 if (CAN_RUN()) {
1414 RUN();
1415
1416 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1417 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1418 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1419
1420 for (int i = 0; i < d_lane_count; i++) {
1421 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1422 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1423 }
1424
1425 for (int i = 0; i < s_lane_count; i++) {
1426 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1427 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1428 }
1429
1430 for (int i = 0; i < h_lane_count; i++) {
1431 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1432 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1433 }
1434 }
1435}
1436
Jacob Bramleye8289202019-07-31 11:25:23 +01001437TEST_SVE(sve_sqinc_sqdec_p_vector) {
1438 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001439 START();
1440
1441 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1442 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1443 Initialise(&masm, p0.VnB(), p0_inputs);
1444
1445 // Check that saturation behaves correctly.
1446
1447 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1448 InsrHelper(&masm, z0.VnD(), z0_inputs);
1449
1450 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1451 InsrHelper(&masm, z1.VnD(), z1_inputs);
1452
1453 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1454 InsrHelper(&masm, z2.VnS(), z2_inputs);
1455
1456 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1457 InsrHelper(&masm, z3.VnH(), z3_inputs);
1458
1459 // The MacroAssembler implements non-destructive operations using movprfx.
1460 __ Sqdecp(z10.VnD(), p0, z0.VnD());
1461 __ Sqdecp(z11.VnD(), p0, z1.VnD());
1462 __ Sqdecp(z12.VnS(), p0, z2.VnS());
1463 __ Sqdecp(z13.VnH(), p0, z3.VnH());
1464
1465 __ Sqincp(z14.VnD(), p0, z0.VnD());
1466 __ Sqincp(z15.VnD(), p0, z1.VnD());
1467 __ Sqincp(z16.VnS(), p0, z2.VnS());
1468 __ Sqincp(z17.VnH(), p0, z3.VnH());
1469
1470 // Also test destructive forms.
1471 __ Mov(z4, z0);
1472 __ Mov(z5, z1);
1473 __ Mov(z6, z2);
1474 __ Mov(z7, z3);
1475
1476 __ Sqdecp(z0.VnD(), p0);
1477 __ Sqdecp(z1.VnD(), p0);
1478 __ Sqdecp(z2.VnS(), p0);
1479 __ Sqdecp(z3.VnH(), p0);
1480
1481 __ Sqincp(z4.VnD(), p0);
1482 __ Sqincp(z5.VnD(), p0);
1483 __ Sqincp(z6.VnS(), p0);
1484 __ Sqincp(z7.VnH(), p0);
1485
1486 END();
1487 if (CAN_RUN()) {
1488 RUN();
1489
1490 // z0_inputs[...] - number of active D lanes (2)
1491 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
1492 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1493
1494 // z1_inputs[...] - number of active D lanes (2)
1495 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1496 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1497
1498 // z2_inputs[...] - number of active S lanes (3)
1499 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
1500 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1501
1502 // z3_inputs[...] - number of active H lanes (5)
1503 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
1504 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1505
1506 // z0_inputs[...] + number of active D lanes (2)
1507 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1508 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1509
1510 // z1_inputs[...] + number of active D lanes (2)
1511 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
1512 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1513
1514 // z2_inputs[...] + number of active S lanes (3)
1515 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
1516 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1517
1518 // z3_inputs[...] + number of active H lanes (5)
1519 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
1520 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1521
1522 // Check that the non-destructive macros produced the same results.
1523 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1524 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1525 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1526 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1527 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1528 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1529 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1530 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1531 }
1532}
1533
Jacob Bramleye8289202019-07-31 11:25:23 +01001534TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
1535 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001536 START();
1537
1538 // With an all-true predicate, these instructions increment or decrement by
1539 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001540 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001541
1542 __ Dup(z0.VnD(), 0);
1543 __ Sqdecp(z0.VnD(), p15);
1544
1545 __ Dup(z1.VnS(), 0);
1546 __ Sqdecp(z1.VnS(), p15);
1547
1548 __ Dup(z2.VnH(), 0);
1549 __ Sqdecp(z2.VnH(), p15);
1550
1551 __ Dup(z3.VnD(), 0);
1552 __ Sqincp(z3.VnD(), p15);
1553
1554 __ Dup(z4.VnS(), 0);
1555 __ Sqincp(z4.VnS(), p15);
1556
1557 __ Dup(z5.VnH(), 0);
1558 __ Sqincp(z5.VnH(), p15);
1559
1560 END();
1561 if (CAN_RUN()) {
1562 RUN();
1563
1564 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1565 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1566 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1567
1568 for (int i = 0; i < d_lane_count; i++) {
1569 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1570 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1571 }
1572
1573 for (int i = 0; i < s_lane_count; i++) {
1574 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1575 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1576 }
1577
1578 for (int i = 0; i < h_lane_count; i++) {
1579 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1580 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1581 }
1582 }
1583}
1584
Jacob Bramleye8289202019-07-31 11:25:23 +01001585TEST_SVE(sve_uqinc_uqdec_p_vector) {
1586 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001587 START();
1588
1589 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1590 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1591 Initialise(&masm, p0.VnB(), p0_inputs);
1592
1593 // Check that saturation behaves correctly.
1594
1595 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
1596 InsrHelper(&masm, z0.VnD(), z0_inputs);
1597
1598 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
1599 InsrHelper(&masm, z1.VnD(), z1_inputs);
1600
1601 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
1602 InsrHelper(&masm, z2.VnS(), z2_inputs);
1603
1604 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
1605 InsrHelper(&masm, z3.VnH(), z3_inputs);
1606
1607 // The MacroAssembler implements non-destructive operations using movprfx.
1608 __ Uqdecp(z10.VnD(), p0, z0.VnD());
1609 __ Uqdecp(z11.VnD(), p0, z1.VnD());
1610 __ Uqdecp(z12.VnS(), p0, z2.VnS());
1611 __ Uqdecp(z13.VnH(), p0, z3.VnH());
1612
1613 __ Uqincp(z14.VnD(), p0, z0.VnD());
1614 __ Uqincp(z15.VnD(), p0, z1.VnD());
1615 __ Uqincp(z16.VnS(), p0, z2.VnS());
1616 __ Uqincp(z17.VnH(), p0, z3.VnH());
1617
1618 // Also test destructive forms.
1619 __ Mov(z4, z0);
1620 __ Mov(z5, z1);
1621 __ Mov(z6, z2);
1622 __ Mov(z7, z3);
1623
1624 __ Uqdecp(z0.VnD(), p0);
1625 __ Uqdecp(z1.VnD(), p0);
1626 __ Uqdecp(z2.VnS(), p0);
1627 __ Uqdecp(z3.VnH(), p0);
1628
1629 __ Uqincp(z4.VnD(), p0);
1630 __ Uqincp(z5.VnD(), p0);
1631 __ Uqincp(z6.VnS(), p0);
1632 __ Uqincp(z7.VnH(), p0);
1633
1634 END();
1635 if (CAN_RUN()) {
1636 RUN();
1637
1638 // z0_inputs[...] - number of active D lanes (2)
1639 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
1640 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1641
1642 // z1_inputs[...] - number of active D lanes (2)
1643 uint64_t z1_expected[] = {0x12345678ffffff28,
1644 0,
1645 0xfffffffffffffffd,
1646 0x7ffffffffffffffd};
1647 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1648
1649 // z2_inputs[...] - number of active S lanes (3)
1650 uint32_t z2_expected[] =
1651 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
1652 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1653
1654 // z3_inputs[...] - number of active H lanes (5)
1655 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
1656 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1657
1658 // z0_inputs[...] + number of active D lanes (2)
1659 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1660 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1661
1662 // z1_inputs[...] + number of active D lanes (2)
1663 uint64_t z5_expected[] = {0x12345678ffffff2c,
1664 2,
1665 UINT64_MAX,
1666 0x8000000000000001};
1667 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1668
1669 // z2_inputs[...] + number of active S lanes (3)
1670 uint32_t z6_expected[] =
1671 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
1672 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1673
1674 // z3_inputs[...] + number of active H lanes (5)
1675 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
1676 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1677
1678 // Check that the non-destructive macros produced the same results.
1679 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1680 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1681 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1682 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1683 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1684 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1685 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1686 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1687 }
1688}
1689
Jacob Bramleye8289202019-07-31 11:25:23 +01001690TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
1691 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001692 START();
1693
1694 // With an all-true predicate, these instructions increment or decrement by
1695 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001696 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001697
1698 __ Mov(x0, 0x1234567800000000);
1699 __ Mov(x1, 0x12340000);
1700 __ Mov(x2, 0x1200);
1701
1702 __ Dup(z0.VnD(), x0);
1703 __ Uqdecp(z0.VnD(), p15);
1704
1705 __ Dup(z1.VnS(), x1);
1706 __ Uqdecp(z1.VnS(), p15);
1707
1708 __ Dup(z2.VnH(), x2);
1709 __ Uqdecp(z2.VnH(), p15);
1710
1711 __ Dup(z3.VnD(), x0);
1712 __ Uqincp(z3.VnD(), p15);
1713
1714 __ Dup(z4.VnS(), x1);
1715 __ Uqincp(z4.VnS(), p15);
1716
1717 __ Dup(z5.VnH(), x2);
1718 __ Uqincp(z5.VnH(), p15);
1719
1720 END();
1721 if (CAN_RUN()) {
1722 RUN();
1723
1724 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1725 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1726 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1727
1728 for (int i = 0; i < d_lane_count; i++) {
1729 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
1730 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
1731 }
1732
1733 for (int i = 0; i < s_lane_count; i++) {
1734 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
1735 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
1736 }
1737
1738 for (int i = 0; i < h_lane_count; i++) {
1739 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
1740 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
1741 }
1742 }
1743}
1744
Jacob Bramleye8289202019-07-31 11:25:23 +01001745TEST_SVE(sve_index) {
1746 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01001747 START();
1748
1749 // Simple cases.
1750 __ Index(z0.VnB(), 0, 1);
1751 __ Index(z1.VnH(), 1, 1);
1752 __ Index(z2.VnS(), 2, 1);
1753 __ Index(z3.VnD(), 3, 1);
1754
1755 // Synthesised immediates.
1756 __ Index(z4.VnB(), 42, -1);
1757 __ Index(z5.VnH(), -1, 42);
1758 __ Index(z6.VnS(), 42, 42);
1759
1760 // Register arguments.
1761 __ Mov(x0, 42);
1762 __ Mov(x1, -3);
1763 __ Index(z10.VnD(), x0, x1);
1764 __ Index(z11.VnB(), w0, w1);
1765 // The register size should correspond to the lane size, but VIXL allows any
1766 // register at least as big as the lane size.
1767 __ Index(z12.VnB(), x0, x1);
1768 __ Index(z13.VnH(), w0, x1);
1769 __ Index(z14.VnS(), x0, w1);
1770
1771 // Integer overflow.
1772 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
1773 __ Index(z21.VnH(), 7, -3);
1774 __ Index(z22.VnS(), INT32_MAX - 2, 1);
1775 __ Index(z23.VnD(), INT64_MIN + 6, -7);
1776
1777 END();
1778
1779 if (CAN_RUN()) {
1780 RUN();
1781
1782 int b_lane_count = core.GetSVELaneCount(kBRegSize);
1783 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1784 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1785 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1786
1787 uint64_t b_mask = GetUintMask(kBRegSize);
1788 uint64_t h_mask = GetUintMask(kHRegSize);
1789 uint64_t s_mask = GetUintMask(kSRegSize);
1790 uint64_t d_mask = GetUintMask(kDRegSize);
1791
1792 // Simple cases.
1793 for (int i = 0; i < b_lane_count; i++) {
1794 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
1795 }
1796 for (int i = 0; i < h_lane_count; i++) {
1797 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
1798 }
1799 for (int i = 0; i < s_lane_count; i++) {
1800 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
1801 }
1802 for (int i = 0; i < d_lane_count; i++) {
1803 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
1804 }
1805
1806 // Synthesised immediates.
1807 for (int i = 0; i < b_lane_count; i++) {
1808 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
1809 }
1810 for (int i = 0; i < h_lane_count; i++) {
1811 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
1812 }
1813 for (int i = 0; i < s_lane_count; i++) {
1814 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
1815 }
1816
1817 // Register arguments.
1818 for (int i = 0; i < d_lane_count; i++) {
1819 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
1820 }
1821 for (int i = 0; i < b_lane_count; i++) {
1822 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
1823 }
1824 for (int i = 0; i < b_lane_count; i++) {
1825 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
1826 }
1827 for (int i = 0; i < h_lane_count; i++) {
1828 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
1829 }
1830 for (int i = 0; i < s_lane_count; i++) {
1831 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
1832 }
1833
1834 // Integer overflow.
1835 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
1836 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
1837 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
1838 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
1839 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
1840 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
1841 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
1842 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
1843 }
1844}
1845
TatWai Chongc844bb22019-06-10 15:32:53 -07001846TEST(sve_int_compare_count_and_limit_scalars) {
1847 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1848 START();
1849
1850 __ Mov(w20, 0xfffffffd);
1851 __ Mov(w21, 0xffffffff);
1852
1853 __ Whilele(p0.VnB(), w20, w21);
1854 __ Mrs(x0, NZCV);
1855 __ Whilele(p1.VnH(), w20, w21);
1856 __ Mrs(x1, NZCV);
1857
1858 __ Mov(w20, 0xffffffff);
1859 __ Mov(w21, 0x00000000);
1860
1861 __ Whilelt(p2.VnS(), w20, w21);
1862 __ Mrs(x2, NZCV);
1863 __ Whilelt(p3.VnD(), w20, w21);
1864 __ Mrs(x3, NZCV);
1865
1866 __ Mov(w20, 0xfffffffd);
1867 __ Mov(w21, 0xffffffff);
1868
1869 __ Whilels(p4.VnB(), w20, w21);
1870 __ Mrs(x4, NZCV);
1871 __ Whilels(p5.VnH(), w20, w21);
1872 __ Mrs(x5, NZCV);
1873
1874 __ Mov(w20, 0xffffffff);
1875 __ Mov(w21, 0x00000000);
1876
1877 __ Whilelo(p6.VnS(), w20, w21);
1878 __ Mrs(x6, NZCV);
1879 __ Whilelo(p7.VnD(), w20, w21);
1880 __ Mrs(x7, NZCV);
1881
1882 __ Mov(x20, 0xfffffffffffffffd);
1883 __ Mov(x21, 0xffffffffffffffff);
1884
1885 __ Whilele(p8.VnB(), x20, x21);
1886 __ Mrs(x8, NZCV);
1887 __ Whilele(p9.VnH(), x20, x21);
1888 __ Mrs(x9, NZCV);
1889
1890 __ Mov(x20, 0xffffffffffffffff);
1891 __ Mov(x21, 0x0000000000000000);
1892
1893 __ Whilelt(p10.VnS(), x20, x21);
1894 __ Mrs(x10, NZCV);
1895 __ Whilelt(p11.VnD(), x20, x21);
1896 __ Mrs(x11, NZCV);
1897
1898 __ Mov(x20, 0xfffffffffffffffd);
1899 __ Mov(x21, 0xffffffffffffffff);
1900
1901 __ Whilels(p12.VnB(), x20, x21);
1902 __ Mrs(x12, NZCV);
1903 __ Whilels(p13.VnH(), x20, x21);
1904 __ Mrs(x13, NZCV);
1905
1906 __ Mov(x20, 0xffffffffffffffff);
1907 __ Mov(x21, 0x0000000000000000);
1908
1909 __ Whilelo(p14.VnS(), x20, x21);
1910 __ Mrs(x14, NZCV);
1911 __ Whilelo(p15.VnD(), x20, x21);
1912 __ Mrs(x15, NZCV);
1913
1914 END();
1915
1916 if (CAN_RUN()) {
1917 RUN();
1918
1919 // 0b...00000000'00000111
1920 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1921 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1922
1923 // 0b...00000000'00010101
1924 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1925 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
1926
1927 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
1928 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
1929
1930 int p3_expected[] = {0x00, 0x01};
1931 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
1932
1933 // 0b...11111111'11111111
1934 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1935 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1936
1937 // 0b...01010101'01010101
1938 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1939 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1940
1941 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
1942 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1943
1944 int p7_expected[] = {0x00, 0x00};
1945 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1946
1947 // 0b...00000000'00000111
1948 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1949 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1950
1951 // 0b...00000000'00010101
1952 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1953 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1954
1955 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
1956 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1957
1958 int p11_expected[] = {0x00, 0x01};
1959 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
1960
1961 // 0b...11111111'11111111
1962 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1963 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
1964
1965 // 0b...01010101'01010101
1966 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1967 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
1968
1969 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
1970 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
1971
1972 int p15_expected[] = {0x00, 0x00};
1973 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
1974
1975 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
1976 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
1977 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
1978 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
1979 ASSERT_EQUAL_32(SVEFirstFlag, w4);
1980 ASSERT_EQUAL_32(SVEFirstFlag, w5);
1981 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
1982 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
1983 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
1984 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
1985 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1986 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
1987 ASSERT_EQUAL_32(SVEFirstFlag, w12);
1988 ASSERT_EQUAL_32(SVEFirstFlag, w13);
1989 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
1990 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
1991 }
1992}
1993
TatWai Chong302729c2019-06-14 16:18:51 -07001994TEST(sve_int_compare_vectors_signed_imm) {
1995 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1996 START();
1997
1998 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
1999 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2000 InsrHelper(&masm, z13.VnB(), z13_inputs);
2001 Initialise(&masm, p0.VnB(), mask_inputs1);
2002
2003 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2004 __ Mrs(x2, NZCV);
2005 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2006
2007 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2008 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2009 InsrHelper(&masm, z14.VnH(), z14_inputs);
2010 Initialise(&masm, p0.VnH(), mask_inputs2);
2011
2012 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2013 __ Mrs(x4, NZCV);
2014 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2015
2016 int z15_inputs[] = {0, 1, -1, INT_MIN};
2017 int mask_inputs3[] = {0, 1, 1, 1};
2018 InsrHelper(&masm, z15.VnS(), z15_inputs);
2019 Initialise(&masm, p0.VnS(), mask_inputs3);
2020
2021 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2022 __ Mrs(x6, NZCV);
2023 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2024
2025 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2026 __ Mrs(x8, NZCV);
2027 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2028
2029 int64_t z16_inputs[] = {0, -1};
2030 int mask_inputs4[] = {1, 1};
2031 InsrHelper(&masm, z16.VnD(), z16_inputs);
2032 Initialise(&masm, p0.VnD(), mask_inputs4);
2033
2034 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2035 __ Mrs(x10, NZCV);
2036 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2037
2038 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2039 __ Mrs(x12, NZCV);
2040 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2041
2042 END();
2043
2044 if (CAN_RUN()) {
2045 RUN();
2046
2047 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2048 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2049
2050 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2051 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2052
2053 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2054 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2055
2056 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2057 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2058
2059 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2060 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2061
2062 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2063 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2064
2065 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2066 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2067
2068 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2069 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2070
2071 int p10_expected[] = {0x00, 0x01};
2072 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2073
2074 int p11_expected[] = {0x00, 0x00};
2075 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2076
2077 int p12_expected[] = {0x01, 0x00};
2078 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2079
2080 int p13_expected[] = {0x01, 0x01};
2081 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2082
2083 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2084 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2085 ASSERT_EQUAL_32(NoFlag, w6);
2086 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2087 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2088 ASSERT_EQUAL_32(NoFlag, w12);
2089 }
2090}
2091
2092TEST(sve_int_compare_vectors_unsigned_imm) {
2093 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2094 START();
2095
2096 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2097 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2098 InsrHelper(&masm, z13.VnB(), src1_inputs);
2099 Initialise(&masm, p0.VnB(), mask_inputs1);
2100
2101 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2102 __ Mrs(x2, NZCV);
2103 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2104
2105 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2106 int mask_inputs2[] = {1, 1, 1, 1, 0};
2107 InsrHelper(&masm, z13.VnH(), src2_inputs);
2108 Initialise(&masm, p0.VnH(), mask_inputs2);
2109
2110 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2111 __ Mrs(x4, NZCV);
2112 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2113
2114 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2115 int mask_inputs3[] = {1, 1, 1, 1};
2116 InsrHelper(&masm, z13.VnS(), src3_inputs);
2117 Initialise(&masm, p0.VnS(), mask_inputs3);
2118
2119 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2120 __ Mrs(x6, NZCV);
2121 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2122
2123 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2124 int mask_inputs4[] = {1, 1};
2125 InsrHelper(&masm, z13.VnD(), src4_inputs);
2126 Initialise(&masm, p0.VnD(), mask_inputs4);
2127
2128 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2129 __ Mrs(x8, NZCV);
2130 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2131
2132 END();
2133
2134 if (CAN_RUN()) {
2135 RUN();
2136
2137 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2138 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2139
2140 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2141 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2142
2143 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2144 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2145
2146 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2147 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2148
2149 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2150 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2151
2152 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2153 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2154
2155 int p8_expected[] = {0x00, 0x01};
2156 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2157
2158 int p9_expected[] = {0x00, 0x01};
2159 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2160
2161 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2162 ASSERT_EQUAL_32(NoFlag, w4);
2163 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2164 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2165 }
2166}
2167
TatWai Chongc844bb22019-06-10 15:32:53 -07002168TEST(sve_int_compare_conditionally_terminate_scalars) {
2169 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2170 START();
2171
2172 __ Mov(x0, 0xfedcba9887654321);
2173 __ Mov(x1, 0x1000100010001000);
2174
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002175 // Initialise Z and C. These are preserved by cterm*, and the V flag is set to
2176 // !C if the condition does not hold.
2177 __ Mov(x10, NoFlag);
2178 __ Msr(NZCV, x10);
2179
TatWai Chongc844bb22019-06-10 15:32:53 -07002180 __ Ctermeq(w0, w0);
2181 __ Mrs(x2, NZCV);
2182 __ Ctermeq(x0, x1);
2183 __ Mrs(x3, NZCV);
2184 __ Ctermne(x0, x0);
2185 __ Mrs(x4, NZCV);
2186 __ Ctermne(w0, w1);
2187 __ Mrs(x5, NZCV);
2188
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002189 // As above, but with all flags initially set.
2190 __ Mov(x10, NZCVFlag);
2191 __ Msr(NZCV, x10);
2192
2193 __ Ctermeq(w0, w0);
2194 __ Mrs(x6, NZCV);
2195 __ Ctermeq(x0, x1);
2196 __ Mrs(x7, NZCV);
2197 __ Ctermne(x0, x0);
2198 __ Mrs(x8, NZCV);
2199 __ Ctermne(w0, w1);
2200 __ Mrs(x9, NZCV);
2201
TatWai Chongc844bb22019-06-10 15:32:53 -07002202 END();
2203
2204 if (CAN_RUN()) {
2205 RUN();
2206
2207 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2208 ASSERT_EQUAL_32(VFlag, w3);
2209 ASSERT_EQUAL_32(VFlag, w4);
2210 ASSERT_EQUAL_32(SVEFirstFlag, w5);
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002211
2212 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w6);
2213 ASSERT_EQUAL_32(ZCFlag, w7);
2214 ASSERT_EQUAL_32(ZCFlag, w8);
2215 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w9);
TatWai Chongc844bb22019-06-10 15:32:53 -07002216 }
2217}
2218
Jacob Bramley0ce75842019-07-17 18:12:50 +01002219// Work out what the architectural `PredTest` pseudocode should produce for the
2220// given result and governing predicate.
2221template <typename Tg, typename Td, int N>
2222static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2223 const Tg (&pg)[N],
2224 int vl) {
2225 int first = -1;
2226 int last = -1;
2227 bool any_active = false;
2228
2229 // Only consider potentially-active lanes.
2230 int start = (N > vl) ? (N - vl) : 0;
2231 for (int i = start; i < N; i++) {
2232 if ((pg[i] & 1) == 1) {
2233 // Look for the first and last active lanes.
2234 // Note that the 'first' lane is the one with the highest index.
2235 if (last < 0) last = i;
2236 first = i;
2237 // Look for any active lanes that are also active in pd.
2238 if ((pd[i] & 1) == 1) any_active = true;
2239 }
2240 }
2241
2242 uint32_t flags = 0;
2243 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2244 if (!any_active) flags |= SVENoneFlag;
2245 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2246 return static_cast<StatusFlags>(flags);
2247}
2248
2249typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2250 const PRegister& pg,
2251 const PRegisterWithLaneSize& pn);
2252template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002253static void PfirstPnextHelper(Test* config,
2254 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002255 unsigned lane_size_in_bits,
2256 const Tg& pg_inputs,
2257 const Tn& pn_inputs,
2258 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002259 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002260 START();
2261
2262 PRegister pg = p15;
2263 PRegister pn = p14;
2264 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2265 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2266
2267 // Initialise NZCV to an impossible value, to check that we actually write it.
2268 __ Mov(x10, NZCVFlag);
2269
2270 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2271 // the Assembler.
2272 __ Msr(NZCV, x10);
2273 __ Mov(p0, pn);
2274 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2275 pg,
2276 p0.WithLaneSize(lane_size_in_bits));
2277 __ Mrs(x0, NZCV);
2278
2279 // The MacroAssembler supports non-destructive use.
2280 __ Msr(NZCV, x10);
2281 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2282 pg,
2283 pn.WithLaneSize(lane_size_in_bits));
2284 __ Mrs(x1, NZCV);
2285
2286 // If pd.Aliases(pg) the macro requires a scratch register.
2287 {
2288 UseScratchRegisterScope temps(&masm);
2289 temps.Include(p13);
2290 __ Msr(NZCV, x10);
2291 __ Mov(p2, p15);
2292 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2293 p2,
2294 pn.WithLaneSize(lane_size_in_bits));
2295 __ Mrs(x2, NZCV);
2296 }
2297
2298 END();
2299
2300 if (CAN_RUN()) {
2301 RUN();
2302
2303 // Check that the inputs weren't modified.
2304 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2305 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2306
2307 // Check the primary operation.
2308 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2309 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2310 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2311
2312 // Check that the flags were properly set.
2313 StatusFlags nzcv_expected =
2314 GetPredTestFlags(pd_expected,
2315 pg_inputs,
2316 core.GetSVELaneCount(kBRegSize));
2317 ASSERT_EQUAL_64(nzcv_expected, x0);
2318 ASSERT_EQUAL_64(nzcv_expected, x1);
2319 ASSERT_EQUAL_64(nzcv_expected, x2);
2320 }
2321}
2322
2323template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002324static void PfirstHelper(Test* config,
2325 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002326 const Tn& pn_inputs,
2327 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002328 PfirstPnextHelper(config,
2329 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002330 kBRegSize, // pfirst only accepts B-sized lanes.
2331 pg_inputs,
2332 pn_inputs,
2333 pd_expected);
2334}
2335
2336template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002337static void PnextHelper(Test* config,
2338 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002339 const Tg& pg_inputs,
2340 const Tn& pn_inputs,
2341 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002342 PfirstPnextHelper(config,
2343 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002344 lane_size_in_bits,
2345 pg_inputs,
2346 pn_inputs,
2347 pd_expected);
2348}
2349
Jacob Bramleye8289202019-07-31 11:25:23 +01002350TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002351 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2352 // large VL), but few enough to make the test easy to read.
2353 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2354 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2355 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2356 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2357 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2358 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2359
2360 // Pfirst finds the first active lane in pg, and activates the corresponding
2361 // lane in pn (if it isn't already active).
2362
2363 // The first active lane in in1 is here. |
2364 // v
2365 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2366 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2367 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2368 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002369 PfirstHelper(config, in1, in0, exp10);
2370 PfirstHelper(config, in1, in2, exp12);
2371 PfirstHelper(config, in1, in3, exp13);
2372 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002373
2374 // The first active lane in in2 is here. |
2375 // v
2376 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2377 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2378 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2379 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002380 PfirstHelper(config, in2, in0, exp20);
2381 PfirstHelper(config, in2, in1, exp21);
2382 PfirstHelper(config, in2, in3, exp23);
2383 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002384
2385 // The first active lane in in3 is here. |
2386 // v
2387 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2388 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2389 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2390 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002391 PfirstHelper(config, in3, in0, exp30);
2392 PfirstHelper(config, in3, in1, exp31);
2393 PfirstHelper(config, in3, in2, exp32);
2394 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002395
2396 // | The first active lane in in4 is here.
2397 // v
2398 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2399 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2400 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2401 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002402 PfirstHelper(config, in4, in0, exp40);
2403 PfirstHelper(config, in4, in1, exp41);
2404 PfirstHelper(config, in4, in2, exp42);
2405 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002406
2407 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002408 PfirstHelper(config, in0, in0, in0);
2409 PfirstHelper(config, in0, in1, in1);
2410 PfirstHelper(config, in0, in2, in2);
2411 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002412
2413 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002414 PfirstHelper(config, in0, in0, in0);
2415 PfirstHelper(config, in1, in1, in1);
2416 PfirstHelper(config, in2, in2, in2);
2417 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002418}
2419
Jacob Bramleye8289202019-07-31 11:25:23 +01002420TEST_SVE(sve_pfirst_alias) {
2421 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002422 START();
2423
2424 // Check that the Simulator behaves correctly when all arguments are aliased.
2425 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2426 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2427 int in_s[] = {0, 1, 1, 0};
2428 int in_d[] = {1, 1};
2429
2430 Initialise(&masm, p0.VnB(), in_b);
2431 Initialise(&masm, p1.VnH(), in_h);
2432 Initialise(&masm, p2.VnS(), in_s);
2433 Initialise(&masm, p3.VnD(), in_d);
2434
2435 // Initialise NZCV to an impossible value, to check that we actually write it.
2436 __ Mov(x10, NZCVFlag);
2437
2438 __ Msr(NZCV, x10);
2439 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
2440 __ Mrs(x0, NZCV);
2441
2442 __ Msr(NZCV, x10);
2443 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
2444 __ Mrs(x1, NZCV);
2445
2446 __ Msr(NZCV, x10);
2447 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
2448 __ Mrs(x2, NZCV);
2449
2450 __ Msr(NZCV, x10);
2451 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
2452 __ Mrs(x3, NZCV);
2453
2454 END();
2455
2456 if (CAN_RUN()) {
2457 RUN();
2458
2459 // The first lane from pg is already active in pdn, so the P register should
2460 // be unchanged.
2461 ASSERT_EQUAL_SVE(in_b, p0.VnB());
2462 ASSERT_EQUAL_SVE(in_h, p1.VnH());
2463 ASSERT_EQUAL_SVE(in_s, p2.VnS());
2464 ASSERT_EQUAL_SVE(in_d, p3.VnD());
2465
2466 ASSERT_EQUAL_64(SVEFirstFlag, x0);
2467 ASSERT_EQUAL_64(SVEFirstFlag, x1);
2468 ASSERT_EQUAL_64(SVEFirstFlag, x2);
2469 ASSERT_EQUAL_64(SVEFirstFlag, x3);
2470 }
2471}
2472
Jacob Bramleye8289202019-07-31 11:25:23 +01002473TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002474 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2475 // (to check propagation if we have a large VL), but few enough to make the
2476 // test easy to read.
2477 // For now, we just use kPRegMinSize so that the test works anywhere.
2478 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2479 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2480 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2481 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
2482 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2483
2484 // Pnext activates the next element that is true in pg, after the last-active
2485 // element in pn. If all pn elements are false (as in in0), it starts looking
2486 // at element 0.
2487
2488 // There are no active lanes in in0, so the result is simply the first active
2489 // lane from pg.
2490 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2491 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2492 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2493 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2494 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2495
2496 // The last active lane in in1 is here. |
2497 // v
2498 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2499 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2500 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2501 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2502 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2503
2504 // | The last active lane in in2 is here.
2505 // v
2506 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2507 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2508 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2509 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2510 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2511
2512 // | The last active lane in in3 is here.
2513 // v
2514 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2515 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2516 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2517 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2518 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2519
2520 // | The last active lane in in4 is here.
2521 // v
2522 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2523 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2524 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2525 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2526 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2527
Jacob Bramleye8289202019-07-31 11:25:23 +01002528 PnextHelper(config, kBRegSize, in0, in0, exp00);
2529 PnextHelper(config, kBRegSize, in1, in0, exp10);
2530 PnextHelper(config, kBRegSize, in2, in0, exp20);
2531 PnextHelper(config, kBRegSize, in3, in0, exp30);
2532 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002533
Jacob Bramleye8289202019-07-31 11:25:23 +01002534 PnextHelper(config, kBRegSize, in0, in1, exp01);
2535 PnextHelper(config, kBRegSize, in1, in1, exp11);
2536 PnextHelper(config, kBRegSize, in2, in1, exp21);
2537 PnextHelper(config, kBRegSize, in3, in1, exp31);
2538 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002539
Jacob Bramleye8289202019-07-31 11:25:23 +01002540 PnextHelper(config, kBRegSize, in0, in2, exp02);
2541 PnextHelper(config, kBRegSize, in1, in2, exp12);
2542 PnextHelper(config, kBRegSize, in2, in2, exp22);
2543 PnextHelper(config, kBRegSize, in3, in2, exp32);
2544 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002545
Jacob Bramleye8289202019-07-31 11:25:23 +01002546 PnextHelper(config, kBRegSize, in0, in3, exp03);
2547 PnextHelper(config, kBRegSize, in1, in3, exp13);
2548 PnextHelper(config, kBRegSize, in2, in3, exp23);
2549 PnextHelper(config, kBRegSize, in3, in3, exp33);
2550 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002551
Jacob Bramleye8289202019-07-31 11:25:23 +01002552 PnextHelper(config, kBRegSize, in0, in4, exp04);
2553 PnextHelper(config, kBRegSize, in1, in4, exp14);
2554 PnextHelper(config, kBRegSize, in2, in4, exp24);
2555 PnextHelper(config, kBRegSize, in3, in4, exp34);
2556 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002557}
2558
Jacob Bramleye8289202019-07-31 11:25:23 +01002559TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002560 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2561 // (to check propagation if we have a large VL), but few enough to make the
2562 // test easy to read.
2563 // For now, we just use kPRegMinSize so that the test works anywhere.
2564 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
2565 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
2566 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
2567 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
2568 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
2569
2570 // Pnext activates the next element that is true in pg, after the last-active
2571 // element in pn. If all pn elements are false (as in in0), it starts looking
2572 // at element 0.
2573 //
2574 // As for other SVE instructions, elements are only considered to be active if
2575 // the _first_ bit in each field is one. Other bits are ignored.
2576
2577 // There are no active lanes in in0, so the result is simply the first active
2578 // lane from pg.
2579 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
2580 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
2581 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
2582 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
2583 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
2584
2585 // | The last active lane in in1 is here.
2586 // v
2587 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
2588 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
2589 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
2590 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
2591 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
2592
2593 // | The last active lane in in2 is here.
2594 // v
2595 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
2596 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
2597 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
2598 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
2599 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
2600
2601 // | The last active lane in in3 is here.
2602 // v
2603 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
2604 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
2605 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
2606 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
2607 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
2608
2609 // | The last active lane in in4 is here.
2610 // v
2611 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
2612 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
2613 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
2614 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
2615 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
2616
Jacob Bramleye8289202019-07-31 11:25:23 +01002617 PnextHelper(config, kHRegSize, in0, in0, exp00);
2618 PnextHelper(config, kHRegSize, in1, in0, exp10);
2619 PnextHelper(config, kHRegSize, in2, in0, exp20);
2620 PnextHelper(config, kHRegSize, in3, in0, exp30);
2621 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002622
Jacob Bramleye8289202019-07-31 11:25:23 +01002623 PnextHelper(config, kHRegSize, in0, in1, exp01);
2624 PnextHelper(config, kHRegSize, in1, in1, exp11);
2625 PnextHelper(config, kHRegSize, in2, in1, exp21);
2626 PnextHelper(config, kHRegSize, in3, in1, exp31);
2627 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002628
Jacob Bramleye8289202019-07-31 11:25:23 +01002629 PnextHelper(config, kHRegSize, in0, in2, exp02);
2630 PnextHelper(config, kHRegSize, in1, in2, exp12);
2631 PnextHelper(config, kHRegSize, in2, in2, exp22);
2632 PnextHelper(config, kHRegSize, in3, in2, exp32);
2633 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002634
Jacob Bramleye8289202019-07-31 11:25:23 +01002635 PnextHelper(config, kHRegSize, in0, in3, exp03);
2636 PnextHelper(config, kHRegSize, in1, in3, exp13);
2637 PnextHelper(config, kHRegSize, in2, in3, exp23);
2638 PnextHelper(config, kHRegSize, in3, in3, exp33);
2639 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002640
Jacob Bramleye8289202019-07-31 11:25:23 +01002641 PnextHelper(config, kHRegSize, in0, in4, exp04);
2642 PnextHelper(config, kHRegSize, in1, in4, exp14);
2643 PnextHelper(config, kHRegSize, in2, in4, exp24);
2644 PnextHelper(config, kHRegSize, in3, in4, exp34);
2645 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002646}
2647
Jacob Bramleye8289202019-07-31 11:25:23 +01002648TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002649 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2650 // (to check propagation if we have a large VL), but few enough to make the
2651 // test easy to read.
2652 // For now, we just use kPRegMinSize so that the test works anywhere.
2653 int in0[] = {0xe, 0xc, 0x8, 0x0};
2654 int in1[] = {0x0, 0x2, 0x0, 0x1};
2655 int in2[] = {0x0, 0x1, 0xf, 0x0};
2656 int in3[] = {0xf, 0x0, 0x0, 0x0};
2657
2658 // Pnext activates the next element that is true in pg, after the last-active
2659 // element in pn. If all pn elements are false (as in in0), it starts looking
2660 // at element 0.
2661 //
2662 // As for other SVE instructions, elements are only considered to be active if
2663 // the _first_ bit in each field is one. Other bits are ignored.
2664
2665 // There are no active lanes in in0, so the result is simply the first active
2666 // lane from pg.
2667 int exp00[] = {0, 0, 0, 0};
2668 int exp10[] = {0, 0, 0, 1};
2669 int exp20[] = {0, 0, 1, 0};
2670 int exp30[] = {1, 0, 0, 0};
2671
2672 // | The last active lane in in1 is here.
2673 // v
2674 int exp01[] = {0, 0, 0, 0};
2675 int exp11[] = {0, 0, 0, 0};
2676 int exp21[] = {0, 0, 1, 0};
2677 int exp31[] = {1, 0, 0, 0};
2678
2679 // | The last active lane in in2 is here.
2680 // v
2681 int exp02[] = {0, 0, 0, 0};
2682 int exp12[] = {0, 0, 0, 0};
2683 int exp22[] = {0, 0, 0, 0};
2684 int exp32[] = {1, 0, 0, 0};
2685
2686 // | The last active lane in in3 is here.
2687 // v
2688 int exp03[] = {0, 0, 0, 0};
2689 int exp13[] = {0, 0, 0, 0};
2690 int exp23[] = {0, 0, 0, 0};
2691 int exp33[] = {0, 0, 0, 0};
2692
Jacob Bramleye8289202019-07-31 11:25:23 +01002693 PnextHelper(config, kSRegSize, in0, in0, exp00);
2694 PnextHelper(config, kSRegSize, in1, in0, exp10);
2695 PnextHelper(config, kSRegSize, in2, in0, exp20);
2696 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002697
Jacob Bramleye8289202019-07-31 11:25:23 +01002698 PnextHelper(config, kSRegSize, in0, in1, exp01);
2699 PnextHelper(config, kSRegSize, in1, in1, exp11);
2700 PnextHelper(config, kSRegSize, in2, in1, exp21);
2701 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002702
Jacob Bramleye8289202019-07-31 11:25:23 +01002703 PnextHelper(config, kSRegSize, in0, in2, exp02);
2704 PnextHelper(config, kSRegSize, in1, in2, exp12);
2705 PnextHelper(config, kSRegSize, in2, in2, exp22);
2706 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002707
Jacob Bramleye8289202019-07-31 11:25:23 +01002708 PnextHelper(config, kSRegSize, in0, in3, exp03);
2709 PnextHelper(config, kSRegSize, in1, in3, exp13);
2710 PnextHelper(config, kSRegSize, in2, in3, exp23);
2711 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002712}
2713
Jacob Bramleye8289202019-07-31 11:25:23 +01002714TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002715 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2716 // (to check propagation if we have a large VL), but few enough to make the
2717 // test easy to read.
2718 // For now, we just use kPRegMinSize so that the test works anywhere.
2719 int in0[] = {0xfe, 0xf0};
2720 int in1[] = {0x00, 0x55};
2721 int in2[] = {0x33, 0xff};
2722
2723 // Pnext activates the next element that is true in pg, after the last-active
2724 // element in pn. If all pn elements are false (as in in0), it starts looking
2725 // at element 0.
2726 //
2727 // As for other SVE instructions, elements are only considered to be active if
2728 // the _first_ bit in each field is one. Other bits are ignored.
2729
2730 // There are no active lanes in in0, so the result is simply the first active
2731 // lane from pg.
2732 int exp00[] = {0, 0};
2733 int exp10[] = {0, 1};
2734 int exp20[] = {0, 1};
2735
2736 // | The last active lane in in1 is here.
2737 // v
2738 int exp01[] = {0, 0};
2739 int exp11[] = {0, 0};
2740 int exp21[] = {1, 0};
2741
2742 // | The last active lane in in2 is here.
2743 // v
2744 int exp02[] = {0, 0};
2745 int exp12[] = {0, 0};
2746 int exp22[] = {0, 0};
2747
Jacob Bramleye8289202019-07-31 11:25:23 +01002748 PnextHelper(config, kDRegSize, in0, in0, exp00);
2749 PnextHelper(config, kDRegSize, in1, in0, exp10);
2750 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002751
Jacob Bramleye8289202019-07-31 11:25:23 +01002752 PnextHelper(config, kDRegSize, in0, in1, exp01);
2753 PnextHelper(config, kDRegSize, in1, in1, exp11);
2754 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002755
Jacob Bramleye8289202019-07-31 11:25:23 +01002756 PnextHelper(config, kDRegSize, in0, in2, exp02);
2757 PnextHelper(config, kDRegSize, in1, in2, exp12);
2758 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002759}
2760
Jacob Bramleye8289202019-07-31 11:25:23 +01002761TEST_SVE(sve_pnext_alias) {
2762 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002763 START();
2764
2765 // Check that the Simulator behaves correctly when all arguments are aliased.
2766 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2767 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2768 int in_s[] = {0, 1, 1, 0};
2769 int in_d[] = {1, 1};
2770
2771 Initialise(&masm, p0.VnB(), in_b);
2772 Initialise(&masm, p1.VnH(), in_h);
2773 Initialise(&masm, p2.VnS(), in_s);
2774 Initialise(&masm, p3.VnD(), in_d);
2775
2776 // Initialise NZCV to an impossible value, to check that we actually write it.
2777 __ Mov(x10, NZCVFlag);
2778
2779 __ Msr(NZCV, x10);
2780 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
2781 __ Mrs(x0, NZCV);
2782
2783 __ Msr(NZCV, x10);
2784 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
2785 __ Mrs(x1, NZCV);
2786
2787 __ Msr(NZCV, x10);
2788 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
2789 __ Mrs(x2, NZCV);
2790
2791 __ Msr(NZCV, x10);
2792 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
2793 __ Mrs(x3, NZCV);
2794
2795 END();
2796
2797 if (CAN_RUN()) {
2798 RUN();
2799
2800 // Since pg.Is(pdn), there can be no active lanes in pg above the last
2801 // active lane in pdn, so the result should always be zero.
2802 ASSERT_EQUAL_SVE(0, p0.VnB());
2803 ASSERT_EQUAL_SVE(0, p1.VnH());
2804 ASSERT_EQUAL_SVE(0, p2.VnS());
2805 ASSERT_EQUAL_SVE(0, p3.VnD());
2806
2807 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
2808 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
2809 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
2810 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
2811 }
2812}
2813
Jacob Bramleye8289202019-07-31 11:25:23 +01002814static void PtrueHelper(Test* config,
2815 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002816 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002817 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002818 START();
2819
2820 PRegisterWithLaneSize p[kNumberOfPRegisters];
2821 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
2822 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
2823 }
2824
2825 // Initialise NZCV to an impossible value, to check that we actually write it.
2826 StatusFlags nzcv_unmodified = NZCVFlag;
2827 __ Mov(x20, nzcv_unmodified);
2828
2829 // We don't have enough registers to conveniently test every pattern, so take
2830 // samples from each group.
2831 __ Msr(NZCV, x20);
2832 __ Ptrue(p[0], SVE_POW2, s);
2833 __ Mrs(x0, NZCV);
2834
2835 __ Msr(NZCV, x20);
2836 __ Ptrue(p[1], SVE_VL1, s);
2837 __ Mrs(x1, NZCV);
2838
2839 __ Msr(NZCV, x20);
2840 __ Ptrue(p[2], SVE_VL2, s);
2841 __ Mrs(x2, NZCV);
2842
2843 __ Msr(NZCV, x20);
2844 __ Ptrue(p[3], SVE_VL5, s);
2845 __ Mrs(x3, NZCV);
2846
2847 __ Msr(NZCV, x20);
2848 __ Ptrue(p[4], SVE_VL6, s);
2849 __ Mrs(x4, NZCV);
2850
2851 __ Msr(NZCV, x20);
2852 __ Ptrue(p[5], SVE_VL8, s);
2853 __ Mrs(x5, NZCV);
2854
2855 __ Msr(NZCV, x20);
2856 __ Ptrue(p[6], SVE_VL16, s);
2857 __ Mrs(x6, NZCV);
2858
2859 __ Msr(NZCV, x20);
2860 __ Ptrue(p[7], SVE_VL64, s);
2861 __ Mrs(x7, NZCV);
2862
2863 __ Msr(NZCV, x20);
2864 __ Ptrue(p[8], SVE_VL256, s);
2865 __ Mrs(x8, NZCV);
2866
2867 {
2868 // We have to use the Assembler to use values not defined by
2869 // SVEPredicateConstraint, so call `ptrues` directly..
2870 typedef void (
2871 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
2872 int pattern);
2873 AssemblePtrueFn assemble =
2874 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
2875
2876 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
2877 __ msr(NZCV, x20);
2878 (masm.*assemble)(p[9], 0xe);
2879 __ mrs(x9, NZCV);
2880
2881 __ msr(NZCV, x20);
2882 (masm.*assemble)(p[10], 0x16);
2883 __ mrs(x10, NZCV);
2884
2885 __ msr(NZCV, x20);
2886 (masm.*assemble)(p[11], 0x1a);
2887 __ mrs(x11, NZCV);
2888
2889 __ msr(NZCV, x20);
2890 (masm.*assemble)(p[12], 0x1c);
2891 __ mrs(x12, NZCV);
2892 }
2893
2894 __ Msr(NZCV, x20);
2895 __ Ptrue(p[13], SVE_MUL4, s);
2896 __ Mrs(x13, NZCV);
2897
2898 __ Msr(NZCV, x20);
2899 __ Ptrue(p[14], SVE_MUL3, s);
2900 __ Mrs(x14, NZCV);
2901
2902 __ Msr(NZCV, x20);
2903 __ Ptrue(p[15], SVE_ALL, s);
2904 __ Mrs(x15, NZCV);
2905
2906 END();
2907
2908 if (CAN_RUN()) {
2909 RUN();
2910
2911 int all = core.GetSVELaneCount(lane_size_in_bits);
2912 int pow2 = 1 << HighestSetBitPosition(all);
2913 int mul4 = all - (all % 4);
2914 int mul3 = all - (all % 3);
2915
2916 // Check P register results.
2917 for (int i = 0; i < all; i++) {
2918 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
2919 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
2920 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
2921 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
2922 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
2923 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
2924 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
2925 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
2926 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
2927 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
2928 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
2929 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
2930 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
2931 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
2932 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
2933 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
2934 }
2935
2936 // Check NZCV results.
2937 if (s == LeaveFlags) {
2938 // No flags should have been updated.
2939 for (int i = 0; i <= 15; i++) {
2940 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
2941 }
2942 } else {
2943 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
2944 StatusFlags nonzero = SVEFirstFlag;
2945
2946 // POW2
2947 ASSERT_EQUAL_64(nonzero, x0);
2948 // VL*
2949 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
2950 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
2951 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
2952 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
2953 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
2954 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
2955 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
2956 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
2957 // #uimm5
2958 ASSERT_EQUAL_64(zero, x9);
2959 ASSERT_EQUAL_64(zero, x10);
2960 ASSERT_EQUAL_64(zero, x11);
2961 ASSERT_EQUAL_64(zero, x12);
2962 // MUL*
2963 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
2964 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
2965 // ALL
2966 ASSERT_EQUAL_64(nonzero, x15);
2967 }
2968 }
2969}
2970
Jacob Bramleye8289202019-07-31 11:25:23 +01002971TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
2972TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
2973TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
2974TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002975
Jacob Bramleye8289202019-07-31 11:25:23 +01002976TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
2977TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
2978TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
2979TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002980
Jacob Bramleye8289202019-07-31 11:25:23 +01002981TEST_SVE(sve_pfalse) {
2982 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002983 START();
2984
2985 // Initialise non-zero inputs.
2986 __ Ptrue(p0.VnB());
2987 __ Ptrue(p1.VnH());
2988 __ Ptrue(p2.VnS());
2989 __ Ptrue(p3.VnD());
2990
2991 // The instruction only supports B-sized lanes, but the lane size has no
2992 // logical effect, so the MacroAssembler accepts anything.
2993 __ Pfalse(p0.VnB());
2994 __ Pfalse(p1.VnH());
2995 __ Pfalse(p2.VnS());
2996 __ Pfalse(p3.VnD());
2997
2998 END();
2999
3000 if (CAN_RUN()) {
3001 RUN();
3002
3003 ASSERT_EQUAL_SVE(0, p0.VnB());
3004 ASSERT_EQUAL_SVE(0, p1.VnB());
3005 ASSERT_EQUAL_SVE(0, p2.VnB());
3006 ASSERT_EQUAL_SVE(0, p3.VnB());
3007 }
3008}
3009
Jacob Bramleye8289202019-07-31 11:25:23 +01003010TEST_SVE(sve_ptest) {
3011 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003012 START();
3013
3014 // Initialise NZCV to a known (impossible) value.
3015 StatusFlags nzcv_unmodified = NZCVFlag;
3016 __ Mov(x0, nzcv_unmodified);
3017 __ Msr(NZCV, x0);
3018
3019 // Construct some test inputs.
3020 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
3021 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
3022 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3023 __ Pfalse(p0.VnB());
3024 __ Ptrue(p1.VnB());
3025 Initialise(&masm, p2.VnB(), in2);
3026 Initialise(&masm, p3.VnB(), in3);
3027 Initialise(&masm, p4.VnB(), in4);
3028
3029 // All-inactive pg.
3030 __ Ptest(p0, p0.VnB());
3031 __ Mrs(x0, NZCV);
3032 __ Ptest(p0, p1.VnB());
3033 __ Mrs(x1, NZCV);
3034 __ Ptest(p0, p2.VnB());
3035 __ Mrs(x2, NZCV);
3036 __ Ptest(p0, p3.VnB());
3037 __ Mrs(x3, NZCV);
3038 __ Ptest(p0, p4.VnB());
3039 __ Mrs(x4, NZCV);
3040
3041 // All-active pg.
3042 __ Ptest(p1, p0.VnB());
3043 __ Mrs(x5, NZCV);
3044 __ Ptest(p1, p1.VnB());
3045 __ Mrs(x6, NZCV);
3046 __ Ptest(p1, p2.VnB());
3047 __ Mrs(x7, NZCV);
3048 __ Ptest(p1, p3.VnB());
3049 __ Mrs(x8, NZCV);
3050 __ Ptest(p1, p4.VnB());
3051 __ Mrs(x9, NZCV);
3052
3053 // Combinations of other inputs.
3054 __ Ptest(p2, p2.VnB());
3055 __ Mrs(x20, NZCV);
3056 __ Ptest(p2, p3.VnB());
3057 __ Mrs(x21, NZCV);
3058 __ Ptest(p2, p4.VnB());
3059 __ Mrs(x22, NZCV);
3060 __ Ptest(p3, p2.VnB());
3061 __ Mrs(x23, NZCV);
3062 __ Ptest(p3, p3.VnB());
3063 __ Mrs(x24, NZCV);
3064 __ Ptest(p3, p4.VnB());
3065 __ Mrs(x25, NZCV);
3066 __ Ptest(p4, p2.VnB());
3067 __ Mrs(x26, NZCV);
3068 __ Ptest(p4, p3.VnB());
3069 __ Mrs(x27, NZCV);
3070 __ Ptest(p4, p4.VnB());
3071 __ Mrs(x28, NZCV);
3072
3073 END();
3074
3075 if (CAN_RUN()) {
3076 RUN();
3077
3078 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3079
3080 // If pg is all inactive, the value of pn is irrelevant.
3081 ASSERT_EQUAL_64(zero, x0);
3082 ASSERT_EQUAL_64(zero, x1);
3083 ASSERT_EQUAL_64(zero, x2);
3084 ASSERT_EQUAL_64(zero, x3);
3085 ASSERT_EQUAL_64(zero, x4);
3086
3087 // All-active pg.
3088 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3089 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3090 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3091 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3092 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3093 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3094
3095 // Other inputs.
3096 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3097 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3098 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3099 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3100 x23); // pg: in3, pn: in2
3101 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3102 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3103 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3104 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3105 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3106 }
3107}
3108
Jacob Bramleye8289202019-07-31 11:25:23 +01003109TEST_SVE(sve_cntp) {
3110 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003111 START();
3112
3113 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3114 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3115 Initialise(&masm, p0.VnB(), p0_inputs);
3116
3117 // With an all-true predicate, these instructions measure the vector length.
3118 __ Ptrue(p10.VnB());
3119 __ Ptrue(p11.VnH());
3120 __ Ptrue(p12.VnS());
3121 __ Ptrue(p13.VnD());
3122
3123 // `ptrue p10.b` provides an all-active pg.
3124 __ Cntp(x10, p10, p10.VnB());
3125 __ Cntp(x11, p10, p11.VnH());
3126 __ Cntp(x12, p10, p12.VnS());
3127 __ Cntp(x13, p10, p13.VnD());
3128
3129 // Check that the predicate mask is applied properly.
3130 __ Cntp(x14, p10, p10.VnB());
3131 __ Cntp(x15, p11, p10.VnB());
3132 __ Cntp(x16, p12, p10.VnB());
3133 __ Cntp(x17, p13, p10.VnB());
3134
3135 // Check other patterns (including some ignored bits).
3136 __ Cntp(x0, p10, p0.VnB());
3137 __ Cntp(x1, p10, p0.VnH());
3138 __ Cntp(x2, p10, p0.VnS());
3139 __ Cntp(x3, p10, p0.VnD());
3140 __ Cntp(x4, p0, p10.VnB());
3141 __ Cntp(x5, p0, p10.VnH());
3142 __ Cntp(x6, p0, p10.VnS());
3143 __ Cntp(x7, p0, p10.VnD());
3144
3145 END();
3146
3147 if (CAN_RUN()) {
3148 RUN();
3149
3150 int vl_b = core.GetSVELaneCount(kBRegSize);
3151 int vl_h = core.GetSVELaneCount(kHRegSize);
3152 int vl_s = core.GetSVELaneCount(kSRegSize);
3153 int vl_d = core.GetSVELaneCount(kDRegSize);
3154
3155 // Check all-active predicates in various combinations.
3156 ASSERT_EQUAL_64(vl_b, x10);
3157 ASSERT_EQUAL_64(vl_h, x11);
3158 ASSERT_EQUAL_64(vl_s, x12);
3159 ASSERT_EQUAL_64(vl_d, x13);
3160
3161 ASSERT_EQUAL_64(vl_b, x14);
3162 ASSERT_EQUAL_64(vl_h, x15);
3163 ASSERT_EQUAL_64(vl_s, x16);
3164 ASSERT_EQUAL_64(vl_d, x17);
3165
3166 // Check that irrelevant bits are properly ignored.
3167 ASSERT_EQUAL_64(7, x0);
3168 ASSERT_EQUAL_64(5, x1);
3169 ASSERT_EQUAL_64(2, x2);
3170 ASSERT_EQUAL_64(1, x3);
3171
3172 ASSERT_EQUAL_64(7, x4);
3173 ASSERT_EQUAL_64(5, x5);
3174 ASSERT_EQUAL_64(2, x6);
3175 ASSERT_EQUAL_64(1, x7);
3176 }
3177}
3178
Martyn Capewell74f84f62019-10-30 15:30:44 +00003179typedef void (MacroAssembler::*CntFn)(const Register& dst,
3180 int pattern,
3181 int multiplier);
3182
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003183template <typename T>
3184void GenerateCntSequence(MacroAssembler* masm,
3185 CntFn cnt,
3186 T acc_value,
3187 int multiplier) {
3188 // Initialise accumulators.
3189 masm->Mov(x0, acc_value);
3190 masm->Mov(x1, acc_value);
3191 masm->Mov(x2, acc_value);
3192 masm->Mov(x3, acc_value);
3193 masm->Mov(x4, acc_value);
3194 masm->Mov(x5, acc_value);
3195 masm->Mov(x6, acc_value);
3196 masm->Mov(x7, acc_value);
3197 masm->Mov(x8, acc_value);
3198 masm->Mov(x9, acc_value);
3199 masm->Mov(x10, acc_value);
3200 masm->Mov(x11, acc_value);
3201 masm->Mov(x12, acc_value);
3202 masm->Mov(x13, acc_value);
3203 masm->Mov(x14, acc_value);
3204 masm->Mov(x15, acc_value);
3205 masm->Mov(x18, acc_value);
3206 masm->Mov(x19, acc_value);
3207 masm->Mov(x20, acc_value);
3208 masm->Mov(x21, acc_value);
3209
3210 (masm->*cnt)(Register(0, sizeof(T) * kBitsPerByte), SVE_POW2, multiplier);
3211 (masm->*cnt)(Register(1, sizeof(T) * kBitsPerByte), SVE_VL1, multiplier);
3212 (masm->*cnt)(Register(2, sizeof(T) * kBitsPerByte), SVE_VL2, multiplier);
3213 (masm->*cnt)(Register(3, sizeof(T) * kBitsPerByte), SVE_VL3, multiplier);
3214 (masm->*cnt)(Register(4, sizeof(T) * kBitsPerByte), SVE_VL4, multiplier);
3215 (masm->*cnt)(Register(5, sizeof(T) * kBitsPerByte), SVE_VL5, multiplier);
3216 (masm->*cnt)(Register(6, sizeof(T) * kBitsPerByte), SVE_VL6, multiplier);
3217 (masm->*cnt)(Register(7, sizeof(T) * kBitsPerByte), SVE_VL7, multiplier);
3218 (masm->*cnt)(Register(8, sizeof(T) * kBitsPerByte), SVE_VL8, multiplier);
3219 (masm->*cnt)(Register(9, sizeof(T) * kBitsPerByte), SVE_VL16, multiplier);
3220 (masm->*cnt)(Register(10, sizeof(T) * kBitsPerByte), SVE_VL32, multiplier);
3221 (masm->*cnt)(Register(11, sizeof(T) * kBitsPerByte), SVE_VL64, multiplier);
3222 (masm->*cnt)(Register(12, sizeof(T) * kBitsPerByte), SVE_VL128, multiplier);
3223 (masm->*cnt)(Register(13, sizeof(T) * kBitsPerByte), SVE_VL256, multiplier);
3224 (masm->*cnt)(Register(14, sizeof(T) * kBitsPerByte), 16, multiplier);
3225 (masm->*cnt)(Register(15, sizeof(T) * kBitsPerByte), 23, multiplier);
3226 (masm->*cnt)(Register(18, sizeof(T) * kBitsPerByte), 28, multiplier);
3227 (masm->*cnt)(Register(19, sizeof(T) * kBitsPerByte), SVE_MUL4, multiplier);
3228 (masm->*cnt)(Register(20, sizeof(T) * kBitsPerByte), SVE_MUL3, multiplier);
3229 (masm->*cnt)(Register(21, sizeof(T) * kBitsPerByte), SVE_ALL, multiplier);
3230}
3231
3232int FixedVL(int fixed, int length) {
3233 VIXL_ASSERT(((fixed >= 1) && (fixed <= 8)) || (fixed == 16) ||
3234 (fixed == 32) || (fixed == 64) || (fixed == 128) ||
3235 (fixed = 256));
3236 return (length >= fixed) ? fixed : 0;
3237}
3238
Martyn Capewell74f84f62019-10-30 15:30:44 +00003239static void CntHelper(Test* config,
3240 CntFn cnt,
3241 int multiplier,
Martyn Capewell579c92d2019-10-30 17:48:52 +00003242 int lane_size_in_bits,
3243 int64_t acc_value = 0,
3244 bool is_increment = true) {
Martyn Capewell74f84f62019-10-30 15:30:44 +00003245 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3246 START();
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003247 GenerateCntSequence(&masm, cnt, acc_value, multiplier);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003248 END();
3249
3250 if (CAN_RUN()) {
3251 RUN();
3252
3253 int all = core.GetSVELaneCount(lane_size_in_bits);
3254 int pow2 = 1 << HighestSetBitPosition(all);
3255 int mul4 = all - (all % 4);
3256 int mul3 = all - (all % 3);
3257
Martyn Capewell579c92d2019-10-30 17:48:52 +00003258 multiplier = is_increment ? multiplier : -multiplier;
3259
3260 ASSERT_EQUAL_64(acc_value + (multiplier * pow2), x0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003261 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(1, all)), x1);
3262 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(2, all)), x2);
3263 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(3, all)), x3);
3264 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(4, all)), x4);
3265 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(5, all)), x5);
3266 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(6, all)), x6);
3267 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(7, all)), x7);
3268 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(8, all)), x8);
3269 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(16, all)), x9);
3270 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(32, all)), x10);
3271 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(64, all)), x11);
3272 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(128, all)), x12);
3273 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(256, all)), x13);
Martyn Capewell579c92d2019-10-30 17:48:52 +00003274 ASSERT_EQUAL_64(acc_value, x14);
3275 ASSERT_EQUAL_64(acc_value, x15);
3276 ASSERT_EQUAL_64(acc_value, x18);
3277 ASSERT_EQUAL_64(acc_value + (multiplier * mul4), x19);
3278 ASSERT_EQUAL_64(acc_value + (multiplier * mul3), x20);
3279 ASSERT_EQUAL_64(acc_value + (multiplier * all), x21);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003280 }
3281}
3282
Martyn Capewell579c92d2019-10-30 17:48:52 +00003283static void IncHelper(Test* config,
3284 CntFn cnt,
3285 int multiplier,
3286 int lane_size_in_bits,
3287 int64_t acc_value) {
3288 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3289}
3290
3291static void DecHelper(Test* config,
3292 CntFn cnt,
3293 int multiplier,
3294 int lane_size_in_bits,
3295 int64_t acc_value) {
3296 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
3297}
3298
Martyn Capewell74f84f62019-10-30 15:30:44 +00003299TEST_SVE(sve_cntb) {
3300 CntHelper(config, &MacroAssembler::Cntb, 1, kBRegSize);
3301 CntHelper(config, &MacroAssembler::Cntb, 2, kBRegSize);
3302 CntHelper(config, &MacroAssembler::Cntb, 15, kBRegSize);
3303 CntHelper(config, &MacroAssembler::Cntb, 16, kBRegSize);
3304}
3305
3306TEST_SVE(sve_cnth) {
3307 CntHelper(config, &MacroAssembler::Cnth, 1, kHRegSize);
3308 CntHelper(config, &MacroAssembler::Cnth, 2, kHRegSize);
3309 CntHelper(config, &MacroAssembler::Cnth, 15, kHRegSize);
3310 CntHelper(config, &MacroAssembler::Cnth, 16, kHRegSize);
3311}
3312
3313TEST_SVE(sve_cntw) {
3314 CntHelper(config, &MacroAssembler::Cntw, 1, kWRegSize);
3315 CntHelper(config, &MacroAssembler::Cntw, 2, kWRegSize);
3316 CntHelper(config, &MacroAssembler::Cntw, 15, kWRegSize);
3317 CntHelper(config, &MacroAssembler::Cntw, 16, kWRegSize);
3318}
3319
3320TEST_SVE(sve_cntd) {
3321 CntHelper(config, &MacroAssembler::Cntd, 1, kDRegSize);
3322 CntHelper(config, &MacroAssembler::Cntd, 2, kDRegSize);
3323 CntHelper(config, &MacroAssembler::Cntd, 15, kDRegSize);
3324 CntHelper(config, &MacroAssembler::Cntd, 16, kDRegSize);
3325}
3326
Martyn Capewell579c92d2019-10-30 17:48:52 +00003327TEST_SVE(sve_decb) {
3328 DecHelper(config, &MacroAssembler::Decb, 1, kBRegSize, 42);
3329 DecHelper(config, &MacroAssembler::Decb, 2, kBRegSize, -1);
3330 DecHelper(config, &MacroAssembler::Decb, 15, kBRegSize, INT64_MIN);
3331 DecHelper(config, &MacroAssembler::Decb, 16, kBRegSize, -42);
3332}
3333
3334TEST_SVE(sve_dech) {
3335 DecHelper(config, &MacroAssembler::Dech, 1, kHRegSize, 42);
3336 DecHelper(config, &MacroAssembler::Dech, 2, kHRegSize, -1);
3337 DecHelper(config, &MacroAssembler::Dech, 15, kHRegSize, INT64_MIN);
3338 DecHelper(config, &MacroAssembler::Dech, 16, kHRegSize, -42);
3339}
3340
3341TEST_SVE(sve_decw) {
3342 DecHelper(config, &MacroAssembler::Decw, 1, kWRegSize, 42);
3343 DecHelper(config, &MacroAssembler::Decw, 2, kWRegSize, -1);
3344 DecHelper(config, &MacroAssembler::Decw, 15, kWRegSize, INT64_MIN);
3345 DecHelper(config, &MacroAssembler::Decw, 16, kWRegSize, -42);
3346}
3347
3348TEST_SVE(sve_decd) {
3349 DecHelper(config, &MacroAssembler::Decd, 1, kDRegSize, 42);
3350 DecHelper(config, &MacroAssembler::Decd, 2, kDRegSize, -1);
3351 DecHelper(config, &MacroAssembler::Decd, 15, kDRegSize, INT64_MIN);
3352 DecHelper(config, &MacroAssembler::Decd, 16, kDRegSize, -42);
3353}
3354
3355TEST_SVE(sve_incb) {
3356 IncHelper(config, &MacroAssembler::Incb, 1, kBRegSize, 42);
3357 IncHelper(config, &MacroAssembler::Incb, 2, kBRegSize, -1);
3358 IncHelper(config, &MacroAssembler::Incb, 15, kBRegSize, INT64_MAX);
3359 IncHelper(config, &MacroAssembler::Incb, 16, kBRegSize, -42);
3360}
3361
3362TEST_SVE(sve_inch) {
3363 IncHelper(config, &MacroAssembler::Inch, 1, kHRegSize, 42);
3364 IncHelper(config, &MacroAssembler::Inch, 2, kHRegSize, -1);
3365 IncHelper(config, &MacroAssembler::Inch, 15, kHRegSize, INT64_MAX);
3366 IncHelper(config, &MacroAssembler::Inch, 16, kHRegSize, -42);
3367}
3368
3369TEST_SVE(sve_incw) {
3370 IncHelper(config, &MacroAssembler::Incw, 1, kWRegSize, 42);
3371 IncHelper(config, &MacroAssembler::Incw, 2, kWRegSize, -1);
3372 IncHelper(config, &MacroAssembler::Incw, 15, kWRegSize, INT64_MAX);
3373 IncHelper(config, &MacroAssembler::Incw, 16, kWRegSize, -42);
3374}
3375
3376TEST_SVE(sve_incd) {
3377 IncHelper(config, &MacroAssembler::Incd, 1, kDRegSize, 42);
3378 IncHelper(config, &MacroAssembler::Incd, 2, kDRegSize, -1);
3379 IncHelper(config, &MacroAssembler::Incd, 15, kDRegSize, INT64_MAX);
3380 IncHelper(config, &MacroAssembler::Incd, 16, kDRegSize, -42);
3381}
3382
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003383template <typename T>
3384static T QAdd(T x, int y) {
3385 VIXL_ASSERT(y > INT_MIN);
3386 T result;
3387 T min = std::numeric_limits<T>::min();
3388 T max = std::numeric_limits<T>::max();
3389 if ((x >= 0) && (y >= 0)) {
3390 // For positive a and b, saturate at max.
3391 result = (max - x) < static_cast<T>(y) ? max : x + y;
3392 } else if ((y < 0) && ((x < 0) || (min == 0))) {
3393 // For negative b, where either a negative or T unsigned.
3394 result = (x - min) < static_cast<T>(-y) ? min : x + y;
3395 } else {
3396 result = x + y;
3397 }
3398 return result;
3399}
3400
3401template <typename T>
3402static void QIncDecHelper(Test* config,
3403 CntFn cnt,
3404 int multiplier,
3405 int lane_size_in_bits,
3406 T acc_value,
3407 bool is_increment) {
3408 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3409 START();
3410 GenerateCntSequence(&masm, cnt, acc_value, multiplier);
3411 END();
3412
3413 if (CAN_RUN()) {
3414 RUN();
3415
3416 int all = core.GetSVELaneCount(lane_size_in_bits);
3417 int pow2 = 1 << HighestSetBitPosition(all);
3418 int mul4 = all - (all % 4);
3419 int mul3 = all - (all % 3);
3420
3421 multiplier = is_increment ? multiplier : -multiplier;
3422
3423 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
3424 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
3425 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
3426 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
3427 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
3428 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
3429 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
3430 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
3431 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
3432 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
3433 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
3434 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
3435 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
3436 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
3437 ASSERT_EQUAL_64(acc_value, x14);
3438 ASSERT_EQUAL_64(acc_value, x15);
3439 ASSERT_EQUAL_64(acc_value, x18);
3440 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
3441 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
3442 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
3443 }
3444}
3445
3446template <typename T>
3447static void QIncHelper(Test* config,
3448 CntFn cnt,
3449 int multiplier,
3450 int lane_size_in_bits,
3451 T acc_value) {
3452 QIncDecHelper<T>(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3453}
3454
3455template <typename T>
3456static void QDecHelper(Test* config,
3457 CntFn cnt,
3458 int multiplier,
3459 int lane_size_in_bits,
3460 T acc_value) {
3461 QIncDecHelper<T>(config,
3462 cnt,
3463 multiplier,
3464 lane_size_in_bits,
3465 acc_value,
3466 false);
3467}
3468
3469TEST_SVE(sve_sqdecb) {
3470 int64_t bigneg = INT64_MIN + 42;
3471 int64_t bigpos = INT64_MAX - 42;
3472 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
3473 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 2, kBRegSize, bigneg);
3474 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
3475 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 16, kBRegSize, bigpos);
3476}
3477
3478TEST_SVE(sve_sqdech) {
3479 int64_t bigneg = INT64_MIN + 42;
3480 int64_t bigpos = INT64_MAX - 42;
3481 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
3482 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 2, kHRegSize, bigneg);
3483 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
3484 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 16, kHRegSize, bigpos);
3485}
3486
3487TEST_SVE(sve_sqdecw) {
3488 int64_t bigneg = INT64_MIN + 42;
3489 int64_t bigpos = INT64_MAX - 42;
3490 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
3491 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 2, kWRegSize, bigneg);
3492 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
3493 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 16, kWRegSize, bigpos);
3494}
3495
3496TEST_SVE(sve_sqdecd) {
3497 int64_t bigneg = INT64_MIN + 42;
3498 int64_t bigpos = INT64_MAX - 42;
3499 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
3500 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 2, kDRegSize, bigneg);
3501 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
3502 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 16, kDRegSize, bigpos);
3503}
3504
3505TEST_SVE(sve_sqincb) {
3506 int64_t bigneg = INT64_MIN + 42;
3507 int64_t bigpos = INT64_MAX - 42;
3508 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
3509 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 2, kBRegSize, bigneg);
3510 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
3511 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 16, kBRegSize, bigpos);
3512}
3513
3514TEST_SVE(sve_sqinch) {
3515 int64_t bigneg = INT64_MIN + 42;
3516 int64_t bigpos = INT64_MAX - 42;
3517 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
3518 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 2, kHRegSize, bigneg);
3519 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
3520 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 16, kHRegSize, bigpos);
3521}
3522
3523TEST_SVE(sve_sqincw) {
3524 int64_t bigneg = INT64_MIN + 42;
3525 int64_t bigpos = INT64_MAX - 42;
3526 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
3527 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 2, kWRegSize, bigneg);
3528 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
3529 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 16, kWRegSize, bigpos);
3530}
3531
3532TEST_SVE(sve_sqincd) {
3533 int64_t bigneg = INT64_MIN + 42;
3534 int64_t bigpos = INT64_MAX - 42;
3535 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
3536 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 2, kDRegSize, bigneg);
3537 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
3538 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 16, kDRegSize, bigpos);
3539}
3540
3541TEST_SVE(sve_uqdecb) {
3542 int32_t big32 = UINT32_MAX - 42;
3543 int64_t big64 = UINT64_MAX - 42;
3544 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
3545 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
3546 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
3547 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big32);
3548 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
3549 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
3550 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
3551 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big64);
3552}
3553
3554TEST_SVE(sve_uqdech) {
3555 int32_t big32 = UINT32_MAX - 42;
3556 int64_t big64 = UINT64_MAX - 42;
3557 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
3558 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
3559 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
3560 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big32);
3561 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
3562 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
3563 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
3564 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big64);
3565}
3566
3567TEST_SVE(sve_uqdecw) {
3568 int32_t big32 = UINT32_MAX - 42;
3569 int64_t big64 = UINT64_MAX - 42;
3570 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
3571 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
3572 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
3573 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big32);
3574 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
3575 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
3576 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
3577 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big64);
3578}
3579
3580TEST_SVE(sve_uqdecd) {
3581 int32_t big32 = UINT32_MAX - 42;
3582 int64_t big64 = UINT64_MAX - 42;
3583 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
3584 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
3585 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
3586 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big32);
3587 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
3588 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
3589 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
3590 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big64);
3591}
3592
3593TEST_SVE(sve_uqincb) {
3594 int32_t big32 = UINT32_MAX - 42;
3595 int64_t big64 = UINT64_MAX - 42;
3596 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
3597 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
3598 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
3599 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big32);
3600 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
3601 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
3602 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
3603 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big64);
3604}
3605
3606TEST_SVE(sve_uqinch) {
3607 int32_t big32 = UINT32_MAX - 42;
3608 int64_t big64 = UINT64_MAX - 42;
3609 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
3610 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
3611 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
3612 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big32);
3613 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
3614 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
3615 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
3616 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big64);
3617}
3618
3619TEST_SVE(sve_uqincw) {
3620 int32_t big32 = UINT32_MAX - 42;
3621 int64_t big64 = UINT64_MAX - 42;
3622 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
3623 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
3624 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
3625 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big32);
3626 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
3627 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
3628 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
3629 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big64);
3630}
3631
3632TEST_SVE(sve_uqincd) {
3633 int32_t big32 = UINT32_MAX - 42;
3634 int64_t big64 = UINT64_MAX - 42;
3635 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
3636 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
3637 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
3638 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big32);
3639 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
3640 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
3641 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
3642 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big64);
3643}
3644
3645typedef void (MacroAssembler::*QIncDecXWFn)(const Register& dst,
3646 const Register& src,
3647 int pattern,
3648 int multiplier);
3649
3650static void QIncDecXWHelper(Test* config,
3651 QIncDecXWFn cnt,
3652 int multiplier,
3653 int lane_size_in_bits,
3654 int32_t acc_value,
3655 bool is_increment) {
3656 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3657 START();
3658
3659 // Initialise accumulators.
3660 __ Mov(x0, acc_value);
3661 __ Mov(x1, acc_value);
3662 __ Mov(x2, acc_value);
3663 __ Mov(x3, acc_value);
3664 __ Mov(x4, acc_value);
3665 __ Mov(x5, acc_value);
3666 __ Mov(x6, acc_value);
3667 __ Mov(x7, acc_value);
3668 __ Mov(x8, acc_value);
3669 __ Mov(x9, acc_value);
3670 __ Mov(x10, acc_value);
3671 __ Mov(x11, acc_value);
3672 __ Mov(x12, acc_value);
3673 __ Mov(x13, acc_value);
3674 __ Mov(x14, acc_value);
3675 __ Mov(x15, acc_value);
3676 __ Mov(x18, acc_value);
3677 __ Mov(x19, acc_value);
3678 __ Mov(x20, acc_value);
3679 __ Mov(x21, acc_value);
3680
3681 (masm.*cnt)(x0, w0, SVE_POW2, multiplier);
3682 (masm.*cnt)(x1, w1, SVE_VL1, multiplier);
3683 (masm.*cnt)(x2, w2, SVE_VL2, multiplier);
3684 (masm.*cnt)(x3, w3, SVE_VL3, multiplier);
3685 (masm.*cnt)(x4, w4, SVE_VL4, multiplier);
3686 (masm.*cnt)(x5, w5, SVE_VL5, multiplier);
3687 (masm.*cnt)(x6, w6, SVE_VL6, multiplier);
3688 (masm.*cnt)(x7, w7, SVE_VL7, multiplier);
3689 (masm.*cnt)(x8, w8, SVE_VL8, multiplier);
3690 (masm.*cnt)(x9, w9, SVE_VL16, multiplier);
3691 (masm.*cnt)(x10, w10, SVE_VL32, multiplier);
3692 (masm.*cnt)(x11, w11, SVE_VL64, multiplier);
3693 (masm.*cnt)(x12, w12, SVE_VL128, multiplier);
3694 (masm.*cnt)(x13, w13, SVE_VL256, multiplier);
3695 (masm.*cnt)(x14, w14, 16, multiplier);
3696 (masm.*cnt)(x15, w15, 23, multiplier);
3697 (masm.*cnt)(x18, w18, 28, multiplier);
3698 (masm.*cnt)(x19, w19, SVE_MUL4, multiplier);
3699 (masm.*cnt)(x20, w20, SVE_MUL3, multiplier);
3700 (masm.*cnt)(x21, w21, SVE_ALL, multiplier);
3701
3702 END();
3703
3704 if (CAN_RUN()) {
3705 RUN();
3706
3707 int all = core.GetSVELaneCount(lane_size_in_bits);
3708 int pow2 = 1 << HighestSetBitPosition(all);
3709 int mul4 = all - (all % 4);
3710 int mul3 = all - (all % 3);
3711
3712 multiplier = is_increment ? multiplier : -multiplier;
3713
3714 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
3715 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
3716 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
3717 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
3718 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
3719 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
3720 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
3721 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
3722 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
3723 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
3724 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
3725 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
3726 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
3727 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
3728 ASSERT_EQUAL_64(acc_value, x14);
3729 ASSERT_EQUAL_64(acc_value, x15);
3730 ASSERT_EQUAL_64(acc_value, x18);
3731 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
3732 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
3733 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
3734 }
3735}
3736
3737static void QIncXWHelper(Test* config,
3738 QIncDecXWFn cnt,
3739 int multiplier,
3740 int lane_size_in_bits,
3741 int32_t acc_value) {
3742 QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3743}
3744
3745static void QDecXWHelper(Test* config,
3746 QIncDecXWFn cnt,
3747 int multiplier,
3748 int lane_size_in_bits,
3749 int32_t acc_value) {
3750 QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
3751}
3752
3753TEST_SVE(sve_sqdecb_xw) {
3754 QDecXWHelper(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
3755 QDecXWHelper(config, &MacroAssembler::Sqdecb, 2, kBRegSize, INT32_MIN + 42);
3756 QDecXWHelper(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
3757 QDecXWHelper(config, &MacroAssembler::Sqdecb, 16, kBRegSize, INT32_MAX - 42);
3758}
3759
3760TEST_SVE(sve_sqdech_xw) {
3761 QDecXWHelper(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
3762 QDecXWHelper(config, &MacroAssembler::Sqdech, 2, kHRegSize, INT32_MIN + 42);
3763 QDecXWHelper(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
3764 QDecXWHelper(config, &MacroAssembler::Sqdech, 16, kHRegSize, INT32_MAX - 42);
3765}
3766
3767TEST_SVE(sve_sqdecw_xw) {
3768 QDecXWHelper(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
3769 QDecXWHelper(config, &MacroAssembler::Sqdecw, 2, kWRegSize, INT32_MIN + 42);
3770 QDecXWHelper(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
3771 QDecXWHelper(config, &MacroAssembler::Sqdecw, 16, kWRegSize, INT32_MAX - 42);
3772}
3773
3774TEST_SVE(sve_sqdecd_xw) {
3775 QDecXWHelper(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
3776 QDecXWHelper(config, &MacroAssembler::Sqdecd, 2, kDRegSize, INT32_MIN + 42);
3777 QDecXWHelper(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
3778 QDecXWHelper(config, &MacroAssembler::Sqdecd, 16, kDRegSize, INT32_MAX - 42);
3779}
3780
3781TEST_SVE(sve_sqincb_xw) {
3782 QIncXWHelper(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
3783 QIncXWHelper(config, &MacroAssembler::Sqincb, 2, kBRegSize, INT32_MIN + 42);
3784 QIncXWHelper(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
3785 QIncXWHelper(config, &MacroAssembler::Sqincb, 16, kBRegSize, INT32_MAX - 42);
3786}
3787
3788TEST_SVE(sve_sqinch_xw) {
3789 QIncXWHelper(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
3790 QIncXWHelper(config, &MacroAssembler::Sqinch, 2, kHRegSize, INT32_MIN + 42);
3791 QIncXWHelper(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
3792 QIncXWHelper(config, &MacroAssembler::Sqinch, 16, kHRegSize, INT32_MAX - 42);
3793}
3794
3795TEST_SVE(sve_sqincw_xw) {
3796 QIncXWHelper(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
3797 QIncXWHelper(config, &MacroAssembler::Sqincw, 2, kWRegSize, INT32_MIN + 42);
3798 QIncXWHelper(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
3799 QIncXWHelper(config, &MacroAssembler::Sqincw, 16, kWRegSize, INT32_MAX - 42);
3800}
3801
3802TEST_SVE(sve_sqincd_xw) {
3803 QIncXWHelper(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
3804 QIncXWHelper(config, &MacroAssembler::Sqincd, 2, kDRegSize, INT32_MIN + 42);
3805 QIncXWHelper(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
3806 QIncXWHelper(config, &MacroAssembler::Sqincd, 16, kDRegSize, INT32_MAX - 42);
3807}
3808
Martyn Capewell8188ddf2019-11-21 17:09:34 +00003809typedef void (MacroAssembler::*IncDecZFn)(const ZRegister& dst,
3810 int pattern,
3811 int multiplier);
3812typedef void (MacroAssembler::*AddSubFn)(const ZRegister& dst,
3813 const ZRegister& src1,
3814 const ZRegister& src2);
3815
3816static void IncDecZHelper(Test* config,
3817 IncDecZFn fn,
3818 CntFn cnt,
3819 AddSubFn addsub,
3820 int multiplier,
3821 int lane_size_in_bits) {
3822 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3823 START();
3824
3825 uint64_t acc_inputs[] = {0x7766554433221100,
3826 0xffffffffffffffff,
3827 0x0000000000000000,
3828 0xffffffff0000ffff,
3829 0x7fffffffffffffff,
3830 0x8000000000000000,
3831 0x7fffffff7fff7fff,
3832 0x8000000080008000};
3833
3834 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
3835 for (int j = 0; j < 4; j++) {
3836 InsrHelper(&masm, ZRegister(i, kDRegSize), acc_inputs);
3837 }
3838 }
3839 for (unsigned i = 0; i < 15; i++) {
3840 __ Mov(XRegister(i), 0);
3841 }
3842
3843 (masm.*fn)(z16.WithLaneSize(lane_size_in_bits), SVE_POW2, multiplier);
3844 (masm.*fn)(z17.WithLaneSize(lane_size_in_bits), SVE_VL1, multiplier);
3845 (masm.*fn)(z18.WithLaneSize(lane_size_in_bits), SVE_VL2, multiplier);
3846 (masm.*fn)(z19.WithLaneSize(lane_size_in_bits), SVE_VL3, multiplier);
3847 (masm.*fn)(z20.WithLaneSize(lane_size_in_bits), SVE_VL4, multiplier);
3848 (masm.*fn)(z21.WithLaneSize(lane_size_in_bits), SVE_VL7, multiplier);
3849 (masm.*fn)(z22.WithLaneSize(lane_size_in_bits), SVE_VL8, multiplier);
3850 (masm.*fn)(z23.WithLaneSize(lane_size_in_bits), SVE_VL16, multiplier);
3851 (masm.*fn)(z24.WithLaneSize(lane_size_in_bits), SVE_VL64, multiplier);
3852 (masm.*fn)(z25.WithLaneSize(lane_size_in_bits), SVE_VL256, multiplier);
3853 (masm.*fn)(z26.WithLaneSize(lane_size_in_bits), 16, multiplier);
3854 (masm.*fn)(z27.WithLaneSize(lane_size_in_bits), 28, multiplier);
3855 (masm.*fn)(z28.WithLaneSize(lane_size_in_bits), SVE_MUL3, multiplier);
3856 (masm.*fn)(z29.WithLaneSize(lane_size_in_bits), SVE_MUL4, multiplier);
3857 (masm.*fn)(z30.WithLaneSize(lane_size_in_bits), SVE_ALL, multiplier);
3858
3859 // Perform computation using alternative instructions.
3860 (masm.*cnt)(x0, SVE_POW2, multiplier);
3861 (masm.*cnt)(x1, SVE_VL1, multiplier);
3862 (masm.*cnt)(x2, SVE_VL2, multiplier);
3863 (masm.*cnt)(x3, SVE_VL3, multiplier);
3864 (masm.*cnt)(x4, SVE_VL4, multiplier);
3865 (masm.*cnt)(x5, SVE_VL7, multiplier);
3866 (masm.*cnt)(x6, SVE_VL8, multiplier);
3867 (masm.*cnt)(x7, SVE_VL16, multiplier);
3868 (masm.*cnt)(x8, SVE_VL64, multiplier);
3869 (masm.*cnt)(x9, SVE_VL256, multiplier);
3870 (masm.*cnt)(x10, 16, multiplier);
3871 (masm.*cnt)(x11, 28, multiplier);
3872 (masm.*cnt)(x12, SVE_MUL3, multiplier);
3873 (masm.*cnt)(x13, SVE_MUL4, multiplier);
3874 (masm.*cnt)(x14, SVE_ALL, multiplier);
3875
3876 ZRegister zscratch = z15.WithLaneSize(lane_size_in_bits);
3877 for (unsigned i = 0; i < 15; i++) {
3878 ZRegister zsrcdst = ZRegister(i, lane_size_in_bits);
3879 Register x = Register(i, kXRegSize);
3880 __ Dup(zscratch, x);
3881 (masm.*addsub)(zsrcdst, zsrcdst, zscratch);
3882 }
3883
3884 END();
3885
3886 if (CAN_RUN()) {
3887 RUN();
3888
3889 ASSERT_EQUAL_SVE(z0, z16);
3890 ASSERT_EQUAL_SVE(z1, z17);
3891 ASSERT_EQUAL_SVE(z2, z18);
3892 ASSERT_EQUAL_SVE(z3, z19);
3893 ASSERT_EQUAL_SVE(z4, z20);
3894 ASSERT_EQUAL_SVE(z5, z21);
3895 ASSERT_EQUAL_SVE(z6, z22);
3896 ASSERT_EQUAL_SVE(z7, z23);
3897 ASSERT_EQUAL_SVE(z8, z24);
3898 ASSERT_EQUAL_SVE(z9, z25);
3899 ASSERT_EQUAL_SVE(z10, z26);
3900 ASSERT_EQUAL_SVE(z11, z27);
3901 ASSERT_EQUAL_SVE(z12, z28);
3902 ASSERT_EQUAL_SVE(z13, z29);
3903 ASSERT_EQUAL_SVE(z14, z30);
3904 }
3905}
3906
3907TEST_SVE(sve_inc_dec_vec) {
3908 CntFn cnth = &MacroAssembler::Cnth;
3909 CntFn cntw = &MacroAssembler::Cntw;
3910 CntFn cntd = &MacroAssembler::Cntd;
3911 AddSubFn sub = &MacroAssembler::Sub;
3912 AddSubFn add = &MacroAssembler::Add;
3913 for (int mult = 1; mult <= 16; mult += 5) {
3914 IncDecZHelper(config, &MacroAssembler::Dech, cnth, sub, mult, kHRegSize);
3915 IncDecZHelper(config, &MacroAssembler::Decw, cntw, sub, mult, kSRegSize);
3916 IncDecZHelper(config, &MacroAssembler::Decd, cntd, sub, mult, kDRegSize);
3917 IncDecZHelper(config, &MacroAssembler::Inch, cnth, add, mult, kHRegSize);
3918 IncDecZHelper(config, &MacroAssembler::Incw, cntw, add, mult, kSRegSize);
3919 IncDecZHelper(config, &MacroAssembler::Incd, cntd, add, mult, kDRegSize);
3920 }
3921}
3922
3923TEST_SVE(sve_unsigned_sat_inc_dec_vec) {
3924 CntFn cnth = &MacroAssembler::Cnth;
3925 CntFn cntw = &MacroAssembler::Cntw;
3926 CntFn cntd = &MacroAssembler::Cntd;
3927 AddSubFn sub = &MacroAssembler::Uqsub;
3928 AddSubFn add = &MacroAssembler::Uqadd;
3929 for (int mult = 1; mult <= 16; mult += 5) {
3930 IncDecZHelper(config, &MacroAssembler::Uqdech, cnth, sub, mult, kHRegSize);
3931 IncDecZHelper(config, &MacroAssembler::Uqdecw, cntw, sub, mult, kSRegSize);
3932 IncDecZHelper(config, &MacroAssembler::Uqdecd, cntd, sub, mult, kDRegSize);
3933 IncDecZHelper(config, &MacroAssembler::Uqinch, cnth, add, mult, kHRegSize);
3934 IncDecZHelper(config, &MacroAssembler::Uqincw, cntw, add, mult, kSRegSize);
3935 IncDecZHelper(config, &MacroAssembler::Uqincd, cntd, add, mult, kDRegSize);
3936 }
3937}
3938
3939TEST_SVE(sve_signed_sat_inc_dec_vec) {
3940 CntFn cnth = &MacroAssembler::Cnth;
3941 CntFn cntw = &MacroAssembler::Cntw;
3942 CntFn cntd = &MacroAssembler::Cntd;
3943 AddSubFn sub = &MacroAssembler::Sqsub;
3944 AddSubFn add = &MacroAssembler::Sqadd;
3945 for (int mult = 1; mult <= 16; mult += 5) {
3946 IncDecZHelper(config, &MacroAssembler::Sqdech, cnth, sub, mult, kHRegSize);
3947 IncDecZHelper(config, &MacroAssembler::Sqdecw, cntw, sub, mult, kSRegSize);
3948 IncDecZHelper(config, &MacroAssembler::Sqdecd, cntd, sub, mult, kDRegSize);
3949 IncDecZHelper(config, &MacroAssembler::Sqinch, cnth, add, mult, kHRegSize);
3950 IncDecZHelper(config, &MacroAssembler::Sqincw, cntw, add, mult, kSRegSize);
3951 IncDecZHelper(config, &MacroAssembler::Sqincd, cntd, add, mult, kDRegSize);
3952 }
3953}
3954
TatWai Chong7a0d3672019-10-23 17:35:18 -07003955typedef void (MacroAssembler::*ArithPredicatedFn)(const ZRegister& zd,
3956 const PRegisterM& pg,
3957 const ZRegister& zn,
3958 const ZRegister& zm);
TatWai Chong13634762019-07-16 16:20:45 -07003959
3960template <typename Td, typename Tg, typename Tn>
3961static void IntBinArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07003962 ArithPredicatedFn macro,
TatWai Chong13634762019-07-16 16:20:45 -07003963 unsigned lane_size_in_bits,
3964 const Tg& pg_inputs,
3965 const Tn& zn_inputs,
3966 const Tn& zm_inputs,
3967 const Td& zd_expected) {
3968 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3969 START();
3970
3971 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
3972 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
3973 InsrHelper(&masm, src_a, zn_inputs);
3974 InsrHelper(&masm, src_b, zm_inputs);
3975
3976 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
3977
3978 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
3979 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
3980 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
3981
3982 // `instr` zd(dst), zd(src_a), zn(src_b)
3983 __ Mov(zd_1, src_a);
3984 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
3985
3986 // `instr` zd(dst), zm(src_a), zd(src_b)
3987 // Based on whether zd and zm registers are aliased, the macro of instructions
3988 // (`Instr`) swaps the order of operands if it has the commutative property,
3989 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
3990 __ Mov(zd_2, src_b);
3991 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
3992
3993 // `instr` zd(dst), zm(src_a), zn(src_b)
3994 // The macro of instructions (`Instr`) automatically selects between `instr`
3995 // and movprfx + `instr` based on whether zd and zn registers are aliased.
TatWai Chongd316c5e2019-10-16 12:22:10 -07003996 // A generated movprfx instruction is predicated that using the same
TatWai Chong13634762019-07-16 16:20:45 -07003997 // governing predicate register. In order to keep the result constant,
3998 // initialize the destination register first.
3999 __ Mov(zd_3, src_a);
4000 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
4001
4002 END();
4003
4004 if (CAN_RUN()) {
4005 RUN();
4006 ASSERT_EQUAL_SVE(zd_expected, zd_1);
4007
4008 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
4009 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
4010 if (!core.HasSVELane(zd_1, lane)) break;
TatWai Chongd316c5e2019-10-16 12:22:10 -07004011 if ((pg_inputs[i] & 1) != 0) {
TatWai Chong13634762019-07-16 16:20:45 -07004012 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
4013 } else {
4014 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
4015 }
4016 }
4017
4018 ASSERT_EQUAL_SVE(zd_expected, zd_3);
4019 }
4020}
4021
4022TEST_SVE(sve_binary_arithmetic_predicated_add) {
4023 // clang-format off
4024 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
4025
4026 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
4027
4028 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
4029
4030 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
4031
4032 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
4033 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
4034
4035 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
4036 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
4037
4038 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
4039 0x1010101010101010, 0x8181818181818181,
4040 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
4041 0x0101010101010101, 0x7f7f7f7fffffffff};
4042
4043 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
4044 0x1010101010101010, 0x0000000000000000,
4045 0x8181818181818181, 0x8080808080808080,
4046 0xffffffffffffffff, 0xffffffffffffffff};
4047
4048 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4049 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4050 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4051 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
4052
4053 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
4054
4055 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
4056 0x8180, 0x8f8f, 0x0101, 0x7f7e};
4057
4058 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
4059 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
4060
4061 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
4062 0x2020202020202020, 0x8181818181818181,
4063 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
4064 0x0101010101010100, 0x7f7f7f7ffffffffe};
4065
TatWai Chong7a0d3672019-10-23 17:35:18 -07004066 ArithPredicatedFn fn = &MacroAssembler::Add;
TatWai Chong13634762019-07-16 16:20:45 -07004067 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
4068 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
4069 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
4070 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
4071
4072 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
4073
4074 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
4075 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
4076
4077 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
4078 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
4079
4080 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
4081 0x0000000000000000, 0x8181818181818181,
4082 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
4083 0x0101010101010102, 0x7f7f7f8000000000};
4084
4085 fn = &MacroAssembler::Sub;
4086 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
4087 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
4088 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
4089 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
4090 // clang-format on
4091}
4092
4093TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
4094 // clang-format off
4095 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
4096
4097 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
4098
4099 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
4100 0xff00, 0xba98, 0x5555, 0x4567};
4101
4102 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
4103 0xfe00, 0xabab, 0xcdcd, 0x5678};
4104
4105 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
4106 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
4107
4108 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
4109 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
4110
4111 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
4112 0x5555555555555555, 0x0000000001234567};
4113
4114 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
4115 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
4116
4117 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4118 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4119 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4120 int pg_d[] = {1, 0, 1, 1};
4121
4122 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
4123
4124 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
4125 0xff00, 0xba98, 0x5555, 0x5678};
4126
4127 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
4128 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
4129
4130 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
4131 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
4132
TatWai Chong7a0d3672019-10-23 17:35:18 -07004133 ArithPredicatedFn fn = &MacroAssembler::Umax;
TatWai Chong13634762019-07-16 16:20:45 -07004134 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
4135 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
4136 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
4137 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
4138
4139 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
4140
4141 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
4142 0xfe00, 0xabab, 0x5555, 0x4567};
4143
4144 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
4145 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
4146
4147 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
4148 0x5555555555555555, 0x0000000001234567};
4149 fn = &MacroAssembler::Umin;
4150 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
4151 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
4152 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
4153 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
4154
4155 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
4156
4157 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
4158 0x0100, 0x0eed, 0x5555, 0x1111};
4159
4160 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
4161 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
4162
4163 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
4164 0x7878787878787878, 0x0000000011111111};
4165
4166 fn = &MacroAssembler::Uabd;
4167 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
4168 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
4169 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
4170 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
4171 // clang-format on
4172}
4173
4174TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
4175 // clang-format off
4176 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
4177
4178 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
4179
4180 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
4181 INT16_MIN, INT16_MAX, INT16_MAX, 1};
4182
4183 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
4184 INT16_MAX, INT16_MAX - 1, -1, 0};
4185
4186 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
4187 INT32_MIN, INT32_MAX, INT32_MAX, 1};
4188
4189 int zm_s[] = {-1, 0, -1, -INT32_MAX,
4190 INT32_MAX, INT32_MAX - 1, -1, 0};
4191
4192 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
4193 INT64_MIN, INT64_MAX, INT64_MAX, 1};
4194
4195 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
4196 INT64_MAX, INT64_MAX - 1, -1, 0};
4197
4198 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4199 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4200 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4201 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
4202
4203 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
4204
4205 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
4206 INT16_MAX, INT16_MAX, INT16_MAX, 1};
4207
4208 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
4209 INT32_MAX, INT32_MAX, INT32_MAX, 1};
4210
4211 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
4212 INT64_MIN, INT64_MAX, INT64_MAX, 1};
4213
TatWai Chong7a0d3672019-10-23 17:35:18 -07004214 ArithPredicatedFn fn = &MacroAssembler::Smax;
TatWai Chong13634762019-07-16 16:20:45 -07004215 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
4216 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
4217 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
4218 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
4219
4220 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
4221
4222 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
4223 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
4224
4225 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
4226 INT32_MIN, INT32_MAX, -1, 0};
4227
4228 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
4229 INT64_MIN, INT64_MAX - 1, -1, 0};
4230
4231 fn = &MacroAssembler::Smin;
4232 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
4233 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
4234 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
4235 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
4236
4237 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
4238
4239 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
4240
4241 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
4242 0xffffffff, 0x7fffffff, 0x80000000, 1};
4243
4244 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
4245 0x8000000000000000, 1, 0x8000000000000000, 1};
4246
4247 fn = &MacroAssembler::Sabd;
4248 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
4249 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
4250 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
4251 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
4252 // clang-format on
4253}
4254
4255TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
4256 // clang-format off
4257 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
4258
4259 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
4260
4261 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
4262 0x8000, 0xff00, 0x5555, 0xaaaa};
4263
4264 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
4265 0x5555, 0xaaaa, 0x0001, 0x1234};
4266
4267 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
4268 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
4269
4270 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
4271 0x12345678, 0x22223333, 0x55556666, 0x77778888};
4272
4273 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
4274 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
4275
4276 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
4277 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
4278
4279 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4280 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4281 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
4282 int pg_d[] = {1, 1, 0, 1};
4283
4284 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
4285
4286 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
4287 0x8000, 0xff00, 0x5555, 0x9e88};
4288
4289 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
4290 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
4291
4292 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
4293 0xffffffffffffffff, 0x38e38e38e38e38e4};
4294
TatWai Chong7a0d3672019-10-23 17:35:18 -07004295 ArithPredicatedFn fn = &MacroAssembler::Mul;
TatWai Chong13634762019-07-16 16:20:45 -07004296 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
4297 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
4298 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
4299 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
4300
4301 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
4302
4303 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
4304 0x2aaa, 0xff00, 0x0000, 0x0c22};
4305
4306 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
4307 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
4308
4309 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
4310 0xffffffffffffffff, 0x71c71c71c71c71c6};
4311
4312 fn = &MacroAssembler::Umulh;
4313 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
4314 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
4315 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
4316 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
4317 // clang-format on
4318}
4319
4320TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
4321 // clang-format off
4322 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
4323
4324 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
4325
4326 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
4327
4328 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
4329
4330 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
4331
4332 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
4333
4334 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
4335
4336 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
4337
4338 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4339 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4340 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
4341 int pg_d[] = {1, 1, 0, 1};
4342
4343 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
4344
4345 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
4346
4347 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
4348
4349 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
4350
TatWai Chong7a0d3672019-10-23 17:35:18 -07004351 ArithPredicatedFn fn = &MacroAssembler::Smulh;
TatWai Chong13634762019-07-16 16:20:45 -07004352 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
4353 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
4354 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
4355 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
4356 // clang-format on
4357}
4358
4359TEST_SVE(sve_binary_arithmetic_predicated_logical) {
4360 // clang-format off
4361 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
4362 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
4363
4364 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
4365 0x8000, 0xffff, 0x5555, 0xaaaa};
4366 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
4367 0x5555, 0xaaaa, 0x0000, 0x0800};
4368
4369 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
4370 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
4371
4372 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
4373 0x0001200880ff55aa, 0x0022446688aaccee};
4374 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
4375 0x7fcd80ff55aa0008, 0x1133557799bbddff};
4376
4377 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4378 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4379 int pg_s[] = {1, 1, 1, 0};
4380 int pg_d[] = {1, 1, 0, 1};
4381
4382 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
4383
4384 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
4385 0x0000, 0xffff, 0x0000, 0x0800};
4386
4387 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
4388
4389 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
4390 0x0001200880ff55aa, 0x0022446688aaccee};
4391
TatWai Chong7a0d3672019-10-23 17:35:18 -07004392 ArithPredicatedFn fn = &MacroAssembler::And;
TatWai Chong13634762019-07-16 16:20:45 -07004393 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
4394 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
4395 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
4396 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
4397
4398 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
4399
4400 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
4401 0x8000, 0xffff, 0x5555, 0xa2aa};
4402
4403 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
4404
4405 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
4406 0x0001200880ff55aa, 0x0000000000000000};
4407
4408 fn = &MacroAssembler::Bic;
4409 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
4410 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
4411 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
4412 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
4413
4414 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
4415
4416 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
4417 0xd555, 0xffff, 0x5555, 0xa2aa};
4418
4419 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
4420
4421 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
4422 0x0001200880ff55aa, 0x1111111111111111};
4423
4424 fn = &MacroAssembler::Eor;
4425 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
4426 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
4427 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
4428 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
4429
4430 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
4431
4432 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
4433 0xd555, 0xffff, 0x5555, 0xaaaa};
4434
4435 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
4436
4437 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
4438 0x0001200880ff55aa, 0x1133557799bbddff};
4439
4440 fn = &MacroAssembler::Orr;
4441 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
4442 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
4443 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
4444 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
4445 // clang-format on
4446}
4447
4448TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
4449 // clang-format off
4450 int zn_s[] = {0, 1, -1, 2468,
4451 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
4452 -11111111, 87654321, 0, 0};
4453
4454 int zm_s[] = {1, -1, 1, 1234,
4455 -1, INT32_MIN, 1, -1,
4456 22222222, 80000000, -1, 0};
4457
4458 int64_t zn_d[] = {0, 1, -1, 2468,
4459 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
4460 -11111111, 87654321, 0, 0};
4461
4462 int64_t zm_d[] = {1, -1, 1, 1234,
4463 -1, INT64_MIN, 1, -1,
4464 22222222, 80000000, -1, 0};
4465
4466 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
4467 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
4468
4469 int exp_s[] = {0, 1, -1, 2,
4470 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
4471 0, 1, 0, 0};
4472
4473 int64_t exp_d[] = {0, -1, -1, 2,
4474 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
4475 0, 1, 0, 0};
4476
TatWai Chong7a0d3672019-10-23 17:35:18 -07004477 ArithPredicatedFn fn = &MacroAssembler::Sdiv;
TatWai Chong13634762019-07-16 16:20:45 -07004478 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
4479 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
4480 // clang-format on
4481}
4482
4483TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
4484 // clang-format off
4485 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
4486 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
4487
4488 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
4489 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
4490
4491 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
4492 0xffffffffffffffff, 0x8000000000000000,
4493 0xffffffffffffffff, 0x8000000000000000,
4494 0xffffffffffffffff, 0xf0000000f0000000};
4495
4496 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
4497 0x8000000000000000, 0x0000000000000002,
4498 0x8888888888888888, 0x0000000000000001,
4499 0x0000000080000000, 0x00000000f0000000};
4500
4501 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
4502 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
4503
4504 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
4505 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
4506
4507 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
4508 0x0000000000000001, 0x4000000000000000,
4509 0x0000000000000001, 0x8000000000000000,
4510 0xffffffffffffffff, 0x0000000100000001};
4511
TatWai Chong7a0d3672019-10-23 17:35:18 -07004512 ArithPredicatedFn fn = &MacroAssembler::Udiv;
TatWai Chong13634762019-07-16 16:20:45 -07004513 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
4514 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
4515 // clang-format on
4516}
4517
TatWai Chong7a0d3672019-10-23 17:35:18 -07004518typedef void (MacroAssembler::*ArithFn)(const ZRegister& zd,
4519 const ZRegister& zn,
4520 const ZRegister& zm);
TatWai Chong845246b2019-08-08 00:01:58 -07004521
4522template <typename T>
4523static void IntArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07004524 ArithFn macro,
TatWai Chong845246b2019-08-08 00:01:58 -07004525 unsigned lane_size_in_bits,
4526 const T& zn_inputs,
4527 const T& zm_inputs,
4528 const T& zd_expected) {
4529 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4530 START();
4531
4532 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
4533 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
4534 InsrHelper(&masm, zn, zn_inputs);
4535 InsrHelper(&masm, zm, zm_inputs);
4536
4537 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
4538 (masm.*macro)(zd, zn, zm);
4539
4540 END();
4541
4542 if (CAN_RUN()) {
4543 RUN();
4544 ASSERT_EQUAL_SVE(zd_expected, zd);
4545 }
4546}
4547
4548TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
4549 // clang-format off
TatWai Chong6995bfd2019-09-26 10:48:05 +01004550 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
4551 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
4552 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
4553 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong845246b2019-08-08 00:01:58 -07004554 0x1000000010001010, 0xf0000000f000f0f0};
4555
TatWai Chong7a0d3672019-10-23 17:35:18 -07004556 ArithFn fn = &MacroAssembler::Add;
TatWai Chong845246b2019-08-08 00:01:58 -07004557
4558 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
4559 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
4560 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
4561 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
4562 0x2000000020002020, 0xe0000001e001e1e0};
4563
TatWai Chong6995bfd2019-09-26 10:48:05 +01004564 IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
4565 IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
4566 IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
4567 IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07004568
4569 fn = &MacroAssembler::Sqadd;
4570
4571 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
4572 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
4573 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
4574 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
4575 0x2000000020002020, 0xe0000001e001e1e0};
4576
TatWai Chong6995bfd2019-09-26 10:48:05 +01004577 IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
4578 IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
4579 IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
4580 IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07004581
4582 fn = &MacroAssembler::Uqadd;
4583
4584 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
4585 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
4586 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
4587 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
4588 0x2000000020002020, 0xffffffffffffffff};
4589
TatWai Chong6995bfd2019-09-26 10:48:05 +01004590 IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
4591 IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
4592 IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
4593 IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07004594 // clang-format on
4595}
4596
4597TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
4598 // clang-format off
4599
4600 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
4601 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
4602
4603 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
4604 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
4605
4606 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
4607 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
4608
4609 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
4610 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
4611 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
4612 0xf0000000f000f0f0, 0x5555555555555555};
4613
TatWai Chong7a0d3672019-10-23 17:35:18 -07004614 ArithFn fn = &MacroAssembler::Sub;
TatWai Chong845246b2019-08-08 00:01:58 -07004615
4616 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
4617 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
4618 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
4619 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
4620 0x8eeeeeed8eed8d8e, 0x5555555555555555};
4621
4622 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
4623 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
4624 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
4625 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
4626
4627 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
4628 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
4629 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
4630 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
4631 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
4632
4633 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
4634 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
4635 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
4636 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
4637
4638 fn = &MacroAssembler::Sqsub;
4639
4640 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
4641 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
4642 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
4643 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
4644 0x7fffffffffffffff, 0x8000000000000000};
4645
4646 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
4647 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
4648 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
4649 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
4650
4651 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
4652 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
4653 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
4654 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
4655 0x8000000000000000, 0x7fffffffffffffff};
4656
4657 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
4658 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
4659 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
4660 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
4661
4662 fn = &MacroAssembler::Uqsub;
4663
4664 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
4665 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
4666 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
4667 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
4668 0x0000000000000000, 0x5555555555555555};
4669
4670 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
4671 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
4672 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
4673 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
4674
4675 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
4676 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
4677 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
4678 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
4679 0x7111111271127272, 0x0000000000000000};
4680
4681 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
4682 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
4683 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
4684 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
4685 // clang-format on
4686}
4687
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004688TEST_SVE(sve_rdvl) {
4689 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4690 START();
4691
4692 // Encodable multipliers.
4693 __ Rdvl(x0, 0);
4694 __ Rdvl(x1, 1);
4695 __ Rdvl(x2, 2);
4696 __ Rdvl(x3, 31);
4697 __ Rdvl(x4, -1);
4698 __ Rdvl(x5, -2);
4699 __ Rdvl(x6, -32);
4700
4701 // For unencodable multipliers, the MacroAssembler uses a sequence of
4702 // instructions.
4703 __ Rdvl(x10, 32);
4704 __ Rdvl(x11, -33);
4705 __ Rdvl(x12, 42);
4706 __ Rdvl(x13, -42);
4707
4708 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
4709 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
4710 // occurs in the macro.
4711 __ Rdvl(x14, 0x007fffffffffffff);
4712 __ Rdvl(x15, -0x0080000000000000);
4713
4714 END();
4715
4716 if (CAN_RUN()) {
4717 RUN();
4718
4719 uint64_t vl = config->sve_vl_in_bytes();
4720
4721 ASSERT_EQUAL_64(vl * 0, x0);
4722 ASSERT_EQUAL_64(vl * 1, x1);
4723 ASSERT_EQUAL_64(vl * 2, x2);
4724 ASSERT_EQUAL_64(vl * 31, x3);
4725 ASSERT_EQUAL_64(vl * -1, x4);
4726 ASSERT_EQUAL_64(vl * -2, x5);
4727 ASSERT_EQUAL_64(vl * -32, x6);
4728
4729 ASSERT_EQUAL_64(vl * 32, x10);
4730 ASSERT_EQUAL_64(vl * -33, x11);
4731 ASSERT_EQUAL_64(vl * 42, x12);
4732 ASSERT_EQUAL_64(vl * -42, x13);
4733
4734 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
4735 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
4736 }
4737}
4738
4739TEST_SVE(sve_rdpl) {
4740 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4741 START();
4742
4743 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
4744 // Addpl(xd, xzr, ...).
4745
4746 // Encodable multipliers (as `addvl`).
4747 __ Rdpl(x0, 0);
4748 __ Rdpl(x1, 8);
4749 __ Rdpl(x2, 248);
4750 __ Rdpl(x3, -8);
4751 __ Rdpl(x4, -256);
4752
4753 // Encodable multipliers (as `movz` + `addpl`).
4754 __ Rdpl(x7, 31);
Jacob Bramley889984c2019-10-28 17:28:48 +00004755 __ Rdpl(x8, -31);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004756
4757 // For unencodable multipliers, the MacroAssembler uses a sequence of
4758 // instructions.
4759 __ Rdpl(x10, 42);
4760 __ Rdpl(x11, -42);
4761
4762 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
4763 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
4764 // occurs in the macro.
4765 __ Rdpl(x12, 0x007fffffffffffff);
4766 __ Rdpl(x13, -0x0080000000000000);
4767
4768 END();
4769
4770 if (CAN_RUN()) {
4771 RUN();
4772
4773 uint64_t vl = config->sve_vl_in_bytes();
4774 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4775 uint64_t pl = vl / kZRegBitsPerPRegBit;
4776
4777 ASSERT_EQUAL_64(pl * 0, x0);
4778 ASSERT_EQUAL_64(pl * 8, x1);
4779 ASSERT_EQUAL_64(pl * 248, x2);
4780 ASSERT_EQUAL_64(pl * -8, x3);
4781 ASSERT_EQUAL_64(pl * -256, x4);
4782
4783 ASSERT_EQUAL_64(pl * 31, x7);
Jacob Bramley889984c2019-10-28 17:28:48 +00004784 ASSERT_EQUAL_64(pl * -31, x8);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004785
4786 ASSERT_EQUAL_64(pl * 42, x10);
4787 ASSERT_EQUAL_64(pl * -42, x11);
4788
4789 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
4790 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
4791 }
4792}
4793
4794TEST_SVE(sve_addvl) {
4795 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4796 START();
4797
4798 uint64_t base = 0x1234567800000000;
4799 __ Mov(x30, base);
4800
4801 // Encodable multipliers.
4802 __ Addvl(x0, x30, 0);
4803 __ Addvl(x1, x30, 1);
4804 __ Addvl(x2, x30, 31);
4805 __ Addvl(x3, x30, -1);
4806 __ Addvl(x4, x30, -32);
4807
4808 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
4809 __ Addvl(x5, x30, 32);
4810 __ Addvl(x6, x30, -33);
4811
4812 // Test the limits of the multiplier supported by the `Rdvl` macro.
4813 __ Addvl(x7, x30, 0x007fffffffffffff);
4814 __ Addvl(x8, x30, -0x0080000000000000);
4815
4816 // Check that xzr behaves correctly.
4817 __ Addvl(x9, xzr, 8);
4818 __ Addvl(x10, xzr, 42);
4819
4820 // Check that sp behaves correctly with encodable and unencodable multipliers.
4821 __ Addvl(sp, sp, -5);
4822 __ Addvl(sp, sp, -37);
4823 __ Addvl(x11, sp, -2);
4824 __ Addvl(sp, x11, 2);
4825 __ Addvl(x12, sp, -42);
4826
4827 // Restore the value of sp.
4828 __ Addvl(sp, x11, 39);
4829 __ Addvl(sp, sp, 5);
4830
4831 // Adjust x11 and x12 to make the test sp-agnostic.
4832 __ Sub(x11, sp, x11);
4833 __ Sub(x12, sp, x12);
4834
4835 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4836 __ Mov(x20, x30);
4837 __ Mov(x21, x30);
4838 __ Mov(x22, x30);
4839 __ Addvl(x20, x20, 4);
4840 __ Addvl(x21, x21, 42);
4841 __ Addvl(x22, x22, -0x0080000000000000);
4842
4843 END();
4844
4845 if (CAN_RUN()) {
4846 RUN();
4847
4848 uint64_t vl = config->sve_vl_in_bytes();
4849
4850 ASSERT_EQUAL_64(base + (vl * 0), x0);
4851 ASSERT_EQUAL_64(base + (vl * 1), x1);
4852 ASSERT_EQUAL_64(base + (vl * 31), x2);
4853 ASSERT_EQUAL_64(base + (vl * -1), x3);
4854 ASSERT_EQUAL_64(base + (vl * -32), x4);
4855
4856 ASSERT_EQUAL_64(base + (vl * 32), x5);
4857 ASSERT_EQUAL_64(base + (vl * -33), x6);
4858
4859 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
4860 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
4861
4862 ASSERT_EQUAL_64(vl * 8, x9);
4863 ASSERT_EQUAL_64(vl * 42, x10);
4864
4865 ASSERT_EQUAL_64(vl * 44, x11);
4866 ASSERT_EQUAL_64(vl * 84, x12);
4867
4868 ASSERT_EQUAL_64(base + (vl * 4), x20);
4869 ASSERT_EQUAL_64(base + (vl * 42), x21);
4870 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
4871
4872 ASSERT_EQUAL_64(base, x30);
4873 }
4874}
4875
4876TEST_SVE(sve_addpl) {
4877 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4878 START();
4879
4880 uint64_t base = 0x1234567800000000;
4881 __ Mov(x30, base);
4882
4883 // Encodable multipliers.
4884 __ Addpl(x0, x30, 0);
4885 __ Addpl(x1, x30, 1);
4886 __ Addpl(x2, x30, 31);
4887 __ Addpl(x3, x30, -1);
4888 __ Addpl(x4, x30, -32);
4889
4890 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
4891 // it falls back to `Rdvl` and `Add`.
4892 __ Addpl(x5, x30, 32);
4893 __ Addpl(x6, x30, -33);
4894
4895 // Test the limits of the multiplier supported by the `Rdvl` macro.
4896 __ Addpl(x7, x30, 0x007fffffffffffff);
4897 __ Addpl(x8, x30, -0x0080000000000000);
4898
4899 // Check that xzr behaves correctly.
4900 __ Addpl(x9, xzr, 8);
4901 __ Addpl(x10, xzr, 42);
4902
4903 // Check that sp behaves correctly with encodable and unencodable multipliers.
4904 __ Addpl(sp, sp, -5);
4905 __ Addpl(sp, sp, -37);
4906 __ Addpl(x11, sp, -2);
4907 __ Addpl(sp, x11, 2);
4908 __ Addpl(x12, sp, -42);
4909
4910 // Restore the value of sp.
4911 __ Addpl(sp, x11, 39);
4912 __ Addpl(sp, sp, 5);
4913
4914 // Adjust x11 and x12 to make the test sp-agnostic.
4915 __ Sub(x11, sp, x11);
4916 __ Sub(x12, sp, x12);
4917
4918 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4919 __ Mov(x20, x30);
4920 __ Mov(x21, x30);
4921 __ Mov(x22, x30);
4922 __ Addpl(x20, x20, 4);
4923 __ Addpl(x21, x21, 42);
4924 __ Addpl(x22, x22, -0x0080000000000000);
4925
4926 END();
4927
4928 if (CAN_RUN()) {
4929 RUN();
4930
4931 uint64_t vl = config->sve_vl_in_bytes();
4932 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4933 uint64_t pl = vl / kZRegBitsPerPRegBit;
4934
4935 ASSERT_EQUAL_64(base + (pl * 0), x0);
4936 ASSERT_EQUAL_64(base + (pl * 1), x1);
4937 ASSERT_EQUAL_64(base + (pl * 31), x2);
4938 ASSERT_EQUAL_64(base + (pl * -1), x3);
4939 ASSERT_EQUAL_64(base + (pl * -32), x4);
4940
4941 ASSERT_EQUAL_64(base + (pl * 32), x5);
4942 ASSERT_EQUAL_64(base + (pl * -33), x6);
4943
4944 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
4945 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
4946
4947 ASSERT_EQUAL_64(pl * 8, x9);
4948 ASSERT_EQUAL_64(pl * 42, x10);
4949
4950 ASSERT_EQUAL_64(pl * 44, x11);
4951 ASSERT_EQUAL_64(pl * 84, x12);
4952
4953 ASSERT_EQUAL_64(base + (pl * 4), x20);
4954 ASSERT_EQUAL_64(base + (pl * 42), x21);
4955 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
4956
4957 ASSERT_EQUAL_64(base, x30);
4958 }
4959}
4960
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004961TEST_SVE(sve_calculate_sve_address) {
4962 // Shadow the `MacroAssembler` type so that the test macros work without
4963 // modification.
4964 typedef CalculateSVEAddressMacroAssembler MacroAssembler;
4965
Jacob Bramley1314c462019-08-08 10:54:16 +01004966 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004967 START(); // NOLINT(clang-diagnostic-local-type-template-args)
Jacob Bramley1314c462019-08-08 10:54:16 +01004968
4969 uint64_t base = 0x1234567800000000;
4970 __ Mov(x28, base);
4971 __ Mov(x29, 48);
4972 __ Mov(x30, -48);
4973
4974 // Simple scalar (or equivalent) cases.
4975
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004976 __ CalculateSVEAddress(x0, SVEMemOperand(x28));
4977 __ CalculateSVEAddress(x1, SVEMemOperand(x28, 0));
4978 __ CalculateSVEAddress(x2, SVEMemOperand(x28, 0, SVE_MUL_VL));
4979 __ CalculateSVEAddress(x3, SVEMemOperand(x28, 0, SVE_MUL_VL), 3);
4980 __ CalculateSVEAddress(x4, SVEMemOperand(x28, xzr));
4981 __ CalculateSVEAddress(x5, SVEMemOperand(x28, xzr, LSL, 42));
Jacob Bramley1314c462019-08-08 10:54:16 +01004982
4983 // scalar-plus-immediate
4984
4985 // Unscaled immediates, handled with `Add`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004986 __ CalculateSVEAddress(x6, SVEMemOperand(x28, 42));
4987 __ CalculateSVEAddress(x7, SVEMemOperand(x28, -42));
Jacob Bramley1314c462019-08-08 10:54:16 +01004988 // Scaled immediates, handled with `Addvl` or `Addpl`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004989 __ CalculateSVEAddress(x8, SVEMemOperand(x28, 31, SVE_MUL_VL), 0);
4990 __ CalculateSVEAddress(x9, SVEMemOperand(x28, -32, SVE_MUL_VL), 0);
Jacob Bramley1314c462019-08-08 10:54:16 +01004991 // Out of `addvl` or `addpl` range.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004992 __ CalculateSVEAddress(x10, SVEMemOperand(x28, 42, SVE_MUL_VL), 0);
4993 __ CalculateSVEAddress(x11, SVEMemOperand(x28, -42, SVE_MUL_VL), 0);
4994 // As above, for VL-based accesses smaller than a Z register.
4995 VIXL_STATIC_ASSERT(kZRegBitsPerPRegBitLog2 == 3);
4996 __ CalculateSVEAddress(x12, SVEMemOperand(x28, -32 * 8, SVE_MUL_VL), 3);
4997 __ CalculateSVEAddress(x13, SVEMemOperand(x28, -42 * 8, SVE_MUL_VL), 3);
4998 __ CalculateSVEAddress(x14, SVEMemOperand(x28, -32 * 4, SVE_MUL_VL), 2);
4999 __ CalculateSVEAddress(x15, SVEMemOperand(x28, -42 * 4, SVE_MUL_VL), 2);
5000 __ CalculateSVEAddress(x18, SVEMemOperand(x28, -32 * 2, SVE_MUL_VL), 1);
5001 __ CalculateSVEAddress(x19, SVEMemOperand(x28, -42 * 2, SVE_MUL_VL), 1);
Jacob Bramley1314c462019-08-08 10:54:16 +01005002
5003 // scalar-plus-scalar
5004
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005005 __ CalculateSVEAddress(x20, SVEMemOperand(x28, x29));
5006 __ CalculateSVEAddress(x21, SVEMemOperand(x28, x30));
5007 __ CalculateSVEAddress(x22, SVEMemOperand(x28, x29, LSL, 8));
5008 __ CalculateSVEAddress(x23, SVEMemOperand(x28, x30, LSL, 8));
Jacob Bramley1314c462019-08-08 10:54:16 +01005009
5010 // In-place updates, to stress scratch register allocation.
5011
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005012 __ Mov(x24, 0xabcd000000000000);
5013 __ Mov(x25, 0xabcd101100000000);
5014 __ Mov(x26, 0xabcd202200000000);
5015 __ Mov(x27, 0xabcd303300000000);
5016 __ Mov(x28, 0xabcd404400000000);
5017 __ Mov(x29, 0xabcd505500000000);
Jacob Bramley1314c462019-08-08 10:54:16 +01005018
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005019 __ CalculateSVEAddress(x24, SVEMemOperand(x24));
5020 __ CalculateSVEAddress(x25, SVEMemOperand(x25, 0x42));
5021 __ CalculateSVEAddress(x26, SVEMemOperand(x26, 3, SVE_MUL_VL), 0);
5022 __ CalculateSVEAddress(x27, SVEMemOperand(x27, 0x42, SVE_MUL_VL), 3);
5023 __ CalculateSVEAddress(x28, SVEMemOperand(x28, x30));
5024 __ CalculateSVEAddress(x29, SVEMemOperand(x29, x30, LSL, 4));
Jacob Bramley1314c462019-08-08 10:54:16 +01005025
5026 END();
5027
5028 if (CAN_RUN()) {
5029 RUN();
5030
5031 uint64_t vl = config->sve_vl_in_bytes();
5032 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5033 uint64_t pl = vl / kZRegBitsPerPRegBit;
5034
5035 // Simple scalar (or equivalent) cases.
5036 ASSERT_EQUAL_64(base, x0);
5037 ASSERT_EQUAL_64(base, x1);
5038 ASSERT_EQUAL_64(base, x2);
5039 ASSERT_EQUAL_64(base, x3);
5040 ASSERT_EQUAL_64(base, x4);
5041 ASSERT_EQUAL_64(base, x5);
5042
5043 // scalar-plus-immediate
5044 ASSERT_EQUAL_64(base + 42, x6);
5045 ASSERT_EQUAL_64(base - 42, x7);
5046 ASSERT_EQUAL_64(base + (31 * vl), x8);
5047 ASSERT_EQUAL_64(base - (32 * vl), x9);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005048 ASSERT_EQUAL_64(base + (42 * vl), x10);
5049 ASSERT_EQUAL_64(base - (42 * vl), x11);
5050 ASSERT_EQUAL_64(base - (32 * vl), x12);
Jacob Bramley1314c462019-08-08 10:54:16 +01005051 ASSERT_EQUAL_64(base - (42 * vl), x13);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005052 ASSERT_EQUAL_64(base - (32 * vl), x14);
5053 ASSERT_EQUAL_64(base - (42 * vl), x15);
5054 ASSERT_EQUAL_64(base - (32 * vl), x18);
5055 ASSERT_EQUAL_64(base - (42 * vl), x19);
Jacob Bramley1314c462019-08-08 10:54:16 +01005056
5057 // scalar-plus-scalar
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005058 ASSERT_EQUAL_64(base + 48, x20);
5059 ASSERT_EQUAL_64(base - 48, x21);
5060 ASSERT_EQUAL_64(base + (48 << 8), x22);
5061 ASSERT_EQUAL_64(base - (48 << 8), x23);
Jacob Bramley1314c462019-08-08 10:54:16 +01005062
5063 // In-place updates.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005064 ASSERT_EQUAL_64(0xabcd000000000000, x24);
5065 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x25);
5066 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x26);
5067 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x27);
5068 ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28);
5069 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29);
Jacob Bramley1314c462019-08-08 10:54:16 +01005070 }
5071}
5072
TatWai Chong4f28df72019-08-14 17:50:30 -07005073TEST_SVE(sve_permute_vector_unpredicated) {
5074 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5075 START();
5076
Jacob Bramleye4983d42019-10-08 10:56:15 +01005077 // Initialise registers with known values first.
5078 __ Dup(z1.VnB(), 0x11);
5079 __ Dup(z2.VnB(), 0x22);
5080 __ Dup(z3.VnB(), 0x33);
5081 __ Dup(z4.VnB(), 0x44);
5082
TatWai Chong4f28df72019-08-14 17:50:30 -07005083 __ Mov(x0, 0x0123456789abcdef);
5084 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
5085 __ Insr(z1.VnS(), w0);
5086 __ Insr(z2.VnD(), x0);
5087 __ Insr(z3.VnH(), h0);
5088 __ Insr(z4.VnD(), d0);
5089
5090 uint64_t inputs[] = {0xfedcba9876543210,
5091 0x0123456789abcdef,
5092 0x8f8e8d8c8b8a8988,
5093 0x8786858483828180};
5094
5095 // Initialize a distinguishable value throughout the register first.
5096 __ Dup(z9.VnB(), 0xff);
5097 InsrHelper(&masm, z9.VnD(), inputs);
5098
5099 __ Rev(z5.VnB(), z9.VnB());
5100 __ Rev(z6.VnH(), z9.VnH());
5101 __ Rev(z7.VnS(), z9.VnS());
5102 __ Rev(z8.VnD(), z9.VnD());
5103
5104 int index[7] = {22, 7, 7, 3, 1, 1, 63};
5105 // Broadcasting an data within the input array.
5106 __ Dup(z10.VnB(), z9.VnB(), index[0]);
5107 __ Dup(z11.VnH(), z9.VnH(), index[1]);
5108 __ Dup(z12.VnS(), z9.VnS(), index[2]);
5109 __ Dup(z13.VnD(), z9.VnD(), index[3]);
5110 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
5111 // Test dst == src
5112 __ Mov(z15, z9);
5113 __ Dup(z15.VnS(), z15.VnS(), index[5]);
5114 // Selecting an data beyond the input array.
5115 __ Dup(z16.VnB(), z9.VnB(), index[6]);
5116
5117 END();
5118
5119 if (CAN_RUN()) {
5120 RUN();
5121
5122 // Insr
Jacob Bramleye4983d42019-10-08 10:56:15 +01005123 uint64_t z1_expected[] = {0x1111111111111111, 0x1111111189abcdef};
5124 uint64_t z2_expected[] = {0x2222222222222222, 0x0123456789abcdef};
5125 uint64_t z3_expected[] = {0x3333333333333333, 0x3333333333333456};
5126 uint64_t z4_expected[] = {0x4444444444444444, 0x7ffaaaaa22223456};
TatWai Chong4f28df72019-08-14 17:50:30 -07005127 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
5128 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
5129 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
5130 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
5131
5132 // Rev
5133 int lane_count = core.GetSVELaneCount(kBRegSize);
5134 for (int i = 0; i < lane_count; i++) {
5135 uint64_t expected =
5136 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
5137 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
5138 ASSERT_EQUAL_64(expected, input);
5139 }
5140
5141 lane_count = core.GetSVELaneCount(kHRegSize);
5142 for (int i = 0; i < lane_count; i++) {
5143 uint64_t expected =
5144 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
5145 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
5146 ASSERT_EQUAL_64(expected, input);
5147 }
5148
5149 lane_count = core.GetSVELaneCount(kSRegSize);
5150 for (int i = 0; i < lane_count; i++) {
5151 uint64_t expected =
5152 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
5153 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
5154 ASSERT_EQUAL_64(expected, input);
5155 }
5156
5157 lane_count = core.GetSVELaneCount(kDRegSize);
5158 for (int i = 0; i < lane_count; i++) {
5159 uint64_t expected =
5160 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
5161 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
5162 ASSERT_EQUAL_64(expected, input);
5163 }
5164
5165 // Dup
5166 unsigned vl = config->sve_vl_in_bits();
5167 lane_count = core.GetSVELaneCount(kBRegSize);
5168 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
5169 for (int i = 0; i < lane_count; i++) {
5170 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
5171 }
5172
5173 lane_count = core.GetSVELaneCount(kHRegSize);
5174 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
5175 for (int i = 0; i < lane_count; i++) {
5176 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
5177 }
5178
5179 lane_count = core.GetSVELaneCount(kSRegSize);
5180 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
5181 for (int i = 0; i < lane_count; i++) {
5182 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
5183 }
5184
5185 lane_count = core.GetSVELaneCount(kDRegSize);
5186 uint64_t expected_z13 =
5187 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
5188 for (int i = 0; i < lane_count; i++) {
5189 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
5190 }
5191
5192 lane_count = core.GetSVELaneCount(kDRegSize);
5193 uint64_t expected_z14_lo = 0;
5194 uint64_t expected_z14_hi = 0;
5195 if (vl > (index[4] * kQRegSize)) {
5196 expected_z14_lo = 0x0123456789abcdef;
5197 expected_z14_hi = 0xfedcba9876543210;
5198 }
5199 for (int i = 0; i < lane_count; i += 2) {
5200 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
5201 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
5202 }
5203
5204 lane_count = core.GetSVELaneCount(kSRegSize);
5205 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
5206 for (int i = 0; i < lane_count; i++) {
5207 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
5208 }
5209
5210 lane_count = core.GetSVELaneCount(kBRegSize);
5211 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
5212 for (int i = 0; i < lane_count; i++) {
5213 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
5214 }
5215 }
5216}
5217
5218TEST_SVE(sve_permute_vector_unpredicated_uppack_vector_elements) {
5219 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5220 START();
5221
5222 uint64_t z9_inputs[] = {0xfedcba9876543210,
5223 0x0123456789abcdef,
5224 0x8f8e8d8c8b8a8988,
5225 0x8786858483828180};
5226 InsrHelper(&masm, z9.VnD(), z9_inputs);
5227
5228 __ Sunpkhi(z10.VnH(), z9.VnB());
5229 __ Sunpkhi(z11.VnS(), z9.VnH());
5230 __ Sunpkhi(z12.VnD(), z9.VnS());
5231
5232 __ Sunpklo(z13.VnH(), z9.VnB());
5233 __ Sunpklo(z14.VnS(), z9.VnH());
5234 __ Sunpklo(z15.VnD(), z9.VnS());
5235
5236 __ Uunpkhi(z16.VnH(), z9.VnB());
5237 __ Uunpkhi(z17.VnS(), z9.VnH());
5238 __ Uunpkhi(z18.VnD(), z9.VnS());
5239
5240 __ Uunpklo(z19.VnH(), z9.VnB());
5241 __ Uunpklo(z20.VnS(), z9.VnH());
5242 __ Uunpklo(z21.VnD(), z9.VnS());
5243
5244 END();
5245
5246 if (CAN_RUN()) {
5247 RUN();
5248
5249 // Suunpkhi
5250 int lane_count = core.GetSVELaneCount(kHRegSize);
5251 for (int i = lane_count - 1; i >= 0; i--) {
5252 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
5253 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
5254 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
5255 ASSERT_EQUAL_64(expected, input);
5256 }
5257
5258 lane_count = core.GetSVELaneCount(kSRegSize);
5259 for (int i = lane_count - 1; i >= 0; i--) {
5260 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
5261 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
5262 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
5263 ASSERT_EQUAL_64(expected, input);
5264 }
5265
5266 lane_count = core.GetSVELaneCount(kDRegSize);
5267 for (int i = lane_count - 1; i >= 0; i--) {
5268 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
5269 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
5270 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
5271 ASSERT_EQUAL_64(expected, input);
5272 }
5273
5274 // Suunpklo
5275 lane_count = core.GetSVELaneCount(kHRegSize);
5276 for (int i = lane_count - 1; i >= 0; i--) {
5277 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
5278 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
5279 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
5280 ASSERT_EQUAL_64(expected, input);
5281 }
5282
5283 lane_count = core.GetSVELaneCount(kSRegSize);
5284 for (int i = lane_count - 1; i >= 0; i--) {
5285 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
5286 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
5287 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
5288 ASSERT_EQUAL_64(expected, input);
5289 }
5290
5291 lane_count = core.GetSVELaneCount(kDRegSize);
5292 for (int i = lane_count - 1; i >= 0; i--) {
5293 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
5294 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
5295 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
5296 ASSERT_EQUAL_64(expected, input);
5297 }
5298
5299 // Uuunpkhi
5300 lane_count = core.GetSVELaneCount(kHRegSize);
5301 for (int i = lane_count - 1; i >= 0; i--) {
5302 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
5303 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
5304 ASSERT_EQUAL_64(expected, input);
5305 }
5306
5307 lane_count = core.GetSVELaneCount(kSRegSize);
5308 for (int i = lane_count - 1; i >= 0; i--) {
5309 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
5310 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
5311 ASSERT_EQUAL_64(expected, input);
5312 }
5313
5314 lane_count = core.GetSVELaneCount(kDRegSize);
5315 for (int i = lane_count - 1; i >= 0; i--) {
5316 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
5317 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
5318 ASSERT_EQUAL_64(expected, input);
5319 }
5320
5321 // Uuunpklo
5322 lane_count = core.GetSVELaneCount(kHRegSize);
5323 for (int i = lane_count - 1; i >= 0; i--) {
5324 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
5325 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
5326 ASSERT_EQUAL_64(expected, input);
5327 }
5328
5329 lane_count = core.GetSVELaneCount(kSRegSize);
5330 for (int i = lane_count - 1; i >= 0; i--) {
5331 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
5332 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
5333 ASSERT_EQUAL_64(expected, input);
5334 }
5335
5336 lane_count = core.GetSVELaneCount(kDRegSize);
5337 for (int i = lane_count - 1; i >= 0; i--) {
5338 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
5339 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
5340 ASSERT_EQUAL_64(expected, input);
5341 }
5342 }
5343}
5344
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005345TEST_SVE(sve_cnot_not) {
5346 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5347 START();
5348
5349 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
5350
5351 // For simplicity, we re-use the same pg for various lane sizes.
5352 // For D lanes: 1, 1, 0
5353 // For S lanes: 1, 1, 1, 0, 0
5354 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5355 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5356 Initialise(&masm, p0.VnB(), pg_in);
5357 PRegisterM pg = p0.Merging();
5358
5359 // These are merging operations, so we have to initialise the result register.
5360 // We use a mixture of constructive and destructive operations.
5361
5362 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005363 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005364 __ Mov(z30, z31);
5365
5366 // For constructive operations, use a different initial result value.
5367 __ Index(z29.VnB(), 0, -1);
5368
5369 __ Mov(z0, z31);
5370 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
5371 __ Mov(z1, z29);
5372 __ Cnot(z1.VnH(), pg, z31.VnH());
5373 __ Mov(z2, z31);
5374 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
5375 __ Mov(z3, z29);
5376 __ Cnot(z3.VnD(), pg, z31.VnD());
5377
5378 __ Mov(z4, z29);
5379 __ Not(z4.VnB(), pg, z31.VnB());
5380 __ Mov(z5, z31);
5381 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
5382 __ Mov(z6, z29);
5383 __ Not(z6.VnS(), pg, z31.VnS());
5384 __ Mov(z7, z31);
5385 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
5386
5387 END();
5388
5389 if (CAN_RUN()) {
5390 RUN();
5391
5392 // Check that constructive operations preserve their inputs.
5393 ASSERT_EQUAL_SVE(z30, z31);
5394
5395 // clang-format off
5396
5397 // Cnot (B) destructive
5398 uint64_t expected_z0[] =
5399 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5400 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
5401 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5402
5403 // Cnot (H)
5404 uint64_t expected_z1[] =
5405 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5406 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
5407 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5408
5409 // Cnot (S) destructive
5410 uint64_t expected_z2[] =
5411 // pg: 0 1 1 1 0 0
5412 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
5413 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5414
5415 // Cnot (D)
5416 uint64_t expected_z3[] =
5417 // pg: 1 1 0
5418 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
5419 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5420
5421 // Not (B)
5422 uint64_t expected_z4[] =
5423 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5424 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
5425 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5426
5427 // Not (H) destructive
5428 uint64_t expected_z5[] =
5429 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5430 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
5431 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5432
5433 // Not (S)
5434 uint64_t expected_z6[] =
5435 // pg: 0 1 1 1 0 0
5436 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
5437 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
5438
5439 // Not (D) destructive
5440 uint64_t expected_z7[] =
5441 // pg: 1 1 0
5442 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
5443 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
5444
5445 // clang-format on
5446 }
5447}
5448
5449TEST_SVE(sve_fabs_fneg) {
5450 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5451 START();
5452
5453 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
5454 // NaNs, but fabs and fneg do not.
5455 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
5456 0xfff00000ff80fc01, // Signalling NaNs.
5457 0x123456789abcdef0};
5458
5459 // For simplicity, we re-use the same pg for various lane sizes.
5460 // For D lanes: 1, 1, 0
5461 // For S lanes: 1, 1, 1, 0, 0
5462 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5463 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5464 Initialise(&masm, p0.VnB(), pg_in);
5465 PRegisterM pg = p0.Merging();
5466
5467 // These are merging operations, so we have to initialise the result register.
5468 // We use a mixture of constructive and destructive operations.
5469
5470 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005471 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005472 __ Mov(z30, z31);
5473
5474 // For constructive operations, use a different initial result value.
5475 __ Index(z29.VnB(), 0, -1);
5476
5477 __ Mov(z0, z29);
5478 __ Fabs(z0.VnH(), pg, z31.VnH());
5479 __ Mov(z1, z31);
5480 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
5481 __ Mov(z2, z29);
5482 __ Fabs(z2.VnD(), pg, z31.VnD());
5483
5484 __ Mov(z3, z31);
5485 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
5486 __ Mov(z4, z29);
5487 __ Fneg(z4.VnS(), pg, z31.VnS());
5488 __ Mov(z5, z31);
5489 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
5490
5491 END();
5492
5493 if (CAN_RUN()) {
5494 RUN();
5495
5496 // Check that constructive operations preserve their inputs.
5497 ASSERT_EQUAL_SVE(z30, z31);
5498
5499 // clang-format off
5500
5501 // Fabs (H)
5502 uint64_t expected_z0[] =
5503 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5504 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
5505 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5506
5507 // Fabs (S) destructive
5508 uint64_t expected_z1[] =
5509 // pg: 0 1 1 1 0 0
5510 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
5511 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5512
5513 // Fabs (D)
5514 uint64_t expected_z2[] =
5515 // pg: 1 1 0
5516 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
5517 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5518
5519 // Fneg (H) destructive
5520 uint64_t expected_z3[] =
5521 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5522 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
5523 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5524
5525 // Fneg (S)
5526 uint64_t expected_z4[] =
5527 // pg: 0 1 1 1 0 0
5528 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
5529 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5530
5531 // Fneg (D) destructive
5532 uint64_t expected_z5[] =
5533 // pg: 1 1 0
5534 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
5535 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5536
5537 // clang-format on
5538 }
5539}
5540
5541TEST_SVE(sve_cls_clz_cnt) {
5542 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5543 START();
5544
5545 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5546
5547 // For simplicity, we re-use the same pg for various lane sizes.
5548 // For D lanes: 1, 1, 0
5549 // For S lanes: 1, 1, 1, 0, 0
5550 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5551 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5552 Initialise(&masm, p0.VnB(), pg_in);
5553 PRegisterM pg = p0.Merging();
5554
5555 // These are merging operations, so we have to initialise the result register.
5556 // We use a mixture of constructive and destructive operations.
5557
5558 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005559 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005560 __ Mov(z30, z31);
5561
5562 // For constructive operations, use a different initial result value.
5563 __ Index(z29.VnB(), 0, -1);
5564
5565 __ Mov(z0, z29);
5566 __ Cls(z0.VnB(), pg, z31.VnB());
5567 __ Mov(z1, z31);
5568 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
5569 __ Mov(z2, z29);
5570 __ Cnt(z2.VnS(), pg, z31.VnS());
5571 __ Mov(z3, z31);
5572 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
5573
5574 END();
5575
5576 if (CAN_RUN()) {
5577 RUN();
5578 // Check that non-destructive operations preserve their inputs.
5579 ASSERT_EQUAL_SVE(z30, z31);
5580
5581 // clang-format off
5582
5583 // cls (B)
5584 uint8_t expected_z0[] =
5585 // pg: 0 0 0 0 1 0 1 1
5586 // pg: 1 0 0 1 0 1 1 1
5587 // pg: 0 0 1 0 1 1 1 0
5588 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
5589 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
5590 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
5591 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
5592
5593 // clz (H) destructive
5594 uint16_t expected_z1[] =
5595 // pg: 0 0 0 1
5596 // pg: 0 1 1 1
5597 // pg: 0 0 1 0
5598 {0x0000, 0x0000, 0x0000, 16,
5599 0xfefc, 0, 0, 0,
5600 0x1234, 0x5678, 0, 0xdef0};
5601 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
5602
5603 // cnt (S)
5604 uint32_t expected_z2[] =
5605 // pg: 0 1
5606 // pg: 1 1
5607 // pg: 0 0
5608 {0xe9eaebec, 0,
5609 22, 16,
5610 0xf9fafbfc, 0xfdfeff00};
5611 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
5612
5613 // cnt (D) destructive
5614 uint64_t expected_z3[] =
5615 // pg: 1 1 0
5616 { 0, 38, 0x123456789abcdef0};
5617 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5618
5619 // clang-format on
5620 }
5621}
5622
5623TEST_SVE(sve_sxt) {
5624 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5625 START();
5626
5627 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5628
5629 // For simplicity, we re-use the same pg for various lane sizes.
5630 // For D lanes: 1, 1, 0
5631 // For S lanes: 1, 1, 1, 0, 0
5632 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5633 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5634 Initialise(&masm, p0.VnB(), pg_in);
5635 PRegisterM pg = p0.Merging();
5636
5637 // These are merging operations, so we have to initialise the result register.
5638 // We use a mixture of constructive and destructive operations.
5639
5640 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005641 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005642 __ Mov(z30, z31);
5643
5644 // For constructive operations, use a different initial result value.
5645 __ Index(z29.VnB(), 0, -1);
5646
5647 __ Mov(z0, z31);
5648 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
5649 __ Mov(z1, z29);
5650 __ Sxtb(z1.VnS(), pg, z31.VnS());
5651 __ Mov(z2, z31);
5652 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
5653 __ Mov(z3, z29);
5654 __ Sxth(z3.VnS(), pg, z31.VnS());
5655 __ Mov(z4, z31);
5656 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
5657 __ Mov(z5, z29);
5658 __ Sxtw(z5.VnD(), pg, z31.VnD());
5659
5660 END();
5661
5662 if (CAN_RUN()) {
5663 RUN();
5664 // Check that constructive operations preserve their inputs.
5665 ASSERT_EQUAL_SVE(z30, z31);
5666
5667 // clang-format off
5668
5669 // Sxtb (H) destructive
5670 uint64_t expected_z0[] =
5671 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5672 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
5673 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5674
5675 // Sxtb (S)
5676 uint64_t expected_z1[] =
5677 // pg: 0 1 1 1 0 0
5678 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
5679 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5680
5681 // Sxtb (D) destructive
5682 uint64_t expected_z2[] =
5683 // pg: 1 1 0
5684 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
5685 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5686
5687 // Sxth (S)
5688 uint64_t expected_z3[] =
5689 // pg: 0 1 1 1 0 0
5690 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
5691 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5692
5693 // Sxth (D) destructive
5694 uint64_t expected_z4[] =
5695 // pg: 1 1 0
5696 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
5697 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5698
5699 // Sxtw (D)
5700 uint64_t expected_z5[] =
5701 // pg: 1 1 0
5702 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
5703 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5704
5705 // clang-format on
5706 }
5707}
5708
5709TEST_SVE(sve_uxt) {
5710 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5711 START();
5712
5713 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5714
5715 // For simplicity, we re-use the same pg for various lane sizes.
5716 // For D lanes: 1, 1, 0
5717 // For S lanes: 1, 1, 1, 0, 0
5718 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5719 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5720 Initialise(&masm, p0.VnB(), pg_in);
5721 PRegisterM pg = p0.Merging();
5722
5723 // These are merging operations, so we have to initialise the result register.
5724 // We use a mixture of constructive and destructive operations.
5725
5726 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005727 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005728 __ Mov(z30, z31);
5729
5730 // For constructive operations, use a different initial result value.
5731 __ Index(z29.VnB(), 0, -1);
5732
5733 __ Mov(z0, z29);
5734 __ Uxtb(z0.VnH(), pg, z31.VnH());
5735 __ Mov(z1, z31);
5736 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
5737 __ Mov(z2, z29);
5738 __ Uxtb(z2.VnD(), pg, z31.VnD());
5739 __ Mov(z3, z31);
5740 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
5741 __ Mov(z4, z29);
5742 __ Uxth(z4.VnD(), pg, z31.VnD());
5743 __ Mov(z5, z31);
5744 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
5745
5746 END();
5747
5748 if (CAN_RUN()) {
5749 RUN();
5750 // clang-format off
5751
5752 // Uxtb (H)
5753 uint64_t expected_z0[] =
5754 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5755 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
5756 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5757
5758 // Uxtb (S) destructive
5759 uint64_t expected_z1[] =
5760 // pg: 0 1 1 1 0 0
5761 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
5762 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5763
5764 // Uxtb (D)
5765 uint64_t expected_z2[] =
5766 // pg: 1 1 0
5767 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
5768 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5769
5770 // Uxth (S) destructive
5771 uint64_t expected_z3[] =
5772 // pg: 0 1 1 1 0 0
5773 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
5774 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5775
5776 // Uxth (D)
5777 uint64_t expected_z4[] =
5778 // pg: 1 1 0
5779 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
5780 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5781
5782 // Uxtw (D) destructive
5783 uint64_t expected_z5[] =
5784 // pg: 1 1 0
5785 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
5786 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5787
5788 // clang-format on
5789 }
5790}
5791
5792TEST_SVE(sve_abs_neg) {
5793 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5794 START();
5795
5796 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5797
5798 // For simplicity, we re-use the same pg for various lane sizes.
5799 // For D lanes: 1, 1, 0
5800 // For S lanes: 1, 1, 1, 0, 0
5801 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5802 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5803 Initialise(&masm, p0.VnB(), pg_in);
5804 PRegisterM pg = p0.Merging();
5805
5806 InsrHelper(&masm, z31.VnD(), in);
5807
5808 // These are merging operations, so we have to initialise the result register.
5809 // We use a mixture of constructive and destructive operations.
5810
5811 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005812 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005813 __ Mov(z30, z31);
5814
5815 // For constructive operations, use a different initial result value.
5816 __ Index(z29.VnB(), 0, -1);
5817
5818 __ Mov(z0, z31);
5819 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
5820 __ Mov(z1, z29);
5821 __ Abs(z1.VnB(), pg, z31.VnB());
5822
5823 __ Mov(z2, z31);
5824 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
5825 __ Mov(z3, z29);
5826 __ Neg(z3.VnS(), pg, z31.VnS());
5827
Jacob Bramleyc0066272019-09-30 16:30:47 +01005828 // The unpredicated form of `Neg` is implemented using `subr`.
5829 __ Mov(z4, z31);
5830 __ Neg(z4.VnB(), z4.VnB()); // destructive
5831 __ Mov(z5, z29);
5832 __ Neg(z5.VnD(), z31.VnD());
5833
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005834 END();
5835
5836 if (CAN_RUN()) {
5837 RUN();
Jacob Bramleyc0066272019-09-30 16:30:47 +01005838
5839 ASSERT_EQUAL_SVE(z30, z31);
5840
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005841 // clang-format off
5842
5843 // Abs (D) destructive
5844 uint64_t expected_z0[] =
5845 // pg: 1 1 0
5846 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
5847 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5848
5849 // Abs (B)
5850 uint64_t expected_z1[] =
5851 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5852 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
5853 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5854
5855 // Neg (H) destructive
5856 uint64_t expected_z2[] =
5857 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5858 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
5859 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5860
5861 // Neg (S)
5862 uint64_t expected_z3[] =
5863 // pg: 0 1 1 1 0 0
5864 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
5865 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5866
Jacob Bramleyc0066272019-09-30 16:30:47 +01005867 // Neg (B) destructive, unpredicated
5868 uint64_t expected_z4[] =
5869 {0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
5870 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5871
5872 // Neg (D) unpredicated
5873 uint64_t expected_z5[] =
5874 {0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
5875 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5876
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005877 // clang-format on
5878 }
5879}
5880
Jacob Bramley0093bb92019-10-04 15:54:10 +01005881TEST_SVE(sve_cpy) {
5882 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5883 START();
5884
5885 // For simplicity, we re-use the same pg for various lane sizes.
5886 // For D lanes: 0, 1, 1
5887 // For S lanes: 0, 1, 1, 0, 1
5888 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5889 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5890
5891 PRegisterM pg = p7.Merging();
5892 Initialise(&masm, pg.VnB(), pg_in);
5893
5894 // These are merging operations, so we have to initialise the result registers
5895 // for each operation.
5896 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5897 __ Index(ZRegister(i, kBRegSize), 0, -1);
5898 }
5899
5900 // Recognisable values to copy.
5901 __ Mov(x0, 0xdeadbeefdeadbe42);
5902 __ Mov(x1, 0xdeadbeefdead8421);
5903 __ Mov(x2, 0xdeadbeef80042001);
5904 __ Mov(x3, 0x8000000420000001);
5905
5906 // Use NEON moves, to avoid testing SVE `cpy` against itself.
5907 __ Dup(v28.V2D(), x0);
5908 __ Dup(v29.V2D(), x1);
5909 __ Dup(v30.V2D(), x2);
5910 __ Dup(v31.V2D(), x3);
5911
5912 // Register forms (CPY_z_p_r)
5913 __ Cpy(z0.VnB(), pg, w0);
5914 __ Cpy(z1.VnH(), pg, x1); // X registers are accepted for small lanes.
5915 __ Cpy(z2.VnS(), pg, w2);
5916 __ Cpy(z3.VnD(), pg, x3);
5917
5918 // VRegister forms (CPY_z_p_v)
5919 __ Cpy(z4.VnB(), pg, b28);
5920 __ Cpy(z5.VnH(), pg, h29);
5921 __ Cpy(z6.VnS(), pg, s30);
5922 __ Cpy(z7.VnD(), pg, d31);
5923
5924 // Check that we can copy the stack pointer.
5925 __ Mov(x10, sp);
5926 __ Mov(sp, 0xabcabcabcabcabca); // Set sp to a known value.
5927 __ Cpy(z16.VnB(), pg, sp);
5928 __ Cpy(z17.VnH(), pg, wsp);
5929 __ Cpy(z18.VnS(), pg, wsp);
5930 __ Cpy(z19.VnD(), pg, sp);
5931 __ Mov(sp, x10); // Restore sp.
5932
5933 END();
5934
5935 if (CAN_RUN()) {
5936 RUN();
5937 // clang-format off
5938
5939 uint64_t expected_b[] =
5940 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5941 {0xe9eaebec424242f0, 0x42f2f34242f64242, 0xf942fbfcfdfeff42};
5942 ASSERT_EQUAL_SVE(expected_b, z0.VnD());
5943 ASSERT_EQUAL_SVE(expected_b, z4.VnD());
5944
5945 uint64_t expected_h[] =
5946 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5947 {0xe9eaebec8421eff0, 0xf1f28421f5f68421, 0x8421fbfcfdfe8421};
5948 ASSERT_EQUAL_SVE(expected_h, z1.VnD());
5949 ASSERT_EQUAL_SVE(expected_h, z5.VnD());
5950
5951 uint64_t expected_s[] =
5952 // pg: 0 0 1 1 0 1
5953 {0xe9eaebecedeeeff0, 0x8004200180042001, 0xf9fafbfc80042001};
5954 ASSERT_EQUAL_SVE(expected_s, z2.VnD());
5955 ASSERT_EQUAL_SVE(expected_s, z6.VnD());
5956
5957 uint64_t expected_d[] =
5958 // pg: 0 1 1
5959 {0xe9eaebecedeeeff0, 0x8000000420000001, 0x8000000420000001};
5960 ASSERT_EQUAL_SVE(expected_d, z3.VnD());
5961 ASSERT_EQUAL_SVE(expected_d, z7.VnD());
5962
5963
5964 uint64_t expected_b_sp[] =
5965 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5966 {0xe9eaebeccacacaf0, 0xcaf2f3cacaf6caca, 0xf9cafbfcfdfeffca};
5967 ASSERT_EQUAL_SVE(expected_b_sp, z16.VnD());
5968
5969 uint64_t expected_h_sp[] =
5970 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5971 {0xe9eaebecabcaeff0, 0xf1f2abcaf5f6abca, 0xabcafbfcfdfeabca};
5972 ASSERT_EQUAL_SVE(expected_h_sp, z17.VnD());
5973
5974 uint64_t expected_s_sp[] =
5975 // pg: 0 0 1 1 0 1
5976 {0xe9eaebecedeeeff0, 0xcabcabcacabcabca, 0xf9fafbfccabcabca};
5977 ASSERT_EQUAL_SVE(expected_s_sp, z18.VnD());
5978
5979 uint64_t expected_d_sp[] =
5980 // pg: 0 1 1
5981 {0xe9eaebecedeeeff0, 0xabcabcabcabcabca, 0xabcabcabcabcabca};
5982 ASSERT_EQUAL_SVE(expected_d_sp, z19.VnD());
5983
5984 // clang-format on
5985 }
5986}
5987
Jacob Bramley0f62eab2019-10-23 17:07:47 +01005988TEST_SVE(sve_cpy_imm) {
5989 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5990 START();
5991
5992 // For simplicity, we re-use the same pg for various lane sizes.
5993 // For D lanes: 0, 1, 1
5994 // For S lanes: 0, 1, 1, 0, 1
5995 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5996 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5997
5998 PRegister pg = p7;
5999 Initialise(&masm, pg.VnB(), pg_in);
6000
6001 // These are (mostly) merging operations, so we have to initialise the result
6002 // registers for each operation.
6003 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6004 __ Index(ZRegister(i, kBRegSize), 0, -1);
6005 }
6006
6007 // Encodable integer forms (CPY_z_p_i)
6008 __ Cpy(z0.VnB(), pg.Merging(), 0);
6009 __ Cpy(z1.VnB(), pg.Zeroing(), 42);
6010 __ Cpy(z2.VnB(), pg.Merging(), -42);
6011 __ Cpy(z3.VnB(), pg.Zeroing(), 0xff);
6012 __ Cpy(z4.VnH(), pg.Merging(), 127);
6013 __ Cpy(z5.VnS(), pg.Zeroing(), -128);
6014 __ Cpy(z6.VnD(), pg.Merging(), -1);
6015
6016 // Forms encodable using fcpy.
6017 __ Cpy(z7.VnH(), pg.Merging(), Float16ToRawbits(Float16(-31.0)));
6018 __ Cpy(z8.VnS(), pg.Zeroing(), FloatToRawbits(2.0f));
6019 __ Cpy(z9.VnD(), pg.Merging(), DoubleToRawbits(-4.0));
6020
6021 // Other forms use a scratch register.
6022 __ Cpy(z10.VnH(), pg.Merging(), 0xff);
6023 __ Cpy(z11.VnD(), pg.Zeroing(), 0x0123456789abcdef);
6024
6025 END();
6026
6027 if (CAN_RUN()) {
6028 RUN();
6029 // clang-format off
6030
6031 uint64_t expected_z0[] =
6032 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6033 {0xe9eaebec000000f0, 0x00f2f30000f60000, 0xf900fbfcfdfeff00};
6034 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6035
6036 uint64_t expected_z1[] =
6037 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6038 {0x000000002a2a2a00, 0x2a00002a2a002a2a, 0x002a00000000002a};
6039 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6040
6041 uint64_t expected_z2[] =
6042 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6043 {0xe9eaebecd6d6d6f0, 0xd6f2f3d6d6f6d6d6, 0xf9d6fbfcfdfeffd6};
6044 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6045
6046 uint64_t expected_z3[] =
6047 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6048 {0x00000000ffffff00, 0xff0000ffff00ffff, 0x00ff0000000000ff};
6049 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6050
6051 uint64_t expected_z4[] =
6052 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6053 {0xe9eaebec007feff0, 0xf1f2007ff5f6007f, 0x007ffbfcfdfe007f};
6054 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6055
6056 uint64_t expected_z5[] =
6057 // pg: 0 0 1 1 0 1
6058 {0x0000000000000000, 0xffffff80ffffff80, 0x00000000ffffff80};
6059 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6060
6061 uint64_t expected_z6[] =
6062 // pg: 0 1 1
6063 {0xe9eaebecedeeeff0, 0xffffffffffffffff, 0xffffffffffffffff};
6064 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6065
6066 uint64_t expected_z7[] =
6067 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6068 {0xe9eaebeccfc0eff0, 0xf1f2cfc0f5f6cfc0, 0xcfc0fbfcfdfecfc0};
6069 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6070
6071 uint64_t expected_z8[] =
6072 // pg: 0 0 1 1 0 1
6073 {0x0000000000000000, 0x4000000040000000, 0x0000000040000000};
6074 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
6075
6076 uint64_t expected_z9[] =
6077 // pg: 0 1 1
6078 {0xe9eaebecedeeeff0, 0xc010000000000000, 0xc010000000000000};
6079 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
6080
6081 uint64_t expected_z10[] =
6082 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6083 {0xe9eaebec00ffeff0, 0xf1f200fff5f600ff, 0x00fffbfcfdfe00ff};
6084 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
6085
6086 uint64_t expected_z11[] =
6087 // pg: 0 1 1
6088 {0x0000000000000000, 0x0123456789abcdef, 0x0123456789abcdef};
6089 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
6090
6091 // clang-format on
6092 }
6093}
6094
6095TEST_SVE(sve_fcpy_imm) {
6096 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6097 START();
6098
6099 // For simplicity, we re-use the same pg for various lane sizes.
6100 // For D lanes: 0, 1, 1
6101 // For S lanes: 0, 1, 1, 0, 1
6102 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6103 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6104
6105 PRegister pg = p7;
6106 Initialise(&masm, pg.VnB(), pg_in);
6107
6108 // These are (mostly) merging operations, so we have to initialise the result
6109 // registers for each operation.
6110 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6111 __ Index(ZRegister(i, kBRegSize), 0, -1);
6112 }
6113
6114 // Encodable floating-point forms (FCPY_z_p_i)
6115 __ Fcpy(z1.VnH(), pg.Merging(), Float16(1.0));
6116 __ Fcpy(z2.VnH(), pg.Merging(), -2.0f);
6117 __ Fcpy(z3.VnH(), pg.Merging(), 3.0);
6118 __ Fcpy(z4.VnS(), pg.Merging(), Float16(-4.0));
6119 __ Fcpy(z5.VnS(), pg.Merging(), 5.0f);
6120 __ Fcpy(z6.VnS(), pg.Merging(), 6.0);
6121 __ Fcpy(z7.VnD(), pg.Merging(), Float16(7.0));
6122 __ Fcpy(z8.VnD(), pg.Merging(), 8.0f);
6123 __ Fcpy(z9.VnD(), pg.Merging(), -9.0);
6124
6125 // Unencodable immediates.
6126 __ Fcpy(z10.VnS(), pg.Merging(), 0.0);
6127 __ Fcpy(z11.VnH(), pg.Merging(), Float16(42.0));
6128 __ Fcpy(z12.VnD(), pg.Merging(), RawbitsToDouble(0x7ff0000012340000)); // NaN
6129 __ Fcpy(z13.VnH(), pg.Merging(), kFP64NegativeInfinity);
6130
6131 END();
6132
6133 if (CAN_RUN()) {
6134 RUN();
6135 // clang-format off
6136
6137 // 1.0 as FP16: 0x3c00
6138 uint64_t expected_z1[] =
6139 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6140 {0xe9eaebec3c00eff0, 0xf1f23c00f5f63c00, 0x3c00fbfcfdfe3c00};
6141 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6142
6143 // -2.0 as FP16: 0xc000
6144 uint64_t expected_z2[] =
6145 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6146 {0xe9eaebecc000eff0, 0xf1f2c000f5f6c000, 0xc000fbfcfdfec000};
6147 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6148
6149 // 3.0 as FP16: 0x4200
6150 uint64_t expected_z3[] =
6151 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6152 {0xe9eaebec4200eff0, 0xf1f24200f5f64200, 0x4200fbfcfdfe4200};
6153 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6154
6155 // -4.0 as FP32: 0xc0800000
6156 uint64_t expected_z4[] =
6157 // pg: 0 0 1 1 0 1
6158 {0xe9eaebecedeeeff0, 0xc0800000c0800000, 0xf9fafbfcc0800000};
6159 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6160
6161 // 5.0 as FP32: 0x40a00000
6162 uint64_t expected_z5[] =
6163 // pg: 0 0 1 1 0 1
6164 {0xe9eaebecedeeeff0, 0x40a0000040a00000, 0xf9fafbfc40a00000};
6165 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6166
6167 // 6.0 as FP32: 0x40c00000
6168 uint64_t expected_z6[] =
6169 // pg: 0 0 1 1 0 1
6170 {0xe9eaebecedeeeff0, 0x40c0000040c00000, 0xf9fafbfc40c00000};
6171 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6172
6173 // 7.0 as FP64: 0x401c000000000000
6174 uint64_t expected_z7[] =
6175 // pg: 0 1 1
6176 {0xe9eaebecedeeeff0, 0x401c000000000000, 0x401c000000000000};
6177 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6178
6179 // 8.0 as FP64: 0x4020000000000000
6180 uint64_t expected_z8[] =
6181 // pg: 0 1 1
6182 {0xe9eaebecedeeeff0, 0x4020000000000000, 0x4020000000000000};
6183 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
6184
6185 // -9.0 as FP64: 0xc022000000000000
6186 uint64_t expected_z9[] =
6187 // pg: 0 1 1
6188 {0xe9eaebecedeeeff0, 0xc022000000000000, 0xc022000000000000};
6189 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
6190
6191 // 0.0 as FP32: 0x00000000
6192 uint64_t expected_z10[] =
6193 // pg: 0 0 1 1 0 1
6194 {0xe9eaebecedeeeff0, 0x0000000000000000, 0xf9fafbfc00000000};
6195 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
6196
6197 // 42.0 as FP16: 0x5140
6198 uint64_t expected_z11[] =
6199 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6200 {0xe9eaebec5140eff0, 0xf1f25140f5f65140, 0x5140fbfcfdfe5140};
6201 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
6202
6203 // Signalling NaN (with payload): 0x7ff0000012340000
6204 uint64_t expected_z12[] =
6205 // pg: 0 1 1
6206 {0xe9eaebecedeeeff0, 0x7ff0000012340000, 0x7ff0000012340000};
6207 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
6208
6209 // -infinity as FP16: 0xfc00
6210 uint64_t expected_z13[] =
6211 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6212 {0xe9eaebecfc00eff0, 0xf1f2fc00f5f6fc00, 0xfc00fbfcfdfefc00};
6213 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
6214
6215 // clang-format on
6216 }
6217}
6218
TatWai Chong4f28df72019-08-14 17:50:30 -07006219TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
6220 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6221 START();
6222
6223 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
6224
6225 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
6226
6227 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
6228
6229 int index_s[] = {1, 3, 2, 31, -1};
6230
6231 int index_d[] = {31, 1};
6232
6233 // Initialize the register with a value that doesn't existed in the table.
6234 __ Dup(z9.VnB(), 0x1f);
6235 InsrHelper(&masm, z9.VnD(), table_inputs);
6236
6237 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
6238 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
6239 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
6240 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
6241
6242 InsrHelper(&masm, ind_b, index_b);
6243 InsrHelper(&masm, ind_h, index_h);
6244 InsrHelper(&masm, ind_s, index_s);
6245 InsrHelper(&masm, ind_d, index_d);
6246
6247 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
6248
6249 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
6250
6251 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
6252
6253 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
6254
6255 END();
6256
6257 if (CAN_RUN()) {
6258 RUN();
6259
6260 // clang-format off
6261 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
6262 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
6263
6264 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
6265 0x5544, 0x7766, 0xddcc, 0x9988};
6266
6267 unsigned z28_expected[] =
6268 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
6269
6270 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
6271 // clang-format on
6272
6273 unsigned vl = config->sve_vl_in_bits();
6274 for (size_t i = 0; i < ArrayLength(index_b); i++) {
6275 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
6276 if (!core.HasSVELane(z26.VnB(), lane)) break;
6277 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
6278 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
6279 }
6280
6281 for (size_t i = 0; i < ArrayLength(index_h); i++) {
6282 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
6283 if (!core.HasSVELane(z27.VnH(), lane)) break;
6284 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
6285 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
6286 }
6287
6288 for (size_t i = 0; i < ArrayLength(index_s); i++) {
6289 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
6290 if (!core.HasSVELane(z28.VnS(), lane)) break;
6291 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
6292 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
6293 }
6294
6295 for (size_t i = 0; i < ArrayLength(index_d); i++) {
6296 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
6297 if (!core.HasSVELane(z29.VnD(), lane)) break;
6298 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
6299 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
6300 }
6301 }
6302}
6303
Jacob Bramley199339d2019-08-05 18:49:13 +01006304TEST_SVE(ldr_str_z_bi) {
6305 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6306 START();
6307
6308 int vl = config->sve_vl_in_bytes();
6309
6310 // The immediate can address [-256, 255] times the VL, so allocate enough
6311 // space to exceed that in both directions.
6312 int data_size = vl * 1024;
6313
6314 uint8_t* data = new uint8_t[data_size];
6315 memset(data, 0, data_size);
6316
6317 // Set the base half-way through the buffer so we can use negative indices.
6318 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6319
6320 __ Index(z1.VnB(), 1, 3);
6321 __ Index(z2.VnB(), 2, 5);
6322 __ Index(z3.VnB(), 3, 7);
6323 __ Index(z4.VnB(), 4, 11);
6324 __ Index(z5.VnB(), 5, 13);
6325 __ Index(z6.VnB(), 6, 2);
6326 __ Index(z7.VnB(), 7, 3);
6327 __ Index(z8.VnB(), 8, 5);
6328 __ Index(z9.VnB(), 9, 7);
6329
6330 // Encodable cases.
6331 __ Str(z1, SVEMemOperand(x0));
6332 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
6333 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
6334 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
6335 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
6336
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006337 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01006338 __ Str(z6, SVEMemOperand(x0, 6 * vl));
6339 __ Str(z7, SVEMemOperand(x0, -7 * vl));
6340 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
6341 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
6342
6343 // Corresponding loads.
6344 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
6345 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
6346 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
6347 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
6348 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
6349
6350 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
6351 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
6352 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
6353 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
6354
6355 END();
6356
6357 if (CAN_RUN()) {
6358 RUN();
6359
6360 uint8_t* expected = new uint8_t[data_size];
6361 memset(expected, 0, data_size);
6362 uint8_t* middle = &expected[data_size / 2];
6363
6364 for (int i = 0; i < vl; i++) {
6365 middle[i] = (1 + (3 * i)) & 0xff; // z1
6366 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
6367 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
6368 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
6369 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
6370 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
6371 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
6372 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
6373 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
6374 }
6375
Jacob Bramley33c99f92019-10-08 15:24:12 +01006376 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01006377
6378 ASSERT_EQUAL_SVE(z1, z11);
6379 ASSERT_EQUAL_SVE(z2, z12);
6380 ASSERT_EQUAL_SVE(z3, z13);
6381 ASSERT_EQUAL_SVE(z4, z14);
6382 ASSERT_EQUAL_SVE(z5, z15);
6383 ASSERT_EQUAL_SVE(z6, z16);
6384 ASSERT_EQUAL_SVE(z7, z17);
6385 ASSERT_EQUAL_SVE(z8, z18);
6386 ASSERT_EQUAL_SVE(z9, z19);
6387
6388 delete[] expected;
6389 }
6390 delete[] data;
6391}
6392
6393TEST_SVE(ldr_str_p_bi) {
6394 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6395 START();
6396
6397 int vl = config->sve_vl_in_bytes();
6398 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
6399 int pl = vl / kZRegBitsPerPRegBit;
6400
6401 // The immediate can address [-256, 255] times the PL, so allocate enough
6402 // space to exceed that in both directions.
6403 int data_size = pl * 1024;
6404
6405 uint8_t* data = new uint8_t[data_size];
6406 memset(data, 0, data_size);
6407
6408 // Set the base half-way through the buffer so we can use negative indices.
6409 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6410
6411 uint64_t pattern[4] = {0x1010101011101111,
6412 0x0010111011000101,
6413 0x1001101110010110,
6414 0x1010110101100011};
6415 for (int i = 8; i <= 15; i++) {
6416 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
6417 Initialise(&masm,
6418 PRegister(i),
6419 pattern[3] * i,
6420 pattern[2] * i,
6421 pattern[1] * i,
6422 pattern[0] * i);
6423 }
6424
6425 // Encodable cases.
6426 __ Str(p8, SVEMemOperand(x0));
6427 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
6428 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
6429 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
6430
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006431 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01006432 __ Str(p12, SVEMemOperand(x0, 6 * pl));
6433 __ Str(p13, SVEMemOperand(x0, -7 * pl));
6434 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
6435 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
6436
6437 // Corresponding loads.
6438 __ Ldr(p0, SVEMemOperand(x0));
6439 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
6440 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
6441 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
6442
6443 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
6444 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
6445 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
6446 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
6447
6448 END();
6449
6450 if (CAN_RUN()) {
6451 RUN();
6452
6453 uint8_t* expected = new uint8_t[data_size];
6454 memset(expected, 0, data_size);
6455 uint8_t* middle = &expected[data_size / 2];
6456
6457 for (int i = 0; i < pl; i++) {
6458 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
6459 size_t index = i / sizeof(pattern[0]);
6460 VIXL_ASSERT(index < ArrayLength(pattern));
6461 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
6462 // Each byte of `pattern` can be multiplied by 15 without carry.
6463 VIXL_ASSERT((byte * 15) <= 0xff);
6464
6465 middle[i] = byte * 8; // p8
6466 middle[(2 * pl) + i] = byte * 9; // p9
6467 middle[(-3 * pl) + i] = byte * 10; // p10
6468 middle[(255 * pl) + i] = byte * 11; // p11
6469 middle[(6 * pl) + i] = byte * 12; // p12
6470 middle[(-7 * pl) + i] = byte * 13; // p13
6471 middle[(314 * pl) + i] = byte * 14; // p14
6472 middle[(-314 * pl) + i] = byte * 15; // p15
6473 }
6474
Jacob Bramley33c99f92019-10-08 15:24:12 +01006475 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01006476
6477 ASSERT_EQUAL_SVE(p0, p8);
6478 ASSERT_EQUAL_SVE(p1, p9);
6479 ASSERT_EQUAL_SVE(p2, p10);
6480 ASSERT_EQUAL_SVE(p3, p11);
6481 ASSERT_EQUAL_SVE(p4, p12);
6482 ASSERT_EQUAL_SVE(p5, p13);
6483 ASSERT_EQUAL_SVE(p6, p14);
6484 ASSERT_EQUAL_SVE(p7, p15);
6485
6486 delete[] expected;
6487 }
6488 delete[] data;
6489}
6490
Jacob Bramleye668b202019-08-14 17:57:34 +01006491template <typename T>
6492static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
6493 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
6494}
6495
6496TEST_SVE(sve_ld1_st1_contiguous) {
6497 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6498 START();
6499
6500 int vl = config->sve_vl_in_bytes();
6501
6502 // The immediate can address [-8, 7] times the VL, so allocate enough space to
6503 // exceed that in both directions.
6504 int data_size = vl * 128;
6505
6506 uint8_t* data = new uint8_t[data_size];
6507 memset(data, 0, data_size);
6508
6509 // Set the base half-way through the buffer so we can use negative indeces.
6510 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6511
Jacob Bramleye668b202019-08-14 17:57:34 +01006512 // Encodable scalar-plus-immediate cases.
6513 __ Index(z1.VnB(), 1, -3);
6514 __ Ptrue(p1.VnB());
6515 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
6516
6517 __ Index(z2.VnH(), -2, 5);
6518 __ Ptrue(p2.VnH(), SVE_MUL3);
6519 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
6520
6521 __ Index(z3.VnS(), 3, -7);
6522 __ Ptrue(p3.VnS(), SVE_POW2);
6523 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
6524
6525 // Encodable scalar-plus-scalar cases.
6526 __ Index(z4.VnD(), -4, 11);
6527 __ Ptrue(p4.VnD(), SVE_VL3);
6528 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
6529 __ Mov(x2, 17);
6530 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
6531
6532 __ Index(z5.VnD(), 6, -2);
6533 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01006534 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
6535 __ Mov(x4, 6);
6536 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01006537
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006538 // Unencodable cases fall back on `CalculateSVEAddress`.
Jacob Bramleye668b202019-08-14 17:57:34 +01006539 __ Index(z6.VnS(), -7, 3);
6540 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
6541 // predicate bits when handling larger lanes.
6542 __ Ptrue(p6.VnB(), SVE_ALL);
6543 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
6544
TatWai Chong6205eb42019-09-24 10:07:20 +01006545 __ Index(z7.VnD(), 32, -11);
6546 __ Ptrue(p7.VnD(), SVE_MUL4);
6547 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01006548
TatWai Chong6205eb42019-09-24 10:07:20 +01006549 // Corresponding loads.
6550 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
6551 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
6552 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
6553 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
6554 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
6555 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
6556
6557 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
6558 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
6559 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
6560 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
6561
6562 // We can test ld1 by comparing the value loaded with the value stored. In
6563 // most cases, there are two complications:
6564 // - Loads have zeroing predication, so we have to clear the inactive
6565 // elements on our reference.
6566 // - We have to replicate any sign- or zero-extension.
6567
6568 // Ld1b(z8.VnB(), ...)
6569 __ Dup(z18.VnB(), 0);
6570 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
6571
6572 // Ld1b(z9.VnH(), ...)
6573 __ Dup(z19.VnH(), 0);
6574 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
6575
6576 // Ld1h(z10.VnS(), ...)
6577 __ Dup(z20.VnS(), 0);
6578 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
6579
6580 // Ld1b(z11.VnD(), ...)
6581 __ Dup(z21.VnD(), 0);
6582 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
6583
6584 // Ld1d(z12.VnD(), ...)
6585 __ Dup(z22.VnD(), 0);
6586 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
6587
6588 // Ld1w(z13.VnS(), ...)
6589 __ Dup(z23.VnS(), 0);
6590 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
6591
6592 // Ld1sb(z14.VnH(), ...)
6593 __ Dup(z24.VnH(), 0);
6594 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
6595
6596 // Ld1sh(z15.VnS(), ...)
6597 __ Dup(z25.VnS(), 0);
6598 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
6599
6600 // Ld1sb(z16.VnD(), ...)
6601 __ Dup(z26.VnD(), 0);
6602 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
6603
6604 // Ld1sw(z17.VnD(), ...)
6605 __ Dup(z27.VnD(), 0);
6606 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01006607
6608 END();
6609
6610 if (CAN_RUN()) {
6611 RUN();
6612
6613 uint8_t* expected = new uint8_t[data_size];
6614 memset(expected, 0, data_size);
6615 uint8_t* middle = &expected[data_size / 2];
6616
6617 int vl_b = vl / kBRegSizeInBytes;
6618 int vl_h = vl / kHRegSizeInBytes;
6619 int vl_s = vl / kSRegSizeInBytes;
6620 int vl_d = vl / kDRegSizeInBytes;
6621
6622 // Encodable cases.
6623
6624 // st1b { z1.b }, SVE_ALL
6625 for (int i = 0; i < vl_b; i++) {
6626 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
6627 }
6628
6629 // st1b { z2.h }, SVE_MUL3
6630 int vl_h_mul3 = vl_h - (vl_h % 3);
6631 for (int i = 0; i < vl_h_mul3; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006632 int64_t offset = 7 * static_cast<int>(vl / (kHRegSize / kBRegSize));
6633 MemoryWrite(middle, offset, i, static_cast<uint8_t>(-2 + (5 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01006634 }
6635
6636 // st1h { z3.s }, SVE_POW2
6637 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
6638 for (int i = 0; i < vl_s_pow2; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006639 int64_t offset = -8 * static_cast<int>(vl / (kSRegSize / kHRegSize));
6640 MemoryWrite(middle, offset, i, static_cast<uint16_t>(3 - (7 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01006641 }
6642
6643 // st1b { z4.d }, SVE_VL3
6644 if (vl_d >= 3) {
6645 for (int i = 0; i < 3; i++) {
6646 MemoryWrite(middle,
6647 (8 * vl) + 17,
6648 i,
6649 static_cast<uint8_t>(-4 + (11 * i)));
6650 }
6651 }
6652
6653 // st1d { z5.d }, SVE_VL16
6654 if (vl_d >= 16) {
6655 for (int i = 0; i < 16; i++) {
6656 MemoryWrite(middle,
6657 (10 * vl) + (6 * kDRegSizeInBytes),
6658 i,
6659 static_cast<uint64_t>(6 - (2 * i)));
6660 }
6661 }
6662
6663 // Unencodable cases.
6664
6665 // st1w { z6.s }, SVE_ALL
6666 for (int i = 0; i < vl_s; i++) {
6667 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
6668 }
6669
TatWai Chong6205eb42019-09-24 10:07:20 +01006670 // st1w { z7.d }, SVE_MUL4
6671 int vl_d_mul4 = vl_d - (vl_d % 4);
6672 for (int i = 0; i < vl_d_mul4; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006673 int64_t offset = 22 * static_cast<int>(vl / (kDRegSize / kWRegSize));
6674 MemoryWrite(middle, offset, i, static_cast<uint32_t>(32 + (-11 * i)));
TatWai Chong6205eb42019-09-24 10:07:20 +01006675 }
6676
Jacob Bramley33c99f92019-10-08 15:24:12 +01006677 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramleye668b202019-08-14 17:57:34 +01006678
TatWai Chong6205eb42019-09-24 10:07:20 +01006679 // Check that we loaded back the expected values.
6680
6681 ASSERT_EQUAL_SVE(z18, z8);
6682 ASSERT_EQUAL_SVE(z19, z9);
6683 ASSERT_EQUAL_SVE(z20, z10);
6684 ASSERT_EQUAL_SVE(z21, z11);
6685 ASSERT_EQUAL_SVE(z22, z12);
6686 ASSERT_EQUAL_SVE(z23, z13);
6687 ASSERT_EQUAL_SVE(z24, z14);
6688 ASSERT_EQUAL_SVE(z25, z15);
6689 ASSERT_EQUAL_SVE(z26, z16);
6690 ASSERT_EQUAL_SVE(z27, z17);
6691
Jacob Bramleye668b202019-08-14 17:57:34 +01006692 delete[] expected;
6693 }
6694 delete[] data;
6695}
6696
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006697TEST_SVE(sve_ld2_st2_scalar_plus_imm) {
6698 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6699 START();
6700
6701 int vl = config->sve_vl_in_bytes();
6702
6703 // The immediate can address [-16, 14] times the VL, so allocate enough space
6704 // to exceed that in both directions.
6705 int data_size = vl * 128;
6706
6707 uint8_t* data = new uint8_t[data_size];
6708 memset(data, 0, data_size);
6709
6710 // Set the base half-way through the buffer so we can use negative indeces.
6711 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6712
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006713 __ Index(z14.VnB(), 1, -3);
6714 __ Index(z15.VnB(), 2, -3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006715 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006716 __ St2b(z14.VnB(), z15.VnB(), p0, SVEMemOperand(x0));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006717
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006718 __ Index(z16.VnH(), -2, 5);
6719 __ Index(z17.VnH(), -3, 5);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006720 __ Ptrue(p1.VnH(), SVE_MUL3);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006721 __ St2h(z16.VnH(), z17.VnH(), p1, SVEMemOperand(x0, 8, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006722
6723 // Wrap around from z31 to z0.
6724 __ Index(z31.VnS(), 3, -7);
6725 __ Index(z0.VnS(), 4, -7);
6726 __ Ptrue(p2.VnS(), SVE_POW2);
6727 __ St2w(z31.VnS(), z0.VnS(), p2, SVEMemOperand(x0, -12, SVE_MUL_VL));
6728
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006729 __ Index(z18.VnD(), -7, 3);
6730 __ Index(z19.VnD(), -8, 3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006731 // Sparse predication, including some irrelevant bits (0xe). To make the
6732 // results easy to check, activate each lane <n> where n is a multiple of 5.
6733 Initialise(&masm,
6734 p3,
6735 0xeee10000000001ee,
6736 0xeeeeeee100000000,
6737 0x01eeeeeeeee10000,
6738 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006739 __ St2d(z18.VnD(), z19.VnD(), p3, SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006740
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006741 // We can test ld2 by comparing the values loaded with the values stored.
6742 // There are two complications:
6743 // - Loads have zeroing predication, so we have to clear the inactive
6744 // elements on our reference.
6745 // - We want to test both loads and stores that span { z31, z0 }, so we have
6746 // to move some values around.
6747 //
6748 // Registers z4-z11 will hold as-stored values (with inactive elements
6749 // cleared). Registers z20-z27 will hold the values that were loaded.
6750
6751 // Ld2b(z14.VnB(), z15.VnB(), ...)
6752 __ Dup(z4.VnB(), 0);
6753 __ Dup(z5.VnB(), 0);
6754 __ Mov(z4.VnB(), p0.Merging(), z14.VnB());
6755 __ Mov(z5.VnB(), p0.Merging(), z15.VnB());
6756
6757 // Ld2h(z16.VnH(), z17.VnH(), ...)
6758 __ Dup(z6.VnH(), 0);
6759 __ Dup(z7.VnH(), 0);
6760 __ Mov(z6.VnH(), p1.Merging(), z16.VnH());
6761 __ Mov(z7.VnH(), p1.Merging(), z17.VnH());
6762
6763 // Ld2w(z31.VnS(), z0.VnS(), ...)
6764 __ Dup(z8.VnS(), 0);
6765 __ Dup(z9.VnS(), 0);
6766 __ Mov(z8.VnS(), p2.Merging(), z31.VnS());
6767 __ Mov(z9.VnS(), p2.Merging(), z0.VnS());
6768
6769 // Ld2d(z18.VnD(), z19.VnD(), ...)
6770 __ Dup(z10.VnD(), 0);
6771 __ Dup(z11.VnD(), 0);
6772 __ Mov(z10.VnD(), p3.Merging(), z18.VnD());
6773 __ Mov(z11.VnD(), p3.Merging(), z19.VnD());
6774
6775 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
6776 __ Ld2b(z31.VnB(), z0.VnB(), p0.Zeroing(), SVEMemOperand(x0));
6777 __ Mov(z20, z31);
6778 __ Mov(z21, z0);
6779
6780 __ Ld2h(z22.VnH(), z23.VnH(), p1.Zeroing(), SVEMemOperand(x0, 8, SVE_MUL_VL));
6781 __ Ld2w(z24.VnS(),
6782 z25.VnS(),
6783 p2.Zeroing(),
6784 SVEMemOperand(x0, -12, SVE_MUL_VL));
6785 __ Ld2d(z26.VnD(),
6786 z27.VnD(),
6787 p3.Zeroing(),
6788 SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006789
6790 END();
6791
6792 if (CAN_RUN()) {
6793 RUN();
6794
6795 uint8_t* expected = new uint8_t[data_size];
6796 memset(expected, 0, data_size);
6797 uint8_t* middle = &expected[data_size / 2];
6798
6799 int vl_b = vl / kBRegSizeInBytes;
6800 int vl_h = vl / kHRegSizeInBytes;
6801 int vl_s = vl / kSRegSizeInBytes;
6802 int vl_d = vl / kDRegSizeInBytes;
6803
6804 int reg_count = 2;
6805
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006806 // st2b { z14.b, z15.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006807 for (int i = 0; i < vl_b; i++) {
6808 uint8_t lane0 = 1 - (3 * i);
6809 uint8_t lane1 = 2 - (3 * i);
6810 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
6811 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
6812 }
6813
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006814 // st2h { z16.h, z17.h }, SVE_MUL3
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006815 int vl_h_mul3 = vl_h - (vl_h % 3);
6816 for (int i = 0; i < vl_h_mul3; i++) {
6817 int64_t offset = 8 * vl;
6818 uint16_t lane0 = -2 + (5 * i);
6819 uint16_t lane1 = -3 + (5 * i);
6820 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
6821 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
6822 }
6823
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006824 // st2w { z31.s, z0.s }, SVE_POW2
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006825 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
6826 for (int i = 0; i < vl_s_pow2; i++) {
6827 int64_t offset = -12 * vl;
6828 uint32_t lane0 = 3 - (7 * i);
6829 uint32_t lane1 = 4 - (7 * i);
6830 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
6831 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
6832 }
6833
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006834 // st2d { z18.d, z19.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006835 for (int i = 0; i < vl_d; i++) {
6836 if ((i % 5) == 0) {
6837 int64_t offset = 14 * vl;
6838 uint64_t lane0 = -7 + (3 * i);
6839 uint64_t lane1 = -8 + (3 * i);
6840 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
6841 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
6842 }
6843 }
6844
6845 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
6846
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006847 // Check that we loaded back the expected values.
6848
6849 // st2b/ld2b
6850 ASSERT_EQUAL_SVE(z4, z20);
6851 ASSERT_EQUAL_SVE(z5, z21);
6852
6853 // st2h/ld2h
6854 ASSERT_EQUAL_SVE(z6, z22);
6855 ASSERT_EQUAL_SVE(z7, z23);
6856
6857 // st2w/ld2w
6858 ASSERT_EQUAL_SVE(z8, z24);
6859 ASSERT_EQUAL_SVE(z9, z25);
6860
6861 // st2d/ld2d
6862 ASSERT_EQUAL_SVE(z10, z26);
6863 ASSERT_EQUAL_SVE(z11, z27);
6864
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006865 delete[] expected;
6866 }
6867 delete[] data;
6868}
6869
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006870TEST_SVE(sve_ld2_st2_scalar_plus_scalar) {
6871 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6872 START();
6873
6874 int vl = config->sve_vl_in_bytes();
6875
6876 // Allocate plenty of space to enable indexing in both directions.
6877 int data_size = vl * 128;
6878
6879 uint8_t* data = new uint8_t[data_size];
6880 memset(data, 0, data_size);
6881
6882 // Set the base half-way through the buffer so we can use negative indeces.
6883 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6884
Jacob Bramleye483ce52019-11-05 16:52:29 +00006885 __ Index(z10.VnB(), -4, 11);
6886 __ Index(z11.VnB(), -5, 11);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006887 __ Ptrue(p7.VnB(), SVE_MUL4);
6888 __ Mov(x1, 0);
Jacob Bramleye483ce52019-11-05 16:52:29 +00006889 __ St2b(z10.VnB(), z11.VnB(), p7, SVEMemOperand(x0, x1));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006890
Jacob Bramleye483ce52019-11-05 16:52:29 +00006891 __ Index(z12.VnH(), 6, -2);
6892 __ Index(z13.VnH(), 7, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006893 __ Ptrue(p6.VnH(), SVE_VL16);
6894 __ Rdvl(x2, 3); // Make offsets VL-dependent so we can avoid overlap.
Jacob Bramleye483ce52019-11-05 16:52:29 +00006895 __ St2h(z12.VnH(), z13.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006896
Jacob Bramleye483ce52019-11-05 16:52:29 +00006897 __ Index(z14.VnS(), -7, 3);
6898 __ Index(z15.VnS(), -8, 3);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006899 // Sparse predication, including some irrelevant bits (0xe). To make the
6900 // results easy to check, activate each lane <n> where n is a multiple of 5.
6901 Initialise(&masm,
6902 p5,
6903 0xeee1000010000100,
6904 0x001eeee100001000,
6905 0x0100001eeee10000,
6906 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00006907 __ Rdvl(x3, -3);
6908 __ St2w(z14.VnS(), z15.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006909
6910 // Wrap around from z31 to z0.
6911 __ Index(z31.VnD(), 32, -11);
6912 __ Index(z0.VnD(), 33, -11);
6913 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00006914 __ Rdvl(x4, 1);
6915 __ St2d(z31.VnD(), z0.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006916
Jacob Bramleye483ce52019-11-05 16:52:29 +00006917 // We can test ld2 by comparing the values loaded with the values stored.
6918 // There are two complications:
6919 // - Loads have zeroing predication, so we have to clear the inactive
6920 // elements on our reference.
6921 // - We want to test both loads and stores that span { z31, z0 }, so we have
6922 // to move some values around.
6923 //
6924 // Registers z4-z11 will hold as-stored values (with inactive elements
6925 // cleared). Registers z20-z27 will hold the values that were loaded.
6926
6927 // Ld2b(z20.VnB(), z21.VnB(), ...)
6928 __ Dup(z4.VnB(), 0);
6929 __ Dup(z5.VnB(), 0);
6930 __ Mov(z4.VnB(), p7.Merging(), z10.VnB());
6931 __ Mov(z5.VnB(), p7.Merging(), z11.VnB());
6932
6933 // Ld2h(z22.VnH(), z23.VnH(), ...)
6934 __ Dup(z6.VnH(), 0);
6935 __ Dup(z7.VnH(), 0);
6936 __ Mov(z6.VnH(), p6.Merging(), z12.VnH());
6937 __ Mov(z7.VnH(), p6.Merging(), z13.VnH());
6938
6939 // Ld2w(z24.VnS(), z25.VnS(), ...)
6940 __ Dup(z8.VnS(), 0);
6941 __ Dup(z9.VnS(), 0);
6942 __ Mov(z8.VnS(), p5.Merging(), z14.VnS());
6943 __ Mov(z9.VnS(), p5.Merging(), z15.VnS());
6944
6945 // Ld2d(z31.VnD(), z0.VnD(), ...)
6946 __ Dup(z10.VnD(), 0);
6947 __ Dup(z11.VnD(), 0);
6948 __ Mov(z10.VnD(), p4.Merging(), z31.VnD());
6949 __ Mov(z11.VnD(), p4.Merging(), z0.VnD());
6950
6951 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
6952 __ Ld2b(z31.VnB(), z0.VnB(), p7.Zeroing(), SVEMemOperand(x0, x1));
6953 __ Mov(z20, z31);
6954 __ Mov(z21, z0);
6955
6956 __ Ld2h(z22.VnH(), z23.VnH(), p6.Zeroing(), SVEMemOperand(x0, x2, LSL, 1));
6957 __ Ld2w(z24.VnS(), z25.VnS(), p5.Zeroing(), SVEMemOperand(x0, x3, LSL, 2));
6958 __ Ld2d(z26.VnD(), z27.VnD(), p4.Zeroing(), SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006959
6960 END();
6961
6962 if (CAN_RUN()) {
6963 RUN();
6964
6965 uint8_t* expected = new uint8_t[data_size];
6966 memset(expected, 0, data_size);
6967 uint8_t* middle = &expected[data_size / 2];
6968
6969 int vl_b = vl / kBRegSizeInBytes;
6970 int vl_h = vl / kHRegSizeInBytes;
6971 int vl_s = vl / kSRegSizeInBytes;
6972 int vl_d = vl / kDRegSizeInBytes;
6973
6974 int reg_count = 2;
6975
Jacob Bramleye483ce52019-11-05 16:52:29 +00006976 // st2b { z10.b, z11.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006977 int vl_b_mul4 = vl_b - (vl_b % 4);
6978 for (int i = 0; i < vl_b_mul4; i++) {
6979 uint8_t lane0 = -4 + (11 * i);
6980 uint8_t lane1 = -5 + (11 * i);
6981 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
6982 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
6983 }
6984
Jacob Bramleye483ce52019-11-05 16:52:29 +00006985 // st2h { z12.h, z13.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006986 if (vl_h >= 16) {
6987 for (int i = 0; i < 16; i++) {
6988 int64_t offset = (3 << kHRegSizeInBytesLog2) * vl;
6989 uint16_t lane0 = 6 - (2 * i);
6990 uint16_t lane1 = 7 - (2 * i);
6991 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
6992 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
6993 }
6994 }
6995
Jacob Bramleye483ce52019-11-05 16:52:29 +00006996 // st2w { z14.s, z15.s }, ((i % 5) == 0)
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006997 for (int i = 0; i < vl_s; i++) {
6998 if ((i % 5) == 0) {
6999 int64_t offset = -(3 << kSRegSizeInBytesLog2) * vl;
7000 uint32_t lane0 = -7 + (3 * i);
7001 uint32_t lane1 = -8 + (3 * i);
7002 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7003 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7004 }
7005 }
7006
7007 // st2d { z31.b, z0.b }, SVE_MUL3
7008 int vl_d_mul3 = vl_d - (vl_d % 3);
7009 for (int i = 0; i < vl_d_mul3; i++) {
7010 int64_t offset = (1 << kDRegSizeInBytesLog2) * vl;
7011 uint64_t lane0 = 32 - (11 * i);
7012 uint64_t lane1 = 33 - (11 * i);
7013 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7014 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7015 }
7016
7017 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7018
Jacob Bramleye483ce52019-11-05 16:52:29 +00007019 // Check that we loaded back the expected values.
7020
7021 // st2b/ld2b
7022 ASSERT_EQUAL_SVE(z4, z20);
7023 ASSERT_EQUAL_SVE(z5, z21);
7024
7025 // st2h/ld2h
7026 ASSERT_EQUAL_SVE(z6, z22);
7027 ASSERT_EQUAL_SVE(z7, z23);
7028
7029 // st2w/ld2w
7030 ASSERT_EQUAL_SVE(z8, z24);
7031 ASSERT_EQUAL_SVE(z9, z25);
7032
7033 // st2d/ld2d
7034 ASSERT_EQUAL_SVE(z10, z26);
7035 ASSERT_EQUAL_SVE(z11, z27);
7036
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007037 delete[] expected;
7038 }
7039 delete[] data;
7040}
7041
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007042TEST_SVE(sve_ld3_st3_scalar_plus_imm) {
7043 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7044 START();
7045
7046 int vl = config->sve_vl_in_bytes();
7047
7048 // The immediate can address [-24, 21] times the VL, so allocate enough space
7049 // to exceed that in both directions.
7050 int data_size = vl * 128;
7051
7052 uint8_t* data = new uint8_t[data_size];
7053 memset(data, 0, data_size);
7054
7055 // Set the base half-way through the buffer so we can use negative indeces.
7056 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7057
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007058 // We can test ld3 by comparing the values loaded with the values stored.
7059 // There are two complications:
7060 // - Loads have zeroing predication, so we have to clear the inactive
7061 // elements on our reference.
7062 // - We want to test both loads and stores that span { z31, z0 }, so we have
7063 // to move some values around.
7064 //
7065 // Registers z4-z15 will hold as-stored values (with inactive elements
7066 // cleared). Registers z16-z27 will hold the values that were loaded.
7067
7068 __ Index(z10.VnB(), 1, -3);
7069 __ Index(z11.VnB(), 2, -3);
7070 __ Index(z12.VnB(), 3, -3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007071 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007072 __ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p0, SVEMemOperand(x0));
7073 // Save the stored values for ld3 tests.
7074 __ Dup(z4.VnB(), 0);
7075 __ Dup(z5.VnB(), 0);
7076 __ Dup(z6.VnB(), 0);
7077 __ Mov(z4.VnB(), p0.Merging(), z10.VnB());
7078 __ Mov(z5.VnB(), p0.Merging(), z11.VnB());
7079 __ Mov(z6.VnB(), p0.Merging(), z12.VnB());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007080
7081 // Wrap around from z31 to z0.
7082 __ Index(z31.VnH(), -2, 5);
7083 __ Index(z0.VnH(), -3, 5);
7084 __ Index(z1.VnH(), -4, 5);
7085 __ Ptrue(p1.VnH(), SVE_MUL3);
7086 __ St3h(z31.VnH(), z0.VnH(), z1.VnH(), p1, SVEMemOperand(x0, 9, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007087 // Save the stored values for ld3 tests.
7088 __ Dup(z7.VnH(), 0);
7089 __ Dup(z8.VnH(), 0);
7090 __ Dup(z9.VnH(), 0);
7091 __ Mov(z7.VnH(), p1.Merging(), z31.VnH());
7092 __ Mov(z8.VnH(), p1.Merging(), z0.VnH());
7093 __ Mov(z9.VnH(), p1.Merging(), z1.VnH());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007094
7095 __ Index(z30.VnS(), 3, -7);
7096 __ Index(z31.VnS(), 4, -7);
7097 __ Index(z0.VnS(), 5, -7);
7098 __ Ptrue(p2.VnS(), SVE_POW2);
7099 __ St3w(z30.VnS(),
7100 z31.VnS(),
7101 z0.VnS(),
7102 p2,
7103 SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007104 // Save the stored values for ld3 tests.
7105 __ Dup(z10.VnS(), 0);
7106 __ Dup(z11.VnS(), 0);
7107 __ Dup(z12.VnS(), 0);
7108 __ Mov(z10.VnS(), p2.Merging(), z30.VnS());
7109 __ Mov(z11.VnS(), p2.Merging(), z31.VnS());
7110 __ Mov(z12.VnS(), p2.Merging(), z0.VnS());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007111
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007112 __ Index(z0.VnD(), -7, 3);
7113 __ Index(z1.VnD(), -8, 3);
7114 __ Index(z2.VnD(), -9, 3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007115 // Sparse predication, including some irrelevant bits (0xee). To make the
7116 // results easy to check, activate each lane <n> where n is a multiple of 5.
7117 Initialise(&masm,
7118 p3,
7119 0xeee10000000001ee,
7120 0xeeeeeee100000000,
7121 0x01eeeeeeeee10000,
7122 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007123 __ St3d(z0.VnD(), z1.VnD(), z2.VnD(), p3, SVEMemOperand(x0, 15, SVE_MUL_VL));
7124 // Save the stored values for ld3 tests.
7125 __ Dup(z13.VnD(), 0);
7126 __ Dup(z14.VnD(), 0);
7127 __ Dup(z15.VnD(), 0);
7128 __ Mov(z13.VnD(), p3.Merging(), z0.VnD());
7129 __ Mov(z14.VnD(), p3.Merging(), z1.VnD());
7130 __ Mov(z15.VnD(), p3.Merging(), z2.VnD());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007131
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007132 // Corresponding loads.
7133 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7134 __ Ld3b(z31.VnB(), z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(x0));
7135 __ Mov(z16, z31);
7136 __ Mov(z17, z0);
7137 __ Mov(z18, z1);
7138 __ Ld3h(z30.VnH(),
7139 z31.VnH(),
7140 z0.VnH(),
7141 p1.Zeroing(),
7142 SVEMemOperand(x0, 9, SVE_MUL_VL));
7143 __ Mov(z19, z30);
7144 __ Mov(z20, z31);
7145 __ Mov(z21, z0);
7146 __ Ld3w(z22.VnS(),
7147 z23.VnS(),
7148 z24.VnS(),
7149 p2.Zeroing(),
7150 SVEMemOperand(x0, -12, SVE_MUL_VL));
7151 __ Ld3d(z25.VnD(),
7152 z26.VnD(),
7153 z27.VnD(),
7154 p3.Zeroing(),
7155 SVEMemOperand(x0, 15, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007156
7157 END();
7158
7159 if (CAN_RUN()) {
7160 RUN();
7161
7162 uint8_t* expected = new uint8_t[data_size];
7163 memset(expected, 0, data_size);
7164 uint8_t* middle = &expected[data_size / 2];
7165
7166 int vl_b = vl / kBRegSizeInBytes;
7167 int vl_h = vl / kHRegSizeInBytes;
7168 int vl_s = vl / kSRegSizeInBytes;
7169 int vl_d = vl / kDRegSizeInBytes;
7170
7171 int reg_count = 3;
7172
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007173 // st3b { z10.b, z11.b, z12.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007174 for (int i = 0; i < vl_b; i++) {
7175 uint8_t lane0 = 1 - (3 * i);
7176 uint8_t lane1 = 2 - (3 * i);
7177 uint8_t lane2 = 3 - (3 * i);
7178 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7179 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7180 MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
7181 }
7182
7183 // st3h { z31.h, z0.h, z1.h }, SVE_MUL3
7184 int vl_h_mul3 = vl_h - (vl_h % 3);
7185 for (int i = 0; i < vl_h_mul3; i++) {
7186 int64_t offset = 9 * vl;
7187 uint16_t lane0 = -2 + (5 * i);
7188 uint16_t lane1 = -3 + (5 * i);
7189 uint16_t lane2 = -4 + (5 * i);
7190 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7191 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7192 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7193 }
7194
7195 // st3w { z30.s, z31.s, z0.s }, SVE_POW2
7196 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7197 for (int i = 0; i < vl_s_pow2; i++) {
7198 int64_t offset = -12 * vl;
7199 uint32_t lane0 = 3 - (7 * i);
7200 uint32_t lane1 = 4 - (7 * i);
7201 uint32_t lane2 = 5 - (7 * i);
7202 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7203 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7204 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7205 }
7206
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007207 // st3d { z0.d, z1.d, z2.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007208 for (int i = 0; i < vl_d; i++) {
7209 if ((i % 5) == 0) {
7210 int64_t offset = 15 * vl;
7211 uint64_t lane0 = -7 + (3 * i);
7212 uint64_t lane1 = -8 + (3 * i);
7213 uint64_t lane2 = -9 + (3 * i);
7214 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7215 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7216 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7217 }
7218 }
7219
7220 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7221
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007222 // Check that we loaded back the expected values.
7223
7224 // st3b/ld3b
7225 ASSERT_EQUAL_SVE(z4, z16);
7226 ASSERT_EQUAL_SVE(z5, z17);
7227 ASSERT_EQUAL_SVE(z6, z18);
7228
7229 // st3h/ld3h
7230 ASSERT_EQUAL_SVE(z7, z19);
7231 ASSERT_EQUAL_SVE(z8, z20);
7232 ASSERT_EQUAL_SVE(z9, z21);
7233
7234 // st3w/ld3w
7235 ASSERT_EQUAL_SVE(z10, z22);
7236 ASSERT_EQUAL_SVE(z11, z23);
7237 ASSERT_EQUAL_SVE(z12, z24);
7238
7239 // st3d/ld3d
7240 ASSERT_EQUAL_SVE(z13, z25);
7241 ASSERT_EQUAL_SVE(z14, z26);
7242 ASSERT_EQUAL_SVE(z15, z27);
7243
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007244 delete[] expected;
7245 }
7246 delete[] data;
7247}
7248
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007249TEST_SVE(sve_ld3_st3_scalar_plus_scalar) {
7250 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7251 START();
7252
7253 int vl = config->sve_vl_in_bytes();
7254
7255 // Allocate plenty of space to enable indexing in both directions.
7256 int data_size = vl * 128;
7257
7258 uint8_t* data = new uint8_t[data_size];
7259 memset(data, 0, data_size);
7260
7261 // Set the base half-way through the buffer so we can use negative indeces.
7262 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7263
Jacob Bramleye483ce52019-11-05 16:52:29 +00007264 // We can test ld3 by comparing the values loaded with the values stored.
7265 // There are two complications:
7266 // - Loads have zeroing predication, so we have to clear the inactive
7267 // elements on our reference.
7268 // - We want to test both loads and stores that span { z31, z0 }, so we have
7269 // to move some values around.
7270 //
7271 // Registers z4-z15 will hold as-stored values (with inactive elements
7272 // cleared). Registers z16-z27 will hold the values that were loaded.
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007273
Jacob Bramleye483ce52019-11-05 16:52:29 +00007274 __ Index(z10.VnB(), -4, 11);
7275 __ Index(z11.VnB(), -5, 11);
7276 __ Index(z12.VnB(), -6, 11);
7277 __ Ptrue(p7.VnB(), SVE_MUL4);
7278 __ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
7279 __ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p7, SVEMemOperand(x0, x1, LSL, 0));
7280 // Save the stored values for ld3 tests.
7281 __ Dup(z4.VnB(), 0);
7282 __ Dup(z5.VnB(), 0);
7283 __ Dup(z6.VnB(), 0);
7284 __ Mov(z4.VnB(), p7.Merging(), z10.VnB());
7285 __ Mov(z5.VnB(), p7.Merging(), z11.VnB());
7286 __ Mov(z6.VnB(), p7.Merging(), z12.VnB());
7287
7288 __ Index(z13.VnH(), 6, -2);
7289 __ Index(z14.VnH(), 7, -2);
7290 __ Index(z15.VnH(), 8, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007291 __ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007292 __ Rdvl(x2, 5); // (5 * vl) << 1 = 10 * vl
7293 __ St3h(z13.VnH(), z14.VnH(), z15.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
7294 // Save the stored values for ld3 tests.
7295 __ Dup(z7.VnH(), 0);
7296 __ Dup(z8.VnH(), 0);
7297 __ Dup(z9.VnH(), 0);
7298 __ Mov(z7.VnH(), p6.Merging(), z13.VnH());
7299 __ Mov(z8.VnH(), p6.Merging(), z14.VnH());
7300 __ Mov(z9.VnH(), p6.Merging(), z15.VnH());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007301
7302 // Wrap around from z31 to z0.
7303 __ Index(z30.VnS(), -7, 3);
7304 __ Index(z31.VnS(), -8, 3);
7305 __ Index(z0.VnS(), -9, 3);
7306 // Sparse predication, including some irrelevant bits (0xe). To make the
7307 // results easy to check, activate each lane <n> where n is a multiple of 5.
7308 Initialise(&masm,
7309 p5,
7310 0xeee1000010000100,
7311 0x001eeee100001000,
7312 0x0100001eeee10000,
7313 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007314 __ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
7315 __ St3w(z30.VnS(), z31.VnS(), z0.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
7316 // Save the stored values for ld3 tests.
7317 __ Dup(z10.VnS(), 0);
7318 __ Dup(z11.VnS(), 0);
7319 __ Dup(z12.VnS(), 0);
7320 __ Mov(z10.VnS(), p5.Merging(), z30.VnS());
7321 __ Mov(z11.VnS(), p5.Merging(), z31.VnS());
7322 __ Mov(z12.VnS(), p5.Merging(), z0.VnS());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007323
7324 __ Index(z31.VnD(), 32, -11);
7325 __ Index(z0.VnD(), 33, -11);
7326 __ Index(z1.VnD(), 34, -11);
7327 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007328 __ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 * vl
7329 __ St3d(z31.VnD(), z0.VnD(), z1.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
7330 // Save the stored values for ld3 tests.
7331 __ Dup(z13.VnD(), 0);
7332 __ Dup(z14.VnD(), 0);
7333 __ Dup(z15.VnD(), 0);
7334 __ Mov(z13.VnD(), p4.Merging(), z31.VnD());
7335 __ Mov(z14.VnD(), p4.Merging(), z0.VnD());
7336 __ Mov(z15.VnD(), p4.Merging(), z1.VnD());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007337
Jacob Bramleye483ce52019-11-05 16:52:29 +00007338 // Corresponding loads.
7339 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7340 __ Ld3b(z31.VnB(),
7341 z0.VnB(),
7342 z1.VnB(),
7343 p7.Zeroing(),
7344 SVEMemOperand(x0, x1, LSL, 0));
7345 __ Mov(z16, z31);
7346 __ Mov(z17, z0);
7347 __ Mov(z18, z1);
7348 __ Ld3h(z30.VnH(),
7349 z31.VnH(),
7350 z0.VnH(),
7351 p6.Zeroing(),
7352 SVEMemOperand(x0, x2, LSL, 1));
7353 __ Mov(z19, z30);
7354 __ Mov(z20, z31);
7355 __ Mov(z21, z0);
7356 __ Ld3w(z22.VnS(),
7357 z23.VnS(),
7358 z24.VnS(),
7359 p5.Zeroing(),
7360 SVEMemOperand(x0, x3, LSL, 2));
7361 __ Ld3d(z25.VnD(),
7362 z26.VnD(),
7363 z27.VnD(),
7364 p4.Zeroing(),
7365 SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007366
7367 END();
7368
7369 if (CAN_RUN()) {
7370 RUN();
7371
7372 uint8_t* expected = new uint8_t[data_size];
7373 memset(expected, 0, data_size);
7374 uint8_t* middle = &expected[data_size / 2];
7375
7376 int vl_b = vl / kBRegSizeInBytes;
7377 int vl_h = vl / kHRegSizeInBytes;
7378 int vl_s = vl / kSRegSizeInBytes;
7379 int vl_d = vl / kDRegSizeInBytes;
7380
7381 int reg_count = 3;
7382
Jacob Bramleye483ce52019-11-05 16:52:29 +00007383 // st3b { z10.b, z11.b, z12.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007384 int vl_b_mul4 = vl_b - (vl_b % 4);
7385 for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007386 int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007387 uint8_t lane0 = -4 + (11 * i);
7388 uint8_t lane1 = -5 + (11 * i);
7389 uint8_t lane2 = -6 + (11 * i);
7390 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7391 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7392 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7393 }
7394
Jacob Bramleye483ce52019-11-05 16:52:29 +00007395 // st3h { z13.h, z14.h, z15.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007396 if (vl_h >= 16) {
7397 for (int i = 0; i < 16; i++) {
7398 int64_t offset = (5 << kHRegSizeInBytesLog2) * vl;
7399 uint16_t lane0 = 6 - (2 * i);
7400 uint16_t lane1 = 7 - (2 * i);
7401 uint16_t lane2 = 8 - (2 * i);
7402 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7403 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7404 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7405 }
7406 }
7407
7408 // st3w { z30.s, z31.s, z0.s }, ((i % 5) == 0)
7409 for (int i = 0; i < vl_s; i++) {
7410 if ((i % 5) == 0) {
7411 int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
7412 uint32_t lane0 = -7 + (3 * i);
7413 uint32_t lane1 = -8 + (3 * i);
7414 uint32_t lane2 = -9 + (3 * i);
7415 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7416 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7417 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7418 }
7419 }
7420
7421 // st3d { z31.d, z0.d, z1.d }, SVE_MUL3
7422 int vl_d_mul3 = vl_d - (vl_d % 3);
7423 for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007424 int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007425 uint64_t lane0 = 32 - (11 * i);
7426 uint64_t lane1 = 33 - (11 * i);
7427 uint64_t lane2 = 34 - (11 * i);
7428 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7429 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7430 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7431 }
7432
7433 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7434
Jacob Bramleye483ce52019-11-05 16:52:29 +00007435 // Check that we loaded back the expected values.
7436
7437 // st3b/ld3b
7438 ASSERT_EQUAL_SVE(z4, z16);
7439 ASSERT_EQUAL_SVE(z5, z17);
7440 ASSERT_EQUAL_SVE(z6, z18);
7441
7442 // st3h/ld3h
7443 ASSERT_EQUAL_SVE(z7, z19);
7444 ASSERT_EQUAL_SVE(z8, z20);
7445 ASSERT_EQUAL_SVE(z9, z21);
7446
7447 // st3w/ld3w
7448 ASSERT_EQUAL_SVE(z10, z22);
7449 ASSERT_EQUAL_SVE(z11, z23);
7450 ASSERT_EQUAL_SVE(z12, z24);
7451
7452 // st3d/ld3d
7453 ASSERT_EQUAL_SVE(z13, z25);
7454 ASSERT_EQUAL_SVE(z14, z26);
7455 ASSERT_EQUAL_SVE(z15, z27);
7456
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007457 delete[] expected;
7458 }
7459 delete[] data;
7460}
7461
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007462TEST_SVE(sve_ld4_st4_scalar_plus_imm) {
7463 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7464 START();
7465
7466 int vl = config->sve_vl_in_bytes();
7467
7468 // The immediate can address [-24, 21] times the VL, so allocate enough space
7469 // to exceed that in both directions.
7470 int data_size = vl * 128;
7471
7472 uint8_t* data = new uint8_t[data_size];
7473 memset(data, 0, data_size);
7474
7475 // Set the base half-way through the buffer so we can use negative indeces.
7476 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7477
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007478 // We can test ld4 by comparing the values loaded with the values stored.
7479 // There are two complications:
7480 // - Loads have zeroing predication, so we have to clear the inactive
7481 // elements on our reference.
7482 // - We want to test both loads and stores that span { z31, z0 }, so we have
7483 // to move some values around.
7484 //
7485 // Registers z3-z18 will hold as-stored values (with inactive elements
7486 // cleared). Registers z19-z31 and z0-z2 will hold the values that were
7487 // loaded.
7488
7489 __ Index(z10.VnB(), 1, -7);
7490 __ Index(z11.VnB(), 2, -7);
7491 __ Index(z12.VnB(), 3, -7);
7492 __ Index(z13.VnB(), 4, -7);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007493 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007494 __ St4b(z10.VnB(), z11.VnB(), z12.VnB(), z13.VnB(), p0, SVEMemOperand(x0));
7495 // Save the stored values for ld4 tests.
7496 __ Dup(z3.VnB(), 0);
7497 __ Dup(z4.VnB(), 0);
7498 __ Dup(z5.VnB(), 0);
7499 __ Dup(z6.VnB(), 0);
7500 __ Mov(z3.VnB(), p0.Merging(), z10.VnB());
7501 __ Mov(z4.VnB(), p0.Merging(), z11.VnB());
7502 __ Mov(z5.VnB(), p0.Merging(), z12.VnB());
7503 __ Mov(z6.VnB(), p0.Merging(), z13.VnB());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007504
7505 // Wrap around from z31 to z0.
7506 __ Index(z31.VnH(), -2, 5);
7507 __ Index(z0.VnH(), -3, 5);
7508 __ Index(z1.VnH(), -4, 5);
7509 __ Index(z2.VnH(), -5, 5);
7510 __ Ptrue(p1.VnH(), SVE_MUL3);
7511 __ St4h(z31.VnH(),
7512 z0.VnH(),
7513 z1.VnH(),
7514 z2.VnH(),
7515 p1,
7516 SVEMemOperand(x0, 4, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007517 // Save the stored values for ld4 tests.
7518 __ Dup(z7.VnH(), 0);
7519 __ Dup(z8.VnH(), 0);
7520 __ Dup(z9.VnH(), 0);
7521 __ Dup(z10.VnH(), 0);
7522 __ Mov(z7.VnH(), p1.Merging(), z31.VnH());
7523 __ Mov(z8.VnH(), p1.Merging(), z0.VnH());
7524 __ Mov(z9.VnH(), p1.Merging(), z1.VnH());
7525 __ Mov(z10.VnH(), p1.Merging(), z2.VnH());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007526
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007527 // Wrap around from z31 to z0.
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007528 __ Index(z29.VnS(), 2, -7);
7529 __ Index(z30.VnS(), 3, -7);
7530 __ Index(z31.VnS(), 4, -7);
7531 __ Index(z0.VnS(), 5, -7);
7532 __ Ptrue(p2.VnS(), SVE_POW2);
7533 __ St4w(z29.VnS(),
7534 z30.VnS(),
7535 z31.VnS(),
7536 z0.VnS(),
7537 p2,
7538 SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007539 // Save the stored values for ld4 tests.
7540 __ Dup(z11.VnS(), 0);
7541 __ Dup(z12.VnS(), 0);
7542 __ Dup(z13.VnS(), 0);
7543 __ Dup(z14.VnS(), 0);
7544 __ Mov(z11.VnS(), p2.Merging(), z29.VnS());
7545 __ Mov(z12.VnS(), p2.Merging(), z30.VnS());
7546 __ Mov(z13.VnS(), p2.Merging(), z31.VnS());
7547 __ Mov(z14.VnS(), p2.Merging(), z0.VnS());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007548
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007549 __ Index(z20.VnD(), -7, 8);
7550 __ Index(z21.VnD(), -8, 8);
7551 __ Index(z22.VnD(), -9, 8);
7552 __ Index(z23.VnD(), -10, 8);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007553 // Sparse predication, including some irrelevant bits (0xee). To make the
7554 // results easy to check, activate each lane <n> where n is a multiple of 5.
7555 Initialise(&masm,
7556 p3,
7557 0xeee10000000001ee,
7558 0xeeeeeee100000000,
7559 0x01eeeeeeeee10000,
7560 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007561 __ St4d(z20.VnD(),
7562 z21.VnD(),
7563 z22.VnD(),
7564 z23.VnD(),
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007565 p3,
7566 SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007567 // Save the stored values for ld4 tests.
7568 __ Dup(z15.VnD(), 0);
7569 __ Dup(z16.VnD(), 0);
7570 __ Dup(z17.VnD(), 0);
7571 __ Dup(z18.VnD(), 0);
7572 __ Mov(z15.VnD(), p3.Merging(), z20.VnD());
7573 __ Mov(z16.VnD(), p3.Merging(), z21.VnD());
7574 __ Mov(z17.VnD(), p3.Merging(), z22.VnD());
7575 __ Mov(z18.VnD(), p3.Merging(), z23.VnD());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007576
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007577 // Corresponding loads.
7578 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7579 __ Ld4b(z31.VnB(),
7580 z0.VnB(),
7581 z1.VnB(),
7582 z2.VnB(),
7583 p0.Zeroing(),
7584 SVEMemOperand(x0));
7585 __ Mov(z19, z31);
7586 __ Mov(z20, z0);
7587 __ Mov(z21, z1);
7588 __ Mov(z22, z2);
7589 __ Ld4h(z23.VnH(),
7590 z24.VnH(),
7591 z25.VnH(),
7592 z26.VnH(),
7593 p1.Zeroing(),
7594 SVEMemOperand(x0, 4, SVE_MUL_VL));
7595 __ Ld4w(z27.VnS(),
7596 z28.VnS(),
7597 z29.VnS(),
7598 z30.VnS(),
7599 p2.Zeroing(),
7600 SVEMemOperand(x0, -12, SVE_MUL_VL));
7601 // Wrap around from z31 to z0.
7602 __ Ld4d(z31.VnD(),
7603 z0.VnD(),
7604 z1.VnD(),
7605 z2.VnD(),
7606 p3.Zeroing(),
7607 SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007608
7609 END();
7610
7611 if (CAN_RUN()) {
7612 RUN();
7613
7614 uint8_t* expected = new uint8_t[data_size];
7615 memset(expected, 0, data_size);
7616 uint8_t* middle = &expected[data_size / 2];
7617
7618 int vl_b = vl / kBRegSizeInBytes;
7619 int vl_h = vl / kHRegSizeInBytes;
7620 int vl_s = vl / kSRegSizeInBytes;
7621 int vl_d = vl / kDRegSizeInBytes;
7622
7623 int reg_count = 4;
7624
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007625 // st2b { z10.b, z11.b, z12.b, z13.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007626 for (int i = 0; i < vl_b; i++) {
7627 uint8_t lane0 = 1 - (7 * i);
7628 uint8_t lane1 = 2 - (7 * i);
7629 uint8_t lane2 = 3 - (7 * i);
7630 uint8_t lane3 = 4 - (7 * i);
7631 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7632 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7633 MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
7634 MemoryWrite(middle, 0, (i * reg_count) + 3, lane3);
7635 }
7636
7637 // st4h { z31.h, z0.h, z1.h, z2.h }, SVE_MUL3
7638 int vl_h_mul3 = vl_h - (vl_h % 3);
7639 for (int i = 0; i < vl_h_mul3; i++) {
7640 int64_t offset = 4 * vl;
7641 uint16_t lane0 = -2 + (5 * i);
7642 uint16_t lane1 = -3 + (5 * i);
7643 uint16_t lane2 = -4 + (5 * i);
7644 uint16_t lane3 = -5 + (5 * i);
7645 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7646 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7647 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7648 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7649 }
7650
7651 // st4w { z29.s, z30.s, z31.s, z0.s }, SVE_POW2
7652 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7653 for (int i = 0; i < vl_s_pow2; i++) {
7654 int64_t offset = -12 * vl;
7655 uint32_t lane0 = 2 - (7 * i);
7656 uint32_t lane1 = 3 - (7 * i);
7657 uint32_t lane2 = 4 - (7 * i);
7658 uint32_t lane3 = 5 - (7 * i);
7659 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7660 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7661 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7662 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7663 }
7664
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007665 // st4d { z20.d, z21.d, z22.d, z23.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007666 for (int i = 0; i < vl_d; i++) {
7667 if ((i % 5) == 0) {
7668 int64_t offset = 16 * vl;
7669 uint64_t lane0 = -7 + (8 * i);
7670 uint64_t lane1 = -8 + (8 * i);
7671 uint64_t lane2 = -9 + (8 * i);
7672 uint64_t lane3 = -10 + (8 * i);
7673 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7674 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7675 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7676 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7677 }
7678 }
7679
7680 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7681
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007682 // Check that we loaded back the expected values.
7683
7684 // st4b/ld4b
7685 ASSERT_EQUAL_SVE(z3, z19);
7686 ASSERT_EQUAL_SVE(z4, z20);
7687 ASSERT_EQUAL_SVE(z5, z21);
7688 ASSERT_EQUAL_SVE(z6, z22);
7689
7690 // st4h/ld4h
7691 ASSERT_EQUAL_SVE(z7, z23);
7692 ASSERT_EQUAL_SVE(z8, z24);
7693 ASSERT_EQUAL_SVE(z9, z25);
7694 ASSERT_EQUAL_SVE(z10, z26);
7695
7696 // st4w/ld4w
7697 ASSERT_EQUAL_SVE(z11, z27);
7698 ASSERT_EQUAL_SVE(z12, z28);
7699 ASSERT_EQUAL_SVE(z13, z29);
7700 ASSERT_EQUAL_SVE(z14, z30);
7701
7702 // st4d/ld4d
7703 ASSERT_EQUAL_SVE(z15, z31);
7704 ASSERT_EQUAL_SVE(z16, z0);
7705 ASSERT_EQUAL_SVE(z17, z1);
7706 ASSERT_EQUAL_SVE(z18, z2);
7707
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007708 delete[] expected;
7709 }
7710 delete[] data;
7711}
7712
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007713TEST_SVE(sve_ld4_st4_scalar_plus_scalar) {
7714 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7715 START();
7716
7717 int vl = config->sve_vl_in_bytes();
7718
7719 // Allocate plenty of space to enable indexing in both directions.
7720 int data_size = vl * 128;
7721
7722 uint8_t* data = new uint8_t[data_size];
7723 memset(data, 0, data_size);
7724
7725 // Set the base half-way through the buffer so we can use negative indeces.
7726 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7727
Jacob Bramleye483ce52019-11-05 16:52:29 +00007728 // We can test ld4 by comparing the values loaded with the values stored.
7729 // There are two complications:
7730 // - Loads have zeroing predication, so we have to clear the inactive
7731 // elements on our reference.
7732 // - We want to test both loads and stores that span { z31, z0 }, so we have
7733 // to move some values around.
7734 //
7735 // Registers z3-z18 will hold as-stored values (with inactive elements
7736 // cleared). Registers z19-z31 and z0-z2 will hold the values that were
7737 // loaded.
7738
7739 __ Index(z19.VnB(), -4, 11);
7740 __ Index(z20.VnB(), -5, 11);
7741 __ Index(z21.VnB(), -6, 11);
7742 __ Index(z22.VnB(), -7, 11);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007743 __ Ptrue(p7.VnB(), SVE_MUL4);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007744 __ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
7745 __ St4b(z19.VnB(),
7746 z20.VnB(),
7747 z21.VnB(),
7748 z22.VnB(),
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007749 p7,
7750 SVEMemOperand(x0, x1, LSL, 0));
Jacob Bramleye483ce52019-11-05 16:52:29 +00007751 // Save the stored values for ld4 tests.
7752 __ Dup(z3.VnB(), 0);
7753 __ Dup(z4.VnB(), 0);
7754 __ Dup(z5.VnB(), 0);
7755 __ Dup(z6.VnB(), 0);
7756 __ Mov(z3.VnB(), p7.Merging(), z19.VnB());
7757 __ Mov(z4.VnB(), p7.Merging(), z20.VnB());
7758 __ Mov(z5.VnB(), p7.Merging(), z21.VnB());
7759 __ Mov(z6.VnB(), p7.Merging(), z22.VnB());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007760
Jacob Bramleye483ce52019-11-05 16:52:29 +00007761 __ Index(z23.VnH(), 6, -2);
7762 __ Index(z24.VnH(), 7, -2);
7763 __ Index(z25.VnH(), 8, -2);
7764 __ Index(z26.VnH(), 9, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007765 __ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007766 __ Rdvl(x2, 7); // (7 * vl) << 1 = 14 * vl
7767 __ St4h(z23.VnH(),
7768 z24.VnH(),
7769 z25.VnH(),
7770 z26.VnH(),
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007771 p6,
7772 SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramleye483ce52019-11-05 16:52:29 +00007773 // Save the stored values for ld4 tests.
7774 __ Dup(z7.VnH(), 0);
7775 __ Dup(z8.VnH(), 0);
7776 __ Dup(z9.VnH(), 0);
7777 __ Dup(z10.VnH(), 0);
7778 __ Mov(z7.VnH(), p6.Merging(), z23.VnH());
7779 __ Mov(z8.VnH(), p6.Merging(), z24.VnH());
7780 __ Mov(z9.VnH(), p6.Merging(), z25.VnH());
7781 __ Mov(z10.VnH(), p6.Merging(), z26.VnH());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007782
7783 // Wrap around from z31 to z0.
7784 __ Index(z29.VnS(), -6, 7);
7785 __ Index(z30.VnS(), -7, 7);
7786 __ Index(z31.VnS(), -8, 7);
7787 __ Index(z0.VnS(), -9, 7);
7788 // Sparse predication, including some irrelevant bits (0xe). To make the
7789 // results easy to check, activate each lane <n> where n is a multiple of 5.
7790 Initialise(&masm,
7791 p5,
7792 0xeee1000010000100,
7793 0x001eeee100001000,
7794 0x0100001eeee10000,
7795 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007796 __ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007797 __ St4w(z29.VnS(),
7798 z30.VnS(),
7799 z31.VnS(),
7800 z0.VnS(),
7801 p5,
Jacob Bramleye483ce52019-11-05 16:52:29 +00007802 SVEMemOperand(x0, x3, LSL, 2));
7803 // Save the stored values for ld4 tests.
7804 __ Dup(z11.VnS(), 0);
7805 __ Dup(z12.VnS(), 0);
7806 __ Dup(z13.VnS(), 0);
7807 __ Dup(z14.VnS(), 0);
7808 __ Mov(z11.VnS(), p5.Merging(), z29.VnS());
7809 __ Mov(z12.VnS(), p5.Merging(), z30.VnS());
7810 __ Mov(z13.VnS(), p5.Merging(), z31.VnS());
7811 __ Mov(z14.VnS(), p5.Merging(), z0.VnS());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007812
7813 __ Index(z31.VnD(), 32, -11);
7814 __ Index(z0.VnD(), 33, -11);
7815 __ Index(z1.VnD(), 34, -11);
7816 __ Index(z2.VnD(), 35, -11);
7817 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007818 __ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 *vl
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007819 __ St4d(z31.VnD(),
7820 z0.VnD(),
7821 z1.VnD(),
7822 z2.VnD(),
7823 p4,
Jacob Bramleye483ce52019-11-05 16:52:29 +00007824 SVEMemOperand(x0, x4, LSL, 3));
7825 // Save the stored values for ld4 tests.
7826 __ Dup(z15.VnD(), 0);
7827 __ Dup(z16.VnD(), 0);
7828 __ Dup(z17.VnD(), 0);
7829 __ Dup(z18.VnD(), 0);
7830 __ Mov(z15.VnD(), p4.Merging(), z31.VnD());
7831 __ Mov(z16.VnD(), p4.Merging(), z0.VnD());
7832 __ Mov(z17.VnD(), p4.Merging(), z1.VnD());
7833 __ Mov(z18.VnD(), p4.Merging(), z2.VnD());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007834
Jacob Bramleye483ce52019-11-05 16:52:29 +00007835 // Corresponding loads.
7836 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7837 __ Ld4b(z31.VnB(),
7838 z0.VnB(),
7839 z1.VnB(),
7840 z2.VnB(),
7841 p7.Zeroing(),
7842 SVEMemOperand(x0, x1, LSL, 0));
7843 __ Mov(z19, z31);
7844 __ Mov(z20, z0);
7845 __ Mov(z21, z1);
7846 __ Mov(z22, z2);
7847 __ Ld4h(z23.VnH(),
7848 z24.VnH(),
7849 z25.VnH(),
7850 z26.VnH(),
7851 p6.Zeroing(),
7852 SVEMemOperand(x0, x2, LSL, 1));
7853 __ Ld4w(z27.VnS(),
7854 z28.VnS(),
7855 z29.VnS(),
7856 z30.VnS(),
7857 p5.Zeroing(),
7858 SVEMemOperand(x0, x3, LSL, 2));
7859 // Wrap around from z31 to z0.
7860 __ Ld4d(z31.VnD(),
7861 z0.VnD(),
7862 z1.VnD(),
7863 z2.VnD(),
7864 p4.Zeroing(),
7865 SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007866
7867 END();
7868
7869 if (CAN_RUN()) {
7870 RUN();
7871
7872 uint8_t* expected = new uint8_t[data_size];
7873 memset(expected, 0, data_size);
7874 uint8_t* middle = &expected[data_size / 2];
7875
7876 int vl_b = vl / kBRegSizeInBytes;
7877 int vl_h = vl / kHRegSizeInBytes;
7878 int vl_s = vl / kSRegSizeInBytes;
7879 int vl_d = vl / kDRegSizeInBytes;
7880
7881 int reg_count = 4;
7882
Jacob Bramleye483ce52019-11-05 16:52:29 +00007883 // st4b { z19.b, z20.b, z21.b, z22.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007884 int vl_b_mul4 = vl_b - (vl_b % 4);
7885 for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007886 int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007887 uint8_t lane0 = -4 + (11 * i);
7888 uint8_t lane1 = -5 + (11 * i);
7889 uint8_t lane2 = -6 + (11 * i);
7890 uint8_t lane3 = -7 + (11 * i);
7891 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7892 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7893 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7894 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7895 }
7896
Jacob Bramleye483ce52019-11-05 16:52:29 +00007897 // st4h { z22.h, z23.h, z24.h, z25.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007898 if (vl_h >= 16) {
7899 for (int i = 0; i < 16; i++) {
7900 int64_t offset = (7 << kHRegSizeInBytesLog2) * vl;
7901 uint16_t lane0 = 6 - (2 * i);
7902 uint16_t lane1 = 7 - (2 * i);
7903 uint16_t lane2 = 8 - (2 * i);
7904 uint16_t lane3 = 9 - (2 * i);
7905 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7906 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7907 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7908 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7909 }
7910 }
7911
7912 // st4w { z29.s, z30.s, z31.s, z0.s }, ((i % 5) == 0)
7913 for (int i = 0; i < vl_s; i++) {
7914 if ((i % 5) == 0) {
7915 int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
7916 uint32_t lane0 = -6 + (7 * i);
7917 uint32_t lane1 = -7 + (7 * i);
7918 uint32_t lane2 = -8 + (7 * i);
7919 uint32_t lane3 = -9 + (7 * i);
7920 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7921 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7922 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7923 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7924 }
7925 }
7926
7927 // st4d { z31.d, z0.d, z1.d, z2.d }, SVE_MUL3
7928 int vl_d_mul3 = vl_d - (vl_d % 3);
7929 for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007930 int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007931 uint64_t lane0 = 32 - (11 * i);
7932 uint64_t lane1 = 33 - (11 * i);
7933 uint64_t lane2 = 34 - (11 * i);
7934 uint64_t lane3 = 35 - (11 * i);
7935 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7936 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7937 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7938 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7939 }
7940
7941 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7942
Jacob Bramleye483ce52019-11-05 16:52:29 +00007943 // Check that we loaded back the expected values.
7944
7945 // st4b/ld4b
7946 ASSERT_EQUAL_SVE(z3, z19);
7947 ASSERT_EQUAL_SVE(z4, z20);
7948 ASSERT_EQUAL_SVE(z5, z21);
7949 ASSERT_EQUAL_SVE(z6, z22);
7950
7951 // st4h/ld4h
7952 ASSERT_EQUAL_SVE(z7, z23);
7953 ASSERT_EQUAL_SVE(z8, z24);
7954 ASSERT_EQUAL_SVE(z9, z25);
7955 ASSERT_EQUAL_SVE(z10, z26);
7956
7957 // st4w/ld4w
7958 ASSERT_EQUAL_SVE(z11, z27);
7959 ASSERT_EQUAL_SVE(z12, z28);
7960 ASSERT_EQUAL_SVE(z13, z29);
7961 ASSERT_EQUAL_SVE(z14, z30);
7962
7963 // st4d/ld4d
7964 ASSERT_EQUAL_SVE(z15, z31);
7965 ASSERT_EQUAL_SVE(z16, z0);
7966 ASSERT_EQUAL_SVE(z17, z1);
7967 ASSERT_EQUAL_SVE(z18, z2);
7968
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007969 delete[] expected;
7970 }
7971 delete[] data;
7972}
7973
7974TEST_SVE(sve_ld234_st234_scalar_plus_scalar_sp) {
7975 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7976 START();
7977
7978 // Check that the simulator correctly interprets rn == 31 as sp.
7979 // The indexing logic is the same regardless so we just check one load and
7980 // store of each type.
7981
7982 // There are no pre- or post-indexing modes, so reserve space first.
7983 __ ClaimVL(2 + 3 + 4);
7984
7985 __ Index(z0.VnB(), 42, 2);
7986 __ Index(z1.VnB(), 43, 2);
7987 __ Ptrue(p0.VnB(), SVE_VL7);
7988 __ Rdvl(x0, 0);
7989 __ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, x0));
7990
7991 __ Index(z4.VnH(), 42, 3);
7992 __ Index(z5.VnH(), 43, 3);
7993 __ Index(z6.VnH(), 44, 3);
7994 __ Ptrue(p1.VnH(), SVE_POW2);
7995 __ Rdvl(x1, 2);
7996 __ Lsr(x1, x1, 1);
7997 __ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, x1, LSL, 1));
7998
7999 __ Index(z8.VnS(), 42, 4);
8000 __ Index(z9.VnS(), 43, 4);
8001 __ Index(z10.VnS(), 44, 4);
8002 __ Index(z11.VnS(), 45, 4);
8003 __ Ptrue(p2.VnS());
8004 __ Rdvl(x2, 2 + 3);
8005 __ Lsr(x2, x2, 2);
8006 __ St4w(z8.VnS(),
8007 z9.VnS(),
8008 z10.VnS(),
8009 z11.VnS(),
8010 p2,
8011 SVEMemOperand(sp, x2, LSL, 2));
8012
Jacob Bramleye483ce52019-11-05 16:52:29 +00008013 // Corresponding loads.
8014 // We have to explicitly zero inactive lanes in the reference values because
8015 // loads have zeroing predication.
8016 __ Dup(z12.VnB(), 0);
8017 __ Dup(z13.VnB(), 0);
8018 __ Mov(z12.VnB(), p0.Merging(), z0.VnB());
8019 __ Mov(z13.VnB(), p0.Merging(), z1.VnB());
8020 __ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, x0));
8021
8022 __ Dup(z16.VnH(), 0);
8023 __ Dup(z17.VnH(), 0);
8024 __ Dup(z18.VnH(), 0);
8025 __ Mov(z16.VnH(), p1.Merging(), z4.VnH());
8026 __ Mov(z17.VnH(), p1.Merging(), z5.VnH());
8027 __ Mov(z18.VnH(), p1.Merging(), z6.VnH());
8028 __ Ld3h(z4.VnH(),
8029 z5.VnH(),
8030 z6.VnH(),
8031 p1.Zeroing(),
8032 SVEMemOperand(sp, x1, LSL, 1));
8033
8034 __ Dup(z20.VnS(), 0);
8035 __ Dup(z21.VnS(), 0);
8036 __ Dup(z22.VnS(), 0);
8037 __ Dup(z23.VnS(), 0);
8038 __ Mov(z20.VnS(), p2.Merging(), z8.VnS());
8039 __ Mov(z21.VnS(), p2.Merging(), z9.VnS());
8040 __ Mov(z22.VnS(), p2.Merging(), z10.VnS());
8041 __ Mov(z23.VnS(), p2.Merging(), z11.VnS());
8042 __ Ld4w(z8.VnS(),
8043 z9.VnS(),
8044 z10.VnS(),
8045 z11.VnS(),
8046 p2.Zeroing(),
8047 SVEMemOperand(sp, x2, LSL, 2));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008048
8049 __ DropVL(2 + 3 + 4);
8050
8051 END();
8052
8053 if (CAN_RUN()) {
8054 RUN();
8055
8056 // The most likely failure mode is the that simulator reads sp as xzr and
8057 // crashes on execution. We already test the address calculations separately
8058 // and sp doesn't change this, so just test that we load the values we
8059 // stored.
Jacob Bramleye483ce52019-11-05 16:52:29 +00008060
8061 // st2b/ld2b
8062 ASSERT_EQUAL_SVE(z0, z12);
8063 ASSERT_EQUAL_SVE(z1, z13);
8064
8065 // st3h/ld3h
8066 ASSERT_EQUAL_SVE(z4, z16);
8067 ASSERT_EQUAL_SVE(z5, z17);
8068 ASSERT_EQUAL_SVE(z6, z18);
8069
8070 // st4h/ld4h
8071 ASSERT_EQUAL_SVE(z8, z20);
8072 ASSERT_EQUAL_SVE(z9, z21);
8073 ASSERT_EQUAL_SVE(z10, z22);
8074 ASSERT_EQUAL_SVE(z11, z23);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008075 }
8076}
8077
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008078TEST_SVE(sve_ld234_st234_scalar_plus_imm_sp) {
8079 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8080 START();
8081
8082 // Check that the simulator correctly interprets rn == 31 as sp.
8083 // The indexing logic is the same regardless so we just check one load and
8084 // store of each type.
8085
8086 // There are no pre- or post-indexing modes, so reserve space first.
8087 // Note that the stores fill in an order that allows each immediate to be a
8088 // multiple of the number of registers.
8089 __ ClaimVL(4 + 2 + 3);
8090
8091 __ Index(z0.VnB(), 42, 2);
8092 __ Index(z1.VnB(), 43, 2);
8093 __ Ptrue(p0.VnB(), SVE_POW2);
8094 __ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, 4, SVE_MUL_VL));
8095
8096 __ Index(z4.VnH(), 42, 3);
8097 __ Index(z5.VnH(), 43, 3);
8098 __ Index(z6.VnH(), 44, 3);
8099 __ Ptrue(p1.VnH(), SVE_VL7);
8100 __ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, 6, SVE_MUL_VL));
8101
8102 __ Index(z8.VnS(), 42, 4);
8103 __ Index(z9.VnS(), 43, 4);
8104 __ Index(z10.VnS(), 44, 4);
8105 __ Index(z11.VnS(), 45, 4);
8106 __ Ptrue(p2.VnS());
8107 __ St4w(z8.VnS(), z9.VnS(), z10.VnS(), z11.VnS(), p2, SVEMemOperand(sp));
8108
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008109 // Corresponding loads.
8110 // We have to explicitly zero inactive lanes in the reference values because
8111 // loads have zeroing predication.
8112 __ Dup(z12.VnB(), 0);
8113 __ Dup(z13.VnB(), 0);
8114 __ Mov(z12.VnB(), p0.Merging(), z0.VnB());
8115 __ Mov(z13.VnB(), p0.Merging(), z1.VnB());
8116 __ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, 4, SVE_MUL_VL));
8117
8118 __ Dup(z16.VnH(), 0);
8119 __ Dup(z17.VnH(), 0);
8120 __ Dup(z18.VnH(), 0);
8121 __ Mov(z16.VnH(), p1.Merging(), z4.VnH());
8122 __ Mov(z17.VnH(), p1.Merging(), z5.VnH());
8123 __ Mov(z18.VnH(), p1.Merging(), z6.VnH());
8124 __ Ld3h(z4.VnH(),
8125 z5.VnH(),
8126 z6.VnH(),
8127 p1.Zeroing(),
8128 SVEMemOperand(sp, 6, SVE_MUL_VL));
8129
8130 __ Dup(z20.VnS(), 0);
8131 __ Dup(z21.VnS(), 0);
8132 __ Dup(z22.VnS(), 0);
8133 __ Dup(z23.VnS(), 0);
8134 __ Mov(z20.VnS(), p2.Merging(), z8.VnS());
8135 __ Mov(z21.VnS(), p2.Merging(), z9.VnS());
8136 __ Mov(z22.VnS(), p2.Merging(), z10.VnS());
8137 __ Mov(z23.VnS(), p2.Merging(), z11.VnS());
8138 __ Ld4w(z8.VnS(),
8139 z9.VnS(),
8140 z10.VnS(),
8141 z11.VnS(),
8142 p2.Zeroing(),
8143 SVEMemOperand(sp));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008144
8145 __ DropVL(4 + 2 + 3);
8146
8147 END();
8148
8149 if (CAN_RUN()) {
8150 RUN();
8151
8152 // The most likely failure mode is the that simulator reads sp as xzr and
8153 // crashes on execution. We already test the address calculations separately
8154 // and sp doesn't change this, so just test that we load the values we
8155 // stored.
8156 // TODO: Actually do this, once loads are implemented.
8157 }
8158}
8159
TatWai Chong6995bfd2019-09-26 10:48:05 +01008160typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
8161 const ZRegister& zn,
8162 const IntegerOperand imm);
8163
8164template <typename F, typename Td, typename Tn>
8165static void IntWideImmHelper(Test* config,
8166 F macro,
8167 unsigned lane_size_in_bits,
8168 const Tn& zn_inputs,
8169 IntegerOperand imm,
8170 const Td& zd_expected) {
8171 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8172 START();
8173
8174 ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
8175 InsrHelper(&masm, zd1, zn_inputs);
8176
8177 // Also test with a different zn, to test the movprfx case.
8178 ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
8179 InsrHelper(&masm, zn, zn_inputs);
8180 ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
8181 ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
8182
8183 // Make a copy so we can check that constructive operations preserve zn.
8184 __ Mov(zn_copy, zn);
8185
8186 {
8187 UseScratchRegisterScope temps(&masm);
8188 // The MacroAssembler needs a P scratch register for some of these macros,
8189 // and it doesn't have one by default.
8190 temps.Include(p3);
8191
8192 (masm.*macro)(zd1, zd1, imm);
8193 (masm.*macro)(zd2, zn, imm);
8194 }
8195
8196 END();
8197
8198 if (CAN_RUN()) {
8199 RUN();
8200
8201 ASSERT_EQUAL_SVE(zd_expected, zd1);
8202
8203 // Check the result from `instr` with movprfx is the same as
8204 // the immediate version.
8205 ASSERT_EQUAL_SVE(zd_expected, zd2);
8206
8207 ASSERT_EQUAL_SVE(zn_copy, zn);
8208 }
8209}
8210
8211TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
8212 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
8213 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
8214 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
8215 int64_t in_d[] = {1, 10, 10000, 1000000};
8216
8217 IntWideImmFn fn = &MacroAssembler::Smax;
8218
8219 int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
8220 int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
8221 int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
8222 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
8223
8224 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
8225 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
8226 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
8227 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
8228
8229 int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
8230 int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
8231 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
8232
8233 // The immediate is in the range [-128, 127], but the macro is able to
8234 // synthesise unencodable immediates.
8235 // B-sized lanes cannot take an immediate out of the range [-128, 127].
8236 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
8237 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8238 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
8239}
8240
8241TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
8242 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
8243 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
8244 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
8245 int64_t in_d[] = {1, 10, 10000, 1000000};
8246
8247 IntWideImmFn fn = &MacroAssembler::Smin;
8248
8249 int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
8250 int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
8251 int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
8252 int64_t exp_d_1[] = {1, 10, 99, 99};
8253
8254 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
8255 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
8256 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
8257 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
8258
8259 int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
8260 int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
8261 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
8262
8263 // The immediate is in the range [-128, 127], but the macro is able to
8264 // synthesise unencodable immediates.
8265 // B-sized lanes cannot take an immediate out of the range [-128, 127].
8266 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
8267 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8268 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
8269}
8270
8271TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
8272 int in_b[] = {0, 255, 127, 0x80, 1, 55};
8273 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
8274 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
8275 int64_t in_d[] = {1, 10, 10000, 1000000};
8276
8277 IntWideImmFn fn = &MacroAssembler::Umax;
8278
8279 int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
8280 int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
8281 int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
8282 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
8283
8284 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
8285 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
8286 IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
8287 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
8288
8289 int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
8290 int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
8291 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
8292
8293 // The immediate is in the range [0, 255], but the macro is able to
8294 // synthesise unencodable immediates.
8295 // B-sized lanes cannot take an immediate out of the range [0, 255].
8296 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
8297 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8298 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
8299}
8300
8301TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
8302 int in_b[] = {0, 255, 127, 0x80, 1, 55};
8303 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
8304 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
8305 int64_t in_d[] = {1, 10, 10000, 1000000};
8306
8307 IntWideImmFn fn = &MacroAssembler::Umin;
8308
8309 int exp_b_1[] = {0, 17, 17, 17, 1, 17};
8310 int exp_h_1[] = {0, 127, 127, 127, 1, 127};
8311 int exp_s_1[] = {0, 255, 127, 255, 1, 255};
8312 int64_t exp_d_1[] = {1, 10, 99, 99};
8313
8314 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
8315 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
8316 IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
8317 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
8318
8319 int exp_h_2[] = {0, 255, 127, 511, 1, 511};
8320 int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
8321 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
8322
8323 // The immediate is in the range [0, 255], but the macro is able to
8324 // synthesise unencodable immediates.
8325 // B-sized lanes cannot take an immediate out of the range [0, 255].
8326 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
8327 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8328 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
8329}
8330
8331TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
8332 int in_b[] = {11, -1, 7, -3};
8333 int in_h[] = {111, -1, 17, -123};
8334 int in_s[] = {11111, -1, 117, -12345};
8335 int64_t in_d[] = {0x7fffffff, 0x80000000};
8336
8337 IntWideImmFn fn = &MacroAssembler::Mul;
8338
8339 int exp_b_1[] = {66, -6, 42, -18};
8340 int exp_h_1[] = {-14208, 128, -2176, 15744};
8341 int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
8342 int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
8343
8344 IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
8345 IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
8346 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8347 IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
8348
8349 int exp_h_2[] = {-28305, 255, -4335, 31365};
8350 int exp_s_2[] = {22755328, -2048, 239616, -25282560};
8351 int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
8352
8353 // The immediate is in the range [-128, 127], but the macro is able to
8354 // synthesise unencodable immediates.
8355 // B-sized lanes cannot take an immediate out of the range [0, 255].
8356 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
8357 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8358 IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
8359
8360 // Integer overflow on multiplication.
8361 unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
8362
8363 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
8364}
8365
8366TEST_SVE(sve_int_wide_imm_unpredicated_add) {
8367 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8368 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8369 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8370 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8371
8372 IntWideImmFn fn = &MacroAssembler::Add;
8373
8374 unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
8375 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
8376 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
8377 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
8378
8379 // Encodable with `add` (shift 0).
8380 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
8381 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8382 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8383 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
8384
8385 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
8386 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
8387 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
8388
8389 // Encodable with `add` (shift 8).
8390 // B-sized lanes cannot take a shift of 8.
8391 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8392 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8393 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
8394
8395 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
8396
8397 // The macro is able to synthesise unencodable immediates.
8398 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01008399
8400 unsigned exp_b_4[] = {0x61, 0x5f, 0xf0, 0xdf};
8401 unsigned exp_h_4[] = {0x6181, 0x5f7f, 0xf010, 0x8aaa};
8402 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
8403 uint64_t exp_d_4[] = {0x8000000180018180, 0x7fffffff7fff7f7e};
8404
8405 // Negative immediates use `sub`.
8406 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
8407 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
8408 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
8409 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008410}
8411
8412TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
8413 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8414 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8415 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8416 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8417
8418 IntWideImmFn fn = &MacroAssembler::Sqadd;
8419
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008420 unsigned exp_b_1[] = {0x02, 0x7f, 0x7f, 0x7f};
TatWai Chong6995bfd2019-09-26 10:48:05 +01008421 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
8422 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
8423 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
8424
8425 // Encodable with `sqadd` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008426 // Note that encodable immediates are unsigned, even for signed saturation.
8427 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008428 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8429 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008430 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008431
8432 unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
8433 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
8434 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
8435
8436 // Encodable with `sqadd` (shift 8).
8437 // B-sized lanes cannot take a shift of 8.
8438 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8439 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8440 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008441}
8442
8443TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
8444 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8445 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8446 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8447 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8448
8449 IntWideImmFn fn = &MacroAssembler::Uqadd;
8450
8451 unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
8452 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
8453 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
8454 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
8455
8456 // Encodable with `uqadd` (shift 0).
8457 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
8458 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8459 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8460 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
8461
8462 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
8463 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
8464 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
8465
8466 // Encodable with `uqadd` (shift 8).
8467 // B-sized lanes cannot take a shift of 8.
8468 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8469 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8470 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008471}
8472
8473TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
8474 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8475 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8476 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8477 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8478
8479 IntWideImmFn fn = &MacroAssembler::Sub;
8480
8481 unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
8482 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
8483 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
8484 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
8485
8486 // Encodable with `sub` (shift 0).
8487 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
8488 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8489 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8490 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
8491
8492 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
8493 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
8494 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
8495
8496 // Encodable with `sub` (shift 8).
8497 // B-sized lanes cannot take a shift of 8.
8498 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8499 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8500 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
8501
8502 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
8503
8504 // The macro is able to synthesise unencodable immediates.
8505 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01008506
8507 unsigned exp_b_4[] = {0xa1, 0x9f, 0x30, 0x1f};
8508 unsigned exp_h_4[] = {0xa181, 0x9f7f, 0x3010, 0xcaaa};
8509 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
8510 uint64_t exp_d_4[] = {0x8000000180018182, 0x7fffffff7fff7f80};
8511
8512 // Negative immediates use `add`.
8513 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
8514 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
8515 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
8516 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008517}
8518
8519TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
8520 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8521 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8522 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8523 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8524
8525 IntWideImmFn fn = &MacroAssembler::Sqsub;
8526
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008527 unsigned exp_b_1[] = {0x80, 0xfe, 0x8f, 0x80};
TatWai Chong6995bfd2019-09-26 10:48:05 +01008528 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
8529 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
8530 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
8531
8532 // Encodable with `sqsub` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008533 // Note that encodable immediates are unsigned, even for signed saturation.
8534 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008535 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8536 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008537 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008538
8539 unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
8540 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
8541 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
8542
8543 // Encodable with `sqsub` (shift 8).
8544 // B-sized lanes cannot take a shift of 8.
8545 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8546 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8547 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008548}
8549
8550TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
8551 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8552 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8553 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8554 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8555
8556 IntWideImmFn fn = &MacroAssembler::Uqsub;
8557
8558 unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
8559 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
8560 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
8561 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
8562
8563 // Encodable with `uqsub` (shift 0).
8564 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
8565 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8566 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8567 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
8568
8569 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
8570 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
8571 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
8572
8573 // Encodable with `uqsub` (shift 8).
8574 // B-sized lanes cannot take a shift of 8.
8575 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8576 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8577 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008578}
8579
8580TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
8581 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8582 START();
8583
8584 // Encodable with `subr` (shift 0).
8585 __ Index(z0.VnD(), 1, 1);
8586 __ Sub(z0.VnD(), 100, z0.VnD());
8587 __ Index(z1.VnS(), 0x7f, 1);
8588 __ Sub(z1.VnS(), 0xf7, z1.VnS());
8589 __ Index(z2.VnH(), 0xaaaa, 0x2222);
8590 __ Sub(z2.VnH(), 0x80, z2.VnH());
8591 __ Index(z3.VnB(), 133, 1);
8592 __ Sub(z3.VnB(), 255, z3.VnB());
8593
8594 // Encodable with `subr` (shift 8).
8595 __ Index(z4.VnD(), 256, -1);
8596 __ Sub(z4.VnD(), 42 * 256, z4.VnD());
8597 __ Index(z5.VnS(), 0x7878, 1);
8598 __ Sub(z5.VnS(), 0x8000, z5.VnS());
8599 __ Index(z6.VnH(), 0x30f0, -1);
8600 __ Sub(z6.VnH(), 0x7f00, z6.VnH());
8601 // B-sized lanes cannot take a shift of 8.
8602
8603 // Select with movprfx.
8604 __ Index(z31.VnD(), 256, 4001);
8605 __ Sub(z7.VnD(), 42 * 256, z31.VnD());
8606
8607 // Out of immediate encodable range of `sub`.
8608 __ Index(z30.VnS(), 0x11223344, 1);
8609 __ Sub(z8.VnS(), 0x88776655, z30.VnS());
8610
8611 END();
8612
8613 if (CAN_RUN()) {
8614 RUN();
8615
8616 int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
8617 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
8618
8619 int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
8620 ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
8621
8622 int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
8623 ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
8624
8625 int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
8626 ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
8627
8628 int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
8629 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
8630
8631 int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
8632 ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
8633
8634 int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
8635 ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
8636
8637 int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
8638 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
8639
8640 int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
8641 ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
8642 }
8643}
8644
8645TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
8646 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8647 START();
8648
8649 // Immediates which can be encoded in the instructions.
8650 __ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
8651 __ Fdup(z1.VnS(), Float16(2.0));
8652 __ Fdup(z2.VnD(), Float16(3.875));
8653 __ Fdup(z3.VnH(), 8.0f);
8654 __ Fdup(z4.VnS(), -4.75f);
8655 __ Fdup(z5.VnD(), 0.5f);
8656 __ Fdup(z6.VnH(), 1.0);
8657 __ Fdup(z7.VnS(), 2.125);
8658 __ Fdup(z8.VnD(), -13.0);
8659
8660 // Immediates which cannot be encoded in the instructions.
8661 __ Fdup(z10.VnH(), Float16(0.0));
8662 __ Fdup(z11.VnH(), kFP16PositiveInfinity);
8663 __ Fdup(z12.VnS(), 255.0f);
8664 __ Fdup(z13.VnS(), kFP32NegativeInfinity);
8665 __ Fdup(z14.VnD(), 12.3456);
8666 __ Fdup(z15.VnD(), kFP64PositiveInfinity);
8667
8668 END();
8669
8670 if (CAN_RUN()) {
8671 RUN();
8672
8673 ASSERT_EQUAL_SVE(0xc500, z0.VnH());
8674 ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
8675 ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
8676 ASSERT_EQUAL_SVE(0x4800, z3.VnH());
8677 ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
8678 ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
8679 ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
8680 ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
8681 ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
8682
8683 ASSERT_EQUAL_SVE(0x0000, z10.VnH());
8684 ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
8685 ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
8686 ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
8687 ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
8688 ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
8689 }
8690}
8691
TatWai Chong6f111bc2019-10-07 09:20:37 +01008692TEST_SVE(sve_andv_eorv_orv) {
8693 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8694 START();
8695
8696 uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
8697 InsrHelper(&masm, z31.VnD(), in);
8698
8699 // For simplicity, we re-use the same pg for various lane sizes.
8700 // For D lanes: 1, 1, 0
8701 // For S lanes: 1, 1, 1, 0, 0
8702 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
8703 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
8704 Initialise(&masm, p0.VnB(), pg_in);
8705
8706 // Make a copy so we can check that constructive operations preserve zn.
8707 __ Mov(z0, z31);
8708 __ Andv(b0, p0, z0.VnB()); // destructive
8709 __ Andv(h1, p0, z31.VnH());
8710 __ Mov(z2, z31);
8711 __ Andv(s2, p0, z2.VnS()); // destructive
8712 __ Andv(d3, p0, z31.VnD());
8713
8714 __ Eorv(b4, p0, z31.VnB());
8715 __ Mov(z5, z31);
8716 __ Eorv(h5, p0, z5.VnH()); // destructive
8717 __ Eorv(s6, p0, z31.VnS());
8718 __ Mov(z7, z31);
8719 __ Eorv(d7, p0, z7.VnD()); // destructive
8720
8721 __ Mov(z8, z31);
8722 __ Orv(b8, p0, z8.VnB()); // destructive
8723 __ Orv(h9, p0, z31.VnH());
8724 __ Mov(z10, z31);
8725 __ Orv(s10, p0, z10.VnS()); // destructive
8726 __ Orv(d11, p0, z31.VnD());
8727
8728 END();
8729
8730 if (CAN_RUN()) {
8731 RUN();
8732
8733 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
8734 ASSERT_EQUAL_64(0x10, d0);
8735 ASSERT_EQUAL_64(0x1010, d1);
8736 ASSERT_EQUAL_64(0x33331111, d2);
8737 ASSERT_EQUAL_64(0x7777555533331111, d3);
8738 ASSERT_EQUAL_64(0xbf, d4);
8739 ASSERT_EQUAL_64(0xedcb, d5);
8740 ASSERT_EQUAL_64(0x44444444, d6);
8741 ASSERT_EQUAL_64(0x7777555533331111, d7);
8742 ASSERT_EQUAL_64(0xff, d8);
8743 ASSERT_EQUAL_64(0xffff, d9);
8744 ASSERT_EQUAL_64(0x77775555, d10);
8745 ASSERT_EQUAL_64(0x7777555533331111, d11);
8746 } else {
8747 ASSERT_EQUAL_64(0, d0);
8748 ASSERT_EQUAL_64(0x0010, d1);
8749 ASSERT_EQUAL_64(0x00110011, d2);
8750 ASSERT_EQUAL_64(0x0011001100110011, d3);
8751 ASSERT_EQUAL_64(0x62, d4);
8752 ASSERT_EQUAL_64(0x0334, d5);
8753 ASSERT_EQUAL_64(0x8899aabb, d6);
8754 ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
8755 ASSERT_EQUAL_64(0xff, d8);
8756 ASSERT_EQUAL_64(0xffff, d9);
8757 ASSERT_EQUAL_64(0xffffffff, d10);
8758 ASSERT_EQUAL_64(0xffffffffffffffff, d11);
8759 }
8760
8761 // Check the upper lanes above the top of the V register are all clear.
8762 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
8763 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
8764 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
8765 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
8766 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
8767 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
8768 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
8769 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
8770 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
8771 ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
8772 ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
8773 ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
8774 ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
8775 }
8776 }
8777}
8778
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07008779
8780TEST_SVE(sve_saddv_uaddv) {
8781 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8782 START();
8783
8784 uint64_t in[] = {0x8899aabbccddeeff, 0x8182838485868788, 0x0807060504030201};
8785 InsrHelper(&masm, z31.VnD(), in);
8786
8787 // For simplicity, we re-use the same pg for various lane sizes.
8788 // For D lanes: 1, 1, 0
8789 // For S lanes: 1, 1, 1, 0, 0
8790 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
8791 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
8792 Initialise(&masm, p0.VnB(), pg_in);
8793
8794 // Make a copy so we can check that constructive operations preserve zn.
8795 __ Mov(z0, z31);
8796 __ Saddv(b0, p0, z0.VnB()); // destructive
8797 __ Saddv(h1, p0, z31.VnH());
8798 __ Mov(z2, z31);
8799 __ Saddv(s2, p0, z2.VnS()); // destructive
8800
8801 __ Uaddv(b4, p0, z31.VnB());
8802 __ Mov(z5, z31);
8803 __ Uaddv(h5, p0, z5.VnH()); // destructive
8804 __ Uaddv(s6, p0, z31.VnS());
8805 __ Mov(z7, z31);
8806 __ Uaddv(d7, p0, z7.VnD()); // destructive
8807
8808 END();
8809
8810 if (CAN_RUN()) {
8811 RUN();
8812
8813 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
8814 // Saddv
8815 ASSERT_EQUAL_64(0xfffffffffffffda9, d0);
8816 ASSERT_EQUAL_64(0xfffffffffffe9495, d1);
8817 ASSERT_EQUAL_64(0xffffffff07090b0c, d2);
8818 // Uaddv
8819 ASSERT_EQUAL_64(0x00000000000002a9, d4);
8820 ASSERT_EQUAL_64(0x0000000000019495, d5);
8821 ASSERT_EQUAL_64(0x0000000107090b0c, d6);
8822 ASSERT_EQUAL_64(0x8182838485868788, d7);
8823 } else {
8824 // Saddv
8825 ASSERT_EQUAL_64(0xfffffffffffffd62, d0);
8826 ASSERT_EQUAL_64(0xfffffffffffe8394, d1);
8827 ASSERT_EQUAL_64(0xfffffffed3e6fa0b, d2);
8828 // Uaddv
8829 ASSERT_EQUAL_64(0x0000000000000562, d4);
8830 ASSERT_EQUAL_64(0x0000000000028394, d5);
8831 ASSERT_EQUAL_64(0x00000001d3e6fa0b, d6);
8832 ASSERT_EQUAL_64(0x0a1c2e4052647687, d7);
8833 }
8834
8835 // Check the upper lanes above the top of the V register are all clear.
8836 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
8837 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
8838 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
8839 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
8840 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
8841 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
8842 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
8843 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
8844 }
8845 }
8846}
8847
8848
8849TEST_SVE(sve_sminv_uminv) {
8850 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8851 START();
8852
8853 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
8854 InsrHelper(&masm, z31.VnD(), in);
8855
8856 // For simplicity, we re-use the same pg for various lane sizes.
8857 // For D lanes: 1, 0, 1
8858 // For S lanes: 1, 1, 0, 0, 1
8859 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
8860 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
8861 Initialise(&masm, p0.VnB(), pg_in);
8862
8863 // Make a copy so we can check that constructive operations preserve zn.
8864 __ Mov(z0, z31);
8865 __ Sminv(b0, p0, z0.VnB()); // destructive
8866 __ Sminv(h1, p0, z31.VnH());
8867 __ Mov(z2, z31);
8868 __ Sminv(s2, p0, z2.VnS()); // destructive
8869 __ Sminv(d3, p0, z31.VnD());
8870
8871 __ Uminv(b4, p0, z31.VnB());
8872 __ Mov(z5, z31);
8873 __ Uminv(h5, p0, z5.VnH()); // destructive
8874 __ Uminv(s6, p0, z31.VnS());
8875 __ Mov(z7, z31);
8876 __ Uminv(d7, p0, z7.VnD()); // destructive
8877
8878 END();
8879
8880 if (CAN_RUN()) {
8881 RUN();
8882
8883 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
8884 // Sminv
8885 ASSERT_EQUAL_64(0xaa, d0);
8886 ASSERT_EQUAL_64(0xaabb, d1);
8887 ASSERT_EQUAL_64(0xaabbfc00, d2);
8888 ASSERT_EQUAL_64(0x00112233aabbfc00, d3); // The smaller lane is inactive.
8889 // Uminv
8890 ASSERT_EQUAL_64(0, d4);
8891 ASSERT_EQUAL_64(0x2233, d5);
8892 ASSERT_EQUAL_64(0x112233, d6);
8893 ASSERT_EQUAL_64(0x00112233aabbfc00, d7); // The smaller lane is inactive.
8894 } else {
8895 // Sminv
8896 ASSERT_EQUAL_64(0xaa, d0);
8897 ASSERT_EQUAL_64(0xaaaa, d1);
8898 ASSERT_EQUAL_64(0xaaaaaaaa, d2);
8899 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d3);
8900 // Uminv
8901 ASSERT_EQUAL_64(0, d4);
8902 ASSERT_EQUAL_64(0x2233, d5);
8903 ASSERT_EQUAL_64(0x112233, d6);
8904 ASSERT_EQUAL_64(0x00112233aabbfc00, d7);
8905 }
8906
8907 // Check the upper lanes above the top of the V register are all clear.
8908 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
8909 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
8910 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
8911 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
8912 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
8913 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
8914 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
8915 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
8916 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
8917 }
8918 }
8919}
8920
8921TEST_SVE(sve_smaxv_umaxv) {
8922 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8923 START();
8924
8925 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
8926 InsrHelper(&masm, z31.VnD(), in);
8927
8928 // For simplicity, we re-use the same pg for various lane sizes.
8929 // For D lanes: 1, 0, 1
8930 // For S lanes: 1, 1, 0, 0, 1
8931 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
8932 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
8933 Initialise(&masm, p0.VnB(), pg_in);
8934
8935 // Make a copy so we can check that constructive operations preserve zn.
8936 __ Mov(z0, z31);
8937 __ Smaxv(b0, p0, z0.VnB()); // destructive
8938 __ Smaxv(h1, p0, z31.VnH());
8939 __ Mov(z2, z31);
8940 __ Smaxv(s2, p0, z2.VnS()); // destructive
8941 __ Smaxv(d3, p0, z31.VnD());
8942
8943 __ Umaxv(b4, p0, z31.VnB());
8944 __ Mov(z5, z31);
8945 __ Umaxv(h5, p0, z5.VnH()); // destructive
8946 __ Umaxv(s6, p0, z31.VnS());
8947 __ Mov(z7, z31);
8948 __ Umaxv(d7, p0, z7.VnD()); // destructive
8949
8950 END();
8951
8952 if (CAN_RUN()) {
8953 RUN();
8954
8955 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
8956 // Smaxv
8957 ASSERT_EQUAL_64(0x33, d0);
8958 ASSERT_EQUAL_64(0x44aa, d1);
8959 ASSERT_EQUAL_64(0x112233, d2);
8960 ASSERT_EQUAL_64(0x112233aabbfc00, d3);
8961 // Umaxv
8962 ASSERT_EQUAL_64(0xfe, d4);
8963 ASSERT_EQUAL_64(0xfc00, d5);
8964 ASSERT_EQUAL_64(0xaabbfc00, d6);
8965 ASSERT_EQUAL_64(0x112233aabbfc00, d7);
8966 } else {
8967 // Smaxv
8968 ASSERT_EQUAL_64(0x33, d0);
8969 ASSERT_EQUAL_64(0x44aa, d1);
8970 ASSERT_EQUAL_64(0x112233, d2);
8971 ASSERT_EQUAL_64(0x00112233aabbfc00, d3);
8972 // Umaxv
8973 ASSERT_EQUAL_64(0xfe, d4);
8974 ASSERT_EQUAL_64(0xfc00, d5);
8975 ASSERT_EQUAL_64(0xaabbfc00, d6);
8976 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d7);
8977 }
8978
8979 // Check the upper lanes above the top of the V register are all clear.
8980 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
8981 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
8982 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
8983 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
8984 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
8985 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
8986 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
8987 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
8988 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
8989 }
8990 }
8991}
8992
TatWai Chong4d2a4e92019-10-23 16:19:32 -07008993typedef void (MacroAssembler::*SdotUdotFn)(const ZRegister& zd,
8994 const ZRegister& za,
8995 const ZRegister& zn,
8996 const ZRegister& zm);
8997
8998template <typename Td, typename Ts, typename Te>
8999static void SdotUdotHelper(Test* config,
9000 SdotUdotFn macro,
9001 unsigned lane_size_in_bits,
9002 const Td& zd_inputs,
9003 const Td& za_inputs,
9004 const Ts& zn_inputs,
9005 const Ts& zm_inputs,
9006 const Te& zd_expected,
9007 const Te& zdnm_expected) {
9008 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9009 START();
9010
9011 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
9012 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
9013 ZRegister zn = z2.WithLaneSize(lane_size_in_bits / 4);
9014 ZRegister zm = z3.WithLaneSize(lane_size_in_bits / 4);
9015
9016 InsrHelper(&masm, zd, zd_inputs);
9017 InsrHelper(&masm, za, za_inputs);
9018 InsrHelper(&masm, zn, zn_inputs);
9019 InsrHelper(&masm, zm, zm_inputs);
9020
9021 // The Dot macro handles arbitrarily-aliased registers in the argument list.
9022 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
9023 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
9024 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
9025 ZRegister dnm_result = z13.WithLaneSize(lane_size_in_bits);
9026 ZRegister d_result = z14.WithLaneSize(lane_size_in_bits);
9027
9028 __ Mov(da_result, za);
9029 // zda = zda + (zn . zm)
9030 (masm.*macro)(da_result, da_result, zn, zm);
9031
9032 __ Mov(dn_result, zn);
9033 // zdn = za + (zdn . zm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009034 (masm.*macro)(dn_result, za, dn_result.WithSameLaneSizeAs(zn), zm);
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009035
9036 __ Mov(dm_result, zm);
9037 // zdm = za + (zn . zdm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009038 (masm.*macro)(dm_result, za, zn, dm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009039
9040 __ Mov(d_result, zd);
9041 // zd = za + (zn . zm)
9042 (masm.*macro)(d_result, za, zn, zm);
9043
9044 __ Mov(dnm_result, zn);
9045 // zdnm = za + (zdmn . zdnm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009046 (masm.*macro)(dnm_result,
9047 za,
9048 dnm_result.WithSameLaneSizeAs(zn),
9049 dnm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009050
9051 END();
9052
9053 if (CAN_RUN()) {
9054 RUN();
9055
9056 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
9057 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits / 4));
9058 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits / 4));
9059
9060 ASSERT_EQUAL_SVE(zd_expected, da_result);
9061 ASSERT_EQUAL_SVE(zd_expected, dn_result);
9062 ASSERT_EQUAL_SVE(zd_expected, dm_result);
9063 ASSERT_EQUAL_SVE(zd_expected, d_result);
9064
9065 ASSERT_EQUAL_SVE(zdnm_expected, dnm_result);
9066 }
9067}
9068
9069TEST_SVE(sve_sdot) {
9070 int zd_inputs[] = {0x33, 0xee, 0xff};
9071 int za_inputs[] = {INT32_MAX, -3, 2};
9072 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
9073 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
9074
9075 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
9076 int32_t zd_expected_s[] = {-2147418113, -183, 133}; // 0x8000ffff
9077 int64_t zd_expected_d[] = {2147549183, -183, 133}; // 0x8000ffff
9078
9079 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
9080 int32_t zdnm_expected_s[] = {-2147418113, 980, 572};
9081 int64_t zdnm_expected_d[] = {2147549183, 980, 572};
9082
9083 SdotUdotHelper(config,
9084 &MacroAssembler::Sdot,
9085 kSRegSize,
9086 zd_inputs,
9087 za_inputs,
9088 zn_inputs,
9089 zm_inputs,
9090 zd_expected_s,
9091 zdnm_expected_s);
9092 SdotUdotHelper(config,
9093 &MacroAssembler::Sdot,
9094 kDRegSize,
9095 zd_inputs,
9096 za_inputs,
9097 zn_inputs,
9098 zm_inputs,
9099 zd_expected_d,
9100 zdnm_expected_d);
9101}
9102
9103TEST_SVE(sve_udot) {
9104 int zd_inputs[] = {0x33, 0xee, 0xff};
9105 int za_inputs[] = {INT32_MAX, -3, 2};
9106 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
9107 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
9108
9109 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
9110 uint32_t zd_expected_s[] = {0x8000ffff, 0x00001749, 0x0000f085};
9111 uint64_t zd_expected_d[] = {0x000000047c00ffff,
9112 0x000000000017ff49,
9113 0x00000000fff00085};
9114
9115 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
9116 uint32_t zdnm_expected_s[] = {0x8000ffff, 0x000101d4, 0x0001d03c};
9117 uint64_t zdnm_expected_d[] = {0x000000047c00ffff,
9118 0x00000000fffe03d4,
9119 0x00000001ffce023c};
9120
9121 SdotUdotHelper(config,
9122 &MacroAssembler::Udot,
9123 kSRegSize,
9124 zd_inputs,
9125 za_inputs,
9126 zn_inputs,
9127 zm_inputs,
9128 zd_expected_s,
9129 zdnm_expected_s);
9130 SdotUdotHelper(config,
9131 &MacroAssembler::Udot,
9132 kDRegSize,
9133 zd_inputs,
9134 za_inputs,
9135 zn_inputs,
9136 zm_inputs,
9137 zd_expected_d,
9138 zdnm_expected_d);
9139}
9140
TatWai Chong7a0d3672019-10-23 17:35:18 -07009141template <typename T, size_t N>
9142static void FPToRawbitsWithSize(const T (&inputs)[N],
9143 uint64_t* outputs,
9144 unsigned size_in_bits) {
TatWai Chongfe536042019-10-23 16:34:11 -07009145 for (size_t i = 0; i < N; i++) {
TatWai Chong7a0d3672019-10-23 17:35:18 -07009146 outputs[i] = vixl::FPToRawbitsWithSize(size_in_bits, inputs[i]);
TatWai Chongfe536042019-10-23 16:34:11 -07009147 }
9148}
9149
TatWai Chong7a0d3672019-10-23 17:35:18 -07009150template <typename Ti, typename Te, size_t N>
9151static void FPBinArithHelper(Test* config,
9152 ArithFn macro,
9153 int lane_size_in_bits,
9154 const Ti (&zn_inputs)[N],
9155 const Ti (&zm_inputs)[N],
9156 const Te (&zd_expected)[N]) {
TatWai Chongfe536042019-10-23 16:34:11 -07009157 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong7a0d3672019-10-23 17:35:18 -07009158
TatWai Chongfe536042019-10-23 16:34:11 -07009159 START();
9160
9161 ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
9162 ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
9163 ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
9164
9165 uint64_t zn_rawbits[N];
9166 uint64_t zm_rawbits[N];
9167
TatWai Chong7a0d3672019-10-23 17:35:18 -07009168 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
9169 FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
TatWai Chongfe536042019-10-23 16:34:11 -07009170
9171 InsrHelper(&masm, zn, zn_rawbits);
9172 InsrHelper(&masm, zm, zm_rawbits);
9173
9174 (masm.*macro)(zd, zn, zm);
9175
9176 END();
9177
9178 if (CAN_RUN()) {
9179 RUN();
9180
9181 ASSERT_EQUAL_SVE(zd_expected, zd);
9182 }
9183}
9184
9185TEST_SVE(sve_fp_arithmetic_unpredicated_fadd) {
9186 double zn_inputs[] = {24.0,
9187 5.5,
9188 0.0,
9189 3.875,
9190 2.125,
9191 kFP64PositiveInfinity,
9192 kFP64NegativeInfinity};
9193
9194 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
9195
TatWai Chong7a0d3672019-10-23 17:35:18 -07009196 ArithFn fn = &MacroAssembler::Fadd;
TatWai Chongfe536042019-10-23 16:34:11 -07009197
9198 uint16_t expected_h[] = {Float16ToRawbits(Float16(1048.0)),
9199 Float16ToRawbits(Float16(2053.5)),
9200 Float16ToRawbits(Float16(0.1)),
9201 Float16ToRawbits(Float16(-0.875)),
9202 Float16ToRawbits(Float16(14.465)),
9203 Float16ToRawbits(kFP16PositiveInfinity),
9204 Float16ToRawbits(kFP16NegativeInfinity)};
9205
TatWai Chong7a0d3672019-10-23 17:35:18 -07009206 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07009207
9208 uint32_t expected_s[] = {FloatToRawbits(1048.0f),
9209 FloatToRawbits(2053.5f),
9210 FloatToRawbits(0.1f),
9211 FloatToRawbits(-0.875f),
9212 FloatToRawbits(14.465f),
9213 FloatToRawbits(kFP32PositiveInfinity),
9214 FloatToRawbits(kFP32NegativeInfinity)};
9215
TatWai Chong7a0d3672019-10-23 17:35:18 -07009216 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07009217
9218 uint64_t expected_d[] = {DoubleToRawbits(1048.0),
9219 DoubleToRawbits(2053.5),
9220 DoubleToRawbits(0.1),
9221 DoubleToRawbits(-0.875),
9222 DoubleToRawbits(14.465),
9223 DoubleToRawbits(kFP64PositiveInfinity),
9224 DoubleToRawbits(kFP64NegativeInfinity)};
9225
TatWai Chong7a0d3672019-10-23 17:35:18 -07009226 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07009227}
9228
9229TEST_SVE(sve_fp_arithmetic_unpredicated_fsub) {
9230 double zn_inputs[] = {24.0,
9231 5.5,
9232 0.0,
9233 3.875,
9234 2.125,
9235 kFP64PositiveInfinity,
9236 kFP64NegativeInfinity};
9237
9238 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
9239
TatWai Chong7a0d3672019-10-23 17:35:18 -07009240 ArithFn fn = &MacroAssembler::Fsub;
TatWai Chongfe536042019-10-23 16:34:11 -07009241
9242 uint16_t expected_h[] = {Float16ToRawbits(Float16(-1000.0)),
9243 Float16ToRawbits(Float16(-2042.5)),
9244 Float16ToRawbits(Float16(-0.1)),
9245 Float16ToRawbits(Float16(8.625)),
9246 Float16ToRawbits(Float16(-10.215)),
9247 Float16ToRawbits(kFP16PositiveInfinity),
9248 Float16ToRawbits(kFP16NegativeInfinity)};
9249
TatWai Chong7a0d3672019-10-23 17:35:18 -07009250 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07009251
9252 uint32_t expected_s[] = {FloatToRawbits(-1000.0),
9253 FloatToRawbits(-2042.5),
9254 FloatToRawbits(-0.1),
9255 FloatToRawbits(8.625),
9256 FloatToRawbits(-10.215),
9257 FloatToRawbits(kFP32PositiveInfinity),
9258 FloatToRawbits(kFP32NegativeInfinity)};
9259
TatWai Chong7a0d3672019-10-23 17:35:18 -07009260 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07009261
9262 uint64_t expected_d[] = {DoubleToRawbits(-1000.0),
9263 DoubleToRawbits(-2042.5),
9264 DoubleToRawbits(-0.1),
9265 DoubleToRawbits(8.625),
9266 DoubleToRawbits(-10.215),
9267 DoubleToRawbits(kFP64PositiveInfinity),
9268 DoubleToRawbits(kFP64NegativeInfinity)};
9269
TatWai Chong7a0d3672019-10-23 17:35:18 -07009270 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07009271}
9272
9273TEST_SVE(sve_fp_arithmetic_unpredicated_fmul) {
9274 double zn_inputs[] = {24.0,
9275 5.5,
9276 0.0,
9277 3.875,
9278 2.125,
9279 kFP64PositiveInfinity,
9280 kFP64NegativeInfinity};
9281
9282 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
9283
TatWai Chong7a0d3672019-10-23 17:35:18 -07009284 ArithFn fn = &MacroAssembler::Fmul;
TatWai Chongfe536042019-10-23 16:34:11 -07009285
9286 uint16_t expected_h[] = {Float16ToRawbits(Float16(24576.0)),
9287 Float16ToRawbits(Float16(11264.0)),
9288 Float16ToRawbits(Float16(0.0)),
9289 Float16ToRawbits(Float16(-18.4)),
9290 Float16ToRawbits(Float16(26.23)),
9291 Float16ToRawbits(kFP16PositiveInfinity),
9292 Float16ToRawbits(kFP16PositiveInfinity)};
9293
TatWai Chong7a0d3672019-10-23 17:35:18 -07009294 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07009295
9296 uint32_t expected_s[] = {FloatToRawbits(24576.0),
9297 FloatToRawbits(11264.0),
9298 FloatToRawbits(0.0),
9299 FloatToRawbits(-18.40625),
9300 FloatToRawbits(26.2225),
9301 FloatToRawbits(kFP32PositiveInfinity),
9302 FloatToRawbits(kFP32PositiveInfinity)};
9303
TatWai Chong7a0d3672019-10-23 17:35:18 -07009304 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07009305
9306 uint64_t expected_d[] = {DoubleToRawbits(24576.0),
9307 DoubleToRawbits(11264.0),
9308 DoubleToRawbits(0.0),
9309 DoubleToRawbits(-18.40625),
9310 DoubleToRawbits(26.2225),
9311 DoubleToRawbits(kFP64PositiveInfinity),
9312 DoubleToRawbits(kFP64PositiveInfinity)};
9313
TatWai Chong7a0d3672019-10-23 17:35:18 -07009314 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07009315}
9316
TatWai Chong7a0d3672019-10-23 17:35:18 -07009317typedef void (MacroAssembler::*FPArithPredicatedFn)(
9318 const ZRegister& zd,
9319 const PRegisterM& pg,
9320 const ZRegister& zn,
9321 const ZRegister& zm,
9322 FPMacroNaNPropagationOption nan_option);
9323
9324template <typename Ti, typename Te, size_t N>
9325static void FPBinArithHelper(
9326 Test* config,
9327 FPArithPredicatedFn macro,
9328 unsigned lane_size_in_bits,
9329 const Ti (&zd_inputs)[N],
9330 const int (&pg_inputs)[N],
9331 const Ti (&zn_inputs)[N],
9332 const Ti (&zm_inputs)[N],
9333 const Te (&zd_expected)[N],
9334 FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
TatWai Chongd316c5e2019-10-16 12:22:10 -07009335 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9336 START();
9337
TatWai Chong7a0d3672019-10-23 17:35:18 -07009338 // Avoid choosing default scratch registers.
9339 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
9340 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
9341 ZRegister zm = z28.WithLaneSize(lane_size_in_bits);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009342
TatWai Chong7a0d3672019-10-23 17:35:18 -07009343 uint64_t zn_inputs_rawbits[N];
9344 uint64_t zm_inputs_rawbits[N];
9345 uint64_t zd_inputs_rawbits[N];
TatWai Chongd316c5e2019-10-16 12:22:10 -07009346
TatWai Chong7a0d3672019-10-23 17:35:18 -07009347 FPToRawbitsWithSize(zn_inputs, zn_inputs_rawbits, lane_size_in_bits);
9348 FPToRawbitsWithSize(zm_inputs, zm_inputs_rawbits, lane_size_in_bits);
9349 FPToRawbitsWithSize(zd_inputs, zd_inputs_rawbits, lane_size_in_bits);
9350
9351 InsrHelper(&masm, zn, zn_inputs_rawbits);
9352 InsrHelper(&masm, zm, zm_inputs_rawbits);
9353 InsrHelper(&masm, zd, zd_inputs_rawbits);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009354
9355 PRegisterWithLaneSize pg = p0.WithLaneSize(lane_size_in_bits);
9356 Initialise(&masm, pg, pg_inputs);
9357
9358 // `instr` zdn, pg, zdn, zm
9359 ZRegister dn_result = z0.WithLaneSize(lane_size_in_bits);
9360 __ Mov(dn_result, zn);
TatWai Chong7a0d3672019-10-23 17:35:18 -07009361 (masm.*macro)(dn_result, pg.Merging(), dn_result, zm, nan_option);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009362
9363 // Based on whether zd and zm registers are aliased, the macro of instructions
9364 // (`Instr`) swaps the order of operands if it has the commutative property,
9365 // otherwise, transfer to the reversed `Instr`, such as fdivr.
9366 // `instr` zdm, pg, zn, zdm
9367 ZRegister dm_result = z1.WithLaneSize(lane_size_in_bits);
9368 __ Mov(dm_result, zm);
TatWai Chong7a0d3672019-10-23 17:35:18 -07009369 (masm.*macro)(dm_result, pg.Merging(), zn, dm_result, nan_option);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009370
9371 // The macro of instructions (`Instr`) automatically selects between `instr`
9372 // and movprfx + `instr` based on whether zd and zn registers are aliased.
9373 // A generated movprfx instruction is predicated that using the same
9374 // governing predicate register. In order to keep the result constant,
9375 // initialize the destination register first.
9376 // `instr` zd, pg, zn, zm
9377 ZRegister d_result = z2.WithLaneSize(lane_size_in_bits);
9378 __ Mov(d_result, zd);
TatWai Chong7a0d3672019-10-23 17:35:18 -07009379 (masm.*macro)(d_result, pg.Merging(), zn, zm, nan_option);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009380
9381 END();
9382
9383 if (CAN_RUN()) {
9384 RUN();
9385
9386 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
9387 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
9388 if (!core.HasSVELane(dn_result, lane)) break;
9389 if ((pg_inputs[i] & 1) != 0) {
9390 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dn_result, lane);
9391 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -07009392 ASSERT_EQUAL_SVE_LANE(zn_inputs_rawbits[i], dn_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009393 }
9394 }
9395
9396 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
9397 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
9398 if (!core.HasSVELane(dm_result, lane)) break;
9399 if ((pg_inputs[i] & 1) != 0) {
9400 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dm_result, lane);
9401 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -07009402 ASSERT_EQUAL_SVE_LANE(zm_inputs_rawbits[i], dm_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009403 }
9404 }
9405
9406 ASSERT_EQUAL_SVE(zd_expected, d_result);
9407 }
9408}
9409
9410TEST_SVE(sve_binary_arithmetic_predicated_fdiv) {
TatWai Chong7a0d3672019-10-23 17:35:18 -07009411 // The inputs are shared with different precision tests.
TatWai Chongd316c5e2019-10-16 12:22:10 -07009412 double zd_in[] = {0.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};
9413
9414 double zn_in[] = {24.0,
9415 24.0,
9416 -2.0,
9417 -2.0,
9418 5.5,
9419 5.5,
9420 kFP64PositiveInfinity,
9421 kFP64PositiveInfinity,
9422 kFP64NegativeInfinity,
9423 kFP64NegativeInfinity};
9424
9425 double zm_in[] = {-2.0, -2.0, 24.0, 24.0, 0.5, 0.5, 0.65, 0.65, 24.0, 24.0};
9426
TatWai Chongd316c5e2019-10-16 12:22:10 -07009427 int pg_in[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
9428
TatWai Chong7a0d3672019-10-23 17:35:18 -07009429 uint16_t exp_h[] = {Float16ToRawbits(Float16(0.1)),
TatWai Chongd316c5e2019-10-16 12:22:10 -07009430 Float16ToRawbits(Float16(-12.0)),
9431 Float16ToRawbits(Float16(2.2)),
9432 Float16ToRawbits(Float16(-0.0833)),
9433 Float16ToRawbits(Float16(4.4)),
9434 Float16ToRawbits(Float16(11.0)),
9435 Float16ToRawbits(Float16(6.6)),
9436 Float16ToRawbits(kFP16PositiveInfinity),
9437 Float16ToRawbits(Float16(8.8)),
9438 Float16ToRawbits(kFP16NegativeInfinity)};
9439
TatWai Chong7a0d3672019-10-23 17:35:18 -07009440 FPBinArithHelper(config,
9441 &MacroAssembler::Fdiv,
9442 kHRegSize,
9443 zd_in,
9444 pg_in,
9445 zn_in,
9446 zm_in,
9447 exp_h);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009448
9449 uint32_t exp_s[] = {FloatToRawbits(0.1),
9450 FloatToRawbits(-12.0),
9451 FloatToRawbits(2.2),
9452 0xbdaaaaab,
9453 FloatToRawbits(4.4),
9454 FloatToRawbits(11.0),
9455 FloatToRawbits(6.6),
9456 FloatToRawbits(kFP32PositiveInfinity),
9457 FloatToRawbits(8.8),
9458 FloatToRawbits(kFP32NegativeInfinity)};
9459
TatWai Chong7a0d3672019-10-23 17:35:18 -07009460 FPBinArithHelper(config,
9461 &MacroAssembler::Fdiv,
9462 kSRegSize,
9463 zd_in,
9464 pg_in,
9465 zn_in,
9466 zm_in,
9467 exp_s);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009468
9469 uint64_t exp_d[] = {DoubleToRawbits(0.1),
9470 DoubleToRawbits(-12.0),
9471 DoubleToRawbits(2.2),
9472 0xbfb5555555555555,
9473 DoubleToRawbits(4.4),
9474 DoubleToRawbits(11.0),
9475 DoubleToRawbits(6.6),
9476 DoubleToRawbits(kFP64PositiveInfinity),
9477 DoubleToRawbits(8.8),
9478 DoubleToRawbits(kFP64NegativeInfinity)};
9479
TatWai Chong7a0d3672019-10-23 17:35:18 -07009480 FPBinArithHelper(config,
9481 &MacroAssembler::Fdiv,
9482 kDRegSize,
9483 zd_in,
9484 pg_in,
9485 zn_in,
9486 zm_in,
9487 exp_d);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009488}
9489
Martyn Capewell9cc3f142019-10-29 14:06:35 +00009490TEST_SVE(sve_select) {
9491 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9492 START();
9493
9494 uint64_t in0[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
9495 uint64_t in1[] = {0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa};
9496
9497 // For simplicity, we re-use the same pg for various lane sizes.
9498 // For D lanes: 1, 1, 0
9499 // For S lanes: 1, 1, 1, 0, 0
9500 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
9501 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
9502 Initialise(&masm, p0.VnB(), pg_in);
9503 PRegisterM pg = p0.Merging();
9504
9505 InsrHelper(&masm, z30.VnD(), in0);
9506 InsrHelper(&masm, z31.VnD(), in1);
9507
9508 __ Sel(z0.VnB(), pg, z30.VnB(), z31.VnB());
9509 __ Sel(z1.VnH(), pg, z30.VnH(), z31.VnH());
9510 __ Sel(z2.VnS(), pg, z30.VnS(), z31.VnS());
9511 __ Sel(z3.VnD(), pg, z30.VnD(), z31.VnD());
9512
9513 END();
9514
9515 if (CAN_RUN()) {
9516 RUN();
9517
9518 uint64_t expected_z0[] = {0xaaaaaaaa05aa07f8,
9519 0xfeaaaaf0aac3870f,
9520 0xaaaa56aa9abcdeaa};
9521 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
9522
9523 uint64_t expected_z1[] = {0xaaaaaaaaaaaa07f8,
9524 0xaaaaf8f0e1c3870f,
9525 0xaaaaaaaa9abcaaaa};
9526 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
9527
9528 uint64_t expected_z2[] = {0xaaaaaaaa05f607f8,
9529 0xfefcf8f0e1c3870f,
9530 0xaaaaaaaaaaaaaaaa};
9531 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
9532
9533 uint64_t expected_z3[] = {0x01f203f405f607f8,
9534 0xfefcf8f0e1c3870f,
9535 0xaaaaaaaaaaaaaaaa};
9536 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
9537 }
9538}
TatWai Chongd316c5e2019-10-16 12:22:10 -07009539
TatWai Chong7a0d3672019-10-23 17:35:18 -07009540// Execute a number of instructions which all use ProcessNaNs, and check that
9541// they all propagate NaNs correctly.
9542template <typename Ti, typename Te, size_t N>
9543static void ProcessNaNsHelper(Test* config,
9544 int lane_size_in_bits,
9545 const Ti (&zn_inputs)[N],
9546 const Ti (&zm_inputs)[N],
9547 const Te (&zd_expected)[N],
9548 FPMacroNaNPropagationOption nan_option) {
9549 ArithFn unpredicated_macro[] = {&MacroAssembler::Fadd,
9550 &MacroAssembler::Fsub,
9551 &MacroAssembler::Fmul};
9552
9553 for (size_t i = 0; i < ArrayLength(unpredicated_macro); i++) {
9554 FPBinArithHelper(config,
9555 unpredicated_macro[i],
9556 lane_size_in_bits,
9557 zn_inputs,
9558 zm_inputs,
9559 zd_expected);
9560 }
9561
9562 FPArithPredicatedFn predicated_macro[] = {&MacroAssembler::Fdiv,
9563 &MacroAssembler::Fmax,
9564 &MacroAssembler::Fmin};
9565 int pg_inputs[N];
9566 // With an all-true predicate, this helper aims to compare with special
9567 // numbers.
9568 for (size_t i = 0; i < N; i++) {
9569 pg_inputs[i] = 1;
9570 }
9571
9572 for (size_t i = 0; i < ArrayLength(predicated_macro); i++) {
9573 FPBinArithHelper(config,
9574 predicated_macro[i],
9575 lane_size_in_bits,
9576 // With an all-true predicate, the value in zd is
9577 // irrelevant to the operations.
9578 zn_inputs,
9579 pg_inputs,
9580 zn_inputs,
9581 zm_inputs,
9582 zd_expected,
9583 nan_option);
9584 }
9585}
9586
9587TEST_SVE(sve_process_nans_double) {
9588 // Use non-standard NaNs to check that the payload bits are preserved.
9589 double sn = RawbitsToDouble(0x7ff5555511111111);
9590 double sm = RawbitsToDouble(0x7ff5555522222222);
9591 double qn = RawbitsToDouble(0x7ffaaaaa11111111);
9592 double qm = RawbitsToDouble(0x7ffaaaaa22222222);
9593 VIXL_ASSERT(IsSignallingNaN(sn));
9594 VIXL_ASSERT(IsSignallingNaN(sm));
9595 VIXL_ASSERT(IsQuietNaN(qn));
9596 VIXL_ASSERT(IsQuietNaN(qm));
9597
9598 // The input NaNs after passing through ProcessNaN.
9599 uint64_t sn_proc = 0x7ffd555511111111;
9600 uint64_t sm_proc = 0x7ffd555522222222;
9601 uint64_t qn_proc = DoubleToRawbits(qn);
9602 uint64_t qm_proc = DoubleToRawbits(qm);
9603
9604 // Quiet NaNs are propagated.
9605 double zn_inputs_1[] = {qn, 0.0, 0.0, qm, qn, qm};
9606 double zm_inputs_1[] = {0.0, qn, qm, 0.0, qm, qn};
9607 uint64_t zd_expected_1[] =
9608 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
9609 ProcessNaNsHelper(config,
9610 kDRegSize,
9611 zn_inputs_1,
9612 zm_inputs_1,
9613 zd_expected_1,
9614 StrictNaNPropagation);
9615
9616 // Signalling NaNs are propagated.
9617 double zn_inputs_2[] = {sn, 0.0, 0.0, sm, sn, sm};
9618 double zm_inputs_2[] = {0.0, sn, sm, 0.0, sm, sn};
9619 uint64_t zd_expected_2[] =
9620 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
9621 ProcessNaNsHelper(config,
9622 kDRegSize,
9623 zn_inputs_2,
9624 zm_inputs_2,
9625 zd_expected_2,
9626 StrictNaNPropagation);
9627
9628 // Signalling NaNs take precedence over quiet NaNs.
9629 double zn_inputs_3[] = {sn, qn, sn, sn, qn};
9630 double zm_inputs_3[] = {qm, sm, sm, qn, sn};
9631 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
9632 ProcessNaNsHelper(config,
9633 kDRegSize,
9634 zn_inputs_3,
9635 zm_inputs_3,
9636 zd_expected_3,
9637 StrictNaNPropagation);
9638}
9639
9640TEST_SVE(sve_process_nans_float) {
9641 // Use non-standard NaNs to check that the payload bits are preserved.
9642 float sn = RawbitsToFloat(0x7f951111);
9643 float sm = RawbitsToFloat(0x7f952222);
9644 float qn = RawbitsToFloat(0x7fea1111);
9645 float qm = RawbitsToFloat(0x7fea2222);
9646 VIXL_ASSERT(IsSignallingNaN(sn));
9647 VIXL_ASSERT(IsSignallingNaN(sm));
9648 VIXL_ASSERT(IsQuietNaN(qn));
9649 VIXL_ASSERT(IsQuietNaN(qm));
9650
9651 // The input NaNs after passing through ProcessNaN.
9652 uint32_t sn_proc = 0x7fd51111;
9653 uint32_t sm_proc = 0x7fd52222;
9654 uint32_t qn_proc = FloatToRawbits(qn);
9655 uint32_t qm_proc = FloatToRawbits(qm);
9656
9657 // Quiet NaNs are propagated.
9658 float zn_inputs_1[] = {qn, 0.0f, 0.0f, qm, qn, qm};
9659 float zm_inputs_1[] = {0.0f, qn, qm, 0.0f, qm, qn};
9660 uint64_t zd_expected_1[] =
9661 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
9662 ProcessNaNsHelper(config,
9663 kSRegSize,
9664 zn_inputs_1,
9665 zm_inputs_1,
9666 zd_expected_1,
9667 StrictNaNPropagation);
9668
9669 // Signalling NaNs are propagated.
9670 float zn_inputs_2[] = {sn, 0.0f, 0.0f, sm, sn, sm};
9671 float zm_inputs_2[] = {0.0f, sn, sm, 0.0f, sm, sn};
9672 uint64_t zd_expected_2[] =
9673 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
9674 ProcessNaNsHelper(config,
9675 kSRegSize,
9676 zn_inputs_2,
9677 zm_inputs_2,
9678 zd_expected_2,
9679 StrictNaNPropagation);
9680
9681 // Signalling NaNs take precedence over quiet NaNs.
9682 float zn_inputs_3[] = {sn, qn, sn, sn, qn};
9683 float zm_inputs_3[] = {qm, sm, sm, qn, sn};
9684 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
9685 ProcessNaNsHelper(config,
9686 kSRegSize,
9687 zn_inputs_3,
9688 zm_inputs_3,
9689 zd_expected_3,
9690 StrictNaNPropagation);
9691}
9692
9693TEST_SVE(sve_process_nans_half) {
9694 // Use non-standard NaNs to check that the payload bits are preserved.
9695 Float16 sn(RawbitsToFloat16(0x7c11));
9696 Float16 sm(RawbitsToFloat16(0xfc22));
9697 Float16 qn(RawbitsToFloat16(0x7e33));
9698 Float16 qm(RawbitsToFloat16(0xfe44));
9699 VIXL_ASSERT(IsSignallingNaN(sn));
9700 VIXL_ASSERT(IsSignallingNaN(sm));
9701 VIXL_ASSERT(IsQuietNaN(qn));
9702 VIXL_ASSERT(IsQuietNaN(qm));
9703
9704 // The input NaNs after passing through ProcessNaN.
9705 uint16_t sn_proc = 0x7e11;
9706 uint16_t sm_proc = 0xfe22;
9707 uint16_t qn_proc = Float16ToRawbits(qn);
9708 uint16_t qm_proc = Float16ToRawbits(qm);
9709
9710 // Quiet NaNs are propagated.
9711 Float16 zn_inputs_1[] = {qn, Float16(0.0), Float16(0.0), qm, qn, qm};
9712 Float16 zm_inputs_1[] = {Float16(0.0), qn, qm, Float16(0.0), qm, qn};
9713 uint64_t zd_expected_1[] =
9714 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
9715 ProcessNaNsHelper(config,
9716 kHRegSize,
9717 zn_inputs_1,
9718 zm_inputs_1,
9719 zd_expected_1,
9720 StrictNaNPropagation);
9721
9722 // Signalling NaNs are propagated.
9723 Float16 zn_inputs_2[] = {sn, Float16(0.0), Float16(0.0), sm, sn, sm};
9724 Float16 zm_inputs_2[] = {Float16(0.0), sn, sm, Float16(0.0), sm, sn};
9725 uint64_t zd_expected_2[] =
9726 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
9727 ProcessNaNsHelper(config,
9728 kHRegSize,
9729 zn_inputs_2,
9730 zm_inputs_2,
9731 zd_expected_2,
9732 StrictNaNPropagation);
9733
9734 // Signalling NaNs take precedence over quiet NaNs.
9735 Float16 zn_inputs_3[] = {sn, qn, sn, sn, qn};
9736 Float16 zm_inputs_3[] = {qm, sm, sm, qn, sn};
9737 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
9738 ProcessNaNsHelper(config,
9739 kHRegSize,
9740 zn_inputs_3,
9741 zm_inputs_3,
9742 zd_expected_3,
9743 StrictNaNPropagation);
9744}
9745
9746TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_h) {
9747 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
9748 double zn_inputs[] = {-2.1,
9749 8.5,
9750 225.5,
9751 0.0,
9752 8.8,
9753 -4.75,
9754 kFP64PositiveInfinity,
9755 kFP64NegativeInfinity};
9756 double zm_inputs[] = {-2.0,
9757 -13.0,
9758 24.0,
9759 0.01,
9760 0.5,
9761 300.75,
9762 kFP64NegativeInfinity,
9763 kFP64PositiveInfinity};
9764 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
9765
9766 uint16_t zd_expected_max[] = {Float16ToRawbits(Float16(-2.0)),
9767 Float16ToRawbits(Float16(8.5)),
9768 Float16ToRawbits(Float16(3.3)),
9769 Float16ToRawbits(Float16(0.01)),
9770 Float16ToRawbits(Float16(5.5)),
9771 Float16ToRawbits(Float16(300.75)),
9772 Float16ToRawbits(kFP16PositiveInfinity),
9773 Float16ToRawbits(kFP16PositiveInfinity)};
9774 FPBinArithHelper(config,
9775 &MacroAssembler::Fmax,
9776 kHRegSize,
9777 zd_inputs,
9778 pg_inputs,
9779 zn_inputs,
9780 zm_inputs,
9781 zd_expected_max);
9782
9783 uint16_t zd_expected_min[] = {Float16ToRawbits(Float16(-2.1)),
9784 Float16ToRawbits(Float16(-13.0)),
9785 Float16ToRawbits(Float16(3.3)),
9786 Float16ToRawbits(Float16(0.0)),
9787 Float16ToRawbits(Float16(5.5)),
9788 Float16ToRawbits(Float16(-4.75)),
9789 Float16ToRawbits(kFP16NegativeInfinity),
9790 Float16ToRawbits(kFP16NegativeInfinity)};
9791 FPBinArithHelper(config,
9792 &MacroAssembler::Fmin,
9793 kHRegSize,
9794 zd_inputs,
9795 pg_inputs,
9796 zn_inputs,
9797 zm_inputs,
9798 zd_expected_min);
9799}
9800
9801TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_s) {
9802 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
9803 double zn_inputs[] = {-2.1,
9804 8.5,
9805 225.5,
9806 0.0,
9807 8.8,
9808 -4.75,
9809 kFP64PositiveInfinity,
9810 kFP64NegativeInfinity};
9811 double zm_inputs[] = {-2.0,
9812 -13.0,
9813 24.0,
9814 0.01,
9815 0.5,
9816 300.75,
9817 kFP64NegativeInfinity,
9818 kFP64PositiveInfinity};
9819 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
9820
9821 uint32_t zd_expected_max[] = {FloatToRawbits(-2.0),
9822 FloatToRawbits(8.5),
9823 FloatToRawbits(3.3),
9824 FloatToRawbits(0.01),
9825 FloatToRawbits(5.5),
9826 FloatToRawbits(300.75),
9827 FloatToRawbits(kFP32PositiveInfinity),
9828 FloatToRawbits(kFP32PositiveInfinity)};
9829 FPBinArithHelper(config,
9830 &MacroAssembler::Fmax,
9831 kSRegSize,
9832 zd_inputs,
9833 pg_inputs,
9834 zn_inputs,
9835 zm_inputs,
9836 zd_expected_max);
9837
9838 uint32_t zd_expected_min[] = {FloatToRawbits(-2.1),
9839 FloatToRawbits(-13.0),
9840 FloatToRawbits(3.3),
9841 FloatToRawbits(0.0),
9842 FloatToRawbits(5.5),
9843 FloatToRawbits(-4.75),
9844 FloatToRawbits(kFP32NegativeInfinity),
9845 FloatToRawbits(kFP32NegativeInfinity)};
9846 FPBinArithHelper(config,
9847 &MacroAssembler::Fmin,
9848 kSRegSize,
9849 zd_inputs,
9850 pg_inputs,
9851 zn_inputs,
9852 zm_inputs,
9853 zd_expected_min);
9854}
9855
9856TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_d) {
9857 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
9858 double zn_inputs[] = {-2.1,
9859 8.5,
9860 225.5,
9861 0.0,
9862 8.8,
9863 -4.75,
9864 kFP64PositiveInfinity,
9865 kFP64NegativeInfinity};
9866 double zm_inputs[] = {-2.0,
9867 -13.0,
9868 24.0,
9869 0.01,
9870 0.5,
9871 300.75,
9872 kFP64NegativeInfinity,
9873 kFP64PositiveInfinity};
9874 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
9875
9876 uint64_t zd_expected_max[] = {DoubleToRawbits(-2.0),
9877 DoubleToRawbits(8.5),
9878 DoubleToRawbits(3.3),
9879 DoubleToRawbits(0.01),
9880 DoubleToRawbits(5.5),
9881 DoubleToRawbits(300.75),
9882 DoubleToRawbits(kFP64PositiveInfinity),
9883 DoubleToRawbits(kFP64PositiveInfinity)};
9884 FPBinArithHelper(config,
9885 &MacroAssembler::Fmax,
9886 kDRegSize,
9887 zd_inputs,
9888 pg_inputs,
9889 zn_inputs,
9890 zm_inputs,
9891 zd_expected_max);
9892
9893 uint64_t zd_expected_min[] = {DoubleToRawbits(-2.1),
9894 DoubleToRawbits(-13.0),
9895 DoubleToRawbits(3.3),
9896 DoubleToRawbits(0.0),
9897 DoubleToRawbits(5.5),
9898 DoubleToRawbits(-4.75),
9899 DoubleToRawbits(kFP64NegativeInfinity),
9900 DoubleToRawbits(kFP64NegativeInfinity)};
9901 FPBinArithHelper(config,
9902 &MacroAssembler::Fmin,
9903 kDRegSize,
9904 zd_inputs,
9905 pg_inputs,
9906 zn_inputs,
9907 zm_inputs,
9908 zd_expected_min);
9909}
TatWai Chong29a0c432019-11-06 22:20:44 -08009910
9911template <typename T, size_t N>
9912static void BitwiseShiftImmHelper(Test* config,
9913 int lane_size_in_bits,
9914 const T (&zn_inputs)[N],
9915 int shift) {
9916 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9917 START();
9918
9919 ZRegister zd_asr = z25.WithLaneSize(lane_size_in_bits);
9920 ZRegister zd_lsr = z26.WithLaneSize(lane_size_in_bits);
9921 ZRegister zd_lsl = z27.WithLaneSize(lane_size_in_bits);
9922 ZRegister zn = z28.WithLaneSize(lane_size_in_bits);
9923
9924 InsrHelper(&masm, zn, zn_inputs);
9925
9926 __ Asr(zd_asr, zn, shift);
9927 __ Lsr(zd_lsr, zn, shift);
9928 __ Lsl(zd_lsl, zn, shift);
9929
9930 END();
9931
9932 if (CAN_RUN()) {
9933 RUN();
9934
9935 const uint64_t mask = GetUintMask(lane_size_in_bits);
9936 for (int i = 0; i < static_cast<int>(N); i++) {
9937 int lane = N - i - 1;
9938 if (!core.HasSVELane(zd_asr, lane)) break;
9939 bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
9940 uint64_t result;
9941 if (shift >= lane_size_in_bits) {
9942 result = is_negative ? mask : 0;
9943 } else {
9944 result = zn_inputs[i] >> shift;
9945 if (is_negative) {
9946 result |= mask << (lane_size_in_bits - shift);
9947 result &= mask;
9948 }
9949 }
9950 ASSERT_EQUAL_SVE_LANE(result, zd_asr, lane);
9951 }
9952
9953 for (int i = 0; i < static_cast<int>(N); i++) {
9954 int lane = N - i - 1;
9955 if (!core.HasSVELane(zd_lsr, lane)) break;
9956 uint64_t result =
9957 (shift >= lane_size_in_bits) ? 0 : zn_inputs[i] >> shift;
9958 ASSERT_EQUAL_SVE_LANE(result, zd_lsr, lane);
9959 }
9960
9961 for (int i = 0; i < static_cast<int>(N); i++) {
9962 int lane = N - i - 1;
9963 if (!core.HasSVELane(zd_lsl, lane)) break;
9964 uint64_t result = (shift >= lane_size_in_bits) ? 0 : zn_inputs[i]
9965 << shift;
9966 ASSERT_EQUAL_SVE_LANE(result & mask, zd_lsl, lane);
9967 }
9968 }
9969}
9970
9971TEST_SVE(sve_bitwise_shift_imm_unpredicated) {
9972 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
9973 int shift_b[] = {1, 3, 5, 8};
9974 for (size_t i = 0; i < ArrayLength(shift_b); i++) {
9975 BitwiseShiftImmHelper(config, kBRegSize, inputs_b, shift_b[i]);
9976 }
9977
9978 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233};
9979 int shift_h[] = {1, 8, 11, 16};
9980 for (size_t i = 0; i < ArrayLength(shift_h); i++) {
9981 BitwiseShiftImmHelper(config, kHRegSize, inputs_h, shift_h[i]);
9982 }
9983
9984 uint64_t inputs_s[] = {0xfedcba98, 0xfffa55aa, 0x00112233};
9985 int shift_s[] = {1, 9, 17, 32};
9986 for (size_t i = 0; i < ArrayLength(shift_s); i++) {
9987 BitwiseShiftImmHelper(config, kSRegSize, inputs_s, shift_s[i]);
9988 }
9989
9990 uint64_t inputs_d[] = {0xfedcba98fedcba98,
9991 0xfffa5555aaaaaaaa,
9992 0x0011223344aafe80};
9993 int shift_d[] = {1, 23, 45, 64};
9994 for (size_t i = 0; i < ArrayLength(shift_d); i++) {
9995 BitwiseShiftImmHelper(config, kDRegSize, inputs_d, shift_d[i]);
9996 }
9997}
9998
9999template <typename T, typename R, size_t N>
10000static void BitwiseShiftWideElementsHelper(Test* config,
10001 Shift shift_type,
10002 int lane_size_in_bits,
10003 const T (&zn_inputs)[N],
10004 const R& zm_inputs,
10005 const T (&zd_expected)[N]) {
10006 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10007 START();
10008
10009 ArithFn macro;
10010 // Since logical shift left and right by the current lane size width is equal
10011 // to 0, so initialize the array to 0 for convenience.
10012 uint64_t zd_expected_max_shift_amount[N] = {0};
10013 switch (shift_type) {
10014 case ASR: {
10015 macro = &MacroAssembler::Asr;
10016 uint64_t mask = GetUintMask(lane_size_in_bits);
10017 for (size_t i = 0; i < ArrayLength(zn_inputs); i++) {
10018 bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
10019 zd_expected_max_shift_amount[i] = is_negative ? mask : 0;
10020 }
10021 break;
10022 }
10023 case LSR:
10024 macro = &MacroAssembler::Lsr;
10025 break;
10026 case LSL:
10027 macro = &MacroAssembler::Lsl;
10028 break;
10029 default:
10030 VIXL_UNIMPLEMENTED();
10031 macro = NULL;
10032 break;
10033 }
10034
10035 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
10036 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
10037 ZRegister zm = z28.WithLaneSize(kDRegSize);
10038
10039 InsrHelper(&masm, zn, zn_inputs);
10040 InsrHelper(&masm, zm, zm_inputs);
10041
10042 (masm.*macro)(zd, zn, zm);
10043
10044 ZRegister zm_max_shift_amount = z25.WithLaneSize(kDRegSize);
10045 ZRegister zd_max_shift_amount = z24.WithLaneSize(lane_size_in_bits);
10046
10047 __ Dup(zm_max_shift_amount, lane_size_in_bits);
10048 (masm.*macro)(zd_max_shift_amount, zn, zm_max_shift_amount);
10049
10050 ZRegister zm_out_of_range = z23.WithLaneSize(kDRegSize);
10051 ZRegister zd_out_of_range = z22.WithLaneSize(lane_size_in_bits);
10052
10053 __ Dup(zm_out_of_range, GetUintMask(lane_size_in_bits));
10054 (masm.*macro)(zd_out_of_range, zn, zm_out_of_range);
10055
10056 END();
10057
10058 if (CAN_RUN()) {
10059 RUN();
10060
10061 ASSERT_EQUAL_SVE(zd_expected, zd);
10062 ASSERT_EQUAL_SVE(zd_expected_max_shift_amount, zd_max_shift_amount);
10063 ASSERT_EQUAL_SVE(zd_max_shift_amount, zd_out_of_range);
10064 }
10065}
10066
10067TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_asr) {
10068 // clang-format off
10069 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10070 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10071 int shift_b[] = {1, 3};
10072 uint64_t expected_b[] = {0xff, 0xee, 0xdd, 0xcc, 0xff, 0x2a, 0xd5, 0xc0,
10073 0xff, 0xfb, 0xf7, 0xf3, 0xff, 0x0a, 0xf5, 0xf0};
10074 BitwiseShiftWideElementsHelper(config,
10075 ASR,
10076 kBRegSize,
10077 inputs_b,
10078 shift_b,
10079 expected_b);
10080
10081 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10082 0xfedc, 0xfa55, 0x0011, 0x2233,
10083 0xfedc, 0xfa55, 0x0011, 0x2233};
10084 int shift_h[] = {1, 8, 11};
10085 uint64_t expected_h[] = {0xff6e, 0xfd2a, 0x0008, 0x1119,
10086 0xfffe, 0xfffa, 0x0000, 0x0022,
10087 0xffff, 0xffff, 0x0000, 0x0004};
10088 BitwiseShiftWideElementsHelper(config,
10089 ASR,
10090 kHRegSize,
10091 inputs_h,
10092 shift_h,
10093 expected_h);
10094
10095 uint64_t inputs_s[] =
10096 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10097 int shift_s[] = {1, 9, 23};
10098 uint64_t expected_s[] =
10099 {0xff6e5d4c, 0xfffd2ad5, 0x00000891, 0x000091a2, 0xffffff55, 0xffffff11};
10100 BitwiseShiftWideElementsHelper(config,
10101 ASR,
10102 kSRegSize,
10103 inputs_s,
10104 shift_s,
10105 expected_s);
10106 // clang-format on
10107}
10108
10109TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsr) {
10110 // clang-format off
10111 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10112 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10113 int shift_b[] = {1, 3};
10114 uint64_t expected_b[] = {0x7f, 0x6e, 0x5d, 0x4c, 0x7f, 0x2a, 0x55, 0x40,
10115 0x1f, 0x1b, 0x17, 0x13, 0x1f, 0x0a, 0x15, 0x10};
10116
10117 BitwiseShiftWideElementsHelper(config,
10118 LSR,
10119 kBRegSize,
10120 inputs_b,
10121 shift_b,
10122 expected_b);
10123
10124 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10125 0xfedc, 0xfa55, 0x0011, 0x2233,
10126 0xfedc, 0xfa55, 0x0011, 0x2233};
10127 int shift_h[] = {1, 8, 11};
10128 uint64_t expected_h[] = {0x7f6e, 0x7d2a, 0x0008, 0x1119,
10129 0x00fe, 0x00fa, 0x0000, 0x0022,
10130 0x001f, 0x001f, 0x0000, 0x0004};
10131 BitwiseShiftWideElementsHelper(config,
10132 LSR,
10133 kHRegSize,
10134 inputs_h,
10135 shift_h,
10136 expected_h);
10137
10138 uint64_t inputs_s[] =
10139 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10140 int shift_s[] = {1, 9, 23};
10141 uint64_t expected_s[] =
10142 {0x7f6e5d4c, 0x7ffd2ad5, 0x00000891, 0x000091a2, 0x00000155, 0x00000111};
10143 BitwiseShiftWideElementsHelper(config,
10144 LSR,
10145 kSRegSize,
10146 inputs_s,
10147 shift_s,
10148 expected_s);
10149 // clang-format on
10150}
10151
10152TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsl) {
10153 // clang-format off
10154 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10155 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10156 int shift_b[] = {1, 5};
10157
10158 uint64_t expected_b[] = {0xfc, 0xb8, 0x74, 0x30, 0xfe, 0xaa, 0x54, 0x00,
10159 0xc0, 0x80, 0x40, 0x00, 0xe0, 0xa0, 0x40, 0x00};
10160
10161 BitwiseShiftWideElementsHelper(config,
10162 LSL,
10163 kBRegSize,
10164 inputs_b,
10165 shift_b,
10166 expected_b);
10167 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10168 0xfedc, 0xfa55, 0x0011, 0x2233,
10169 0xfedc, 0xfa55, 0x0011, 0x2233};
10170 int shift_h[] = {1, 2, 14};
10171
10172 uint64_t expected_h[] = {0xfdb8, 0xf4aa, 0x0022, 0x4466,
10173 0xfb70, 0xe954, 0x0044, 0x88cc,
10174 0x0000, 0x4000, 0x4000, 0xc000};
10175 BitwiseShiftWideElementsHelper(config,
10176 LSL,
10177 kHRegSize,
10178 inputs_h,
10179 shift_h,
10180 expected_h);
10181 uint64_t inputs_s[] =
10182 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10183 int shift_s[] = {1, 19, 26};
10184 uint64_t expected_s[] =
10185 {0xfdb97530, 0xfff4ab54, 0x11980000, 0x2b380000, 0xa8000000, 0x20000000};
10186 BitwiseShiftWideElementsHelper(config,
10187 LSL,
10188 kSRegSize,
10189 inputs_s,
10190 shift_s,
10191 expected_s);
10192 // clang-format on
10193}
10194
TatWai Chong4023d7a2019-11-18 14:16:28 -080010195TEST_SVE(sve_setffr) {
10196 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10197 START();
10198
10199 __ Ptrue(p15.VnB());
10200 __ Setffr();
10201 __ Rdffr(p14.VnB());
10202
10203 END();
10204
10205 if (CAN_RUN()) {
10206 RUN();
10207
10208 ASSERT_EQUAL_SVE(p14.VnB(), p15.VnB());
10209 }
10210}
10211
10212static void WrffrHelper(Test* config, unsigned active_lanes) {
10213 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10214 START();
10215
10216 int inputs[kPRegMaxSize] = {0};
10217 VIXL_ASSERT(active_lanes <= kPRegMaxSize);
10218 for (unsigned i = 0; i < active_lanes; i++) {
10219 // The rightmost (highest-indexed) array element maps to the lowest-numbered
10220 // lane.
10221 inputs[kPRegMaxSize - i - 1] = 1;
10222 }
10223
10224 Initialise(&masm, p1.VnB(), inputs);
10225 __ Wrffr(p1.VnB());
10226 __ Rdffr(p2.VnB());
10227
10228 END();
10229
10230 if (CAN_RUN()) {
10231 RUN();
10232
10233 ASSERT_EQUAL_SVE(p1.VnB(), p2.VnB());
10234 }
10235}
10236
10237TEST_SVE(sve_wrffr) {
10238 int active_lanes_inputs[] = {0, 1, 7, 10, 32, 48, kPRegMaxSize};
10239 for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
10240 WrffrHelper(config, active_lanes_inputs[i]);
10241 }
10242}
10243
Jacob Bramleyd77a8e42019-02-12 16:52:24 +000010244} // namespace aarch64
10245} // namespace vixl