blob: 41c90e6bf240b2a3005741b859980a7277417fc2 [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
28
29#include <cfloat>
30#include <cmath>
31#include <cstdio>
32#include <cstdlib>
33#include <cstring>
34
35#include "test-runner.h"
36#include "test-utils.h"
37#include "aarch64/test-utils-aarch64.h"
38
39#include "aarch64/cpu-aarch64.h"
40#include "aarch64/disasm-aarch64.h"
41#include "aarch64/macro-assembler-aarch64.h"
42#include "aarch64/simulator-aarch64.h"
43#include "test-assembler-aarch64.h"
44
45namespace vixl {
46namespace aarch64 {
47
Jacob Bramleye8289202019-07-31 11:25:23 +010048Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
49 // We never free this memory, but we need it to live for as long as the static
50 // linked list of tests, and this is the easiest way to do it.
51 Test* test = new Test(name, fn);
52 test->set_sve_vl_in_bits(vl);
53 return test;
54}
55
56// The TEST_SVE macro works just like the usual TEST macro, but the resulting
57// function receives a `const Test& config` argument, to allow it to query the
58// vector length.
59#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
60// On the Simulator, run SVE tests with several vector lengths, including the
61// extreme values and an intermediate value that isn't a power of two.
62
63#define TEST_SVE(name) \
64 void Test##name(Test* config); \
65 Test* test_##name##_list[] = \
66 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
67 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
68 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
69 void Test##name(Test* config)
70
71#define SVE_SETUP_WITH_FEATURES(...) \
72 SETUP_WITH_FEATURES(__VA_ARGS__); \
73 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
74
75#else
76// Otherwise, just use whatever the hardware provides.
77static const int kSVEVectorLengthInBits =
78 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
79 ? CPU::ReadSVEVectorLengthInBits()
80 : 0;
81
82#define TEST_SVE(name) \
83 void Test##name(Test* config); \
84 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
85 "AARCH64_ASM_" #name "_vlauto", \
86 &Test##name); \
87 void Test##name(Test* config)
88
89#define SVE_SETUP_WITH_FEATURES(...) \
90 SETUP_WITH_FEATURES(__VA_ARGS__); \
91 USE(config)
92
93#endif
94
Jacob Bramley03c0b512019-02-22 16:42:06 +000095// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
96// is optimised for call-site clarity, not generated code quality, so it doesn't
97// exist in the MacroAssembler itself.
98//
99// Usage:
100//
101// int values[] = { 42, 43, 44 };
102// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
103//
104// The rightmost (highest-indexed) array element maps to the lowest-numbered
105// lane.
106template <typename T, size_t N>
107void InsrHelper(MacroAssembler* masm,
108 const ZRegister& zdn,
109 const T (&values)[N]) {
110 for (size_t i = 0; i < N; i++) {
111 masm->Insr(zdn, values[i]);
112 }
113}
114
Jacob Bramley0ce75842019-07-17 18:12:50 +0100115// Conveniently initialise P registers with scalar bit patterns. The destination
116// lane size is ignored. This is optimised for call-site clarity, not generated
117// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100118//
119// Usage:
120//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100121// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100122void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100123 const PRegister& pd,
124 uint64_t value3,
125 uint64_t value2,
126 uint64_t value1,
127 uint64_t value0) {
128 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100129 UseScratchRegisterScope temps(masm);
130 Register temp = temps.AcquireX();
131 Label data;
132 Label done;
133
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100134 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100135 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100136 masm->B(&done);
137 {
138 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
139 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100140 masm->dc64(value0);
141 masm->dc64(value1);
142 masm->dc64(value2);
143 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100144 }
145 masm->Bind(&done);
146}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100147void Initialise(MacroAssembler* masm,
148 const PRegister& pd,
149 uint64_t value2,
150 uint64_t value1,
151 uint64_t value0) {
152 Initialise(masm, pd, 0, value2, value1, value0);
153}
154void Initialise(MacroAssembler* masm,
155 const PRegister& pd,
156 uint64_t value1,
157 uint64_t value0) {
158 Initialise(masm, pd, 0, 0, value1, value0);
159}
160void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
161 Initialise(masm, pd, 0, 0, 0, value0);
162}
163
164// Conveniently initialise P registers by lane. This is optimised for call-site
165// clarity, not generated code quality.
166//
167// Usage:
168//
169// int values[] = { 0x0, 0x1, 0x2 };
170// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
171//
172// The rightmost (highest-indexed) array element maps to the lowest-numbered
173// lane. Unspecified lanes are set to 0 (inactive).
174//
175// Each element of the `values` array is mapped onto a lane in `pd`. The
176// architecture only respects the lower bit, and writes zero the upper bits, but
177// other (encodable) values can be specified if required by the test.
178template <typename T, size_t N>
179void Initialise(MacroAssembler* masm,
180 const PRegisterWithLaneSize& pd,
181 const T (&values)[N]) {
182 // Turn the array into 64-bit chunks.
183 uint64_t chunks[4] = {0, 0, 0, 0};
184 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
185
186 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
187 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
188 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
189
190 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
191
192 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
193 size_t bit = 0;
194 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
195 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
196 uint64_t value = values[n] & p_lane_mask;
197 chunks[bit / 64] |= value << (bit % 64);
198 bit += p_bits_per_lane;
199 }
200
201 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
202}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100203
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000204// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100205TEST_SVE(sve_test_infrastructure_z) {
206 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000207 START();
208
Jacob Bramley03c0b512019-02-22 16:42:06 +0000209 __ Mov(x0, 0x0123456789abcdef);
210
211 // Test basic `Insr` behaviour.
212 __ Insr(z0.VnB(), 1);
213 __ Insr(z0.VnB(), 2);
214 __ Insr(z0.VnB(), x0);
215 __ Insr(z0.VnB(), -42);
216 __ Insr(z0.VnB(), 0);
217
218 // Test array inputs.
219 int z1_inputs[] = {3, 4, 5, -42, 0};
220 InsrHelper(&masm, z1.VnH(), z1_inputs);
221
222 // Test that sign-extension works as intended for various lane sizes.
223 __ Dup(z2.VnD(), 0); // Clear the register first.
224 __ Insr(z2.VnB(), -42); // 0xd6
225 __ Insr(z2.VnB(), 0xfe); // 0xfe
226 __ Insr(z2.VnH(), -42); // 0xffd6
227 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
228 __ Insr(z2.VnS(), -42); // 0xffffffd6
229 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
230 // Use another register for VnD(), so we can support 128-bit Z registers.
231 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
232 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
233
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000234 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000235
Jacob Bramley119bd212019-04-16 10:13:09 +0100236 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100237 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000238
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100239 // Test that array checks work properly on a register initialised
240 // lane-by-lane.
241 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
242 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000243
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100244 // Test that lane-by-lane checks work properly on a register initialised
245 // by array.
246 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
247 // The rightmost (highest-indexed) array element maps to the
248 // lowest-numbered lane.
249 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
250 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000251 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100252
253 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
254 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
255 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
256 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100257 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000258}
259
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100260// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100261TEST_SVE(sve_test_infrastructure_p) {
262 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100263 START();
264
265 // Simple cases: move boolean (0 or 1) values.
266
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100267 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100268 Initialise(&masm, p0.VnB(), p0_inputs);
269
270 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
271 Initialise(&masm, p1.VnH(), p1_inputs);
272
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100273 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100274 Initialise(&masm, p2.VnS(), p2_inputs);
275
276 int p3_inputs[] = {0, 1};
277 Initialise(&masm, p3.VnD(), p3_inputs);
278
279 // Advanced cases: move numeric value into architecturally-ignored bits.
280
281 // B-sized lanes get one bit in a P register, so there are no ignored bits.
282
283 // H-sized lanes get two bits in a P register.
284 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
285 Initialise(&masm, p4.VnH(), p4_inputs);
286
287 // S-sized lanes get four bits in a P register.
288 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
289 Initialise(&masm, p5.VnS(), p5_inputs);
290
291 // D-sized lanes get eight bits in a P register.
292 int p6_inputs[] = {0x81, 0xcc, 0x55};
293 Initialise(&masm, p6.VnD(), p6_inputs);
294
295 // The largest possible P register has 32 bytes.
296 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
297 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
298 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
299 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
300 Initialise(&masm, p7.VnD(), p7_inputs);
301
302 END();
303
304 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100305 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100306
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100307 // Test that lane-by-lane checks work properly. The rightmost
308 // (highest-indexed) array element maps to the lowest-numbered lane.
309 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
310 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
311 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100312 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100313 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
314 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
315 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
316 }
317 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
318 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
319 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
320 }
321 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
322 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
323 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
324 }
325
326 // Test that array checks work properly on predicates initialised with a
327 // possibly-different lane size.
328 // 0b...11'10'01'00'01'10'11
329 int p4_expected[] = {0x39, 0x1b};
330 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
331
332 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
333
334 // 0b...10000001'11001100'01010101
335 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
336 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
337
338 // 0b...10011100'10011101'10011110'10011111
339 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
340 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
341 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100342 }
343}
344
Jacob Bramley935b15b2019-07-04 14:09:22 +0100345// Test that writes to V registers clear the high bits of the corresponding Z
346// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100347TEST_SVE(sve_v_write_clear) {
348 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
349 CPUFeatures::kFP,
350 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100351 START();
352
353 // The Simulator has two mechansisms for writing V registers:
354 // - Write*Register, calling through to SimRegisterBase::Write.
355 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
356 // Try to cover both variants.
357
358 // Prepare some known inputs.
359 uint8_t data[kQRegSizeInBytes];
360 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
361 data[i] = 42 + i;
362 }
363 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
364 __ Fmov(d30, 42.0);
365
Jacob Bramley199339d2019-08-05 18:49:13 +0100366 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100367 // diagnose.
368 __ Index(z0.VnB(), 0, 1);
369 __ Index(z1.VnB(), 0, 1);
370 __ Index(z2.VnB(), 0, 1);
371 __ Index(z3.VnB(), 0, 1);
372 __ Index(z4.VnB(), 0, 1);
373
374 __ Index(z10.VnB(), 0, -1);
375 __ Index(z11.VnB(), 0, -1);
376 __ Index(z12.VnB(), 0, -1);
377 __ Index(z13.VnB(), 0, -1);
378 __ Index(z14.VnB(), 0, -1);
379
380 // Instructions using Write*Register (and SimRegisterBase::Write).
381 __ Ldr(b0, MemOperand(x10));
382 __ Fcvt(h1, d30);
383 __ Fmov(s2, 1.5f);
384 __ Fmov(d3, d30);
385 __ Ldr(q4, MemOperand(x10));
386
387 // Instructions using LogicVRegister::ClearForWrite.
388 // These also (incidentally) test that across-lane instructions correctly
389 // ignore the high-order Z register lanes.
390 __ Sminv(b10, v10.V16B());
391 __ Addv(h11, v11.V4H());
392 __ Saddlv(s12, v12.V8H());
393 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
394 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
395
396 END();
397
398 if (CAN_RUN()) {
399 RUN();
400
401 // Check the Q part first.
402 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
403 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
404 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
405 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
406 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
407 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
408 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
409 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
410 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
411 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
412 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
413 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
414 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
415 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
416 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
417
418 // Check that the upper lanes are all clear.
419 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
420 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
421 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
422 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
430 }
431 }
432}
433
Jacob Bramleye8289202019-07-31 11:25:23 +0100434static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
435 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100436 START();
437
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100438 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100439 int za_inputs[] = {-39, 1, -3, 2};
440 int zn_inputs[] = {-5, -20, 9, 8};
441 int zm_inputs[] = {9, -5, 4, 5};
442
443 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
444 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
445 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
446 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
447
448 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100449 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100450 InsrHelper(&masm, za, za_inputs);
451 InsrHelper(&masm, zn, zn_inputs);
452 InsrHelper(&masm, zm, zm_inputs);
453
454 int p0_inputs[] = {1, 1, 0, 1};
455 int p1_inputs[] = {1, 0, 1, 1};
456 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100457 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100458
459 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
460 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
461 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
462 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
463
464 // The Mla macro automatically selects between mla, mad and movprfx + mla
465 // based on what registers are aliased.
466 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
467 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
468 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100469 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100470
471 __ Mov(mla_da_result, za);
472 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
473
474 __ Mov(mla_dn_result, zn);
475 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
476
477 __ Mov(mla_dm_result, zm);
478 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
479
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100480 __ Mov(mla_d_result, zd);
481 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100482
483 // The Mls macro automatically selects between mls, msb and movprfx + mls
484 // based on what registers are aliased.
485 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
486 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
487 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100488 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100489
490 __ Mov(mls_da_result, za);
491 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
492
493 __ Mov(mls_dn_result, zn);
494 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
495
496 __ Mov(mls_dm_result, zm);
497 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
498
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100499 __ Mov(mls_d_result, zd);
500 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100501
502 END();
503
504 if (CAN_RUN()) {
505 RUN();
506
507 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
508 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
509 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
510
511 int mla[] = {-84, 101, 33, 42};
512 int mls[] = {6, -99, -39, -38};
513
514 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
515 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
516
517 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
518 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
519
520 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
521 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
522
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100523 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
524 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100525
526 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
527 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
528
529 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
530 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
531
532 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
533 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
534
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100535 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
536 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100537 }
538}
539
Jacob Bramleye8289202019-07-31 11:25:23 +0100540TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
541TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
542TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
543TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100544
Jacob Bramleye8289202019-07-31 11:25:23 +0100545TEST_SVE(sve_bitwise_unpredicate_logical) {
546 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700547 START();
548
549 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
550 InsrHelper(&masm, z8.VnD(), z8_inputs);
551 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
552 InsrHelper(&masm, z15.VnD(), z15_inputs);
553
554 __ And(z1.VnD(), z8.VnD(), z15.VnD());
555 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
556 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
557 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
558
559 END();
560
561 if (CAN_RUN()) {
562 RUN();
563 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
564 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
565 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
566 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
567
568 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
569 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
570 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
571 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
572 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700573}
574
Jacob Bramleye8289202019-07-31 11:25:23 +0100575TEST_SVE(sve_predicate_logical) {
576 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700577 START();
578
579 // 0b...01011010'10110111
580 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
581 // 0b...11011001'01010010
582 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
583 // 0b...01010101'10110010
584 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
585
586 Initialise(&masm, p10.VnB(), p10_inputs);
587 Initialise(&masm, p11.VnB(), p11_inputs);
588 Initialise(&masm, p12.VnB(), p12_inputs);
589
590 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
591 __ Mrs(x0, NZCV);
592 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
593 __ Mrs(x1, NZCV);
594 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
595 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
596 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
597 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
598 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
599 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
600
601 END();
602
603 if (CAN_RUN()) {
604 RUN();
605
606 // 0b...01010000'00010010
607 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
608 // 0b...00000001'00000000
609 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
610 // 0b...00000001'10100000
611 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
612 // 0b...00000101'10100000
613 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
614 // 0b...00000100'00000000
615 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
616 // 0b...01010101'00010010
617 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
618 // 0b...01010001'10110010
619 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
620 // 0b...01011011'00010111
621 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
622
623 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
624 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
625 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
626 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
627 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
628 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
629 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
630 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
631
TatWai Chong96713fe2019-06-04 16:39:37 -0700632 ASSERT_EQUAL_32(SVEFirstFlag, w0);
633 ASSERT_EQUAL_32(SVENotLastFlag, w1);
634 }
635}
TatWai Chongf4fa8222019-06-17 12:08:14 -0700636
Jacob Bramleye8289202019-07-31 11:25:23 +0100637TEST_SVE(sve_int_compare_vectors) {
638 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700639 START();
640
641 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
642 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
643 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
644 InsrHelper(&masm, z10.VnB(), z10_inputs);
645 InsrHelper(&masm, z11.VnB(), z11_inputs);
646 Initialise(&masm, p0.VnB(), p0_inputs);
647
648 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
649 __ Mrs(x6, NZCV);
650
651 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
652 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
653 int p1_inputs[] = {1, 1};
654 InsrHelper(&masm, z12.VnD(), z12_inputs);
655 InsrHelper(&masm, z13.VnD(), z13_inputs);
656 Initialise(&masm, p1.VnD(), p1_inputs);
657
658 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
659 __ Mrs(x7, NZCV);
660
661 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
662 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
663
664 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
665 InsrHelper(&masm, z14.VnH(), z14_inputs);
666 InsrHelper(&masm, z15.VnH(), z15_inputs);
667 Initialise(&masm, p2.VnH(), p2_inputs);
668
669 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
670 __ Mrs(x8, NZCV);
671
672 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
673 __ Mrs(x9, NZCV);
674
675 int z16_inputs[] = {0, -1, 0, 0};
676 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
677 int p3_inputs[] = {1, 1, 1, 1};
678 InsrHelper(&masm, z16.VnS(), z16_inputs);
679 InsrHelper(&masm, z17.VnS(), z17_inputs);
680 Initialise(&masm, p3.VnS(), p3_inputs);
681
682 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
683 __ Mrs(x10, NZCV);
684
685 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
686 __ Mrs(x11, NZCV);
687
688 // Architectural aliases testing.
689 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
690 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
691 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
692 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
693
694 END();
695
696 if (CAN_RUN()) {
697 RUN();
698
699 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
700 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
701 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
702 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
703 }
704
705 int p7_expected[] = {1, 0};
706 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
707
708 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
709 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
710
711 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
712 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
713
714 int p10_expected[] = {0, 0, 0, 1};
715 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
716
717 int p11_expected[] = {0, 1, 1, 1};
718 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
719
720 // Reuse the expected results to verify the architectural aliases.
721 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
722 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
723 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
724 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
725
726 ASSERT_EQUAL_32(SVEFirstFlag, w6);
727 ASSERT_EQUAL_32(NoFlag, w7);
728 ASSERT_EQUAL_32(NoFlag, w8);
729 ASSERT_EQUAL_32(NoFlag, w9);
730 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
731 }
732}
733
Jacob Bramleye8289202019-07-31 11:25:23 +0100734TEST_SVE(sve_int_compare_vectors_wide_elements) {
735 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700736 START();
737
738 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
739 int src2_inputs_1[] = {0, -1};
740 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
741 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
742 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
743 Initialise(&masm, p0.VnB(), mask_inputs_1);
744
745 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
746 __ Mrs(x2, NZCV);
747 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
748 __ Mrs(x3, NZCV);
749
750 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
751 int src2_inputs_2[] = {0, -32767};
752 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
753 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
754 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
755 Initialise(&masm, p0.VnH(), mask_inputs_2);
756
757 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
758 __ Mrs(x4, NZCV);
759 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
760 __ Mrs(x5, NZCV);
761
762 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
763 int src2_inputs_3[] = {0, -2147483648};
764 int mask_inputs_3[] = {1, 1, 1, 1};
765 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
766 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
767 Initialise(&masm, p0.VnS(), mask_inputs_3);
768
769 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
770 __ Mrs(x6, NZCV);
771 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
772 __ Mrs(x7, NZCV);
773
774 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
775 int src2_inputs_4[] = {0x00, 0x7f};
776 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
777 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
778 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
779 Initialise(&masm, p0.VnB(), mask_inputs_4);
780
781 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
782 __ Mrs(x8, NZCV);
783 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
784 __ Mrs(x9, NZCV);
785
786 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
787 int src2_inputs_5[] = {0x8000, 0xffff};
788 int mask_inputs_5[] = {1, 1, 1, 1};
789 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
790 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
791 Initialise(&masm, p0.VnS(), mask_inputs_5);
792
793 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
794 __ Mrs(x10, NZCV);
795 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
796 __ Mrs(x11, NZCV);
797
798 END();
799
800 if (CAN_RUN()) {
801 RUN();
802 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
803 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
804
805 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
806 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
807
808 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
809 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
810
811 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
812 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
813
814 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
815 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
816
817 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
818 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
819
820 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
821 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
822
823 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
824 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
825
826 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
827 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
828
829 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
830 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
831
832 ASSERT_EQUAL_32(NoFlag, w2);
833 ASSERT_EQUAL_32(NoFlag, w3);
834 ASSERT_EQUAL_32(NoFlag, w4);
835 ASSERT_EQUAL_32(SVENotLastFlag, w5);
836 ASSERT_EQUAL_32(SVEFirstFlag, w6);
837 ASSERT_EQUAL_32(SVENotLastFlag, w7);
838 ASSERT_EQUAL_32(SVEFirstFlag, w8);
839 ASSERT_EQUAL_32(SVEFirstFlag, w9);
840 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
841 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700842 }
TatWai Chongf4fa8222019-06-17 12:08:14 -0700843}
844
Jacob Bramleye8289202019-07-31 11:25:23 +0100845TEST_SVE(sve_bitwise_imm) {
846 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -0700847 START();
848
849 // clang-format off
850 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
851 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
852 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
853 0x0123, 0x4567, 0x89ab, 0xcdef};
854 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
855 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
856 // clang-format on
857
858 InsrHelper(&masm, z1.VnD(), z21_inputs);
859 InsrHelper(&masm, z2.VnS(), z22_inputs);
860 InsrHelper(&masm, z3.VnH(), z23_inputs);
861 InsrHelper(&masm, z4.VnB(), z24_inputs);
862
863 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
864 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
865 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
866 __ And(z4.VnB(), z4.VnB(), 0x3f);
867
868 InsrHelper(&masm, z5.VnD(), z21_inputs);
869 InsrHelper(&masm, z6.VnS(), z22_inputs);
870 InsrHelper(&masm, z7.VnH(), z23_inputs);
871 InsrHelper(&masm, z8.VnB(), z24_inputs);
872
873 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
874 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
875 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
876 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
877
878 InsrHelper(&masm, z9.VnD(), z21_inputs);
879 InsrHelper(&masm, z10.VnS(), z22_inputs);
880 InsrHelper(&masm, z11.VnH(), z23_inputs);
881 InsrHelper(&masm, z12.VnB(), z24_inputs);
882
883 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
884 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
885 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
886 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
887
Jacob Bramley6069fd42019-06-24 10:20:45 +0100888 {
889 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
890 // so here we test `dupm` directly.
891 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
892 __ dupm(z13.VnD(), 0x7ffffff800000000);
893 __ dupm(z14.VnS(), 0x7ffc7ffc);
894 __ dupm(z15.VnH(), 0x3ffc);
895 __ dupm(z16.VnB(), 0xc3);
896 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700897
898 END();
899
900 if (CAN_RUN()) {
901 RUN();
902
903 // clang-format off
904 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
905 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
906 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
907 0x0120, 0x0560, 0x09a0, 0x0de0};
908 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
909 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
910
911 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
912 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
913 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
914 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
915
916 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
917 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
918 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
919 0x0ed3, 0x4a97, 0x865b, 0xc21f};
920 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
921 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
922
923 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
924 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
925 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
926 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
927
928 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
929 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
930 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
931 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
932 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
933 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
934
935 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
936 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
937 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
938 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
939
940 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
941 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
942 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
943 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
944 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
945 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
946 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
947 // clang-format on
948 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700949}
950
Jacob Bramleye8289202019-07-31 11:25:23 +0100951TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +0100952 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
953 // unencodable immediates.
954
Jacob Bramleye8289202019-07-31 11:25:23 +0100955 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100956 START();
957
958 // Encodable with `dup` (shift 0).
959 __ Dup(z0.VnD(), -1);
960 __ Dup(z1.VnS(), 0x7f);
961 __ Dup(z2.VnH(), -0x80);
962 __ Dup(z3.VnB(), 42);
963
964 // Encodable with `dup` (shift 8).
TatWai Chong6995bfd2019-09-26 10:48:05 +0100965 __ Dup(z4.VnD(), -42 * 256);
966 __ Dup(z5.VnS(), -0x8000);
967 __ Dup(z6.VnH(), 0x7f00);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100968 // B-sized lanes cannot take a shift of 8.
969
970 // Encodable with `dupm` (but not `dup`).
971 __ Dup(z10.VnD(), 0x3fc);
972 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
973 __ Dup(z12.VnH(), 0x0001);
974 // All values that fit B-sized lanes are encodable with `dup`.
975
976 // Cases that require immediate synthesis.
977 __ Dup(z20.VnD(), 0x1234);
978 __ Dup(z21.VnD(), -4242);
979 __ Dup(z22.VnD(), 0xfedcba9876543210);
980 __ Dup(z23.VnS(), 0x01020304);
981 __ Dup(z24.VnS(), -0x01020304);
982 __ Dup(z25.VnH(), 0x3c38);
983 // All values that fit B-sized lanes are directly encodable.
984
985 END();
986
987 if (CAN_RUN()) {
988 RUN();
989
990 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
991 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
992 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
993 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
994
TatWai Chong6995bfd2019-09-26 10:48:05 +0100995 ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
996 ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
997 ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley6069fd42019-06-24 10:20:45 +0100998
999 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1000 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1001 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1002
1003 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1004 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1005 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1006 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1007 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1008 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1009 }
1010}
1011
Jacob Bramleye8289202019-07-31 11:25:23 +01001012TEST_SVE(sve_inc_dec_p_scalar) {
1013 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001014 START();
1015
1016 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1017 Initialise(&masm, p0.VnB(), p0_inputs);
1018
1019 int p0_b_count = 9;
1020 int p0_h_count = 5;
1021 int p0_s_count = 3;
1022 int p0_d_count = 2;
1023
1024 // 64-bit operations preserve their high bits.
1025 __ Mov(x0, 0x123456780000002a);
1026 __ Decp(x0, p0.VnB());
1027
1028 __ Mov(x1, 0x123456780000002a);
1029 __ Incp(x1, p0.VnH());
1030
1031 // Check that saturation does not occur.
1032 __ Mov(x10, 1);
1033 __ Decp(x10, p0.VnS());
1034
1035 __ Mov(x11, UINT64_MAX);
1036 __ Incp(x11, p0.VnD());
1037
1038 __ Mov(x12, INT64_MAX);
1039 __ Incp(x12, p0.VnB());
1040
1041 // With an all-true predicate, these instructions increment or decrement by
1042 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001043 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001044
1045 __ Mov(x20, 0x4000000000000000);
1046 __ Decp(x20, p15.VnB());
1047
1048 __ Mov(x21, 0x4000000000000000);
1049 __ Incp(x21, p15.VnH());
1050
1051 END();
1052 if (CAN_RUN()) {
1053 RUN();
1054
1055 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1056 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1057
1058 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1059 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1060 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1061
1062 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1063 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1064 }
1065}
1066
Jacob Bramleye8289202019-07-31 11:25:23 +01001067TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1068 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001069 START();
1070
1071 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1072 Initialise(&masm, p0.VnB(), p0_inputs);
1073
1074 int p0_b_count = 9;
1075 int p0_h_count = 5;
1076 int p0_s_count = 3;
1077 int p0_d_count = 2;
1078
1079 uint64_t dummy_high = 0x1234567800000000;
1080
1081 // 64-bit operations preserve their high bits.
1082 __ Mov(x0, dummy_high + 42);
1083 __ Sqdecp(x0, p0.VnB());
1084
1085 __ Mov(x1, dummy_high + 42);
1086 __ Sqincp(x1, p0.VnH());
1087
1088 // 32-bit operations sign-extend into their high bits.
1089 __ Mov(x2, dummy_high + 42);
1090 __ Sqdecp(x2, p0.VnS(), w2);
1091
1092 __ Mov(x3, dummy_high + 42);
1093 __ Sqincp(x3, p0.VnD(), w3);
1094
1095 __ Mov(x4, dummy_high + 1);
1096 __ Sqdecp(x4, p0.VnS(), w4);
1097
1098 __ Mov(x5, dummy_high - 1);
1099 __ Sqincp(x5, p0.VnD(), w5);
1100
1101 // Check that saturation behaves correctly.
1102 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
1103 __ Sqdecp(x10, p0.VnB(), x10);
1104
1105 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1106 __ Sqdecp(x11, p0.VnH(), w11);
1107
1108 __ Mov(x12, 1);
1109 __ Sqdecp(x12, p0.VnS(), x12);
1110
1111 __ Mov(x13, dummy_high + 1);
1112 __ Sqdecp(x13, p0.VnD(), w13);
1113
1114 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
1115 __ Sqincp(x14, p0.VnB(), x14);
1116
1117 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1118 __ Sqincp(x15, p0.VnH(), w15);
1119
1120 // Don't use x16 and x17 since they are scratch registers by default.
1121
1122 __ Mov(x18, 0xffffffffffffffff);
1123 __ Sqincp(x18, p0.VnS(), x18);
1124
1125 __ Mov(x19, dummy_high + 0xffffffff);
1126 __ Sqincp(x19, p0.VnD(), w19);
1127
1128 __ Mov(x20, dummy_high + 0xffffffff);
1129 __ Sqdecp(x20, p0.VnB(), w20);
1130
1131 // With an all-true predicate, these instructions increment or decrement by
1132 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001133 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001134
1135 __ Mov(x21, 0);
1136 __ Sqdecp(x21, p15.VnB(), x21);
1137
1138 __ Mov(x22, 0);
1139 __ Sqincp(x22, p15.VnH(), x22);
1140
1141 __ Mov(x23, dummy_high);
1142 __ Sqdecp(x23, p15.VnS(), w23);
1143
1144 __ Mov(x24, dummy_high);
1145 __ Sqincp(x24, p15.VnD(), w24);
1146
1147 END();
1148 if (CAN_RUN()) {
1149 RUN();
1150
1151 // 64-bit operations preserve their high bits.
1152 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1153 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1154
1155 // 32-bit operations sign-extend into their high bits.
1156 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1157 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1158 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1159 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1160
1161 // Check that saturation behaves correctly.
1162 ASSERT_EQUAL_64(INT64_MIN, x10);
1163 ASSERT_EQUAL_64(INT32_MIN, x11);
1164 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1165 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1166 ASSERT_EQUAL_64(INT64_MAX, x14);
1167 ASSERT_EQUAL_64(INT32_MAX, x15);
1168 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1169 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1170 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1171
1172 // Check all-true predicates.
1173 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1174 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1175 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1176 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1177 }
1178}
1179
Jacob Bramleye8289202019-07-31 11:25:23 +01001180TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1181 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001182 START();
1183
1184 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1185 Initialise(&masm, p0.VnB(), p0_inputs);
1186
1187 int p0_b_count = 9;
1188 int p0_h_count = 5;
1189 int p0_s_count = 3;
1190 int p0_d_count = 2;
1191
1192 uint64_t dummy_high = 0x1234567800000000;
1193
1194 // 64-bit operations preserve their high bits.
1195 __ Mov(x0, dummy_high + 42);
1196 __ Uqdecp(x0, p0.VnB());
1197
1198 __ Mov(x1, dummy_high + 42);
1199 __ Uqincp(x1, p0.VnH());
1200
1201 // 32-bit operations zero-extend into their high bits.
1202 __ Mov(x2, dummy_high + 42);
1203 __ Uqdecp(x2, p0.VnS(), w2);
1204
1205 __ Mov(x3, dummy_high + 42);
1206 __ Uqincp(x3, p0.VnD(), w3);
1207
1208 __ Mov(x4, dummy_high + 0x80000001);
1209 __ Uqdecp(x4, p0.VnS(), w4);
1210
1211 __ Mov(x5, dummy_high + 0x7fffffff);
1212 __ Uqincp(x5, p0.VnD(), w5);
1213
1214 // Check that saturation behaves correctly.
1215 __ Mov(x10, 1);
1216 __ Uqdecp(x10, p0.VnB(), x10);
1217
1218 __ Mov(x11, dummy_high + 1);
1219 __ Uqdecp(x11, p0.VnH(), w11);
1220
1221 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1222 __ Uqdecp(x12, p0.VnS(), x12);
1223
1224 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1225 __ Uqdecp(x13, p0.VnD(), w13);
1226
1227 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1228 __ Uqincp(x14, p0.VnB(), x14);
1229
1230 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1231 __ Uqincp(x15, p0.VnH(), w15);
1232
1233 // Don't use x16 and x17 since they are scratch registers by default.
1234
1235 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1236 __ Uqincp(x18, p0.VnS(), x18);
1237
1238 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1239 __ Uqincp(x19, p0.VnD(), w19);
1240
1241 // With an all-true predicate, these instructions increment or decrement by
1242 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001243 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001244
1245 __ Mov(x20, 0x4000000000000000);
1246 __ Uqdecp(x20, p15.VnB(), x20);
1247
1248 __ Mov(x21, 0x4000000000000000);
1249 __ Uqincp(x21, p15.VnH(), x21);
1250
1251 __ Mov(x22, dummy_high + 0x40000000);
1252 __ Uqdecp(x22, p15.VnS(), w22);
1253
1254 __ Mov(x23, dummy_high + 0x40000000);
1255 __ Uqincp(x23, p15.VnD(), w23);
1256
1257 END();
1258 if (CAN_RUN()) {
1259 RUN();
1260
1261 // 64-bit operations preserve their high bits.
1262 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1263 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1264
1265 // 32-bit operations zero-extend into their high bits.
1266 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1267 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1268 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1269 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1270
1271 // Check that saturation behaves correctly.
1272 ASSERT_EQUAL_64(0, x10);
1273 ASSERT_EQUAL_64(0, x11);
1274 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1275 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1276 ASSERT_EQUAL_64(UINT64_MAX, x14);
1277 ASSERT_EQUAL_64(UINT32_MAX, x15);
1278 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1279 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1280
1281 // Check all-true predicates.
1282 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1283 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1284 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1285 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1286 }
1287}
1288
Jacob Bramleye8289202019-07-31 11:25:23 +01001289TEST_SVE(sve_inc_dec_p_vector) {
1290 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001291 START();
1292
1293 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1294 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1295 Initialise(&masm, p0.VnB(), p0_inputs);
1296
1297 // Check that saturation does not occur.
1298
1299 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1300 InsrHelper(&masm, z0.VnD(), z0_inputs);
1301
1302 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1303 InsrHelper(&masm, z1.VnD(), z1_inputs);
1304
1305 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1306 InsrHelper(&masm, z2.VnS(), z2_inputs);
1307
1308 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1309 InsrHelper(&masm, z3.VnH(), z3_inputs);
1310
1311 // The MacroAssembler implements non-destructive operations using movprfx.
1312 __ Decp(z10.VnD(), p0, z0.VnD());
1313 __ Decp(z11.VnD(), p0, z1.VnD());
1314 __ Decp(z12.VnS(), p0, z2.VnS());
1315 __ Decp(z13.VnH(), p0, z3.VnH());
1316
1317 __ Incp(z14.VnD(), p0, z0.VnD());
1318 __ Incp(z15.VnD(), p0, z1.VnD());
1319 __ Incp(z16.VnS(), p0, z2.VnS());
1320 __ Incp(z17.VnH(), p0, z3.VnH());
1321
1322 // Also test destructive forms.
1323 __ Mov(z4, z0);
1324 __ Mov(z5, z1);
1325 __ Mov(z6, z2);
1326 __ Mov(z7, z3);
1327
1328 __ Decp(z0.VnD(), p0);
1329 __ Decp(z1.VnD(), p0);
1330 __ Decp(z2.VnS(), p0);
1331 __ Decp(z3.VnH(), p0);
1332
1333 __ Incp(z4.VnD(), p0);
1334 __ Incp(z5.VnD(), p0);
1335 __ Incp(z6.VnS(), p0);
1336 __ Incp(z7.VnH(), p0);
1337
1338 END();
1339 if (CAN_RUN()) {
1340 RUN();
1341
1342 // z0_inputs[...] - number of active D lanes (2)
1343 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1344 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1345
1346 // z1_inputs[...] - number of active D lanes (2)
1347 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1348 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1349
1350 // z2_inputs[...] - number of active S lanes (3)
1351 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1352 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1353
1354 // z3_inputs[...] - number of active H lanes (5)
1355 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1356 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1357
1358 // z0_inputs[...] + number of active D lanes (2)
1359 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1360 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1361
1362 // z1_inputs[...] + number of active D lanes (2)
1363 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1364 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1365
1366 // z2_inputs[...] + number of active S lanes (3)
1367 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1368 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1369
1370 // z3_inputs[...] + number of active H lanes (5)
1371 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1372 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1373
1374 // Check that the non-destructive macros produced the same results.
1375 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1376 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1377 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1378 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1379 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1380 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1381 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1382 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1383 }
1384}
1385
Jacob Bramleye8289202019-07-31 11:25:23 +01001386TEST_SVE(sve_inc_dec_ptrue_vector) {
1387 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001388 START();
1389
1390 // With an all-true predicate, these instructions increment or decrement by
1391 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001392 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001393
1394 __ Dup(z0.VnD(), 0);
1395 __ Decp(z0.VnD(), p15);
1396
1397 __ Dup(z1.VnS(), 0);
1398 __ Decp(z1.VnS(), p15);
1399
1400 __ Dup(z2.VnH(), 0);
1401 __ Decp(z2.VnH(), p15);
1402
1403 __ Dup(z3.VnD(), 0);
1404 __ Incp(z3.VnD(), p15);
1405
1406 __ Dup(z4.VnS(), 0);
1407 __ Incp(z4.VnS(), p15);
1408
1409 __ Dup(z5.VnH(), 0);
1410 __ Incp(z5.VnH(), p15);
1411
1412 END();
1413 if (CAN_RUN()) {
1414 RUN();
1415
1416 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1417 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1418 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1419
1420 for (int i = 0; i < d_lane_count; i++) {
1421 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1422 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1423 }
1424
1425 for (int i = 0; i < s_lane_count; i++) {
1426 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1427 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1428 }
1429
1430 for (int i = 0; i < h_lane_count; i++) {
1431 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1432 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1433 }
1434 }
1435}
1436
Jacob Bramleye8289202019-07-31 11:25:23 +01001437TEST_SVE(sve_sqinc_sqdec_p_vector) {
1438 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001439 START();
1440
1441 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1442 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1443 Initialise(&masm, p0.VnB(), p0_inputs);
1444
1445 // Check that saturation behaves correctly.
1446
1447 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1448 InsrHelper(&masm, z0.VnD(), z0_inputs);
1449
1450 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1451 InsrHelper(&masm, z1.VnD(), z1_inputs);
1452
1453 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1454 InsrHelper(&masm, z2.VnS(), z2_inputs);
1455
1456 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1457 InsrHelper(&masm, z3.VnH(), z3_inputs);
1458
1459 // The MacroAssembler implements non-destructive operations using movprfx.
1460 __ Sqdecp(z10.VnD(), p0, z0.VnD());
1461 __ Sqdecp(z11.VnD(), p0, z1.VnD());
1462 __ Sqdecp(z12.VnS(), p0, z2.VnS());
1463 __ Sqdecp(z13.VnH(), p0, z3.VnH());
1464
1465 __ Sqincp(z14.VnD(), p0, z0.VnD());
1466 __ Sqincp(z15.VnD(), p0, z1.VnD());
1467 __ Sqincp(z16.VnS(), p0, z2.VnS());
1468 __ Sqincp(z17.VnH(), p0, z3.VnH());
1469
1470 // Also test destructive forms.
1471 __ Mov(z4, z0);
1472 __ Mov(z5, z1);
1473 __ Mov(z6, z2);
1474 __ Mov(z7, z3);
1475
1476 __ Sqdecp(z0.VnD(), p0);
1477 __ Sqdecp(z1.VnD(), p0);
1478 __ Sqdecp(z2.VnS(), p0);
1479 __ Sqdecp(z3.VnH(), p0);
1480
1481 __ Sqincp(z4.VnD(), p0);
1482 __ Sqincp(z5.VnD(), p0);
1483 __ Sqincp(z6.VnS(), p0);
1484 __ Sqincp(z7.VnH(), p0);
1485
1486 END();
1487 if (CAN_RUN()) {
1488 RUN();
1489
1490 // z0_inputs[...] - number of active D lanes (2)
1491 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
1492 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1493
1494 // z1_inputs[...] - number of active D lanes (2)
1495 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1496 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1497
1498 // z2_inputs[...] - number of active S lanes (3)
1499 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
1500 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1501
1502 // z3_inputs[...] - number of active H lanes (5)
1503 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
1504 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1505
1506 // z0_inputs[...] + number of active D lanes (2)
1507 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1508 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1509
1510 // z1_inputs[...] + number of active D lanes (2)
1511 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
1512 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1513
1514 // z2_inputs[...] + number of active S lanes (3)
1515 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
1516 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1517
1518 // z3_inputs[...] + number of active H lanes (5)
1519 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
1520 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1521
1522 // Check that the non-destructive macros produced the same results.
1523 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1524 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1525 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1526 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1527 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1528 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1529 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1530 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1531 }
1532}
1533
Jacob Bramleye8289202019-07-31 11:25:23 +01001534TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
1535 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001536 START();
1537
1538 // With an all-true predicate, these instructions increment or decrement by
1539 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001540 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001541
1542 __ Dup(z0.VnD(), 0);
1543 __ Sqdecp(z0.VnD(), p15);
1544
1545 __ Dup(z1.VnS(), 0);
1546 __ Sqdecp(z1.VnS(), p15);
1547
1548 __ Dup(z2.VnH(), 0);
1549 __ Sqdecp(z2.VnH(), p15);
1550
1551 __ Dup(z3.VnD(), 0);
1552 __ Sqincp(z3.VnD(), p15);
1553
1554 __ Dup(z4.VnS(), 0);
1555 __ Sqincp(z4.VnS(), p15);
1556
1557 __ Dup(z5.VnH(), 0);
1558 __ Sqincp(z5.VnH(), p15);
1559
1560 END();
1561 if (CAN_RUN()) {
1562 RUN();
1563
1564 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1565 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1566 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1567
1568 for (int i = 0; i < d_lane_count; i++) {
1569 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1570 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1571 }
1572
1573 for (int i = 0; i < s_lane_count; i++) {
1574 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1575 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1576 }
1577
1578 for (int i = 0; i < h_lane_count; i++) {
1579 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1580 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1581 }
1582 }
1583}
1584
Jacob Bramleye8289202019-07-31 11:25:23 +01001585TEST_SVE(sve_uqinc_uqdec_p_vector) {
1586 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001587 START();
1588
1589 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1590 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1591 Initialise(&masm, p0.VnB(), p0_inputs);
1592
1593 // Check that saturation behaves correctly.
1594
1595 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
1596 InsrHelper(&masm, z0.VnD(), z0_inputs);
1597
1598 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
1599 InsrHelper(&masm, z1.VnD(), z1_inputs);
1600
1601 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
1602 InsrHelper(&masm, z2.VnS(), z2_inputs);
1603
1604 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
1605 InsrHelper(&masm, z3.VnH(), z3_inputs);
1606
1607 // The MacroAssembler implements non-destructive operations using movprfx.
1608 __ Uqdecp(z10.VnD(), p0, z0.VnD());
1609 __ Uqdecp(z11.VnD(), p0, z1.VnD());
1610 __ Uqdecp(z12.VnS(), p0, z2.VnS());
1611 __ Uqdecp(z13.VnH(), p0, z3.VnH());
1612
1613 __ Uqincp(z14.VnD(), p0, z0.VnD());
1614 __ Uqincp(z15.VnD(), p0, z1.VnD());
1615 __ Uqincp(z16.VnS(), p0, z2.VnS());
1616 __ Uqincp(z17.VnH(), p0, z3.VnH());
1617
1618 // Also test destructive forms.
1619 __ Mov(z4, z0);
1620 __ Mov(z5, z1);
1621 __ Mov(z6, z2);
1622 __ Mov(z7, z3);
1623
1624 __ Uqdecp(z0.VnD(), p0);
1625 __ Uqdecp(z1.VnD(), p0);
1626 __ Uqdecp(z2.VnS(), p0);
1627 __ Uqdecp(z3.VnH(), p0);
1628
1629 __ Uqincp(z4.VnD(), p0);
1630 __ Uqincp(z5.VnD(), p0);
1631 __ Uqincp(z6.VnS(), p0);
1632 __ Uqincp(z7.VnH(), p0);
1633
1634 END();
1635 if (CAN_RUN()) {
1636 RUN();
1637
1638 // z0_inputs[...] - number of active D lanes (2)
1639 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
1640 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1641
1642 // z1_inputs[...] - number of active D lanes (2)
1643 uint64_t z1_expected[] = {0x12345678ffffff28,
1644 0,
1645 0xfffffffffffffffd,
1646 0x7ffffffffffffffd};
1647 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1648
1649 // z2_inputs[...] - number of active S lanes (3)
1650 uint32_t z2_expected[] =
1651 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
1652 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1653
1654 // z3_inputs[...] - number of active H lanes (5)
1655 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
1656 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1657
1658 // z0_inputs[...] + number of active D lanes (2)
1659 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1660 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1661
1662 // z1_inputs[...] + number of active D lanes (2)
1663 uint64_t z5_expected[] = {0x12345678ffffff2c,
1664 2,
1665 UINT64_MAX,
1666 0x8000000000000001};
1667 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1668
1669 // z2_inputs[...] + number of active S lanes (3)
1670 uint32_t z6_expected[] =
1671 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
1672 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1673
1674 // z3_inputs[...] + number of active H lanes (5)
1675 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
1676 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1677
1678 // Check that the non-destructive macros produced the same results.
1679 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1680 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1681 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1682 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1683 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1684 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1685 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1686 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1687 }
1688}
1689
Jacob Bramleye8289202019-07-31 11:25:23 +01001690TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
1691 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001692 START();
1693
1694 // With an all-true predicate, these instructions increment or decrement by
1695 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001696 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001697
1698 __ Mov(x0, 0x1234567800000000);
1699 __ Mov(x1, 0x12340000);
1700 __ Mov(x2, 0x1200);
1701
1702 __ Dup(z0.VnD(), x0);
1703 __ Uqdecp(z0.VnD(), p15);
1704
1705 __ Dup(z1.VnS(), x1);
1706 __ Uqdecp(z1.VnS(), p15);
1707
1708 __ Dup(z2.VnH(), x2);
1709 __ Uqdecp(z2.VnH(), p15);
1710
1711 __ Dup(z3.VnD(), x0);
1712 __ Uqincp(z3.VnD(), p15);
1713
1714 __ Dup(z4.VnS(), x1);
1715 __ Uqincp(z4.VnS(), p15);
1716
1717 __ Dup(z5.VnH(), x2);
1718 __ Uqincp(z5.VnH(), p15);
1719
1720 END();
1721 if (CAN_RUN()) {
1722 RUN();
1723
1724 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1725 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1726 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1727
1728 for (int i = 0; i < d_lane_count; i++) {
1729 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
1730 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
1731 }
1732
1733 for (int i = 0; i < s_lane_count; i++) {
1734 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
1735 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
1736 }
1737
1738 for (int i = 0; i < h_lane_count; i++) {
1739 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
1740 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
1741 }
1742 }
1743}
1744
Jacob Bramleye8289202019-07-31 11:25:23 +01001745TEST_SVE(sve_index) {
1746 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01001747 START();
1748
1749 // Simple cases.
1750 __ Index(z0.VnB(), 0, 1);
1751 __ Index(z1.VnH(), 1, 1);
1752 __ Index(z2.VnS(), 2, 1);
1753 __ Index(z3.VnD(), 3, 1);
1754
1755 // Synthesised immediates.
1756 __ Index(z4.VnB(), 42, -1);
1757 __ Index(z5.VnH(), -1, 42);
1758 __ Index(z6.VnS(), 42, 42);
1759
1760 // Register arguments.
1761 __ Mov(x0, 42);
1762 __ Mov(x1, -3);
1763 __ Index(z10.VnD(), x0, x1);
1764 __ Index(z11.VnB(), w0, w1);
1765 // The register size should correspond to the lane size, but VIXL allows any
1766 // register at least as big as the lane size.
1767 __ Index(z12.VnB(), x0, x1);
1768 __ Index(z13.VnH(), w0, x1);
1769 __ Index(z14.VnS(), x0, w1);
1770
1771 // Integer overflow.
1772 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
1773 __ Index(z21.VnH(), 7, -3);
1774 __ Index(z22.VnS(), INT32_MAX - 2, 1);
1775 __ Index(z23.VnD(), INT64_MIN + 6, -7);
1776
1777 END();
1778
1779 if (CAN_RUN()) {
1780 RUN();
1781
1782 int b_lane_count = core.GetSVELaneCount(kBRegSize);
1783 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1784 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1785 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1786
1787 uint64_t b_mask = GetUintMask(kBRegSize);
1788 uint64_t h_mask = GetUintMask(kHRegSize);
1789 uint64_t s_mask = GetUintMask(kSRegSize);
1790 uint64_t d_mask = GetUintMask(kDRegSize);
1791
1792 // Simple cases.
1793 for (int i = 0; i < b_lane_count; i++) {
1794 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
1795 }
1796 for (int i = 0; i < h_lane_count; i++) {
1797 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
1798 }
1799 for (int i = 0; i < s_lane_count; i++) {
1800 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
1801 }
1802 for (int i = 0; i < d_lane_count; i++) {
1803 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
1804 }
1805
1806 // Synthesised immediates.
1807 for (int i = 0; i < b_lane_count; i++) {
1808 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
1809 }
1810 for (int i = 0; i < h_lane_count; i++) {
1811 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
1812 }
1813 for (int i = 0; i < s_lane_count; i++) {
1814 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
1815 }
1816
1817 // Register arguments.
1818 for (int i = 0; i < d_lane_count; i++) {
1819 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
1820 }
1821 for (int i = 0; i < b_lane_count; i++) {
1822 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
1823 }
1824 for (int i = 0; i < b_lane_count; i++) {
1825 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
1826 }
1827 for (int i = 0; i < h_lane_count; i++) {
1828 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
1829 }
1830 for (int i = 0; i < s_lane_count; i++) {
1831 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
1832 }
1833
1834 // Integer overflow.
1835 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
1836 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
1837 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
1838 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
1839 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
1840 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
1841 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
1842 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
1843 }
1844}
1845
TatWai Chongc844bb22019-06-10 15:32:53 -07001846TEST(sve_int_compare_count_and_limit_scalars) {
1847 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1848 START();
1849
1850 __ Mov(w20, 0xfffffffd);
1851 __ Mov(w21, 0xffffffff);
1852
1853 __ Whilele(p0.VnB(), w20, w21);
1854 __ Mrs(x0, NZCV);
1855 __ Whilele(p1.VnH(), w20, w21);
1856 __ Mrs(x1, NZCV);
1857
1858 __ Mov(w20, 0xffffffff);
1859 __ Mov(w21, 0x00000000);
1860
1861 __ Whilelt(p2.VnS(), w20, w21);
1862 __ Mrs(x2, NZCV);
1863 __ Whilelt(p3.VnD(), w20, w21);
1864 __ Mrs(x3, NZCV);
1865
1866 __ Mov(w20, 0xfffffffd);
1867 __ Mov(w21, 0xffffffff);
1868
1869 __ Whilels(p4.VnB(), w20, w21);
1870 __ Mrs(x4, NZCV);
1871 __ Whilels(p5.VnH(), w20, w21);
1872 __ Mrs(x5, NZCV);
1873
1874 __ Mov(w20, 0xffffffff);
1875 __ Mov(w21, 0x00000000);
1876
1877 __ Whilelo(p6.VnS(), w20, w21);
1878 __ Mrs(x6, NZCV);
1879 __ Whilelo(p7.VnD(), w20, w21);
1880 __ Mrs(x7, NZCV);
1881
1882 __ Mov(x20, 0xfffffffffffffffd);
1883 __ Mov(x21, 0xffffffffffffffff);
1884
1885 __ Whilele(p8.VnB(), x20, x21);
1886 __ Mrs(x8, NZCV);
1887 __ Whilele(p9.VnH(), x20, x21);
1888 __ Mrs(x9, NZCV);
1889
1890 __ Mov(x20, 0xffffffffffffffff);
1891 __ Mov(x21, 0x0000000000000000);
1892
1893 __ Whilelt(p10.VnS(), x20, x21);
1894 __ Mrs(x10, NZCV);
1895 __ Whilelt(p11.VnD(), x20, x21);
1896 __ Mrs(x11, NZCV);
1897
1898 __ Mov(x20, 0xfffffffffffffffd);
1899 __ Mov(x21, 0xffffffffffffffff);
1900
1901 __ Whilels(p12.VnB(), x20, x21);
1902 __ Mrs(x12, NZCV);
1903 __ Whilels(p13.VnH(), x20, x21);
1904 __ Mrs(x13, NZCV);
1905
1906 __ Mov(x20, 0xffffffffffffffff);
1907 __ Mov(x21, 0x0000000000000000);
1908
1909 __ Whilelo(p14.VnS(), x20, x21);
1910 __ Mrs(x14, NZCV);
1911 __ Whilelo(p15.VnD(), x20, x21);
1912 __ Mrs(x15, NZCV);
1913
1914 END();
1915
1916 if (CAN_RUN()) {
1917 RUN();
1918
1919 // 0b...00000000'00000111
1920 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1921 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1922
1923 // 0b...00000000'00010101
1924 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1925 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
1926
1927 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
1928 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
1929
1930 int p3_expected[] = {0x00, 0x01};
1931 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
1932
1933 // 0b...11111111'11111111
1934 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1935 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1936
1937 // 0b...01010101'01010101
1938 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1939 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1940
1941 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
1942 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1943
1944 int p7_expected[] = {0x00, 0x00};
1945 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1946
1947 // 0b...00000000'00000111
1948 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1949 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1950
1951 // 0b...00000000'00010101
1952 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1953 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1954
1955 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
1956 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1957
1958 int p11_expected[] = {0x00, 0x01};
1959 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
1960
1961 // 0b...11111111'11111111
1962 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1963 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
1964
1965 // 0b...01010101'01010101
1966 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1967 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
1968
1969 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
1970 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
1971
1972 int p15_expected[] = {0x00, 0x00};
1973 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
1974
1975 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
1976 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
1977 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
1978 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
1979 ASSERT_EQUAL_32(SVEFirstFlag, w4);
1980 ASSERT_EQUAL_32(SVEFirstFlag, w5);
1981 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
1982 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
1983 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
1984 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
1985 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1986 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
1987 ASSERT_EQUAL_32(SVEFirstFlag, w12);
1988 ASSERT_EQUAL_32(SVEFirstFlag, w13);
1989 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
1990 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
1991 }
1992}
1993
TatWai Chong302729c2019-06-14 16:18:51 -07001994TEST(sve_int_compare_vectors_signed_imm) {
1995 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1996 START();
1997
1998 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
1999 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2000 InsrHelper(&masm, z13.VnB(), z13_inputs);
2001 Initialise(&masm, p0.VnB(), mask_inputs1);
2002
2003 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2004 __ Mrs(x2, NZCV);
2005 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2006
2007 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2008 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2009 InsrHelper(&masm, z14.VnH(), z14_inputs);
2010 Initialise(&masm, p0.VnH(), mask_inputs2);
2011
2012 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2013 __ Mrs(x4, NZCV);
2014 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2015
2016 int z15_inputs[] = {0, 1, -1, INT_MIN};
2017 int mask_inputs3[] = {0, 1, 1, 1};
2018 InsrHelper(&masm, z15.VnS(), z15_inputs);
2019 Initialise(&masm, p0.VnS(), mask_inputs3);
2020
2021 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2022 __ Mrs(x6, NZCV);
2023 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2024
2025 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2026 __ Mrs(x8, NZCV);
2027 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2028
2029 int64_t z16_inputs[] = {0, -1};
2030 int mask_inputs4[] = {1, 1};
2031 InsrHelper(&masm, z16.VnD(), z16_inputs);
2032 Initialise(&masm, p0.VnD(), mask_inputs4);
2033
2034 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2035 __ Mrs(x10, NZCV);
2036 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2037
2038 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2039 __ Mrs(x12, NZCV);
2040 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2041
2042 END();
2043
2044 if (CAN_RUN()) {
2045 RUN();
2046
2047 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2048 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2049
2050 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2051 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2052
2053 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2054 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2055
2056 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2057 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2058
2059 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2060 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2061
2062 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2063 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2064
2065 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2066 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2067
2068 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2069 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2070
2071 int p10_expected[] = {0x00, 0x01};
2072 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2073
2074 int p11_expected[] = {0x00, 0x00};
2075 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2076
2077 int p12_expected[] = {0x01, 0x00};
2078 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2079
2080 int p13_expected[] = {0x01, 0x01};
2081 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2082
2083 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2084 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2085 ASSERT_EQUAL_32(NoFlag, w6);
2086 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2087 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2088 ASSERT_EQUAL_32(NoFlag, w12);
2089 }
2090}
2091
2092TEST(sve_int_compare_vectors_unsigned_imm) {
2093 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2094 START();
2095
2096 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2097 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2098 InsrHelper(&masm, z13.VnB(), src1_inputs);
2099 Initialise(&masm, p0.VnB(), mask_inputs1);
2100
2101 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2102 __ Mrs(x2, NZCV);
2103 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2104
2105 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2106 int mask_inputs2[] = {1, 1, 1, 1, 0};
2107 InsrHelper(&masm, z13.VnH(), src2_inputs);
2108 Initialise(&masm, p0.VnH(), mask_inputs2);
2109
2110 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2111 __ Mrs(x4, NZCV);
2112 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2113
2114 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2115 int mask_inputs3[] = {1, 1, 1, 1};
2116 InsrHelper(&masm, z13.VnS(), src3_inputs);
2117 Initialise(&masm, p0.VnS(), mask_inputs3);
2118
2119 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2120 __ Mrs(x6, NZCV);
2121 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2122
2123 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2124 int mask_inputs4[] = {1, 1};
2125 InsrHelper(&masm, z13.VnD(), src4_inputs);
2126 Initialise(&masm, p0.VnD(), mask_inputs4);
2127
2128 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2129 __ Mrs(x8, NZCV);
2130 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2131
2132 END();
2133
2134 if (CAN_RUN()) {
2135 RUN();
2136
2137 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2138 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2139
2140 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2141 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2142
2143 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2144 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2145
2146 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2147 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2148
2149 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2150 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2151
2152 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2153 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2154
2155 int p8_expected[] = {0x00, 0x01};
2156 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2157
2158 int p9_expected[] = {0x00, 0x01};
2159 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2160
2161 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2162 ASSERT_EQUAL_32(NoFlag, w4);
2163 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2164 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2165 }
2166}
2167
TatWai Chongc844bb22019-06-10 15:32:53 -07002168TEST(sve_int_compare_conditionally_terminate_scalars) {
2169 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2170 START();
2171
2172 __ Mov(x0, 0xfedcba9887654321);
2173 __ Mov(x1, 0x1000100010001000);
2174
2175 __ Ctermeq(w0, w0);
2176 __ Mrs(x2, NZCV);
2177 __ Ctermeq(x0, x1);
2178 __ Mrs(x3, NZCV);
2179 __ Ctermne(x0, x0);
2180 __ Mrs(x4, NZCV);
2181 __ Ctermne(w0, w1);
2182 __ Mrs(x5, NZCV);
2183
2184 END();
2185
2186 if (CAN_RUN()) {
2187 RUN();
2188
2189 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2190 ASSERT_EQUAL_32(VFlag, w3);
2191 ASSERT_EQUAL_32(VFlag, w4);
2192 ASSERT_EQUAL_32(SVEFirstFlag, w5);
2193 }
2194}
2195
Jacob Bramley0ce75842019-07-17 18:12:50 +01002196// Work out what the architectural `PredTest` pseudocode should produce for the
2197// given result and governing predicate.
2198template <typename Tg, typename Td, int N>
2199static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2200 const Tg (&pg)[N],
2201 int vl) {
2202 int first = -1;
2203 int last = -1;
2204 bool any_active = false;
2205
2206 // Only consider potentially-active lanes.
2207 int start = (N > vl) ? (N - vl) : 0;
2208 for (int i = start; i < N; i++) {
2209 if ((pg[i] & 1) == 1) {
2210 // Look for the first and last active lanes.
2211 // Note that the 'first' lane is the one with the highest index.
2212 if (last < 0) last = i;
2213 first = i;
2214 // Look for any active lanes that are also active in pd.
2215 if ((pd[i] & 1) == 1) any_active = true;
2216 }
2217 }
2218
2219 uint32_t flags = 0;
2220 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2221 if (!any_active) flags |= SVENoneFlag;
2222 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2223 return static_cast<StatusFlags>(flags);
2224}
2225
2226typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2227 const PRegister& pg,
2228 const PRegisterWithLaneSize& pn);
2229template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002230static void PfirstPnextHelper(Test* config,
2231 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002232 unsigned lane_size_in_bits,
2233 const Tg& pg_inputs,
2234 const Tn& pn_inputs,
2235 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002236 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002237 START();
2238
2239 PRegister pg = p15;
2240 PRegister pn = p14;
2241 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2242 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2243
2244 // Initialise NZCV to an impossible value, to check that we actually write it.
2245 __ Mov(x10, NZCVFlag);
2246
2247 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2248 // the Assembler.
2249 __ Msr(NZCV, x10);
2250 __ Mov(p0, pn);
2251 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2252 pg,
2253 p0.WithLaneSize(lane_size_in_bits));
2254 __ Mrs(x0, NZCV);
2255
2256 // The MacroAssembler supports non-destructive use.
2257 __ Msr(NZCV, x10);
2258 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2259 pg,
2260 pn.WithLaneSize(lane_size_in_bits));
2261 __ Mrs(x1, NZCV);
2262
2263 // If pd.Aliases(pg) the macro requires a scratch register.
2264 {
2265 UseScratchRegisterScope temps(&masm);
2266 temps.Include(p13);
2267 __ Msr(NZCV, x10);
2268 __ Mov(p2, p15);
2269 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2270 p2,
2271 pn.WithLaneSize(lane_size_in_bits));
2272 __ Mrs(x2, NZCV);
2273 }
2274
2275 END();
2276
2277 if (CAN_RUN()) {
2278 RUN();
2279
2280 // Check that the inputs weren't modified.
2281 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2282 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2283
2284 // Check the primary operation.
2285 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2286 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2287 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2288
2289 // Check that the flags were properly set.
2290 StatusFlags nzcv_expected =
2291 GetPredTestFlags(pd_expected,
2292 pg_inputs,
2293 core.GetSVELaneCount(kBRegSize));
2294 ASSERT_EQUAL_64(nzcv_expected, x0);
2295 ASSERT_EQUAL_64(nzcv_expected, x1);
2296 ASSERT_EQUAL_64(nzcv_expected, x2);
2297 }
2298}
2299
2300template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002301static void PfirstHelper(Test* config,
2302 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002303 const Tn& pn_inputs,
2304 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002305 PfirstPnextHelper(config,
2306 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002307 kBRegSize, // pfirst only accepts B-sized lanes.
2308 pg_inputs,
2309 pn_inputs,
2310 pd_expected);
2311}
2312
2313template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002314static void PnextHelper(Test* config,
2315 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002316 const Tg& pg_inputs,
2317 const Tn& pn_inputs,
2318 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002319 PfirstPnextHelper(config,
2320 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002321 lane_size_in_bits,
2322 pg_inputs,
2323 pn_inputs,
2324 pd_expected);
2325}
2326
Jacob Bramleye8289202019-07-31 11:25:23 +01002327TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002328 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2329 // large VL), but few enough to make the test easy to read.
2330 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2331 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2332 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2333 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2334 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2335 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2336
2337 // Pfirst finds the first active lane in pg, and activates the corresponding
2338 // lane in pn (if it isn't already active).
2339
2340 // The first active lane in in1 is here. |
2341 // v
2342 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2343 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2344 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2345 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002346 PfirstHelper(config, in1, in0, exp10);
2347 PfirstHelper(config, in1, in2, exp12);
2348 PfirstHelper(config, in1, in3, exp13);
2349 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002350
2351 // The first active lane in in2 is here. |
2352 // v
2353 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2354 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2355 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2356 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002357 PfirstHelper(config, in2, in0, exp20);
2358 PfirstHelper(config, in2, in1, exp21);
2359 PfirstHelper(config, in2, in3, exp23);
2360 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002361
2362 // The first active lane in in3 is here. |
2363 // v
2364 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2365 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2366 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2367 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002368 PfirstHelper(config, in3, in0, exp30);
2369 PfirstHelper(config, in3, in1, exp31);
2370 PfirstHelper(config, in3, in2, exp32);
2371 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002372
2373 // | The first active lane in in4 is here.
2374 // v
2375 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2376 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2377 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2378 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002379 PfirstHelper(config, in4, in0, exp40);
2380 PfirstHelper(config, in4, in1, exp41);
2381 PfirstHelper(config, in4, in2, exp42);
2382 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002383
2384 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002385 PfirstHelper(config, in0, in0, in0);
2386 PfirstHelper(config, in0, in1, in1);
2387 PfirstHelper(config, in0, in2, in2);
2388 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002389
2390 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002391 PfirstHelper(config, in0, in0, in0);
2392 PfirstHelper(config, in1, in1, in1);
2393 PfirstHelper(config, in2, in2, in2);
2394 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002395}
2396
Jacob Bramleye8289202019-07-31 11:25:23 +01002397TEST_SVE(sve_pfirst_alias) {
2398 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002399 START();
2400
2401 // Check that the Simulator behaves correctly when all arguments are aliased.
2402 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2403 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2404 int in_s[] = {0, 1, 1, 0};
2405 int in_d[] = {1, 1};
2406
2407 Initialise(&masm, p0.VnB(), in_b);
2408 Initialise(&masm, p1.VnH(), in_h);
2409 Initialise(&masm, p2.VnS(), in_s);
2410 Initialise(&masm, p3.VnD(), in_d);
2411
2412 // Initialise NZCV to an impossible value, to check that we actually write it.
2413 __ Mov(x10, NZCVFlag);
2414
2415 __ Msr(NZCV, x10);
2416 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
2417 __ Mrs(x0, NZCV);
2418
2419 __ Msr(NZCV, x10);
2420 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
2421 __ Mrs(x1, NZCV);
2422
2423 __ Msr(NZCV, x10);
2424 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
2425 __ Mrs(x2, NZCV);
2426
2427 __ Msr(NZCV, x10);
2428 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
2429 __ Mrs(x3, NZCV);
2430
2431 END();
2432
2433 if (CAN_RUN()) {
2434 RUN();
2435
2436 // The first lane from pg is already active in pdn, so the P register should
2437 // be unchanged.
2438 ASSERT_EQUAL_SVE(in_b, p0.VnB());
2439 ASSERT_EQUAL_SVE(in_h, p1.VnH());
2440 ASSERT_EQUAL_SVE(in_s, p2.VnS());
2441 ASSERT_EQUAL_SVE(in_d, p3.VnD());
2442
2443 ASSERT_EQUAL_64(SVEFirstFlag, x0);
2444 ASSERT_EQUAL_64(SVEFirstFlag, x1);
2445 ASSERT_EQUAL_64(SVEFirstFlag, x2);
2446 ASSERT_EQUAL_64(SVEFirstFlag, x3);
2447 }
2448}
2449
Jacob Bramleye8289202019-07-31 11:25:23 +01002450TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002451 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2452 // (to check propagation if we have a large VL), but few enough to make the
2453 // test easy to read.
2454 // For now, we just use kPRegMinSize so that the test works anywhere.
2455 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2456 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2457 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2458 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
2459 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2460
2461 // Pnext activates the next element that is true in pg, after the last-active
2462 // element in pn. If all pn elements are false (as in in0), it starts looking
2463 // at element 0.
2464
2465 // There are no active lanes in in0, so the result is simply the first active
2466 // lane from pg.
2467 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2468 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2469 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2470 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2471 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2472
2473 // The last active lane in in1 is here. |
2474 // v
2475 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2476 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2477 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2478 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2479 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2480
2481 // | The last active lane in in2 is here.
2482 // v
2483 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2484 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2485 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2486 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2487 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2488
2489 // | The last active lane in in3 is here.
2490 // v
2491 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2492 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2493 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2494 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2495 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2496
2497 // | The last active lane in in4 is here.
2498 // v
2499 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2500 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2501 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2502 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2503 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2504
Jacob Bramleye8289202019-07-31 11:25:23 +01002505 PnextHelper(config, kBRegSize, in0, in0, exp00);
2506 PnextHelper(config, kBRegSize, in1, in0, exp10);
2507 PnextHelper(config, kBRegSize, in2, in0, exp20);
2508 PnextHelper(config, kBRegSize, in3, in0, exp30);
2509 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002510
Jacob Bramleye8289202019-07-31 11:25:23 +01002511 PnextHelper(config, kBRegSize, in0, in1, exp01);
2512 PnextHelper(config, kBRegSize, in1, in1, exp11);
2513 PnextHelper(config, kBRegSize, in2, in1, exp21);
2514 PnextHelper(config, kBRegSize, in3, in1, exp31);
2515 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002516
Jacob Bramleye8289202019-07-31 11:25:23 +01002517 PnextHelper(config, kBRegSize, in0, in2, exp02);
2518 PnextHelper(config, kBRegSize, in1, in2, exp12);
2519 PnextHelper(config, kBRegSize, in2, in2, exp22);
2520 PnextHelper(config, kBRegSize, in3, in2, exp32);
2521 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002522
Jacob Bramleye8289202019-07-31 11:25:23 +01002523 PnextHelper(config, kBRegSize, in0, in3, exp03);
2524 PnextHelper(config, kBRegSize, in1, in3, exp13);
2525 PnextHelper(config, kBRegSize, in2, in3, exp23);
2526 PnextHelper(config, kBRegSize, in3, in3, exp33);
2527 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002528
Jacob Bramleye8289202019-07-31 11:25:23 +01002529 PnextHelper(config, kBRegSize, in0, in4, exp04);
2530 PnextHelper(config, kBRegSize, in1, in4, exp14);
2531 PnextHelper(config, kBRegSize, in2, in4, exp24);
2532 PnextHelper(config, kBRegSize, in3, in4, exp34);
2533 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002534}
2535
Jacob Bramleye8289202019-07-31 11:25:23 +01002536TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002537 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2538 // (to check propagation if we have a large VL), but few enough to make the
2539 // test easy to read.
2540 // For now, we just use kPRegMinSize so that the test works anywhere.
2541 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
2542 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
2543 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
2544 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
2545 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
2546
2547 // Pnext activates the next element that is true in pg, after the last-active
2548 // element in pn. If all pn elements are false (as in in0), it starts looking
2549 // at element 0.
2550 //
2551 // As for other SVE instructions, elements are only considered to be active if
2552 // the _first_ bit in each field is one. Other bits are ignored.
2553
2554 // There are no active lanes in in0, so the result is simply the first active
2555 // lane from pg.
2556 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
2557 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
2558 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
2559 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
2560 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
2561
2562 // | The last active lane in in1 is here.
2563 // v
2564 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
2565 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
2566 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
2567 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
2568 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
2569
2570 // | The last active lane in in2 is here.
2571 // v
2572 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
2573 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
2574 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
2575 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
2576 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
2577
2578 // | The last active lane in in3 is here.
2579 // v
2580 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
2581 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
2582 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
2583 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
2584 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
2585
2586 // | The last active lane in in4 is here.
2587 // v
2588 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
2589 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
2590 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
2591 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
2592 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
2593
Jacob Bramleye8289202019-07-31 11:25:23 +01002594 PnextHelper(config, kHRegSize, in0, in0, exp00);
2595 PnextHelper(config, kHRegSize, in1, in0, exp10);
2596 PnextHelper(config, kHRegSize, in2, in0, exp20);
2597 PnextHelper(config, kHRegSize, in3, in0, exp30);
2598 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002599
Jacob Bramleye8289202019-07-31 11:25:23 +01002600 PnextHelper(config, kHRegSize, in0, in1, exp01);
2601 PnextHelper(config, kHRegSize, in1, in1, exp11);
2602 PnextHelper(config, kHRegSize, in2, in1, exp21);
2603 PnextHelper(config, kHRegSize, in3, in1, exp31);
2604 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002605
Jacob Bramleye8289202019-07-31 11:25:23 +01002606 PnextHelper(config, kHRegSize, in0, in2, exp02);
2607 PnextHelper(config, kHRegSize, in1, in2, exp12);
2608 PnextHelper(config, kHRegSize, in2, in2, exp22);
2609 PnextHelper(config, kHRegSize, in3, in2, exp32);
2610 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002611
Jacob Bramleye8289202019-07-31 11:25:23 +01002612 PnextHelper(config, kHRegSize, in0, in3, exp03);
2613 PnextHelper(config, kHRegSize, in1, in3, exp13);
2614 PnextHelper(config, kHRegSize, in2, in3, exp23);
2615 PnextHelper(config, kHRegSize, in3, in3, exp33);
2616 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002617
Jacob Bramleye8289202019-07-31 11:25:23 +01002618 PnextHelper(config, kHRegSize, in0, in4, exp04);
2619 PnextHelper(config, kHRegSize, in1, in4, exp14);
2620 PnextHelper(config, kHRegSize, in2, in4, exp24);
2621 PnextHelper(config, kHRegSize, in3, in4, exp34);
2622 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002623}
2624
Jacob Bramleye8289202019-07-31 11:25:23 +01002625TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002626 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2627 // (to check propagation if we have a large VL), but few enough to make the
2628 // test easy to read.
2629 // For now, we just use kPRegMinSize so that the test works anywhere.
2630 int in0[] = {0xe, 0xc, 0x8, 0x0};
2631 int in1[] = {0x0, 0x2, 0x0, 0x1};
2632 int in2[] = {0x0, 0x1, 0xf, 0x0};
2633 int in3[] = {0xf, 0x0, 0x0, 0x0};
2634
2635 // Pnext activates the next element that is true in pg, after the last-active
2636 // element in pn. If all pn elements are false (as in in0), it starts looking
2637 // at element 0.
2638 //
2639 // As for other SVE instructions, elements are only considered to be active if
2640 // the _first_ bit in each field is one. Other bits are ignored.
2641
2642 // There are no active lanes in in0, so the result is simply the first active
2643 // lane from pg.
2644 int exp00[] = {0, 0, 0, 0};
2645 int exp10[] = {0, 0, 0, 1};
2646 int exp20[] = {0, 0, 1, 0};
2647 int exp30[] = {1, 0, 0, 0};
2648
2649 // | The last active lane in in1 is here.
2650 // v
2651 int exp01[] = {0, 0, 0, 0};
2652 int exp11[] = {0, 0, 0, 0};
2653 int exp21[] = {0, 0, 1, 0};
2654 int exp31[] = {1, 0, 0, 0};
2655
2656 // | The last active lane in in2 is here.
2657 // v
2658 int exp02[] = {0, 0, 0, 0};
2659 int exp12[] = {0, 0, 0, 0};
2660 int exp22[] = {0, 0, 0, 0};
2661 int exp32[] = {1, 0, 0, 0};
2662
2663 // | The last active lane in in3 is here.
2664 // v
2665 int exp03[] = {0, 0, 0, 0};
2666 int exp13[] = {0, 0, 0, 0};
2667 int exp23[] = {0, 0, 0, 0};
2668 int exp33[] = {0, 0, 0, 0};
2669
Jacob Bramleye8289202019-07-31 11:25:23 +01002670 PnextHelper(config, kSRegSize, in0, in0, exp00);
2671 PnextHelper(config, kSRegSize, in1, in0, exp10);
2672 PnextHelper(config, kSRegSize, in2, in0, exp20);
2673 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002674
Jacob Bramleye8289202019-07-31 11:25:23 +01002675 PnextHelper(config, kSRegSize, in0, in1, exp01);
2676 PnextHelper(config, kSRegSize, in1, in1, exp11);
2677 PnextHelper(config, kSRegSize, in2, in1, exp21);
2678 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002679
Jacob Bramleye8289202019-07-31 11:25:23 +01002680 PnextHelper(config, kSRegSize, in0, in2, exp02);
2681 PnextHelper(config, kSRegSize, in1, in2, exp12);
2682 PnextHelper(config, kSRegSize, in2, in2, exp22);
2683 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002684
Jacob Bramleye8289202019-07-31 11:25:23 +01002685 PnextHelper(config, kSRegSize, in0, in3, exp03);
2686 PnextHelper(config, kSRegSize, in1, in3, exp13);
2687 PnextHelper(config, kSRegSize, in2, in3, exp23);
2688 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002689}
2690
Jacob Bramleye8289202019-07-31 11:25:23 +01002691TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002692 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2693 // (to check propagation if we have a large VL), but few enough to make the
2694 // test easy to read.
2695 // For now, we just use kPRegMinSize so that the test works anywhere.
2696 int in0[] = {0xfe, 0xf0};
2697 int in1[] = {0x00, 0x55};
2698 int in2[] = {0x33, 0xff};
2699
2700 // Pnext activates the next element that is true in pg, after the last-active
2701 // element in pn. If all pn elements are false (as in in0), it starts looking
2702 // at element 0.
2703 //
2704 // As for other SVE instructions, elements are only considered to be active if
2705 // the _first_ bit in each field is one. Other bits are ignored.
2706
2707 // There are no active lanes in in0, so the result is simply the first active
2708 // lane from pg.
2709 int exp00[] = {0, 0};
2710 int exp10[] = {0, 1};
2711 int exp20[] = {0, 1};
2712
2713 // | The last active lane in in1 is here.
2714 // v
2715 int exp01[] = {0, 0};
2716 int exp11[] = {0, 0};
2717 int exp21[] = {1, 0};
2718
2719 // | The last active lane in in2 is here.
2720 // v
2721 int exp02[] = {0, 0};
2722 int exp12[] = {0, 0};
2723 int exp22[] = {0, 0};
2724
Jacob Bramleye8289202019-07-31 11:25:23 +01002725 PnextHelper(config, kDRegSize, in0, in0, exp00);
2726 PnextHelper(config, kDRegSize, in1, in0, exp10);
2727 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002728
Jacob Bramleye8289202019-07-31 11:25:23 +01002729 PnextHelper(config, kDRegSize, in0, in1, exp01);
2730 PnextHelper(config, kDRegSize, in1, in1, exp11);
2731 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002732
Jacob Bramleye8289202019-07-31 11:25:23 +01002733 PnextHelper(config, kDRegSize, in0, in2, exp02);
2734 PnextHelper(config, kDRegSize, in1, in2, exp12);
2735 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002736}
2737
Jacob Bramleye8289202019-07-31 11:25:23 +01002738TEST_SVE(sve_pnext_alias) {
2739 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002740 START();
2741
2742 // Check that the Simulator behaves correctly when all arguments are aliased.
2743 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2744 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2745 int in_s[] = {0, 1, 1, 0};
2746 int in_d[] = {1, 1};
2747
2748 Initialise(&masm, p0.VnB(), in_b);
2749 Initialise(&masm, p1.VnH(), in_h);
2750 Initialise(&masm, p2.VnS(), in_s);
2751 Initialise(&masm, p3.VnD(), in_d);
2752
2753 // Initialise NZCV to an impossible value, to check that we actually write it.
2754 __ Mov(x10, NZCVFlag);
2755
2756 __ Msr(NZCV, x10);
2757 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
2758 __ Mrs(x0, NZCV);
2759
2760 __ Msr(NZCV, x10);
2761 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
2762 __ Mrs(x1, NZCV);
2763
2764 __ Msr(NZCV, x10);
2765 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
2766 __ Mrs(x2, NZCV);
2767
2768 __ Msr(NZCV, x10);
2769 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
2770 __ Mrs(x3, NZCV);
2771
2772 END();
2773
2774 if (CAN_RUN()) {
2775 RUN();
2776
2777 // Since pg.Is(pdn), there can be no active lanes in pg above the last
2778 // active lane in pdn, so the result should always be zero.
2779 ASSERT_EQUAL_SVE(0, p0.VnB());
2780 ASSERT_EQUAL_SVE(0, p1.VnH());
2781 ASSERT_EQUAL_SVE(0, p2.VnS());
2782 ASSERT_EQUAL_SVE(0, p3.VnD());
2783
2784 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
2785 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
2786 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
2787 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
2788 }
2789}
2790
Jacob Bramleye8289202019-07-31 11:25:23 +01002791static void PtrueHelper(Test* config,
2792 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002793 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002794 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002795 START();
2796
2797 PRegisterWithLaneSize p[kNumberOfPRegisters];
2798 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
2799 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
2800 }
2801
2802 // Initialise NZCV to an impossible value, to check that we actually write it.
2803 StatusFlags nzcv_unmodified = NZCVFlag;
2804 __ Mov(x20, nzcv_unmodified);
2805
2806 // We don't have enough registers to conveniently test every pattern, so take
2807 // samples from each group.
2808 __ Msr(NZCV, x20);
2809 __ Ptrue(p[0], SVE_POW2, s);
2810 __ Mrs(x0, NZCV);
2811
2812 __ Msr(NZCV, x20);
2813 __ Ptrue(p[1], SVE_VL1, s);
2814 __ Mrs(x1, NZCV);
2815
2816 __ Msr(NZCV, x20);
2817 __ Ptrue(p[2], SVE_VL2, s);
2818 __ Mrs(x2, NZCV);
2819
2820 __ Msr(NZCV, x20);
2821 __ Ptrue(p[3], SVE_VL5, s);
2822 __ Mrs(x3, NZCV);
2823
2824 __ Msr(NZCV, x20);
2825 __ Ptrue(p[4], SVE_VL6, s);
2826 __ Mrs(x4, NZCV);
2827
2828 __ Msr(NZCV, x20);
2829 __ Ptrue(p[5], SVE_VL8, s);
2830 __ Mrs(x5, NZCV);
2831
2832 __ Msr(NZCV, x20);
2833 __ Ptrue(p[6], SVE_VL16, s);
2834 __ Mrs(x6, NZCV);
2835
2836 __ Msr(NZCV, x20);
2837 __ Ptrue(p[7], SVE_VL64, s);
2838 __ Mrs(x7, NZCV);
2839
2840 __ Msr(NZCV, x20);
2841 __ Ptrue(p[8], SVE_VL256, s);
2842 __ Mrs(x8, NZCV);
2843
2844 {
2845 // We have to use the Assembler to use values not defined by
2846 // SVEPredicateConstraint, so call `ptrues` directly..
2847 typedef void (
2848 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
2849 int pattern);
2850 AssemblePtrueFn assemble =
2851 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
2852
2853 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
2854 __ msr(NZCV, x20);
2855 (masm.*assemble)(p[9], 0xe);
2856 __ mrs(x9, NZCV);
2857
2858 __ msr(NZCV, x20);
2859 (masm.*assemble)(p[10], 0x16);
2860 __ mrs(x10, NZCV);
2861
2862 __ msr(NZCV, x20);
2863 (masm.*assemble)(p[11], 0x1a);
2864 __ mrs(x11, NZCV);
2865
2866 __ msr(NZCV, x20);
2867 (masm.*assemble)(p[12], 0x1c);
2868 __ mrs(x12, NZCV);
2869 }
2870
2871 __ Msr(NZCV, x20);
2872 __ Ptrue(p[13], SVE_MUL4, s);
2873 __ Mrs(x13, NZCV);
2874
2875 __ Msr(NZCV, x20);
2876 __ Ptrue(p[14], SVE_MUL3, s);
2877 __ Mrs(x14, NZCV);
2878
2879 __ Msr(NZCV, x20);
2880 __ Ptrue(p[15], SVE_ALL, s);
2881 __ Mrs(x15, NZCV);
2882
2883 END();
2884
2885 if (CAN_RUN()) {
2886 RUN();
2887
2888 int all = core.GetSVELaneCount(lane_size_in_bits);
2889 int pow2 = 1 << HighestSetBitPosition(all);
2890 int mul4 = all - (all % 4);
2891 int mul3 = all - (all % 3);
2892
2893 // Check P register results.
2894 for (int i = 0; i < all; i++) {
2895 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
2896 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
2897 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
2898 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
2899 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
2900 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
2901 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
2902 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
2903 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
2904 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
2905 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
2906 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
2907 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
2908 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
2909 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
2910 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
2911 }
2912
2913 // Check NZCV results.
2914 if (s == LeaveFlags) {
2915 // No flags should have been updated.
2916 for (int i = 0; i <= 15; i++) {
2917 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
2918 }
2919 } else {
2920 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
2921 StatusFlags nonzero = SVEFirstFlag;
2922
2923 // POW2
2924 ASSERT_EQUAL_64(nonzero, x0);
2925 // VL*
2926 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
2927 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
2928 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
2929 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
2930 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
2931 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
2932 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
2933 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
2934 // #uimm5
2935 ASSERT_EQUAL_64(zero, x9);
2936 ASSERT_EQUAL_64(zero, x10);
2937 ASSERT_EQUAL_64(zero, x11);
2938 ASSERT_EQUAL_64(zero, x12);
2939 // MUL*
2940 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
2941 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
2942 // ALL
2943 ASSERT_EQUAL_64(nonzero, x15);
2944 }
2945 }
2946}
2947
Jacob Bramleye8289202019-07-31 11:25:23 +01002948TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
2949TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
2950TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
2951TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002952
Jacob Bramleye8289202019-07-31 11:25:23 +01002953TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
2954TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
2955TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
2956TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002957
Jacob Bramleye8289202019-07-31 11:25:23 +01002958TEST_SVE(sve_pfalse) {
2959 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002960 START();
2961
2962 // Initialise non-zero inputs.
2963 __ Ptrue(p0.VnB());
2964 __ Ptrue(p1.VnH());
2965 __ Ptrue(p2.VnS());
2966 __ Ptrue(p3.VnD());
2967
2968 // The instruction only supports B-sized lanes, but the lane size has no
2969 // logical effect, so the MacroAssembler accepts anything.
2970 __ Pfalse(p0.VnB());
2971 __ Pfalse(p1.VnH());
2972 __ Pfalse(p2.VnS());
2973 __ Pfalse(p3.VnD());
2974
2975 END();
2976
2977 if (CAN_RUN()) {
2978 RUN();
2979
2980 ASSERT_EQUAL_SVE(0, p0.VnB());
2981 ASSERT_EQUAL_SVE(0, p1.VnB());
2982 ASSERT_EQUAL_SVE(0, p2.VnB());
2983 ASSERT_EQUAL_SVE(0, p3.VnB());
2984 }
2985}
2986
Jacob Bramleye8289202019-07-31 11:25:23 +01002987TEST_SVE(sve_ptest) {
2988 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002989 START();
2990
2991 // Initialise NZCV to a known (impossible) value.
2992 StatusFlags nzcv_unmodified = NZCVFlag;
2993 __ Mov(x0, nzcv_unmodified);
2994 __ Msr(NZCV, x0);
2995
2996 // Construct some test inputs.
2997 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
2998 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
2999 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3000 __ Pfalse(p0.VnB());
3001 __ Ptrue(p1.VnB());
3002 Initialise(&masm, p2.VnB(), in2);
3003 Initialise(&masm, p3.VnB(), in3);
3004 Initialise(&masm, p4.VnB(), in4);
3005
3006 // All-inactive pg.
3007 __ Ptest(p0, p0.VnB());
3008 __ Mrs(x0, NZCV);
3009 __ Ptest(p0, p1.VnB());
3010 __ Mrs(x1, NZCV);
3011 __ Ptest(p0, p2.VnB());
3012 __ Mrs(x2, NZCV);
3013 __ Ptest(p0, p3.VnB());
3014 __ Mrs(x3, NZCV);
3015 __ Ptest(p0, p4.VnB());
3016 __ Mrs(x4, NZCV);
3017
3018 // All-active pg.
3019 __ Ptest(p1, p0.VnB());
3020 __ Mrs(x5, NZCV);
3021 __ Ptest(p1, p1.VnB());
3022 __ Mrs(x6, NZCV);
3023 __ Ptest(p1, p2.VnB());
3024 __ Mrs(x7, NZCV);
3025 __ Ptest(p1, p3.VnB());
3026 __ Mrs(x8, NZCV);
3027 __ Ptest(p1, p4.VnB());
3028 __ Mrs(x9, NZCV);
3029
3030 // Combinations of other inputs.
3031 __ Ptest(p2, p2.VnB());
3032 __ Mrs(x20, NZCV);
3033 __ Ptest(p2, p3.VnB());
3034 __ Mrs(x21, NZCV);
3035 __ Ptest(p2, p4.VnB());
3036 __ Mrs(x22, NZCV);
3037 __ Ptest(p3, p2.VnB());
3038 __ Mrs(x23, NZCV);
3039 __ Ptest(p3, p3.VnB());
3040 __ Mrs(x24, NZCV);
3041 __ Ptest(p3, p4.VnB());
3042 __ Mrs(x25, NZCV);
3043 __ Ptest(p4, p2.VnB());
3044 __ Mrs(x26, NZCV);
3045 __ Ptest(p4, p3.VnB());
3046 __ Mrs(x27, NZCV);
3047 __ Ptest(p4, p4.VnB());
3048 __ Mrs(x28, NZCV);
3049
3050 END();
3051
3052 if (CAN_RUN()) {
3053 RUN();
3054
3055 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3056
3057 // If pg is all inactive, the value of pn is irrelevant.
3058 ASSERT_EQUAL_64(zero, x0);
3059 ASSERT_EQUAL_64(zero, x1);
3060 ASSERT_EQUAL_64(zero, x2);
3061 ASSERT_EQUAL_64(zero, x3);
3062 ASSERT_EQUAL_64(zero, x4);
3063
3064 // All-active pg.
3065 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3066 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3067 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3068 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3069 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3070 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3071
3072 // Other inputs.
3073 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3074 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3075 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3076 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3077 x23); // pg: in3, pn: in2
3078 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3079 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3080 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3081 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3082 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3083 }
3084}
3085
Jacob Bramleye8289202019-07-31 11:25:23 +01003086TEST_SVE(sve_cntp) {
3087 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003088 START();
3089
3090 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3091 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3092 Initialise(&masm, p0.VnB(), p0_inputs);
3093
3094 // With an all-true predicate, these instructions measure the vector length.
3095 __ Ptrue(p10.VnB());
3096 __ Ptrue(p11.VnH());
3097 __ Ptrue(p12.VnS());
3098 __ Ptrue(p13.VnD());
3099
3100 // `ptrue p10.b` provides an all-active pg.
3101 __ Cntp(x10, p10, p10.VnB());
3102 __ Cntp(x11, p10, p11.VnH());
3103 __ Cntp(x12, p10, p12.VnS());
3104 __ Cntp(x13, p10, p13.VnD());
3105
3106 // Check that the predicate mask is applied properly.
3107 __ Cntp(x14, p10, p10.VnB());
3108 __ Cntp(x15, p11, p10.VnB());
3109 __ Cntp(x16, p12, p10.VnB());
3110 __ Cntp(x17, p13, p10.VnB());
3111
3112 // Check other patterns (including some ignored bits).
3113 __ Cntp(x0, p10, p0.VnB());
3114 __ Cntp(x1, p10, p0.VnH());
3115 __ Cntp(x2, p10, p0.VnS());
3116 __ Cntp(x3, p10, p0.VnD());
3117 __ Cntp(x4, p0, p10.VnB());
3118 __ Cntp(x5, p0, p10.VnH());
3119 __ Cntp(x6, p0, p10.VnS());
3120 __ Cntp(x7, p0, p10.VnD());
3121
3122 END();
3123
3124 if (CAN_RUN()) {
3125 RUN();
3126
3127 int vl_b = core.GetSVELaneCount(kBRegSize);
3128 int vl_h = core.GetSVELaneCount(kHRegSize);
3129 int vl_s = core.GetSVELaneCount(kSRegSize);
3130 int vl_d = core.GetSVELaneCount(kDRegSize);
3131
3132 // Check all-active predicates in various combinations.
3133 ASSERT_EQUAL_64(vl_b, x10);
3134 ASSERT_EQUAL_64(vl_h, x11);
3135 ASSERT_EQUAL_64(vl_s, x12);
3136 ASSERT_EQUAL_64(vl_d, x13);
3137
3138 ASSERT_EQUAL_64(vl_b, x14);
3139 ASSERT_EQUAL_64(vl_h, x15);
3140 ASSERT_EQUAL_64(vl_s, x16);
3141 ASSERT_EQUAL_64(vl_d, x17);
3142
3143 // Check that irrelevant bits are properly ignored.
3144 ASSERT_EQUAL_64(7, x0);
3145 ASSERT_EQUAL_64(5, x1);
3146 ASSERT_EQUAL_64(2, x2);
3147 ASSERT_EQUAL_64(1, x3);
3148
3149 ASSERT_EQUAL_64(7, x4);
3150 ASSERT_EQUAL_64(5, x5);
3151 ASSERT_EQUAL_64(2, x6);
3152 ASSERT_EQUAL_64(1, x7);
3153 }
3154}
3155
TatWai Chong13634762019-07-16 16:20:45 -07003156typedef void (MacroAssembler::*IntBinArithFn)(const ZRegister& zd,
3157 const PRegisterM& pg,
3158 const ZRegister& zn,
3159 const ZRegister& zm);
3160
3161template <typename Td, typename Tg, typename Tn>
3162static void IntBinArithHelper(Test* config,
3163 IntBinArithFn macro,
3164 unsigned lane_size_in_bits,
3165 const Tg& pg_inputs,
3166 const Tn& zn_inputs,
3167 const Tn& zm_inputs,
3168 const Td& zd_expected) {
3169 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3170 START();
3171
3172 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
3173 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
3174 InsrHelper(&masm, src_a, zn_inputs);
3175 InsrHelper(&masm, src_b, zm_inputs);
3176
3177 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
3178
3179 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
3180 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
3181 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
3182
3183 // `instr` zd(dst), zd(src_a), zn(src_b)
3184 __ Mov(zd_1, src_a);
3185 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
3186
3187 // `instr` zd(dst), zm(src_a), zd(src_b)
3188 // Based on whether zd and zm registers are aliased, the macro of instructions
3189 // (`Instr`) swaps the order of operands if it has the commutative property,
3190 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
3191 __ Mov(zd_2, src_b);
3192 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
3193
3194 // `instr` zd(dst), zm(src_a), zn(src_b)
3195 // The macro of instructions (`Instr`) automatically selects between `instr`
3196 // and movprfx + `instr` based on whether zd and zn registers are aliased.
3197 // A generated moveprfx instruction is predicated that using the same
3198 // governing predicate register. In order to keep the result constant,
3199 // initialize the destination register first.
3200 __ Mov(zd_3, src_a);
3201 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
3202
3203 END();
3204
3205 if (CAN_RUN()) {
3206 RUN();
3207 ASSERT_EQUAL_SVE(zd_expected, zd_1);
3208
3209 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
3210 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
3211 if (!core.HasSVELane(zd_1, lane)) break;
3212 if (pg_inputs[i] == 1) {
3213 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
3214 } else {
3215 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
3216 }
3217 }
3218
3219 ASSERT_EQUAL_SVE(zd_expected, zd_3);
3220 }
3221}
3222
3223TEST_SVE(sve_binary_arithmetic_predicated_add) {
3224 // clang-format off
3225 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
3226
3227 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
3228
3229 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
3230
3231 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
3232
3233 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
3234 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
3235
3236 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
3237 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
3238
3239 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
3240 0x1010101010101010, 0x8181818181818181,
3241 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
3242 0x0101010101010101, 0x7f7f7f7fffffffff};
3243
3244 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
3245 0x1010101010101010, 0x0000000000000000,
3246 0x8181818181818181, 0x8080808080808080,
3247 0xffffffffffffffff, 0xffffffffffffffff};
3248
3249 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3250 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3251 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3252 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3253
3254 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
3255
3256 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
3257 0x8180, 0x8f8f, 0x0101, 0x7f7e};
3258
3259 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
3260 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
3261
3262 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
3263 0x2020202020202020, 0x8181818181818181,
3264 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
3265 0x0101010101010100, 0x7f7f7f7ffffffffe};
3266
3267 IntBinArithFn fn = &MacroAssembler::Add;
3268 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
3269 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
3270 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
3271 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
3272
3273 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
3274
3275 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
3276 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
3277
3278 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
3279 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
3280
3281 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
3282 0x0000000000000000, 0x8181818181818181,
3283 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
3284 0x0101010101010102, 0x7f7f7f8000000000};
3285
3286 fn = &MacroAssembler::Sub;
3287 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
3288 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
3289 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
3290 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
3291 // clang-format on
3292}
3293
3294TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
3295 // clang-format off
3296 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
3297
3298 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
3299
3300 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
3301 0xff00, 0xba98, 0x5555, 0x4567};
3302
3303 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
3304 0xfe00, 0xabab, 0xcdcd, 0x5678};
3305
3306 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
3307 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
3308
3309 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
3310 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
3311
3312 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
3313 0x5555555555555555, 0x0000000001234567};
3314
3315 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
3316 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3317
3318 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3319 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3320 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3321 int pg_d[] = {1, 0, 1, 1};
3322
3323 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
3324
3325 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
3326 0xff00, 0xba98, 0x5555, 0x5678};
3327
3328 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
3329 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
3330
3331 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3332 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3333
3334 IntBinArithFn fn = &MacroAssembler::Umax;
3335 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
3336 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
3337 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
3338 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
3339
3340 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
3341
3342 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
3343 0xfe00, 0xabab, 0x5555, 0x4567};
3344
3345 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
3346 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
3347
3348 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
3349 0x5555555555555555, 0x0000000001234567};
3350 fn = &MacroAssembler::Umin;
3351 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
3352 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
3353 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
3354 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
3355
3356 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
3357
3358 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
3359 0x0100, 0x0eed, 0x5555, 0x1111};
3360
3361 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
3362 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
3363
3364 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3365 0x7878787878787878, 0x0000000011111111};
3366
3367 fn = &MacroAssembler::Uabd;
3368 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
3369 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
3370 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
3371 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
3372 // clang-format on
3373}
3374
3375TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
3376 // clang-format off
3377 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
3378
3379 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
3380
3381 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
3382 INT16_MIN, INT16_MAX, INT16_MAX, 1};
3383
3384 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
3385 INT16_MAX, INT16_MAX - 1, -1, 0};
3386
3387 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
3388 INT32_MIN, INT32_MAX, INT32_MAX, 1};
3389
3390 int zm_s[] = {-1, 0, -1, -INT32_MAX,
3391 INT32_MAX, INT32_MAX - 1, -1, 0};
3392
3393 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3394 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3395
3396 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
3397 INT64_MAX, INT64_MAX - 1, -1, 0};
3398
3399 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3400 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3401 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3402 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3403
3404 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
3405
3406 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
3407 INT16_MAX, INT16_MAX, INT16_MAX, 1};
3408
3409 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
3410 INT32_MAX, INT32_MAX, INT32_MAX, 1};
3411
3412 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
3413 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3414
3415 IntBinArithFn fn = &MacroAssembler::Smax;
3416 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
3417 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
3418 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
3419 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
3420
3421 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
3422
3423 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
3424 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
3425
3426 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
3427 INT32_MIN, INT32_MAX, -1, 0};
3428
3429 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3430 INT64_MIN, INT64_MAX - 1, -1, 0};
3431
3432 fn = &MacroAssembler::Smin;
3433 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
3434 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
3435 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
3436 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
3437
3438 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
3439
3440 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
3441
3442 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
3443 0xffffffff, 0x7fffffff, 0x80000000, 1};
3444
3445 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
3446 0x8000000000000000, 1, 0x8000000000000000, 1};
3447
3448 fn = &MacroAssembler::Sabd;
3449 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
3450 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
3451 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
3452 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
3453 // clang-format on
3454}
3455
3456TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
3457 // clang-format off
3458 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3459
3460 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3461
3462 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
3463 0x8000, 0xff00, 0x5555, 0xaaaa};
3464
3465 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
3466 0x5555, 0xaaaa, 0x0001, 0x1234};
3467
3468 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3469 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
3470
3471 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3472 0x12345678, 0x22223333, 0x55556666, 0x77778888};
3473
3474 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
3475 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
3476
3477 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
3478 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
3479
3480 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3481 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3482 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3483 int pg_d[] = {1, 1, 0, 1};
3484
3485 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
3486
3487 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
3488 0x8000, 0xff00, 0x5555, 0x9e88};
3489
3490 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
3491 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
3492
3493 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
3494 0xffffffffffffffff, 0x38e38e38e38e38e4};
3495
3496 IntBinArithFn fn = &MacroAssembler::Mul;
3497 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
3498 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
3499 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
3500 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
3501
3502 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
3503
3504 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
3505 0x2aaa, 0xff00, 0x0000, 0x0c22};
3506
3507 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
3508 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
3509
3510 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
3511 0xffffffffffffffff, 0x71c71c71c71c71c6};
3512
3513 fn = &MacroAssembler::Umulh;
3514 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
3515 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
3516 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
3517 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
3518 // clang-format on
3519}
3520
3521TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
3522 // clang-format off
3523 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
3524
3525 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
3526
3527 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
3528
3529 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
3530
3531 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
3532
3533 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
3534
3535 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
3536
3537 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
3538
3539 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3540 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3541 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3542 int pg_d[] = {1, 1, 0, 1};
3543
3544 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
3545
3546 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
3547
3548 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
3549
3550 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
3551
3552 IntBinArithFn fn = &MacroAssembler::Smulh;
3553 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
3554 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
3555 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3556 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3557 // clang-format on
3558}
3559
3560TEST_SVE(sve_binary_arithmetic_predicated_logical) {
3561 // clang-format off
3562 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3563 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3564
3565 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
3566 0x8000, 0xffff, 0x5555, 0xaaaa};
3567 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
3568 0x5555, 0xaaaa, 0x0000, 0x0800};
3569
3570 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
3571 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
3572
3573 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
3574 0x0001200880ff55aa, 0x0022446688aaccee};
3575 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
3576 0x7fcd80ff55aa0008, 0x1133557799bbddff};
3577
3578 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3579 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3580 int pg_s[] = {1, 1, 1, 0};
3581 int pg_d[] = {1, 1, 0, 1};
3582
3583 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
3584
3585 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
3586 0x0000, 0xffff, 0x0000, 0x0800};
3587
3588 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
3589
3590 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
3591 0x0001200880ff55aa, 0x0022446688aaccee};
3592
3593 IntBinArithFn fn = &MacroAssembler::And;
3594 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
3595 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
3596 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
3597 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
3598
3599 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
3600
3601 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
3602 0x8000, 0xffff, 0x5555, 0xa2aa};
3603
3604 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
3605
3606 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
3607 0x0001200880ff55aa, 0x0000000000000000};
3608
3609 fn = &MacroAssembler::Bic;
3610 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
3611 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
3612 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
3613 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
3614
3615 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
3616
3617 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
3618 0xd555, 0xffff, 0x5555, 0xa2aa};
3619
3620 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
3621
3622 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
3623 0x0001200880ff55aa, 0x1111111111111111};
3624
3625 fn = &MacroAssembler::Eor;
3626 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
3627 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
3628 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
3629 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
3630
3631 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
3632
3633 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
3634 0xd555, 0xffff, 0x5555, 0xaaaa};
3635
3636 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
3637
3638 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
3639 0x0001200880ff55aa, 0x1133557799bbddff};
3640
3641 fn = &MacroAssembler::Orr;
3642 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
3643 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
3644 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
3645 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
3646 // clang-format on
3647}
3648
3649TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
3650 // clang-format off
3651 int zn_s[] = {0, 1, -1, 2468,
3652 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
3653 -11111111, 87654321, 0, 0};
3654
3655 int zm_s[] = {1, -1, 1, 1234,
3656 -1, INT32_MIN, 1, -1,
3657 22222222, 80000000, -1, 0};
3658
3659 int64_t zn_d[] = {0, 1, -1, 2468,
3660 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
3661 -11111111, 87654321, 0, 0};
3662
3663 int64_t zm_d[] = {1, -1, 1, 1234,
3664 -1, INT64_MIN, 1, -1,
3665 22222222, 80000000, -1, 0};
3666
3667 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
3668 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
3669
3670 int exp_s[] = {0, 1, -1, 2,
3671 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
3672 0, 1, 0, 0};
3673
3674 int64_t exp_d[] = {0, -1, -1, 2,
3675 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
3676 0, 1, 0, 0};
3677
3678 IntBinArithFn fn = &MacroAssembler::Sdiv;
3679 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3680 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3681 // clang-format on
3682}
3683
3684TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
3685 // clang-format off
3686 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
3687 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
3688
3689 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
3690 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
3691
3692 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
3693 0xffffffffffffffff, 0x8000000000000000,
3694 0xffffffffffffffff, 0x8000000000000000,
3695 0xffffffffffffffff, 0xf0000000f0000000};
3696
3697 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
3698 0x8000000000000000, 0x0000000000000002,
3699 0x8888888888888888, 0x0000000000000001,
3700 0x0000000080000000, 0x00000000f0000000};
3701
3702 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
3703 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
3704
3705 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
3706 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
3707
3708 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
3709 0x0000000000000001, 0x4000000000000000,
3710 0x0000000000000001, 0x8000000000000000,
3711 0xffffffffffffffff, 0x0000000100000001};
3712
3713 IntBinArithFn fn = &MacroAssembler::Udiv;
3714 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3715 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3716 // clang-format on
3717}
3718
TatWai Chong845246b2019-08-08 00:01:58 -07003719typedef void (MacroAssembler::*IntArithFn)(const ZRegister& zd,
3720 const ZRegister& zn,
3721 const ZRegister& zm);
3722
3723template <typename T>
3724static void IntArithHelper(Test* config,
3725 IntArithFn macro,
3726 unsigned lane_size_in_bits,
3727 const T& zn_inputs,
3728 const T& zm_inputs,
3729 const T& zd_expected) {
3730 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3731 START();
3732
3733 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
3734 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
3735 InsrHelper(&masm, zn, zn_inputs);
3736 InsrHelper(&masm, zm, zm_inputs);
3737
3738 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
3739 (masm.*macro)(zd, zn, zm);
3740
3741 END();
3742
3743 if (CAN_RUN()) {
3744 RUN();
3745 ASSERT_EQUAL_SVE(zd_expected, zd);
3746 }
3747}
3748
3749TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
3750 // clang-format off
TatWai Chong6995bfd2019-09-26 10:48:05 +01003751 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
3752 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
3753 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
3754 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong845246b2019-08-08 00:01:58 -07003755 0x1000000010001010, 0xf0000000f000f0f0};
3756
3757 IntArithFn fn = &MacroAssembler::Add;
3758
3759 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
3760 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
3761 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
3762 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
3763 0x2000000020002020, 0xe0000001e001e1e0};
3764
TatWai Chong6995bfd2019-09-26 10:48:05 +01003765 IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
3766 IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
3767 IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
3768 IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003769
3770 fn = &MacroAssembler::Sqadd;
3771
3772 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
3773 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
3774 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
3775 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3776 0x2000000020002020, 0xe0000001e001e1e0};
3777
TatWai Chong6995bfd2019-09-26 10:48:05 +01003778 IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
3779 IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
3780 IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
3781 IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003782
3783 fn = &MacroAssembler::Uqadd;
3784
3785 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
3786 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
3787 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
3788 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
3789 0x2000000020002020, 0xffffffffffffffff};
3790
TatWai Chong6995bfd2019-09-26 10:48:05 +01003791 IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
3792 IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
3793 IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
3794 IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003795 // clang-format on
3796}
3797
3798TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
3799 // clang-format off
3800
3801 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
3802 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
3803
3804 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
3805 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
3806
3807 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
3808 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
3809
3810 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
3811 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
3812 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
3813 0xf0000000f000f0f0, 0x5555555555555555};
3814
3815 IntArithFn fn = &MacroAssembler::Sub;
3816
3817 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
3818 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
3819 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
3820 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
3821 0x8eeeeeed8eed8d8e, 0x5555555555555555};
3822
3823 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
3824 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
3825 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
3826 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
3827
3828 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
3829 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
3830 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
3831 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
3832 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
3833
3834 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
3835 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
3836 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
3837 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
3838
3839 fn = &MacroAssembler::Sqsub;
3840
3841 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
3842 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
3843 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
3844 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3845 0x7fffffffffffffff, 0x8000000000000000};
3846
3847 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
3848 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
3849 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
3850 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
3851
3852 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
3853 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
3854 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
3855 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
3856 0x8000000000000000, 0x7fffffffffffffff};
3857
3858 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
3859 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
3860 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
3861 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
3862
3863 fn = &MacroAssembler::Uqsub;
3864
3865 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
3866 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
3867 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
3868 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
3869 0x0000000000000000, 0x5555555555555555};
3870
3871 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
3872 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
3873 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
3874 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
3875
3876 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
3877 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
3878 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
3879 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
3880 0x7111111271127272, 0x0000000000000000};
3881
3882 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
3883 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
3884 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
3885 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
3886 // clang-format on
3887}
3888
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01003889TEST_SVE(sve_rdvl) {
3890 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3891 START();
3892
3893 // Encodable multipliers.
3894 __ Rdvl(x0, 0);
3895 __ Rdvl(x1, 1);
3896 __ Rdvl(x2, 2);
3897 __ Rdvl(x3, 31);
3898 __ Rdvl(x4, -1);
3899 __ Rdvl(x5, -2);
3900 __ Rdvl(x6, -32);
3901
3902 // For unencodable multipliers, the MacroAssembler uses a sequence of
3903 // instructions.
3904 __ Rdvl(x10, 32);
3905 __ Rdvl(x11, -33);
3906 __ Rdvl(x12, 42);
3907 __ Rdvl(x13, -42);
3908
3909 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
3910 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
3911 // occurs in the macro.
3912 __ Rdvl(x14, 0x007fffffffffffff);
3913 __ Rdvl(x15, -0x0080000000000000);
3914
3915 END();
3916
3917 if (CAN_RUN()) {
3918 RUN();
3919
3920 uint64_t vl = config->sve_vl_in_bytes();
3921
3922 ASSERT_EQUAL_64(vl * 0, x0);
3923 ASSERT_EQUAL_64(vl * 1, x1);
3924 ASSERT_EQUAL_64(vl * 2, x2);
3925 ASSERT_EQUAL_64(vl * 31, x3);
3926 ASSERT_EQUAL_64(vl * -1, x4);
3927 ASSERT_EQUAL_64(vl * -2, x5);
3928 ASSERT_EQUAL_64(vl * -32, x6);
3929
3930 ASSERT_EQUAL_64(vl * 32, x10);
3931 ASSERT_EQUAL_64(vl * -33, x11);
3932 ASSERT_EQUAL_64(vl * 42, x12);
3933 ASSERT_EQUAL_64(vl * -42, x13);
3934
3935 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
3936 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
3937 }
3938}
3939
3940TEST_SVE(sve_rdpl) {
3941 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3942 START();
3943
3944 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
3945 // Addpl(xd, xzr, ...).
3946
3947 // Encodable multipliers (as `addvl`).
3948 __ Rdpl(x0, 0);
3949 __ Rdpl(x1, 8);
3950 __ Rdpl(x2, 248);
3951 __ Rdpl(x3, -8);
3952 __ Rdpl(x4, -256);
3953
3954 // Encodable multipliers (as `movz` + `addpl`).
3955 __ Rdpl(x7, 31);
3956 __ Rdpl(x8, -32);
3957
3958 // For unencodable multipliers, the MacroAssembler uses a sequence of
3959 // instructions.
3960 __ Rdpl(x10, 42);
3961 __ Rdpl(x11, -42);
3962
3963 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
3964 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
3965 // occurs in the macro.
3966 __ Rdpl(x12, 0x007fffffffffffff);
3967 __ Rdpl(x13, -0x0080000000000000);
3968
3969 END();
3970
3971 if (CAN_RUN()) {
3972 RUN();
3973
3974 uint64_t vl = config->sve_vl_in_bytes();
3975 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
3976 uint64_t pl = vl / kZRegBitsPerPRegBit;
3977
3978 ASSERT_EQUAL_64(pl * 0, x0);
3979 ASSERT_EQUAL_64(pl * 8, x1);
3980 ASSERT_EQUAL_64(pl * 248, x2);
3981 ASSERT_EQUAL_64(pl * -8, x3);
3982 ASSERT_EQUAL_64(pl * -256, x4);
3983
3984 ASSERT_EQUAL_64(pl * 31, x7);
3985 ASSERT_EQUAL_64(pl * -32, x8);
3986
3987 ASSERT_EQUAL_64(pl * 42, x10);
3988 ASSERT_EQUAL_64(pl * -42, x11);
3989
3990 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
3991 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
3992 }
3993}
3994
3995TEST_SVE(sve_addvl) {
3996 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3997 START();
3998
3999 uint64_t base = 0x1234567800000000;
4000 __ Mov(x30, base);
4001
4002 // Encodable multipliers.
4003 __ Addvl(x0, x30, 0);
4004 __ Addvl(x1, x30, 1);
4005 __ Addvl(x2, x30, 31);
4006 __ Addvl(x3, x30, -1);
4007 __ Addvl(x4, x30, -32);
4008
4009 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
4010 __ Addvl(x5, x30, 32);
4011 __ Addvl(x6, x30, -33);
4012
4013 // Test the limits of the multiplier supported by the `Rdvl` macro.
4014 __ Addvl(x7, x30, 0x007fffffffffffff);
4015 __ Addvl(x8, x30, -0x0080000000000000);
4016
4017 // Check that xzr behaves correctly.
4018 __ Addvl(x9, xzr, 8);
4019 __ Addvl(x10, xzr, 42);
4020
4021 // Check that sp behaves correctly with encodable and unencodable multipliers.
4022 __ Addvl(sp, sp, -5);
4023 __ Addvl(sp, sp, -37);
4024 __ Addvl(x11, sp, -2);
4025 __ Addvl(sp, x11, 2);
4026 __ Addvl(x12, sp, -42);
4027
4028 // Restore the value of sp.
4029 __ Addvl(sp, x11, 39);
4030 __ Addvl(sp, sp, 5);
4031
4032 // Adjust x11 and x12 to make the test sp-agnostic.
4033 __ Sub(x11, sp, x11);
4034 __ Sub(x12, sp, x12);
4035
4036 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4037 __ Mov(x20, x30);
4038 __ Mov(x21, x30);
4039 __ Mov(x22, x30);
4040 __ Addvl(x20, x20, 4);
4041 __ Addvl(x21, x21, 42);
4042 __ Addvl(x22, x22, -0x0080000000000000);
4043
4044 END();
4045
4046 if (CAN_RUN()) {
4047 RUN();
4048
4049 uint64_t vl = config->sve_vl_in_bytes();
4050
4051 ASSERT_EQUAL_64(base + (vl * 0), x0);
4052 ASSERT_EQUAL_64(base + (vl * 1), x1);
4053 ASSERT_EQUAL_64(base + (vl * 31), x2);
4054 ASSERT_EQUAL_64(base + (vl * -1), x3);
4055 ASSERT_EQUAL_64(base + (vl * -32), x4);
4056
4057 ASSERT_EQUAL_64(base + (vl * 32), x5);
4058 ASSERT_EQUAL_64(base + (vl * -33), x6);
4059
4060 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
4061 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
4062
4063 ASSERT_EQUAL_64(vl * 8, x9);
4064 ASSERT_EQUAL_64(vl * 42, x10);
4065
4066 ASSERT_EQUAL_64(vl * 44, x11);
4067 ASSERT_EQUAL_64(vl * 84, x12);
4068
4069 ASSERT_EQUAL_64(base + (vl * 4), x20);
4070 ASSERT_EQUAL_64(base + (vl * 42), x21);
4071 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
4072
4073 ASSERT_EQUAL_64(base, x30);
4074 }
4075}
4076
4077TEST_SVE(sve_addpl) {
4078 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4079 START();
4080
4081 uint64_t base = 0x1234567800000000;
4082 __ Mov(x30, base);
4083
4084 // Encodable multipliers.
4085 __ Addpl(x0, x30, 0);
4086 __ Addpl(x1, x30, 1);
4087 __ Addpl(x2, x30, 31);
4088 __ Addpl(x3, x30, -1);
4089 __ Addpl(x4, x30, -32);
4090
4091 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
4092 // it falls back to `Rdvl` and `Add`.
4093 __ Addpl(x5, x30, 32);
4094 __ Addpl(x6, x30, -33);
4095
4096 // Test the limits of the multiplier supported by the `Rdvl` macro.
4097 __ Addpl(x7, x30, 0x007fffffffffffff);
4098 __ Addpl(x8, x30, -0x0080000000000000);
4099
4100 // Check that xzr behaves correctly.
4101 __ Addpl(x9, xzr, 8);
4102 __ Addpl(x10, xzr, 42);
4103
4104 // Check that sp behaves correctly with encodable and unencodable multipliers.
4105 __ Addpl(sp, sp, -5);
4106 __ Addpl(sp, sp, -37);
4107 __ Addpl(x11, sp, -2);
4108 __ Addpl(sp, x11, 2);
4109 __ Addpl(x12, sp, -42);
4110
4111 // Restore the value of sp.
4112 __ Addpl(sp, x11, 39);
4113 __ Addpl(sp, sp, 5);
4114
4115 // Adjust x11 and x12 to make the test sp-agnostic.
4116 __ Sub(x11, sp, x11);
4117 __ Sub(x12, sp, x12);
4118
4119 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4120 __ Mov(x20, x30);
4121 __ Mov(x21, x30);
4122 __ Mov(x22, x30);
4123 __ Addpl(x20, x20, 4);
4124 __ Addpl(x21, x21, 42);
4125 __ Addpl(x22, x22, -0x0080000000000000);
4126
4127 END();
4128
4129 if (CAN_RUN()) {
4130 RUN();
4131
4132 uint64_t vl = config->sve_vl_in_bytes();
4133 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4134 uint64_t pl = vl / kZRegBitsPerPRegBit;
4135
4136 ASSERT_EQUAL_64(base + (pl * 0), x0);
4137 ASSERT_EQUAL_64(base + (pl * 1), x1);
4138 ASSERT_EQUAL_64(base + (pl * 31), x2);
4139 ASSERT_EQUAL_64(base + (pl * -1), x3);
4140 ASSERT_EQUAL_64(base + (pl * -32), x4);
4141
4142 ASSERT_EQUAL_64(base + (pl * 32), x5);
4143 ASSERT_EQUAL_64(base + (pl * -33), x6);
4144
4145 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
4146 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
4147
4148 ASSERT_EQUAL_64(pl * 8, x9);
4149 ASSERT_EQUAL_64(pl * 42, x10);
4150
4151 ASSERT_EQUAL_64(pl * 44, x11);
4152 ASSERT_EQUAL_64(pl * 84, x12);
4153
4154 ASSERT_EQUAL_64(base + (pl * 4), x20);
4155 ASSERT_EQUAL_64(base + (pl * 42), x21);
4156 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
4157
4158 ASSERT_EQUAL_64(base, x30);
4159 }
4160}
4161
Jacob Bramley1314c462019-08-08 10:54:16 +01004162TEST_SVE(sve_adr_x) {
4163 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4164 START();
4165
4166 uint64_t base = 0x1234567800000000;
4167 __ Mov(x28, base);
4168 __ Mov(x29, 48);
4169 __ Mov(x30, -48);
4170
4171 // Simple scalar (or equivalent) cases.
4172
4173 __ Adr(x0, SVEMemOperand(x28));
4174 __ Adr(x1, SVEMemOperand(x28, 0));
4175 __ Adr(x2, SVEMemOperand(x28, 0, SVE_MUL_VL).ForZRegAccess());
4176 __ Adr(x3, SVEMemOperand(x28, 0, SVE_MUL_VL).ForPRegAccess());
4177 __ Adr(x4, SVEMemOperand(x28, xzr));
4178 __ Adr(x5, SVEMemOperand(x28, xzr, LSL, 42));
4179
4180 // scalar-plus-immediate
4181
4182 // Unscaled immediates, handled with `Add`.
4183 __ Adr(x6, SVEMemOperand(x28, 42));
4184 __ Adr(x7, SVEMemOperand(x28, -42));
4185 // Scaled immediates, handled with `Addvl` or `Addpl`.
4186 __ Adr(x8, SVEMemOperand(x28, 31, SVE_MUL_VL).ForZRegAccess());
4187 __ Adr(x9, SVEMemOperand(x28, -32, SVE_MUL_VL).ForZRegAccess());
4188 __ Adr(x10, SVEMemOperand(x28, 31, SVE_MUL_VL).ForPRegAccess());
4189 __ Adr(x11, SVEMemOperand(x28, -32, SVE_MUL_VL).ForPRegAccess());
4190 // Out of `addvl` or `addpl` range.
4191 __ Adr(x12, SVEMemOperand(x28, 42, SVE_MUL_VL).ForZRegAccess());
4192 __ Adr(x13, SVEMemOperand(x28, -42, SVE_MUL_VL).ForZRegAccess());
4193 __ Adr(x14, SVEMemOperand(x28, 42, SVE_MUL_VL).ForPRegAccess());
4194 __ Adr(x15, SVEMemOperand(x28, -42, SVE_MUL_VL).ForPRegAccess());
4195
4196 // scalar-plus-scalar
4197
4198 __ Adr(x18, SVEMemOperand(x28, x29));
4199 __ Adr(x19, SVEMemOperand(x28, x30));
4200 __ Adr(x20, SVEMemOperand(x28, x29, LSL, 8));
4201 __ Adr(x21, SVEMemOperand(x28, x30, LSL, 8));
4202
4203 // In-place updates, to stress scratch register allocation.
4204
4205 __ Mov(x22, 0xabcd000000000000);
4206 __ Mov(x23, 0xabcd101100000000);
4207 __ Mov(x24, 0xabcd202200000000);
4208 __ Mov(x25, 0xabcd303300000000);
4209 __ Mov(x26, 0xabcd404400000000);
4210 __ Mov(x27, 0xabcd505500000000);
4211
4212 __ Adr(x22, SVEMemOperand(x22));
4213 __ Adr(x23, SVEMemOperand(x23, 0x42));
4214 __ Adr(x24, SVEMemOperand(x24, 3, SVE_MUL_VL).ForZRegAccess());
4215 __ Adr(x25, SVEMemOperand(x25, 0x42, SVE_MUL_VL).ForPRegAccess());
4216 __ Adr(x26, SVEMemOperand(x26, x29));
4217 __ Adr(x27, SVEMemOperand(x27, x30, LSL, 4));
4218
4219 END();
4220
4221 if (CAN_RUN()) {
4222 RUN();
4223
4224 uint64_t vl = config->sve_vl_in_bytes();
4225 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4226 uint64_t pl = vl / kZRegBitsPerPRegBit;
4227
4228 // Simple scalar (or equivalent) cases.
4229 ASSERT_EQUAL_64(base, x0);
4230 ASSERT_EQUAL_64(base, x1);
4231 ASSERT_EQUAL_64(base, x2);
4232 ASSERT_EQUAL_64(base, x3);
4233 ASSERT_EQUAL_64(base, x4);
4234 ASSERT_EQUAL_64(base, x5);
4235
4236 // scalar-plus-immediate
4237 ASSERT_EQUAL_64(base + 42, x6);
4238 ASSERT_EQUAL_64(base - 42, x7);
4239 ASSERT_EQUAL_64(base + (31 * vl), x8);
4240 ASSERT_EQUAL_64(base - (32 * vl), x9);
4241 ASSERT_EQUAL_64(base + (31 * pl), x10);
4242 ASSERT_EQUAL_64(base - (32 * pl), x11);
4243 ASSERT_EQUAL_64(base + (42 * vl), x12);
4244 ASSERT_EQUAL_64(base - (42 * vl), x13);
4245 ASSERT_EQUAL_64(base + (42 * pl), x14);
4246 ASSERT_EQUAL_64(base - (42 * pl), x15);
4247
4248 // scalar-plus-scalar
4249 ASSERT_EQUAL_64(base + 48, x18);
4250 ASSERT_EQUAL_64(base - 48, x19);
4251 ASSERT_EQUAL_64(base + (48 << 8), x20);
4252 ASSERT_EQUAL_64(base - (48 << 8), x21);
4253
4254 // In-place updates.
4255 ASSERT_EQUAL_64(0xabcd000000000000, x22);
4256 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x23);
4257 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x24);
4258 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x25);
4259 ASSERT_EQUAL_64(0xabcd404400000000 + 48, x26);
4260 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x27);
4261
4262 // Check that the inputs were unmodified.
4263 ASSERT_EQUAL_64(base, x28);
4264 ASSERT_EQUAL_64(48, x29);
4265 ASSERT_EQUAL_64(-48, x30);
4266 }
4267}
4268
TatWai Chong4f28df72019-08-14 17:50:30 -07004269TEST_SVE(sve_permute_vector_unpredicated) {
4270 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
4271 START();
4272
4273 __ Mov(x0, 0x0123456789abcdef);
4274 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
4275 __ Insr(z1.VnS(), w0);
4276 __ Insr(z2.VnD(), x0);
4277 __ Insr(z3.VnH(), h0);
4278 __ Insr(z4.VnD(), d0);
4279
4280 uint64_t inputs[] = {0xfedcba9876543210,
4281 0x0123456789abcdef,
4282 0x8f8e8d8c8b8a8988,
4283 0x8786858483828180};
4284
4285 // Initialize a distinguishable value throughout the register first.
4286 __ Dup(z9.VnB(), 0xff);
4287 InsrHelper(&masm, z9.VnD(), inputs);
4288
4289 __ Rev(z5.VnB(), z9.VnB());
4290 __ Rev(z6.VnH(), z9.VnH());
4291 __ Rev(z7.VnS(), z9.VnS());
4292 __ Rev(z8.VnD(), z9.VnD());
4293
4294 int index[7] = {22, 7, 7, 3, 1, 1, 63};
4295 // Broadcasting an data within the input array.
4296 __ Dup(z10.VnB(), z9.VnB(), index[0]);
4297 __ Dup(z11.VnH(), z9.VnH(), index[1]);
4298 __ Dup(z12.VnS(), z9.VnS(), index[2]);
4299 __ Dup(z13.VnD(), z9.VnD(), index[3]);
4300 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
4301 // Test dst == src
4302 __ Mov(z15, z9);
4303 __ Dup(z15.VnS(), z15.VnS(), index[5]);
4304 // Selecting an data beyond the input array.
4305 __ Dup(z16.VnB(), z9.VnB(), index[6]);
4306
4307 END();
4308
4309 if (CAN_RUN()) {
4310 RUN();
4311
4312 // Insr
4313 uint64_t z1_expected[] = {0x7f80f0017ff0f001, 0x7f80f00089abcdef};
4314 uint64_t z2_expected[] = {0x7ff0f0027f80f000, 0x0123456789abcdef};
4315 uint64_t z3_expected[] = {0xf0037f80f0017ff0, 0xf0037f80f0003456};
4316 uint64_t z4_expected[] = {0x7ff0f0047f80f000, 0x7ffaaaaa22223456};
4317 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
4318 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
4319 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
4320 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
4321
4322 // Rev
4323 int lane_count = core.GetSVELaneCount(kBRegSize);
4324 for (int i = 0; i < lane_count; i++) {
4325 uint64_t expected =
4326 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
4327 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
4328 ASSERT_EQUAL_64(expected, input);
4329 }
4330
4331 lane_count = core.GetSVELaneCount(kHRegSize);
4332 for (int i = 0; i < lane_count; i++) {
4333 uint64_t expected =
4334 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
4335 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
4336 ASSERT_EQUAL_64(expected, input);
4337 }
4338
4339 lane_count = core.GetSVELaneCount(kSRegSize);
4340 for (int i = 0; i < lane_count; i++) {
4341 uint64_t expected =
4342 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
4343 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
4344 ASSERT_EQUAL_64(expected, input);
4345 }
4346
4347 lane_count = core.GetSVELaneCount(kDRegSize);
4348 for (int i = 0; i < lane_count; i++) {
4349 uint64_t expected =
4350 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
4351 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
4352 ASSERT_EQUAL_64(expected, input);
4353 }
4354
4355 // Dup
4356 unsigned vl = config->sve_vl_in_bits();
4357 lane_count = core.GetSVELaneCount(kBRegSize);
4358 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
4359 for (int i = 0; i < lane_count; i++) {
4360 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
4361 }
4362
4363 lane_count = core.GetSVELaneCount(kHRegSize);
4364 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
4365 for (int i = 0; i < lane_count; i++) {
4366 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
4367 }
4368
4369 lane_count = core.GetSVELaneCount(kSRegSize);
4370 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
4371 for (int i = 0; i < lane_count; i++) {
4372 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
4373 }
4374
4375 lane_count = core.GetSVELaneCount(kDRegSize);
4376 uint64_t expected_z13 =
4377 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
4378 for (int i = 0; i < lane_count; i++) {
4379 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
4380 }
4381
4382 lane_count = core.GetSVELaneCount(kDRegSize);
4383 uint64_t expected_z14_lo = 0;
4384 uint64_t expected_z14_hi = 0;
4385 if (vl > (index[4] * kQRegSize)) {
4386 expected_z14_lo = 0x0123456789abcdef;
4387 expected_z14_hi = 0xfedcba9876543210;
4388 }
4389 for (int i = 0; i < lane_count; i += 2) {
4390 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
4391 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
4392 }
4393
4394 lane_count = core.GetSVELaneCount(kSRegSize);
4395 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
4396 for (int i = 0; i < lane_count; i++) {
4397 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
4398 }
4399
4400 lane_count = core.GetSVELaneCount(kBRegSize);
4401 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
4402 for (int i = 0; i < lane_count; i++) {
4403 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
4404 }
4405 }
4406}
4407
4408TEST_SVE(sve_permute_vector_unpredicated_uppack_vector_elements) {
4409 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4410 START();
4411
4412 uint64_t z9_inputs[] = {0xfedcba9876543210,
4413 0x0123456789abcdef,
4414 0x8f8e8d8c8b8a8988,
4415 0x8786858483828180};
4416 InsrHelper(&masm, z9.VnD(), z9_inputs);
4417
4418 __ Sunpkhi(z10.VnH(), z9.VnB());
4419 __ Sunpkhi(z11.VnS(), z9.VnH());
4420 __ Sunpkhi(z12.VnD(), z9.VnS());
4421
4422 __ Sunpklo(z13.VnH(), z9.VnB());
4423 __ Sunpklo(z14.VnS(), z9.VnH());
4424 __ Sunpklo(z15.VnD(), z9.VnS());
4425
4426 __ Uunpkhi(z16.VnH(), z9.VnB());
4427 __ Uunpkhi(z17.VnS(), z9.VnH());
4428 __ Uunpkhi(z18.VnD(), z9.VnS());
4429
4430 __ Uunpklo(z19.VnH(), z9.VnB());
4431 __ Uunpklo(z20.VnS(), z9.VnH());
4432 __ Uunpklo(z21.VnD(), z9.VnS());
4433
4434 END();
4435
4436 if (CAN_RUN()) {
4437 RUN();
4438
4439 // Suunpkhi
4440 int lane_count = core.GetSVELaneCount(kHRegSize);
4441 for (int i = lane_count - 1; i >= 0; i--) {
4442 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
4443 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4444 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4445 ASSERT_EQUAL_64(expected, input);
4446 }
4447
4448 lane_count = core.GetSVELaneCount(kSRegSize);
4449 for (int i = lane_count - 1; i >= 0; i--) {
4450 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
4451 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4452 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4453 ASSERT_EQUAL_64(expected, input);
4454 }
4455
4456 lane_count = core.GetSVELaneCount(kDRegSize);
4457 for (int i = lane_count - 1; i >= 0; i--) {
4458 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
4459 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4460 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4461 ASSERT_EQUAL_64(expected, input);
4462 }
4463
4464 // Suunpklo
4465 lane_count = core.GetSVELaneCount(kHRegSize);
4466 for (int i = lane_count - 1; i >= 0; i--) {
4467 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
4468 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4469 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4470 ASSERT_EQUAL_64(expected, input);
4471 }
4472
4473 lane_count = core.GetSVELaneCount(kSRegSize);
4474 for (int i = lane_count - 1; i >= 0; i--) {
4475 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
4476 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4477 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4478 ASSERT_EQUAL_64(expected, input);
4479 }
4480
4481 lane_count = core.GetSVELaneCount(kDRegSize);
4482 for (int i = lane_count - 1; i >= 0; i--) {
4483 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
4484 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4485 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4486 ASSERT_EQUAL_64(expected, input);
4487 }
4488
4489 // Uuunpkhi
4490 lane_count = core.GetSVELaneCount(kHRegSize);
4491 for (int i = lane_count - 1; i >= 0; i--) {
4492 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
4493 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4494 ASSERT_EQUAL_64(expected, input);
4495 }
4496
4497 lane_count = core.GetSVELaneCount(kSRegSize);
4498 for (int i = lane_count - 1; i >= 0; i--) {
4499 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
4500 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4501 ASSERT_EQUAL_64(expected, input);
4502 }
4503
4504 lane_count = core.GetSVELaneCount(kDRegSize);
4505 for (int i = lane_count - 1; i >= 0; i--) {
4506 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
4507 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4508 ASSERT_EQUAL_64(expected, input);
4509 }
4510
4511 // Uuunpklo
4512 lane_count = core.GetSVELaneCount(kHRegSize);
4513 for (int i = lane_count - 1; i >= 0; i--) {
4514 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
4515 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4516 ASSERT_EQUAL_64(expected, input);
4517 }
4518
4519 lane_count = core.GetSVELaneCount(kSRegSize);
4520 for (int i = lane_count - 1; i >= 0; i--) {
4521 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
4522 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4523 ASSERT_EQUAL_64(expected, input);
4524 }
4525
4526 lane_count = core.GetSVELaneCount(kDRegSize);
4527 for (int i = lane_count - 1; i >= 0; i--) {
4528 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
4529 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4530 ASSERT_EQUAL_64(expected, input);
4531 }
4532 }
4533}
4534
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004535TEST_SVE(sve_cnot_not) {
4536 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4537 START();
4538
4539 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
4540
4541 // For simplicity, we re-use the same pg for various lane sizes.
4542 // For D lanes: 1, 1, 0
4543 // For S lanes: 1, 1, 1, 0, 0
4544 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4545 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4546 Initialise(&masm, p0.VnB(), pg_in);
4547 PRegisterM pg = p0.Merging();
4548
4549 // These are merging operations, so we have to initialise the result register.
4550 // We use a mixture of constructive and destructive operations.
4551
4552 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004553 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004554 __ Mov(z30, z31);
4555
4556 // For constructive operations, use a different initial result value.
4557 __ Index(z29.VnB(), 0, -1);
4558
4559 __ Mov(z0, z31);
4560 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
4561 __ Mov(z1, z29);
4562 __ Cnot(z1.VnH(), pg, z31.VnH());
4563 __ Mov(z2, z31);
4564 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
4565 __ Mov(z3, z29);
4566 __ Cnot(z3.VnD(), pg, z31.VnD());
4567
4568 __ Mov(z4, z29);
4569 __ Not(z4.VnB(), pg, z31.VnB());
4570 __ Mov(z5, z31);
4571 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
4572 __ Mov(z6, z29);
4573 __ Not(z6.VnS(), pg, z31.VnS());
4574 __ Mov(z7, z31);
4575 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
4576
4577 END();
4578
4579 if (CAN_RUN()) {
4580 RUN();
4581
4582 // Check that constructive operations preserve their inputs.
4583 ASSERT_EQUAL_SVE(z30, z31);
4584
4585 // clang-format off
4586
4587 // Cnot (B) destructive
4588 uint64_t expected_z0[] =
4589 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4590 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
4591 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4592
4593 // Cnot (H)
4594 uint64_t expected_z1[] =
4595 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4596 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
4597 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4598
4599 // Cnot (S) destructive
4600 uint64_t expected_z2[] =
4601 // pg: 0 1 1 1 0 0
4602 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
4603 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4604
4605 // Cnot (D)
4606 uint64_t expected_z3[] =
4607 // pg: 1 1 0
4608 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
4609 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4610
4611 // Not (B)
4612 uint64_t expected_z4[] =
4613 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4614 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
4615 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4616
4617 // Not (H) destructive
4618 uint64_t expected_z5[] =
4619 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4620 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
4621 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4622
4623 // Not (S)
4624 uint64_t expected_z6[] =
4625 // pg: 0 1 1 1 0 0
4626 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
4627 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
4628
4629 // Not (D) destructive
4630 uint64_t expected_z7[] =
4631 // pg: 1 1 0
4632 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
4633 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
4634
4635 // clang-format on
4636 }
4637}
4638
4639TEST_SVE(sve_fabs_fneg) {
4640 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4641 START();
4642
4643 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
4644 // NaNs, but fabs and fneg do not.
4645 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
4646 0xfff00000ff80fc01, // Signalling NaNs.
4647 0x123456789abcdef0};
4648
4649 // For simplicity, we re-use the same pg for various lane sizes.
4650 // For D lanes: 1, 1, 0
4651 // For S lanes: 1, 1, 1, 0, 0
4652 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4653 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4654 Initialise(&masm, p0.VnB(), pg_in);
4655 PRegisterM pg = p0.Merging();
4656
4657 // These are merging operations, so we have to initialise the result register.
4658 // We use a mixture of constructive and destructive operations.
4659
4660 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004661 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004662 __ Mov(z30, z31);
4663
4664 // For constructive operations, use a different initial result value.
4665 __ Index(z29.VnB(), 0, -1);
4666
4667 __ Mov(z0, z29);
4668 __ Fabs(z0.VnH(), pg, z31.VnH());
4669 __ Mov(z1, z31);
4670 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
4671 __ Mov(z2, z29);
4672 __ Fabs(z2.VnD(), pg, z31.VnD());
4673
4674 __ Mov(z3, z31);
4675 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
4676 __ Mov(z4, z29);
4677 __ Fneg(z4.VnS(), pg, z31.VnS());
4678 __ Mov(z5, z31);
4679 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
4680
4681 END();
4682
4683 if (CAN_RUN()) {
4684 RUN();
4685
4686 // Check that constructive operations preserve their inputs.
4687 ASSERT_EQUAL_SVE(z30, z31);
4688
4689 // clang-format off
4690
4691 // Fabs (H)
4692 uint64_t expected_z0[] =
4693 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4694 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
4695 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4696
4697 // Fabs (S) destructive
4698 uint64_t expected_z1[] =
4699 // pg: 0 1 1 1 0 0
4700 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
4701 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4702
4703 // Fabs (D)
4704 uint64_t expected_z2[] =
4705 // pg: 1 1 0
4706 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
4707 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4708
4709 // Fneg (H) destructive
4710 uint64_t expected_z3[] =
4711 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4712 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
4713 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4714
4715 // Fneg (S)
4716 uint64_t expected_z4[] =
4717 // pg: 0 1 1 1 0 0
4718 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
4719 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4720
4721 // Fneg (D) destructive
4722 uint64_t expected_z5[] =
4723 // pg: 1 1 0
4724 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
4725 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4726
4727 // clang-format on
4728 }
4729}
4730
4731TEST_SVE(sve_cls_clz_cnt) {
4732 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4733 START();
4734
4735 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4736
4737 // For simplicity, we re-use the same pg for various lane sizes.
4738 // For D lanes: 1, 1, 0
4739 // For S lanes: 1, 1, 1, 0, 0
4740 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4741 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4742 Initialise(&masm, p0.VnB(), pg_in);
4743 PRegisterM pg = p0.Merging();
4744
4745 // These are merging operations, so we have to initialise the result register.
4746 // We use a mixture of constructive and destructive operations.
4747
4748 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004749 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004750 __ Mov(z30, z31);
4751
4752 // For constructive operations, use a different initial result value.
4753 __ Index(z29.VnB(), 0, -1);
4754
4755 __ Mov(z0, z29);
4756 __ Cls(z0.VnB(), pg, z31.VnB());
4757 __ Mov(z1, z31);
4758 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
4759 __ Mov(z2, z29);
4760 __ Cnt(z2.VnS(), pg, z31.VnS());
4761 __ Mov(z3, z31);
4762 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
4763
4764 END();
4765
4766 if (CAN_RUN()) {
4767 RUN();
4768 // Check that non-destructive operations preserve their inputs.
4769 ASSERT_EQUAL_SVE(z30, z31);
4770
4771 // clang-format off
4772
4773 // cls (B)
4774 uint8_t expected_z0[] =
4775 // pg: 0 0 0 0 1 0 1 1
4776 // pg: 1 0 0 1 0 1 1 1
4777 // pg: 0 0 1 0 1 1 1 0
4778 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
4779 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
4780 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
4781 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
4782
4783 // clz (H) destructive
4784 uint16_t expected_z1[] =
4785 // pg: 0 0 0 1
4786 // pg: 0 1 1 1
4787 // pg: 0 0 1 0
4788 {0x0000, 0x0000, 0x0000, 16,
4789 0xfefc, 0, 0, 0,
4790 0x1234, 0x5678, 0, 0xdef0};
4791 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
4792
4793 // cnt (S)
4794 uint32_t expected_z2[] =
4795 // pg: 0 1
4796 // pg: 1 1
4797 // pg: 0 0
4798 {0xe9eaebec, 0,
4799 22, 16,
4800 0xf9fafbfc, 0xfdfeff00};
4801 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
4802
4803 // cnt (D) destructive
4804 uint64_t expected_z3[] =
4805 // pg: 1 1 0
4806 { 0, 38, 0x123456789abcdef0};
4807 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4808
4809 // clang-format on
4810 }
4811}
4812
4813TEST_SVE(sve_sxt) {
4814 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4815 START();
4816
4817 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4818
4819 // For simplicity, we re-use the same pg for various lane sizes.
4820 // For D lanes: 1, 1, 0
4821 // For S lanes: 1, 1, 1, 0, 0
4822 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4823 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4824 Initialise(&masm, p0.VnB(), pg_in);
4825 PRegisterM pg = p0.Merging();
4826
4827 // These are merging operations, so we have to initialise the result register.
4828 // We use a mixture of constructive and destructive operations.
4829
4830 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004831 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004832 __ Mov(z30, z31);
4833
4834 // For constructive operations, use a different initial result value.
4835 __ Index(z29.VnB(), 0, -1);
4836
4837 __ Mov(z0, z31);
4838 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
4839 __ Mov(z1, z29);
4840 __ Sxtb(z1.VnS(), pg, z31.VnS());
4841 __ Mov(z2, z31);
4842 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
4843 __ Mov(z3, z29);
4844 __ Sxth(z3.VnS(), pg, z31.VnS());
4845 __ Mov(z4, z31);
4846 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
4847 __ Mov(z5, z29);
4848 __ Sxtw(z5.VnD(), pg, z31.VnD());
4849
4850 END();
4851
4852 if (CAN_RUN()) {
4853 RUN();
4854 // Check that constructive operations preserve their inputs.
4855 ASSERT_EQUAL_SVE(z30, z31);
4856
4857 // clang-format off
4858
4859 // Sxtb (H) destructive
4860 uint64_t expected_z0[] =
4861 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4862 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
4863 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4864
4865 // Sxtb (S)
4866 uint64_t expected_z1[] =
4867 // pg: 0 1 1 1 0 0
4868 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
4869 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4870
4871 // Sxtb (D) destructive
4872 uint64_t expected_z2[] =
4873 // pg: 1 1 0
4874 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
4875 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4876
4877 // Sxth (S)
4878 uint64_t expected_z3[] =
4879 // pg: 0 1 1 1 0 0
4880 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
4881 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4882
4883 // Sxth (D) destructive
4884 uint64_t expected_z4[] =
4885 // pg: 1 1 0
4886 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
4887 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4888
4889 // Sxtw (D)
4890 uint64_t expected_z5[] =
4891 // pg: 1 1 0
4892 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
4893 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4894
4895 // clang-format on
4896 }
4897}
4898
4899TEST_SVE(sve_uxt) {
4900 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4901 START();
4902
4903 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4904
4905 // For simplicity, we re-use the same pg for various lane sizes.
4906 // For D lanes: 1, 1, 0
4907 // For S lanes: 1, 1, 1, 0, 0
4908 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4909 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4910 Initialise(&masm, p0.VnB(), pg_in);
4911 PRegisterM pg = p0.Merging();
4912
4913 // These are merging operations, so we have to initialise the result register.
4914 // We use a mixture of constructive and destructive operations.
4915
4916 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004917 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004918 __ Mov(z30, z31);
4919
4920 // For constructive operations, use a different initial result value.
4921 __ Index(z29.VnB(), 0, -1);
4922
4923 __ Mov(z0, z29);
4924 __ Uxtb(z0.VnH(), pg, z31.VnH());
4925 __ Mov(z1, z31);
4926 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
4927 __ Mov(z2, z29);
4928 __ Uxtb(z2.VnD(), pg, z31.VnD());
4929 __ Mov(z3, z31);
4930 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
4931 __ Mov(z4, z29);
4932 __ Uxth(z4.VnD(), pg, z31.VnD());
4933 __ Mov(z5, z31);
4934 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
4935
4936 END();
4937
4938 if (CAN_RUN()) {
4939 RUN();
4940 // clang-format off
4941
4942 // Uxtb (H)
4943 uint64_t expected_z0[] =
4944 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4945 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
4946 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4947
4948 // Uxtb (S) destructive
4949 uint64_t expected_z1[] =
4950 // pg: 0 1 1 1 0 0
4951 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
4952 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4953
4954 // Uxtb (D)
4955 uint64_t expected_z2[] =
4956 // pg: 1 1 0
4957 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
4958 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4959
4960 // Uxth (S) destructive
4961 uint64_t expected_z3[] =
4962 // pg: 0 1 1 1 0 0
4963 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
4964 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4965
4966 // Uxth (D)
4967 uint64_t expected_z4[] =
4968 // pg: 1 1 0
4969 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
4970 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4971
4972 // Uxtw (D) destructive
4973 uint64_t expected_z5[] =
4974 // pg: 1 1 0
4975 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
4976 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4977
4978 // clang-format on
4979 }
4980}
4981
4982TEST_SVE(sve_abs_neg) {
4983 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4984 START();
4985
4986 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4987
4988 // For simplicity, we re-use the same pg for various lane sizes.
4989 // For D lanes: 1, 1, 0
4990 // For S lanes: 1, 1, 1, 0, 0
4991 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4992 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4993 Initialise(&masm, p0.VnB(), pg_in);
4994 PRegisterM pg = p0.Merging();
4995
4996 InsrHelper(&masm, z31.VnD(), in);
4997
4998 // These are merging operations, so we have to initialise the result register.
4999 // We use a mixture of constructive and destructive operations.
5000
5001 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005002 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005003 __ Mov(z30, z31);
5004
5005 // For constructive operations, use a different initial result value.
5006 __ Index(z29.VnB(), 0, -1);
5007
5008 __ Mov(z0, z31);
5009 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
5010 __ Mov(z1, z29);
5011 __ Abs(z1.VnB(), pg, z31.VnB());
5012
5013 __ Mov(z2, z31);
5014 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
5015 __ Mov(z3, z29);
5016 __ Neg(z3.VnS(), pg, z31.VnS());
5017
Jacob Bramleyc0066272019-09-30 16:30:47 +01005018 // The unpredicated form of `Neg` is implemented using `subr`.
5019 __ Mov(z4, z31);
5020 __ Neg(z4.VnB(), z4.VnB()); // destructive
5021 __ Mov(z5, z29);
5022 __ Neg(z5.VnD(), z31.VnD());
5023
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005024 END();
5025
5026 if (CAN_RUN()) {
5027 RUN();
Jacob Bramleyc0066272019-09-30 16:30:47 +01005028
5029 ASSERT_EQUAL_SVE(z30, z31);
5030
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005031 // clang-format off
5032
5033 // Abs (D) destructive
5034 uint64_t expected_z0[] =
5035 // pg: 1 1 0
5036 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
5037 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5038
5039 // Abs (B)
5040 uint64_t expected_z1[] =
5041 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5042 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
5043 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5044
5045 // Neg (H) destructive
5046 uint64_t expected_z2[] =
5047 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5048 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
5049 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5050
5051 // Neg (S)
5052 uint64_t expected_z3[] =
5053 // pg: 0 1 1 1 0 0
5054 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
5055 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5056
Jacob Bramleyc0066272019-09-30 16:30:47 +01005057 // Neg (B) destructive, unpredicated
5058 uint64_t expected_z4[] =
5059 {0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
5060 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5061
5062 // Neg (D) unpredicated
5063 uint64_t expected_z5[] =
5064 {0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
5065 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5066
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005067 // clang-format on
5068 }
5069}
5070
TatWai Chong4f28df72019-08-14 17:50:30 -07005071TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
5072 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5073 START();
5074
5075 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
5076
5077 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
5078
5079 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
5080
5081 int index_s[] = {1, 3, 2, 31, -1};
5082
5083 int index_d[] = {31, 1};
5084
5085 // Initialize the register with a value that doesn't existed in the table.
5086 __ Dup(z9.VnB(), 0x1f);
5087 InsrHelper(&masm, z9.VnD(), table_inputs);
5088
5089 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
5090 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
5091 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
5092 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
5093
5094 InsrHelper(&masm, ind_b, index_b);
5095 InsrHelper(&masm, ind_h, index_h);
5096 InsrHelper(&masm, ind_s, index_s);
5097 InsrHelper(&masm, ind_d, index_d);
5098
5099 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
5100
5101 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
5102
5103 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
5104
5105 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
5106
5107 END();
5108
5109 if (CAN_RUN()) {
5110 RUN();
5111
5112 // clang-format off
5113 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
5114 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
5115
5116 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
5117 0x5544, 0x7766, 0xddcc, 0x9988};
5118
5119 unsigned z28_expected[] =
5120 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
5121
5122 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
5123 // clang-format on
5124
5125 unsigned vl = config->sve_vl_in_bits();
5126 for (size_t i = 0; i < ArrayLength(index_b); i++) {
5127 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
5128 if (!core.HasSVELane(z26.VnB(), lane)) break;
5129 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
5130 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
5131 }
5132
5133 for (size_t i = 0; i < ArrayLength(index_h); i++) {
5134 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
5135 if (!core.HasSVELane(z27.VnH(), lane)) break;
5136 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
5137 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
5138 }
5139
5140 for (size_t i = 0; i < ArrayLength(index_s); i++) {
5141 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
5142 if (!core.HasSVELane(z28.VnS(), lane)) break;
5143 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
5144 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
5145 }
5146
5147 for (size_t i = 0; i < ArrayLength(index_d); i++) {
5148 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
5149 if (!core.HasSVELane(z29.VnD(), lane)) break;
5150 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
5151 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
5152 }
5153 }
5154}
5155
Jacob Bramley199339d2019-08-05 18:49:13 +01005156TEST_SVE(ldr_str_z_bi) {
5157 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5158 START();
5159
5160 int vl = config->sve_vl_in_bytes();
5161
5162 // The immediate can address [-256, 255] times the VL, so allocate enough
5163 // space to exceed that in both directions.
5164 int data_size = vl * 1024;
5165
5166 uint8_t* data = new uint8_t[data_size];
5167 memset(data, 0, data_size);
5168
5169 // Set the base half-way through the buffer so we can use negative indices.
5170 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5171
5172 __ Index(z1.VnB(), 1, 3);
5173 __ Index(z2.VnB(), 2, 5);
5174 __ Index(z3.VnB(), 3, 7);
5175 __ Index(z4.VnB(), 4, 11);
5176 __ Index(z5.VnB(), 5, 13);
5177 __ Index(z6.VnB(), 6, 2);
5178 __ Index(z7.VnB(), 7, 3);
5179 __ Index(z8.VnB(), 8, 5);
5180 __ Index(z9.VnB(), 9, 7);
5181
5182 // Encodable cases.
5183 __ Str(z1, SVEMemOperand(x0));
5184 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
5185 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
5186 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
5187 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
5188
5189 // Cases that fall back on `Adr`.
5190 __ Str(z6, SVEMemOperand(x0, 6 * vl));
5191 __ Str(z7, SVEMemOperand(x0, -7 * vl));
5192 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
5193 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
5194
5195 // Corresponding loads.
5196 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
5197 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
5198 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
5199 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
5200 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
5201
5202 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
5203 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
5204 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
5205 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
5206
5207 END();
5208
5209 if (CAN_RUN()) {
5210 RUN();
5211
5212 uint8_t* expected = new uint8_t[data_size];
5213 memset(expected, 0, data_size);
5214 uint8_t* middle = &expected[data_size / 2];
5215
5216 for (int i = 0; i < vl; i++) {
5217 middle[i] = (1 + (3 * i)) & 0xff; // z1
5218 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
5219 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
5220 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
5221 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
5222 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
5223 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
5224 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
5225 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
5226 }
5227
5228 ASSERT_EQUAL_MEMORY(expected, data, data_size);
5229
5230 ASSERT_EQUAL_SVE(z1, z11);
5231 ASSERT_EQUAL_SVE(z2, z12);
5232 ASSERT_EQUAL_SVE(z3, z13);
5233 ASSERT_EQUAL_SVE(z4, z14);
5234 ASSERT_EQUAL_SVE(z5, z15);
5235 ASSERT_EQUAL_SVE(z6, z16);
5236 ASSERT_EQUAL_SVE(z7, z17);
5237 ASSERT_EQUAL_SVE(z8, z18);
5238 ASSERT_EQUAL_SVE(z9, z19);
5239
5240 delete[] expected;
5241 }
5242 delete[] data;
5243}
5244
5245TEST_SVE(ldr_str_p_bi) {
5246 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5247 START();
5248
5249 int vl = config->sve_vl_in_bytes();
5250 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5251 int pl = vl / kZRegBitsPerPRegBit;
5252
5253 // The immediate can address [-256, 255] times the PL, so allocate enough
5254 // space to exceed that in both directions.
5255 int data_size = pl * 1024;
5256
5257 uint8_t* data = new uint8_t[data_size];
5258 memset(data, 0, data_size);
5259
5260 // Set the base half-way through the buffer so we can use negative indices.
5261 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5262
5263 uint64_t pattern[4] = {0x1010101011101111,
5264 0x0010111011000101,
5265 0x1001101110010110,
5266 0x1010110101100011};
5267 for (int i = 8; i <= 15; i++) {
5268 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
5269 Initialise(&masm,
5270 PRegister(i),
5271 pattern[3] * i,
5272 pattern[2] * i,
5273 pattern[1] * i,
5274 pattern[0] * i);
5275 }
5276
5277 // Encodable cases.
5278 __ Str(p8, SVEMemOperand(x0));
5279 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
5280 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
5281 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
5282
5283 // Cases that fall back on `Adr`.
5284 __ Str(p12, SVEMemOperand(x0, 6 * pl));
5285 __ Str(p13, SVEMemOperand(x0, -7 * pl));
5286 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
5287 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
5288
5289 // Corresponding loads.
5290 __ Ldr(p0, SVEMemOperand(x0));
5291 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
5292 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
5293 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
5294
5295 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
5296 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
5297 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
5298 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
5299
5300 END();
5301
5302 if (CAN_RUN()) {
5303 RUN();
5304
5305 uint8_t* expected = new uint8_t[data_size];
5306 memset(expected, 0, data_size);
5307 uint8_t* middle = &expected[data_size / 2];
5308
5309 for (int i = 0; i < pl; i++) {
5310 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
5311 size_t index = i / sizeof(pattern[0]);
5312 VIXL_ASSERT(index < ArrayLength(pattern));
5313 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
5314 // Each byte of `pattern` can be multiplied by 15 without carry.
5315 VIXL_ASSERT((byte * 15) <= 0xff);
5316
5317 middle[i] = byte * 8; // p8
5318 middle[(2 * pl) + i] = byte * 9; // p9
5319 middle[(-3 * pl) + i] = byte * 10; // p10
5320 middle[(255 * pl) + i] = byte * 11; // p11
5321 middle[(6 * pl) + i] = byte * 12; // p12
5322 middle[(-7 * pl) + i] = byte * 13; // p13
5323 middle[(314 * pl) + i] = byte * 14; // p14
5324 middle[(-314 * pl) + i] = byte * 15; // p15
5325 }
5326
5327 ASSERT_EQUAL_MEMORY(expected, data, data_size);
5328
5329 ASSERT_EQUAL_SVE(p0, p8);
5330 ASSERT_EQUAL_SVE(p1, p9);
5331 ASSERT_EQUAL_SVE(p2, p10);
5332 ASSERT_EQUAL_SVE(p3, p11);
5333 ASSERT_EQUAL_SVE(p4, p12);
5334 ASSERT_EQUAL_SVE(p5, p13);
5335 ASSERT_EQUAL_SVE(p6, p14);
5336 ASSERT_EQUAL_SVE(p7, p15);
5337
5338 delete[] expected;
5339 }
5340 delete[] data;
5341}
5342
Jacob Bramleye668b202019-08-14 17:57:34 +01005343template <typename T>
5344static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
5345 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
5346}
5347
5348TEST_SVE(sve_ld1_st1_contiguous) {
5349 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5350 START();
5351
5352 int vl = config->sve_vl_in_bytes();
5353
5354 // The immediate can address [-8, 7] times the VL, so allocate enough space to
5355 // exceed that in both directions.
5356 int data_size = vl * 128;
5357
5358 uint8_t* data = new uint8_t[data_size];
5359 memset(data, 0, data_size);
5360
5361 // Set the base half-way through the buffer so we can use negative indeces.
5362 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5363
Jacob Bramleye668b202019-08-14 17:57:34 +01005364 // Encodable scalar-plus-immediate cases.
5365 __ Index(z1.VnB(), 1, -3);
5366 __ Ptrue(p1.VnB());
5367 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
5368
5369 __ Index(z2.VnH(), -2, 5);
5370 __ Ptrue(p2.VnH(), SVE_MUL3);
5371 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
5372
5373 __ Index(z3.VnS(), 3, -7);
5374 __ Ptrue(p3.VnS(), SVE_POW2);
5375 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
5376
5377 // Encodable scalar-plus-scalar cases.
5378 __ Index(z4.VnD(), -4, 11);
5379 __ Ptrue(p4.VnD(), SVE_VL3);
5380 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
5381 __ Mov(x2, 17);
5382 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
5383
5384 __ Index(z5.VnD(), 6, -2);
5385 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01005386 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
5387 __ Mov(x4, 6);
5388 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01005389
5390 // Unencodable cases fall back on `Adr`.
5391 __ Index(z6.VnS(), -7, 3);
5392 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
5393 // predicate bits when handling larger lanes.
5394 __ Ptrue(p6.VnB(), SVE_ALL);
5395 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
5396
TatWai Chong6205eb42019-09-24 10:07:20 +01005397 __ Index(z7.VnD(), 32, -11);
5398 __ Ptrue(p7.VnD(), SVE_MUL4);
5399 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01005400
TatWai Chong6205eb42019-09-24 10:07:20 +01005401 // Corresponding loads.
5402 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
5403 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5404 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5405 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5406 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
5407 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
5408
5409 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5410 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5411 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5412 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
5413
5414 // We can test ld1 by comparing the value loaded with the value stored. In
5415 // most cases, there are two complications:
5416 // - Loads have zeroing predication, so we have to clear the inactive
5417 // elements on our reference.
5418 // - We have to replicate any sign- or zero-extension.
5419
5420 // Ld1b(z8.VnB(), ...)
5421 __ Dup(z18.VnB(), 0);
5422 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
5423
5424 // Ld1b(z9.VnH(), ...)
5425 __ Dup(z19.VnH(), 0);
5426 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
5427
5428 // Ld1h(z10.VnS(), ...)
5429 __ Dup(z20.VnS(), 0);
5430 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
5431
5432 // Ld1b(z11.VnD(), ...)
5433 __ Dup(z21.VnD(), 0);
5434 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
5435
5436 // Ld1d(z12.VnD(), ...)
5437 __ Dup(z22.VnD(), 0);
5438 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
5439
5440 // Ld1w(z13.VnS(), ...)
5441 __ Dup(z23.VnS(), 0);
5442 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
5443
5444 // Ld1sb(z14.VnH(), ...)
5445 __ Dup(z24.VnH(), 0);
5446 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
5447
5448 // Ld1sh(z15.VnS(), ...)
5449 __ Dup(z25.VnS(), 0);
5450 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
5451
5452 // Ld1sb(z16.VnD(), ...)
5453 __ Dup(z26.VnD(), 0);
5454 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
5455
5456 // Ld1sw(z17.VnD(), ...)
5457 __ Dup(z27.VnD(), 0);
5458 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01005459
5460 END();
5461
5462 if (CAN_RUN()) {
5463 RUN();
5464
5465 uint8_t* expected = new uint8_t[data_size];
5466 memset(expected, 0, data_size);
5467 uint8_t* middle = &expected[data_size / 2];
5468
5469 int vl_b = vl / kBRegSizeInBytes;
5470 int vl_h = vl / kHRegSizeInBytes;
5471 int vl_s = vl / kSRegSizeInBytes;
5472 int vl_d = vl / kDRegSizeInBytes;
5473
5474 // Encodable cases.
5475
5476 // st1b { z1.b }, SVE_ALL
5477 for (int i = 0; i < vl_b; i++) {
5478 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
5479 }
5480
5481 // st1b { z2.h }, SVE_MUL3
5482 int vl_h_mul3 = vl_h - (vl_h % 3);
5483 for (int i = 0; i < vl_h_mul3; i++) {
5484 MemoryWrite(middle, 7 * vl, i, static_cast<uint8_t>(-2 + (5 * i)));
5485 }
5486
5487 // st1h { z3.s }, SVE_POW2
5488 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
5489 for (int i = 0; i < vl_s_pow2; i++) {
5490 MemoryWrite(middle, -8 * vl, i, static_cast<uint16_t>(3 - (7 * i)));
5491 }
5492
5493 // st1b { z4.d }, SVE_VL3
5494 if (vl_d >= 3) {
5495 for (int i = 0; i < 3; i++) {
5496 MemoryWrite(middle,
5497 (8 * vl) + 17,
5498 i,
5499 static_cast<uint8_t>(-4 + (11 * i)));
5500 }
5501 }
5502
5503 // st1d { z5.d }, SVE_VL16
5504 if (vl_d >= 16) {
5505 for (int i = 0; i < 16; i++) {
5506 MemoryWrite(middle,
5507 (10 * vl) + (6 * kDRegSizeInBytes),
5508 i,
5509 static_cast<uint64_t>(6 - (2 * i)));
5510 }
5511 }
5512
5513 // Unencodable cases.
5514
5515 // st1w { z6.s }, SVE_ALL
5516 for (int i = 0; i < vl_s; i++) {
5517 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
5518 }
5519
TatWai Chong6205eb42019-09-24 10:07:20 +01005520 // st1w { z7.d }, SVE_MUL4
5521 int vl_d_mul4 = vl_d - (vl_d % 4);
5522 for (int i = 0; i < vl_d_mul4; i++) {
5523 MemoryWrite(middle, 22 * vl, i, static_cast<uint32_t>(32 + (-11 * i)));
5524 }
5525
Jacob Bramleye668b202019-08-14 17:57:34 +01005526 ASSERT_EQUAL_MEMORY(expected, data, data_size);
5527
TatWai Chong6205eb42019-09-24 10:07:20 +01005528 // Check that we loaded back the expected values.
5529
5530 ASSERT_EQUAL_SVE(z18, z8);
5531 ASSERT_EQUAL_SVE(z19, z9);
5532 ASSERT_EQUAL_SVE(z20, z10);
5533 ASSERT_EQUAL_SVE(z21, z11);
5534 ASSERT_EQUAL_SVE(z22, z12);
5535 ASSERT_EQUAL_SVE(z23, z13);
5536 ASSERT_EQUAL_SVE(z24, z14);
5537 ASSERT_EQUAL_SVE(z25, z15);
5538 ASSERT_EQUAL_SVE(z26, z16);
5539 ASSERT_EQUAL_SVE(z27, z17);
5540
Jacob Bramleye668b202019-08-14 17:57:34 +01005541 delete[] expected;
5542 }
5543 delete[] data;
5544}
5545
TatWai Chong6995bfd2019-09-26 10:48:05 +01005546typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
5547 const ZRegister& zn,
5548 const IntegerOperand imm);
5549
5550template <typename F, typename Td, typename Tn>
5551static void IntWideImmHelper(Test* config,
5552 F macro,
5553 unsigned lane_size_in_bits,
5554 const Tn& zn_inputs,
5555 IntegerOperand imm,
5556 const Td& zd_expected) {
5557 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5558 START();
5559
5560 ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
5561 InsrHelper(&masm, zd1, zn_inputs);
5562
5563 // Also test with a different zn, to test the movprfx case.
5564 ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
5565 InsrHelper(&masm, zn, zn_inputs);
5566 ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
5567 ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
5568
5569 // Make a copy so we can check that constructive operations preserve zn.
5570 __ Mov(zn_copy, zn);
5571
5572 {
5573 UseScratchRegisterScope temps(&masm);
5574 // The MacroAssembler needs a P scratch register for some of these macros,
5575 // and it doesn't have one by default.
5576 temps.Include(p3);
5577
5578 (masm.*macro)(zd1, zd1, imm);
5579 (masm.*macro)(zd2, zn, imm);
5580 }
5581
5582 END();
5583
5584 if (CAN_RUN()) {
5585 RUN();
5586
5587 ASSERT_EQUAL_SVE(zd_expected, zd1);
5588
5589 // Check the result from `instr` with movprfx is the same as
5590 // the immediate version.
5591 ASSERT_EQUAL_SVE(zd_expected, zd2);
5592
5593 ASSERT_EQUAL_SVE(zn_copy, zn);
5594 }
5595}
5596
5597TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
5598 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
5599 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
5600 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
5601 int64_t in_d[] = {1, 10, 10000, 1000000};
5602
5603 IntWideImmFn fn = &MacroAssembler::Smax;
5604
5605 int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
5606 int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
5607 int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
5608 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
5609
5610 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
5611 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
5612 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
5613 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
5614
5615 int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
5616 int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
5617 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
5618
5619 // The immediate is in the range [-128, 127], but the macro is able to
5620 // synthesise unencodable immediates.
5621 // B-sized lanes cannot take an immediate out of the range [-128, 127].
5622 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
5623 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5624 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
5625}
5626
5627TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
5628 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
5629 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
5630 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
5631 int64_t in_d[] = {1, 10, 10000, 1000000};
5632
5633 IntWideImmFn fn = &MacroAssembler::Smin;
5634
5635 int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
5636 int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
5637 int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
5638 int64_t exp_d_1[] = {1, 10, 99, 99};
5639
5640 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
5641 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
5642 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
5643 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
5644
5645 int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
5646 int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
5647 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
5648
5649 // The immediate is in the range [-128, 127], but the macro is able to
5650 // synthesise unencodable immediates.
5651 // B-sized lanes cannot take an immediate out of the range [-128, 127].
5652 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
5653 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5654 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
5655}
5656
5657TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
5658 int in_b[] = {0, 255, 127, 0x80, 1, 55};
5659 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
5660 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
5661 int64_t in_d[] = {1, 10, 10000, 1000000};
5662
5663 IntWideImmFn fn = &MacroAssembler::Umax;
5664
5665 int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
5666 int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
5667 int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
5668 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
5669
5670 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
5671 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
5672 IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
5673 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
5674
5675 int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
5676 int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
5677 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
5678
5679 // The immediate is in the range [0, 255], but the macro is able to
5680 // synthesise unencodable immediates.
5681 // B-sized lanes cannot take an immediate out of the range [0, 255].
5682 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
5683 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5684 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
5685}
5686
5687TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
5688 int in_b[] = {0, 255, 127, 0x80, 1, 55};
5689 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
5690 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
5691 int64_t in_d[] = {1, 10, 10000, 1000000};
5692
5693 IntWideImmFn fn = &MacroAssembler::Umin;
5694
5695 int exp_b_1[] = {0, 17, 17, 17, 1, 17};
5696 int exp_h_1[] = {0, 127, 127, 127, 1, 127};
5697 int exp_s_1[] = {0, 255, 127, 255, 1, 255};
5698 int64_t exp_d_1[] = {1, 10, 99, 99};
5699
5700 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
5701 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
5702 IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
5703 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
5704
5705 int exp_h_2[] = {0, 255, 127, 511, 1, 511};
5706 int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
5707 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
5708
5709 // The immediate is in the range [0, 255], but the macro is able to
5710 // synthesise unencodable immediates.
5711 // B-sized lanes cannot take an immediate out of the range [0, 255].
5712 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
5713 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5714 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
5715}
5716
5717TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
5718 int in_b[] = {11, -1, 7, -3};
5719 int in_h[] = {111, -1, 17, -123};
5720 int in_s[] = {11111, -1, 117, -12345};
5721 int64_t in_d[] = {0x7fffffff, 0x80000000};
5722
5723 IntWideImmFn fn = &MacroAssembler::Mul;
5724
5725 int exp_b_1[] = {66, -6, 42, -18};
5726 int exp_h_1[] = {-14208, 128, -2176, 15744};
5727 int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
5728 int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
5729
5730 IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
5731 IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
5732 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5733 IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
5734
5735 int exp_h_2[] = {-28305, 255, -4335, 31365};
5736 int exp_s_2[] = {22755328, -2048, 239616, -25282560};
5737 int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
5738
5739 // The immediate is in the range [-128, 127], but the macro is able to
5740 // synthesise unencodable immediates.
5741 // B-sized lanes cannot take an immediate out of the range [0, 255].
5742 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
5743 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5744 IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
5745
5746 // Integer overflow on multiplication.
5747 unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
5748
5749 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
5750}
5751
5752TEST_SVE(sve_int_wide_imm_unpredicated_add) {
5753 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5754 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5755 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5756 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5757
5758 IntWideImmFn fn = &MacroAssembler::Add;
5759
5760 unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
5761 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
5762 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
5763 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
5764
5765 // Encodable with `add` (shift 0).
5766 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
5767 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5768 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5769 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
5770
5771 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
5772 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
5773 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
5774
5775 // Encodable with `add` (shift 8).
5776 // B-sized lanes cannot take a shift of 8.
5777 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5778 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5779 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
5780
5781 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
5782
5783 // The macro is able to synthesise unencodable immediates.
5784 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
5785}
5786
5787TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
5788 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5789 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5790 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5791 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5792
5793 IntWideImmFn fn = &MacroAssembler::Sqadd;
5794
5795 unsigned exp_b_1[] = {0x80, 0x00, 0x91, 0x80};
5796 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
5797 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
5798 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
5799
5800 // Encodable with `sqadd` (shift 0).
5801 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
5802 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5803 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5804 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
5805
5806 unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
5807 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
5808 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
5809
5810 // Encodable with `sqadd` (shift 8).
5811 // B-sized lanes cannot take a shift of 8.
5812 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5813 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5814 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
5815
5816 unsigned exp_s_3[] = {0x80808181, 0x7fffffff, 0xab29aaaa, 0xf07ff0f0};
5817
5818 // The macro is able to synthesise unencodable immediates.
5819 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
5820}
5821
5822TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
5823 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5824 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5825 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5826 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5827
5828 IntWideImmFn fn = &MacroAssembler::Uqadd;
5829
5830 unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
5831 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
5832 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
5833 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
5834
5835 // Encodable with `uqadd` (shift 0).
5836 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
5837 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5838 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5839 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
5840
5841 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
5842 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
5843 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
5844
5845 // Encodable with `uqadd` (shift 8).
5846 // B-sized lanes cannot take a shift of 8.
5847 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5848 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5849 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
5850
5851 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
5852
5853 // The macro is able to synthesise unencodable immediates.
5854 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
5855}
5856
5857TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
5858 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5859 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5860 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5861 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5862
5863 IntWideImmFn fn = &MacroAssembler::Sub;
5864
5865 unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
5866 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
5867 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
5868 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
5869
5870 // Encodable with `sub` (shift 0).
5871 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
5872 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5873 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5874 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
5875
5876 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
5877 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
5878 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
5879
5880 // Encodable with `sub` (shift 8).
5881 // B-sized lanes cannot take a shift of 8.
5882 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5883 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5884 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
5885
5886 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
5887
5888 // The macro is able to synthesise unencodable immediates.
5889 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
5890}
5891
5892TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
5893 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5894 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5895 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5896 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5897
5898 IntWideImmFn fn = &MacroAssembler::Sqsub;
5899
5900 unsigned exp_b_1[] = {0x00, 0x7f, 0x7f, 0x7e};
5901 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
5902 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
5903 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
5904
5905 // Encodable with `sqsub` (shift 0).
5906 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
5907 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5908 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5909 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
5910
5911 unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
5912 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
5913 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
5914
5915 // Encodable with `sqsub` (shift 8).
5916 // B-sized lanes cannot take a shift of 8.
5917 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5918 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5919 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
5920
5921 unsigned exp_s_3[] = {0x80000000, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
5922
5923 // The macro is able to synthesise unencodable immediates.
5924 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
5925}
5926
5927TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
5928 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5929 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5930 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5931 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5932
5933 IntWideImmFn fn = &MacroAssembler::Uqsub;
5934
5935 unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
5936 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
5937 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
5938 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
5939
5940 // Encodable with `uqsub` (shift 0).
5941 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
5942 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5943 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5944 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
5945
5946 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
5947 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
5948 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
5949
5950 // Encodable with `uqsub` (shift 8).
5951 // B-sized lanes cannot take a shift of 8.
5952 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5953 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5954 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
5955
5956 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
5957
5958 // The macro is able to synthesise unencodable immediates.
5959 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
5960}
5961
5962TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
5963 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5964 START();
5965
5966 // Encodable with `subr` (shift 0).
5967 __ Index(z0.VnD(), 1, 1);
5968 __ Sub(z0.VnD(), 100, z0.VnD());
5969 __ Index(z1.VnS(), 0x7f, 1);
5970 __ Sub(z1.VnS(), 0xf7, z1.VnS());
5971 __ Index(z2.VnH(), 0xaaaa, 0x2222);
5972 __ Sub(z2.VnH(), 0x80, z2.VnH());
5973 __ Index(z3.VnB(), 133, 1);
5974 __ Sub(z3.VnB(), 255, z3.VnB());
5975
5976 // Encodable with `subr` (shift 8).
5977 __ Index(z4.VnD(), 256, -1);
5978 __ Sub(z4.VnD(), 42 * 256, z4.VnD());
5979 __ Index(z5.VnS(), 0x7878, 1);
5980 __ Sub(z5.VnS(), 0x8000, z5.VnS());
5981 __ Index(z6.VnH(), 0x30f0, -1);
5982 __ Sub(z6.VnH(), 0x7f00, z6.VnH());
5983 // B-sized lanes cannot take a shift of 8.
5984
5985 // Select with movprfx.
5986 __ Index(z31.VnD(), 256, 4001);
5987 __ Sub(z7.VnD(), 42 * 256, z31.VnD());
5988
5989 // Out of immediate encodable range of `sub`.
5990 __ Index(z30.VnS(), 0x11223344, 1);
5991 __ Sub(z8.VnS(), 0x88776655, z30.VnS());
5992
5993 END();
5994
5995 if (CAN_RUN()) {
5996 RUN();
5997
5998 int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
5999 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6000
6001 int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
6002 ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
6003
6004 int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
6005 ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
6006
6007 int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
6008 ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
6009
6010 int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
6011 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6012
6013 int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
6014 ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
6015
6016 int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
6017 ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
6018
6019 int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
6020 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6021
6022 int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
6023 ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
6024 }
6025}
6026
6027TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
6028 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6029 START();
6030
6031 // Immediates which can be encoded in the instructions.
6032 __ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
6033 __ Fdup(z1.VnS(), Float16(2.0));
6034 __ Fdup(z2.VnD(), Float16(3.875));
6035 __ Fdup(z3.VnH(), 8.0f);
6036 __ Fdup(z4.VnS(), -4.75f);
6037 __ Fdup(z5.VnD(), 0.5f);
6038 __ Fdup(z6.VnH(), 1.0);
6039 __ Fdup(z7.VnS(), 2.125);
6040 __ Fdup(z8.VnD(), -13.0);
6041
6042 // Immediates which cannot be encoded in the instructions.
6043 __ Fdup(z10.VnH(), Float16(0.0));
6044 __ Fdup(z11.VnH(), kFP16PositiveInfinity);
6045 __ Fdup(z12.VnS(), 255.0f);
6046 __ Fdup(z13.VnS(), kFP32NegativeInfinity);
6047 __ Fdup(z14.VnD(), 12.3456);
6048 __ Fdup(z15.VnD(), kFP64PositiveInfinity);
6049
6050 END();
6051
6052 if (CAN_RUN()) {
6053 RUN();
6054
6055 ASSERT_EQUAL_SVE(0xc500, z0.VnH());
6056 ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
6057 ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
6058 ASSERT_EQUAL_SVE(0x4800, z3.VnH());
6059 ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
6060 ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
6061 ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
6062 ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
6063 ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
6064
6065 ASSERT_EQUAL_SVE(0x0000, z10.VnH());
6066 ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
6067 ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
6068 ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
6069 ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
6070 ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
6071 }
6072}
6073
TatWai Chong6f111bc2019-10-07 09:20:37 +01006074TEST_SVE(sve_andv_eorv_orv) {
6075 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6076 START();
6077
6078 uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
6079 InsrHelper(&masm, z31.VnD(), in);
6080
6081 // For simplicity, we re-use the same pg for various lane sizes.
6082 // For D lanes: 1, 1, 0
6083 // For S lanes: 1, 1, 1, 0, 0
6084 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6085 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6086 Initialise(&masm, p0.VnB(), pg_in);
6087
6088 // Make a copy so we can check that constructive operations preserve zn.
6089 __ Mov(z0, z31);
6090 __ Andv(b0, p0, z0.VnB()); // destructive
6091 __ Andv(h1, p0, z31.VnH());
6092 __ Mov(z2, z31);
6093 __ Andv(s2, p0, z2.VnS()); // destructive
6094 __ Andv(d3, p0, z31.VnD());
6095
6096 __ Eorv(b4, p0, z31.VnB());
6097 __ Mov(z5, z31);
6098 __ Eorv(h5, p0, z5.VnH()); // destructive
6099 __ Eorv(s6, p0, z31.VnS());
6100 __ Mov(z7, z31);
6101 __ Eorv(d7, p0, z7.VnD()); // destructive
6102
6103 __ Mov(z8, z31);
6104 __ Orv(b8, p0, z8.VnB()); // destructive
6105 __ Orv(h9, p0, z31.VnH());
6106 __ Mov(z10, z31);
6107 __ Orv(s10, p0, z10.VnS()); // destructive
6108 __ Orv(d11, p0, z31.VnD());
6109
6110 END();
6111
6112 if (CAN_RUN()) {
6113 RUN();
6114
6115 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6116 ASSERT_EQUAL_64(0x10, d0);
6117 ASSERT_EQUAL_64(0x1010, d1);
6118 ASSERT_EQUAL_64(0x33331111, d2);
6119 ASSERT_EQUAL_64(0x7777555533331111, d3);
6120 ASSERT_EQUAL_64(0xbf, d4);
6121 ASSERT_EQUAL_64(0xedcb, d5);
6122 ASSERT_EQUAL_64(0x44444444, d6);
6123 ASSERT_EQUAL_64(0x7777555533331111, d7);
6124 ASSERT_EQUAL_64(0xff, d8);
6125 ASSERT_EQUAL_64(0xffff, d9);
6126 ASSERT_EQUAL_64(0x77775555, d10);
6127 ASSERT_EQUAL_64(0x7777555533331111, d11);
6128 } else {
6129 ASSERT_EQUAL_64(0, d0);
6130 ASSERT_EQUAL_64(0x0010, d1);
6131 ASSERT_EQUAL_64(0x00110011, d2);
6132 ASSERT_EQUAL_64(0x0011001100110011, d3);
6133 ASSERT_EQUAL_64(0x62, d4);
6134 ASSERT_EQUAL_64(0x0334, d5);
6135 ASSERT_EQUAL_64(0x8899aabb, d6);
6136 ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
6137 ASSERT_EQUAL_64(0xff, d8);
6138 ASSERT_EQUAL_64(0xffff, d9);
6139 ASSERT_EQUAL_64(0xffffffff, d10);
6140 ASSERT_EQUAL_64(0xffffffffffffffff, d11);
6141 }
6142
6143 // Check the upper lanes above the top of the V register are all clear.
6144 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6145 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6146 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6147 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6148 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6149 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6150 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6151 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6152 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6153 ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
6154 ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
6155 ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
6156 ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
6157 }
6158 }
6159}
6160
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00006161} // namespace aarch64
6162} // namespace vixl