blob: ede7b7a14e59a8ae34088c6da1df6e4f7b504fe0 [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
Jacob Bramley85a9c102019-12-09 17:48:29 +000028#include <unistd.h>
Jacob Bramleyd77a8e42019-02-12 16:52:24 +000029
30#include <cfloat>
31#include <cmath>
32#include <cstdio>
33#include <cstdlib>
34#include <cstring>
TatWai Chong1af34f12020-06-01 20:54:06 -070035#include <functional>
Jacob Bramleyd77a8e42019-02-12 16:52:24 +000036
37#include "test-runner.h"
38#include "test-utils.h"
39#include "aarch64/test-utils-aarch64.h"
40
41#include "aarch64/cpu-aarch64.h"
42#include "aarch64/disasm-aarch64.h"
43#include "aarch64/macro-assembler-aarch64.h"
44#include "aarch64/simulator-aarch64.h"
45#include "test-assembler-aarch64.h"
46
47namespace vixl {
48namespace aarch64 {
49
Jacob Bramleye8289202019-07-31 11:25:23 +010050Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
51 // We never free this memory, but we need it to live for as long as the static
52 // linked list of tests, and this is the easiest way to do it.
53 Test* test = new Test(name, fn);
54 test->set_sve_vl_in_bits(vl);
55 return test;
56}
57
58// The TEST_SVE macro works just like the usual TEST macro, but the resulting
59// function receives a `const Test& config` argument, to allow it to query the
60// vector length.
61#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
62// On the Simulator, run SVE tests with several vector lengths, including the
63// extreme values and an intermediate value that isn't a power of two.
64
65#define TEST_SVE(name) \
66 void Test##name(Test* config); \
67 Test* test_##name##_list[] = \
68 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
69 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
70 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
71 void Test##name(Test* config)
72
73#define SVE_SETUP_WITH_FEATURES(...) \
74 SETUP_WITH_FEATURES(__VA_ARGS__); \
75 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
76
77#else
78// Otherwise, just use whatever the hardware provides.
79static const int kSVEVectorLengthInBits =
80 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
81 ? CPU::ReadSVEVectorLengthInBits()
82 : 0;
83
84#define TEST_SVE(name) \
85 void Test##name(Test* config); \
86 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
87 "AARCH64_ASM_" #name "_vlauto", \
88 &Test##name); \
89 void Test##name(Test* config)
90
91#define SVE_SETUP_WITH_FEATURES(...) \
92 SETUP_WITH_FEATURES(__VA_ARGS__); \
93 USE(config)
94
95#endif
96
Jacob Bramley03c0b512019-02-22 16:42:06 +000097// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
98// is optimised for call-site clarity, not generated code quality, so it doesn't
99// exist in the MacroAssembler itself.
100//
101// Usage:
102//
103// int values[] = { 42, 43, 44 };
104// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
105//
106// The rightmost (highest-indexed) array element maps to the lowest-numbered
107// lane.
108template <typename T, size_t N>
109void InsrHelper(MacroAssembler* masm,
110 const ZRegister& zdn,
111 const T (&values)[N]) {
112 for (size_t i = 0; i < N; i++) {
113 masm->Insr(zdn, values[i]);
114 }
115}
116
Jacob Bramley0ce75842019-07-17 18:12:50 +0100117// Conveniently initialise P registers with scalar bit patterns. The destination
118// lane size is ignored. This is optimised for call-site clarity, not generated
119// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100120//
121// Usage:
122//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100123// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100124void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100125 const PRegister& pd,
126 uint64_t value3,
127 uint64_t value2,
128 uint64_t value1,
129 uint64_t value0) {
130 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100131 UseScratchRegisterScope temps(masm);
132 Register temp = temps.AcquireX();
133 Label data;
134 Label done;
135
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100136 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100137 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100138 masm->B(&done);
139 {
140 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
141 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100142 masm->dc64(value0);
143 masm->dc64(value1);
144 masm->dc64(value2);
145 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100146 }
147 masm->Bind(&done);
148}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100149void Initialise(MacroAssembler* masm,
150 const PRegister& pd,
151 uint64_t value2,
152 uint64_t value1,
153 uint64_t value0) {
154 Initialise(masm, pd, 0, value2, value1, value0);
155}
156void Initialise(MacroAssembler* masm,
157 const PRegister& pd,
158 uint64_t value1,
159 uint64_t value0) {
160 Initialise(masm, pd, 0, 0, value1, value0);
161}
162void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
163 Initialise(masm, pd, 0, 0, 0, value0);
164}
165
166// Conveniently initialise P registers by lane. This is optimised for call-site
167// clarity, not generated code quality.
168//
169// Usage:
170//
171// int values[] = { 0x0, 0x1, 0x2 };
172// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
173//
174// The rightmost (highest-indexed) array element maps to the lowest-numbered
175// lane. Unspecified lanes are set to 0 (inactive).
176//
177// Each element of the `values` array is mapped onto a lane in `pd`. The
178// architecture only respects the lower bit, and writes zero the upper bits, but
179// other (encodable) values can be specified if required by the test.
180template <typename T, size_t N>
181void Initialise(MacroAssembler* masm,
182 const PRegisterWithLaneSize& pd,
183 const T (&values)[N]) {
184 // Turn the array into 64-bit chunks.
185 uint64_t chunks[4] = {0, 0, 0, 0};
186 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
187
188 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
189 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
190 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
191
192 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
193
194 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
195 size_t bit = 0;
196 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
197 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
198 uint64_t value = values[n] & p_lane_mask;
199 chunks[bit / 64] |= value << (bit % 64);
200 bit += p_bits_per_lane;
201 }
202
203 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
204}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100205
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000206// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100207TEST_SVE(sve_test_infrastructure_z) {
208 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000209 START();
210
Jacob Bramley03c0b512019-02-22 16:42:06 +0000211 __ Mov(x0, 0x0123456789abcdef);
212
213 // Test basic `Insr` behaviour.
214 __ Insr(z0.VnB(), 1);
215 __ Insr(z0.VnB(), 2);
216 __ Insr(z0.VnB(), x0);
217 __ Insr(z0.VnB(), -42);
218 __ Insr(z0.VnB(), 0);
219
220 // Test array inputs.
221 int z1_inputs[] = {3, 4, 5, -42, 0};
222 InsrHelper(&masm, z1.VnH(), z1_inputs);
223
224 // Test that sign-extension works as intended for various lane sizes.
225 __ Dup(z2.VnD(), 0); // Clear the register first.
226 __ Insr(z2.VnB(), -42); // 0xd6
227 __ Insr(z2.VnB(), 0xfe); // 0xfe
228 __ Insr(z2.VnH(), -42); // 0xffd6
229 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
230 __ Insr(z2.VnS(), -42); // 0xffffffd6
231 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
232 // Use another register for VnD(), so we can support 128-bit Z registers.
233 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
234 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
235
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000236 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000237
Jacob Bramley119bd212019-04-16 10:13:09 +0100238 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100239 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000240
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100241 // Test that array checks work properly on a register initialised
242 // lane-by-lane.
243 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
244 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000245
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100246 // Test that lane-by-lane checks work properly on a register initialised
247 // by array.
248 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
249 // The rightmost (highest-indexed) array element maps to the
250 // lowest-numbered lane.
251 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
252 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000253 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100254
255 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
256 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
257 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
258 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100259 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000260}
261
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100262// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100263TEST_SVE(sve_test_infrastructure_p) {
264 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100265 START();
266
267 // Simple cases: move boolean (0 or 1) values.
268
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100269 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100270 Initialise(&masm, p0.VnB(), p0_inputs);
271
272 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
273 Initialise(&masm, p1.VnH(), p1_inputs);
274
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100275 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100276 Initialise(&masm, p2.VnS(), p2_inputs);
277
278 int p3_inputs[] = {0, 1};
279 Initialise(&masm, p3.VnD(), p3_inputs);
280
281 // Advanced cases: move numeric value into architecturally-ignored bits.
282
283 // B-sized lanes get one bit in a P register, so there are no ignored bits.
284
285 // H-sized lanes get two bits in a P register.
286 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
287 Initialise(&masm, p4.VnH(), p4_inputs);
288
289 // S-sized lanes get four bits in a P register.
290 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
291 Initialise(&masm, p5.VnS(), p5_inputs);
292
293 // D-sized lanes get eight bits in a P register.
294 int p6_inputs[] = {0x81, 0xcc, 0x55};
295 Initialise(&masm, p6.VnD(), p6_inputs);
296
297 // The largest possible P register has 32 bytes.
298 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
299 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
300 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
301 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
302 Initialise(&masm, p7.VnD(), p7_inputs);
303
304 END();
305
306 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100307 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100308
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100309 // Test that lane-by-lane checks work properly. The rightmost
310 // (highest-indexed) array element maps to the lowest-numbered lane.
311 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
312 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
313 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100314 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100315 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
316 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
317 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
318 }
319 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
320 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
321 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
322 }
323 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
324 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
325 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
326 }
327
328 // Test that array checks work properly on predicates initialised with a
329 // possibly-different lane size.
330 // 0b...11'10'01'00'01'10'11
331 int p4_expected[] = {0x39, 0x1b};
332 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
333
334 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
335
336 // 0b...10000001'11001100'01010101
337 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
338 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
339
340 // 0b...10011100'10011101'10011110'10011111
341 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
342 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
343 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100344 }
345}
346
Jacob Bramley935b15b2019-07-04 14:09:22 +0100347// Test that writes to V registers clear the high bits of the corresponding Z
348// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100349TEST_SVE(sve_v_write_clear) {
350 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
351 CPUFeatures::kFP,
352 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100353 START();
354
355 // The Simulator has two mechansisms for writing V registers:
356 // - Write*Register, calling through to SimRegisterBase::Write.
357 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
358 // Try to cover both variants.
359
360 // Prepare some known inputs.
361 uint8_t data[kQRegSizeInBytes];
362 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
363 data[i] = 42 + i;
364 }
365 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
366 __ Fmov(d30, 42.0);
367
Jacob Bramley199339d2019-08-05 18:49:13 +0100368 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100369 // diagnose.
370 __ Index(z0.VnB(), 0, 1);
371 __ Index(z1.VnB(), 0, 1);
372 __ Index(z2.VnB(), 0, 1);
373 __ Index(z3.VnB(), 0, 1);
374 __ Index(z4.VnB(), 0, 1);
375
376 __ Index(z10.VnB(), 0, -1);
377 __ Index(z11.VnB(), 0, -1);
378 __ Index(z12.VnB(), 0, -1);
379 __ Index(z13.VnB(), 0, -1);
380 __ Index(z14.VnB(), 0, -1);
381
382 // Instructions using Write*Register (and SimRegisterBase::Write).
383 __ Ldr(b0, MemOperand(x10));
384 __ Fcvt(h1, d30);
385 __ Fmov(s2, 1.5f);
386 __ Fmov(d3, d30);
387 __ Ldr(q4, MemOperand(x10));
388
389 // Instructions using LogicVRegister::ClearForWrite.
390 // These also (incidentally) test that across-lane instructions correctly
391 // ignore the high-order Z register lanes.
392 __ Sminv(b10, v10.V16B());
393 __ Addv(h11, v11.V4H());
394 __ Saddlv(s12, v12.V8H());
395 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
396 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
397
398 END();
399
400 if (CAN_RUN()) {
401 RUN();
402
403 // Check the Q part first.
404 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
405 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
406 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
407 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
408 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
409 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
410 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
411 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
412 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
413 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
414 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
415 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
416 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
417 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
418 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
419
420 // Check that the upper lanes are all clear.
421 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
422 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
430 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
431 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
432 }
433 }
434}
435
Jacob Bramleye8289202019-07-31 11:25:23 +0100436static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
437 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100438 START();
439
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100440 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100441 int za_inputs[] = {-39, 1, -3, 2};
442 int zn_inputs[] = {-5, -20, 9, 8};
443 int zm_inputs[] = {9, -5, 4, 5};
444
445 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
446 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
447 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
448 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
449
450 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100451 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100452 InsrHelper(&masm, za, za_inputs);
453 InsrHelper(&masm, zn, zn_inputs);
454 InsrHelper(&masm, zm, zm_inputs);
455
456 int p0_inputs[] = {1, 1, 0, 1};
457 int p1_inputs[] = {1, 0, 1, 1};
458 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100459 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100460
461 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
462 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
463 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
464 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
465
466 // The Mla macro automatically selects between mla, mad and movprfx + mla
467 // based on what registers are aliased.
468 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
469 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
470 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100471 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100472
473 __ Mov(mla_da_result, za);
474 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
475
476 __ Mov(mla_dn_result, zn);
477 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
478
479 __ Mov(mla_dm_result, zm);
480 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
481
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100482 __ Mov(mla_d_result, zd);
483 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100484
485 // The Mls macro automatically selects between mls, msb and movprfx + mls
486 // based on what registers are aliased.
487 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
488 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
489 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100490 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100491
492 __ Mov(mls_da_result, za);
493 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
494
495 __ Mov(mls_dn_result, zn);
496 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
497
498 __ Mov(mls_dm_result, zm);
499 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
500
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100501 __ Mov(mls_d_result, zd);
502 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100503
504 END();
505
506 if (CAN_RUN()) {
507 RUN();
508
509 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
510 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
511 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
512
513 int mla[] = {-84, 101, 33, 42};
514 int mls[] = {6, -99, -39, -38};
515
516 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
517 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
518
519 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
520 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
521
522 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
523 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
524
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100525 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
526 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100527
528 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
529 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
530
531 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
532 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
533
534 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
535 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
536
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100537 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
538 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100539 }
540}
541
Jacob Bramleye8289202019-07-31 11:25:23 +0100542TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
543TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
544TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
545TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100546
Jacob Bramleye8289202019-07-31 11:25:23 +0100547TEST_SVE(sve_bitwise_unpredicate_logical) {
548 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700549 START();
550
551 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
552 InsrHelper(&masm, z8.VnD(), z8_inputs);
553 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
554 InsrHelper(&masm, z15.VnD(), z15_inputs);
555
556 __ And(z1.VnD(), z8.VnD(), z15.VnD());
557 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
558 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
559 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
560
561 END();
562
563 if (CAN_RUN()) {
564 RUN();
565 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
566 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
567 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
568 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
569
570 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
571 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
572 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
573 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
574 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700575}
576
Martyn Capewellf804b602020-02-24 18:57:18 +0000577TEST_SVE(sve_last_r) {
578 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
579 START();
580
581 __ Pfalse(p1.VnB());
582 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
583 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
584 Initialise(&masm, p2.VnB(), p2_inputs);
585 Initialise(&masm, p3.VnB(), p3_inputs);
586 __ Ptrue(p4.VnB());
587
588 __ Index(z0.VnB(), 0x10, 1);
589 __ Lasta(x1, p1, z0.VnB());
590 __ Lastb(x2, p1, z0.VnB());
591 __ Lasta(x3, p2, z0.VnB());
592 __ Lastb(x4, p2, z0.VnB());
593 __ Lasta(x5, p3, z0.VnB());
594 __ Lastb(x6, p3, z0.VnB());
595 __ Lasta(x7, p4, z0.VnB());
596
597 __ Punpklo(p3.VnH(), p3.VnB());
598 __ Index(z0.VnH(), 0x1110, 1);
599 __ Lasta(x9, p1, z0.VnH());
600 __ Lastb(x10, p3, z0.VnH());
601 __ Lasta(x12, p4, z0.VnH());
602
603 __ Index(z0.VnS(), 0x11111110, 1);
604 __ Lastb(x13, p1, z0.VnS());
605 __ Lasta(x14, p2, z0.VnS());
606 __ Lastb(x18, p4, z0.VnS());
607
608 __ Index(z0.VnD(), 0x1111111111111110, 1);
609 __ Lasta(x19, p1, z0.VnD());
610 __ Lastb(x20, p3, z0.VnD());
611 __ Lasta(x21, p3, z0.VnD());
612 END();
613
614 if (CAN_RUN()) {
615 RUN();
616
617 ASSERT_EQUAL_64(0x0000000000000010, x1);
618 ASSERT_EQUAL_64(0x0000000000000011, x3);
619 ASSERT_EQUAL_64(0x0000000000000010, x4);
620 ASSERT_EQUAL_64(0x0000000000000019, x5);
621 ASSERT_EQUAL_64(0x0000000000000018, x6);
622 ASSERT_EQUAL_64(0x0000000000000010, x7);
623 ASSERT_EQUAL_64(0x0000000000001110, x9);
624 ASSERT_EQUAL_64(0x0000000000001110, x12);
625 ASSERT_EQUAL_64(0x0000000011111111, x14);
626 ASSERT_EQUAL_64(0x1111111111111110, x19);
627
628 int vl = core.GetSVELaneCount(kBRegSize) * 8;
629 switch (vl) {
630 case 128:
631 ASSERT_EQUAL_64(0x000000000000001f, x2);
632 ASSERT_EQUAL_64(0x0000000000001116, x10);
633 ASSERT_EQUAL_64(0x0000000011111113, x13);
634 ASSERT_EQUAL_64(0x0000000011111113, x18);
635 ASSERT_EQUAL_64(0x1111111111111111, x20);
636 ASSERT_EQUAL_64(0x1111111111111110, x21);
637 break;
638 case 384:
639 ASSERT_EQUAL_64(0x000000000000003f, x2);
640 ASSERT_EQUAL_64(0x0000000000001118, x10);
641 ASSERT_EQUAL_64(0x000000001111111b, x13);
642 ASSERT_EQUAL_64(0x000000001111111b, x18);
643 ASSERT_EQUAL_64(0x1111111111111112, x20);
644 ASSERT_EQUAL_64(0x1111111111111113, x21);
645 break;
646 case 2048:
647 ASSERT_EQUAL_64(0x000000000000000f, x2);
648 ASSERT_EQUAL_64(0x0000000000001118, x10);
649 ASSERT_EQUAL_64(0x000000001111114f, x13);
650 ASSERT_EQUAL_64(0x000000001111114f, x18);
651 ASSERT_EQUAL_64(0x1111111111111112, x20);
652 ASSERT_EQUAL_64(0x1111111111111113, x21);
653 break;
654 default:
655 printf("WARNING: Some tests skipped due to unexpected VL.\n");
656 break;
657 }
658 }
659}
660
661TEST_SVE(sve_last_v) {
662 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
663 START();
664
665 __ Pfalse(p1.VnB());
666 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
667 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
668 Initialise(&masm, p2.VnB(), p2_inputs);
669 Initialise(&masm, p3.VnB(), p3_inputs);
670 __ Ptrue(p4.VnB());
671
672 __ Index(z0.VnB(), 0x10, 1);
673 __ Lasta(b1, p1, z0.VnB());
674 __ Lastb(b2, p1, z0.VnB());
675 __ Lasta(b3, p2, z0.VnB());
676 __ Lastb(b4, p2, z0.VnB());
677 __ Lasta(b5, p3, z0.VnB());
678 __ Lastb(b6, p3, z0.VnB());
679 __ Lasta(b7, p4, z0.VnB());
680
681 __ Punpklo(p3.VnH(), p3.VnB());
682 __ Index(z0.VnH(), 0x1110, 1);
683 __ Lasta(h9, p1, z0.VnH());
684 __ Lastb(h10, p3, z0.VnH());
685 __ Lasta(h12, p4, z0.VnH());
686
687 __ Index(z0.VnS(), 0x11111110, 1);
688 __ Lastb(s13, p1, z0.VnS());
689 __ Lasta(s14, p2, z0.VnS());
690 __ Lastb(s18, p4, z0.VnS());
691
692 __ Index(z0.VnD(), 0x1111111111111110, 1);
693 __ Lasta(d19, p1, z0.VnD());
694 __ Lastb(d20, p3, z0.VnD());
695 __ Lasta(d21, p3, z0.VnD());
696 END();
697
698 if (CAN_RUN()) {
699 RUN();
700
701 ASSERT_EQUAL_128(0, 0x0000000000000010, q1);
702 ASSERT_EQUAL_128(0, 0x0000000000000011, q3);
703 ASSERT_EQUAL_128(0, 0x0000000000000010, q4);
704 ASSERT_EQUAL_128(0, 0x0000000000000019, q5);
705 ASSERT_EQUAL_128(0, 0x0000000000000018, q6);
706 ASSERT_EQUAL_128(0, 0x0000000000000010, q7);
707 ASSERT_EQUAL_128(0, 0x0000000000001110, q9);
708 ASSERT_EQUAL_128(0, 0x0000000000001110, q12);
709 ASSERT_EQUAL_128(0, 0x0000000011111111, q14);
710 ASSERT_EQUAL_128(0, 0x1111111111111110, q19);
711
712 int vl = core.GetSVELaneCount(kBRegSize) * 8;
713 switch (vl) {
714 case 128:
715 ASSERT_EQUAL_128(0, 0x000000000000001f, q2);
716 ASSERT_EQUAL_128(0, 0x0000000000001116, q10);
717 ASSERT_EQUAL_128(0, 0x0000000011111113, q13);
718 ASSERT_EQUAL_128(0, 0x0000000011111113, q18);
719 ASSERT_EQUAL_128(0, 0x1111111111111111, q20);
720 ASSERT_EQUAL_128(0, 0x1111111111111110, q21);
721 break;
722 case 384:
723 ASSERT_EQUAL_128(0, 0x000000000000003f, q2);
724 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
725 ASSERT_EQUAL_128(0, 0x000000001111111b, q13);
726 ASSERT_EQUAL_128(0, 0x000000001111111b, q18);
727 ASSERT_EQUAL_128(0, 0x1111111111111112, q20);
728 ASSERT_EQUAL_128(0, 0x1111111111111113, q21);
729 break;
730 case 2048:
731 ASSERT_EQUAL_128(0, 0x000000000000000f, q2);
732 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
733 ASSERT_EQUAL_128(0, 0x000000001111114f, q13);
734 ASSERT_EQUAL_128(0, 0x000000001111114f, q18);
735 ASSERT_EQUAL_128(0, 0x1111111111111112, q20);
736 ASSERT_EQUAL_128(0, 0x1111111111111113, q21);
737 break;
738 default:
739 printf("WARNING: Some tests skipped due to unexpected VL.\n");
740 break;
741 }
742 }
743}
744
745TEST_SVE(sve_clast_r) {
746 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
747 START();
748
749 __ Pfalse(p1.VnB());
750 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
751 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
752 Initialise(&masm, p2.VnB(), p2_inputs);
753 Initialise(&masm, p3.VnB(), p3_inputs);
754 __ Ptrue(p4.VnB());
755
756 __ Index(z0.VnB(), 0x10, 1);
757 __ Mov(x1, -1);
758 __ Mov(x2, -1);
759 __ Clasta(x1, p1, x1, z0.VnB());
760 __ Clastb(x2, p1, x2, z0.VnB());
761 __ Clasta(x3, p2, x3, z0.VnB());
762 __ Clastb(x4, p2, x4, z0.VnB());
763 __ Clasta(x5, p3, x5, z0.VnB());
764 __ Clastb(x6, p3, x6, z0.VnB());
765 __ Clasta(x7, p4, x7, z0.VnB());
766
767 __ Punpklo(p3.VnH(), p3.VnB());
768 __ Index(z0.VnH(), 0x1110, 1);
769 __ Mov(x9, -1);
770 __ Clasta(x9, p1, x9, z0.VnH());
771 __ Clastb(x10, p3, x10, z0.VnH());
772 __ Clasta(x12, p4, x12, z0.VnH());
773
774 __ Index(z0.VnS(), 0x11111110, 1);
775 __ Mov(x13, -1);
776 __ Clasta(x13, p1, x13, z0.VnS());
777 __ Clastb(x14, p2, x14, z0.VnS());
778 __ Clasta(x18, p4, x18, z0.VnS());
779
780 __ Index(z0.VnD(), 0x1111111111111110, 1);
781 __ Mov(x19, -1);
782 __ Clasta(x19, p1, x19, z0.VnD());
783 __ Clastb(x20, p2, x20, z0.VnD());
784 __ Clasta(x21, p4, x21, z0.VnD());
785 END();
786
787 if (CAN_RUN()) {
788 RUN();
789 ASSERT_EQUAL_64(0x00000000000000ff, x1);
790 ASSERT_EQUAL_64(0x00000000000000ff, x2);
791 ASSERT_EQUAL_64(0x0000000000000011, x3);
792 ASSERT_EQUAL_64(0x0000000000000010, x4);
793 ASSERT_EQUAL_64(0x0000000000000019, x5);
794 ASSERT_EQUAL_64(0x0000000000000018, x6);
795 ASSERT_EQUAL_64(0x0000000000000010, x7);
796 ASSERT_EQUAL_64(0x000000000000ffff, x9);
797 ASSERT_EQUAL_64(0x0000000000001110, x12);
798 ASSERT_EQUAL_64(0x00000000ffffffff, x13);
799 ASSERT_EQUAL_64(0x0000000011111110, x14);
800 ASSERT_EQUAL_64(0x0000000011111110, x18);
801 ASSERT_EQUAL_64(0xffffffffffffffff, x19);
802 ASSERT_EQUAL_64(0x1111111111111110, x20);
803 ASSERT_EQUAL_64(0x1111111111111110, x21);
804
805 int vl = core.GetSVELaneCount(kBRegSize) * 8;
806 switch (vl) {
807 case 128:
808 ASSERT_EQUAL_64(0x0000000000001116, x10);
809 break;
810 case 384:
811 ASSERT_EQUAL_64(0x0000000000001118, x10);
812 break;
813 case 2048:
814 ASSERT_EQUAL_64(0x0000000000001118, x10);
815 break;
816 default:
817 printf("WARNING: Some tests skipped due to unexpected VL.\n");
818 break;
819 }
820 }
821}
822
823TEST_SVE(sve_clast_v) {
824 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
825 START();
826
827 __ Pfalse(p1.VnB());
828 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
829 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
830 Initialise(&masm, p2.VnB(), p2_inputs);
831 Initialise(&masm, p3.VnB(), p3_inputs);
832 __ Ptrue(p4.VnB());
833
834 __ Index(z0.VnB(), 0x10, 1);
835 __ Dup(z1.VnB(), -1);
836 __ Dup(z2.VnB(), -1);
837 __ Clasta(b1, p1, b1, z0.VnB());
838 __ Clastb(b2, p1, b2, z0.VnB());
839 __ Clasta(b3, p2, b3, z0.VnB());
840 __ Clastb(b4, p2, b4, z0.VnB());
841 __ Clasta(b5, p3, b5, z0.VnB());
842 __ Clastb(b6, p3, b6, z0.VnB());
843 __ Clasta(b7, p4, b7, z0.VnB());
844
845 __ Punpklo(p3.VnH(), p3.VnB());
846 __ Index(z0.VnH(), 0x1110, 1);
847 __ Dup(z9.VnB(), -1);
848 __ Clasta(h9, p1, h9, z0.VnH());
849 __ Clastb(h10, p3, h10, z0.VnH());
850 __ Clasta(h12, p4, h12, z0.VnH());
851
852 __ Index(z0.VnS(), 0x11111110, 1);
853 __ Dup(z13.VnB(), -1);
854 __ Clasta(s13, p1, s13, z0.VnS());
855 __ Clastb(s14, p2, s14, z0.VnS());
856 __ Clasta(s18, p4, s18, z0.VnS());
857
858 __ Index(z0.VnD(), 0x1111111111111110, 1);
859 __ Dup(z19.VnB(), -1);
860 __ Clasta(d19, p1, d19, z0.VnD());
861 __ Clastb(d20, p2, d20, z0.VnD());
862 __ Clasta(d21, p4, d21, z0.VnD());
863 END();
864
865 if (CAN_RUN()) {
866 RUN();
867 ASSERT_EQUAL_128(0, 0x00000000000000ff, q1);
868 ASSERT_EQUAL_128(0, 0x00000000000000ff, q2);
869 ASSERT_EQUAL_128(0, 0x0000000000000011, q3);
870 ASSERT_EQUAL_128(0, 0x0000000000000010, q4);
871 ASSERT_EQUAL_128(0, 0x0000000000000019, q5);
872 ASSERT_EQUAL_128(0, 0x0000000000000018, q6);
873 ASSERT_EQUAL_128(0, 0x0000000000000010, q7);
874 ASSERT_EQUAL_128(0, 0x000000000000ffff, q9);
875 ASSERT_EQUAL_128(0, 0x0000000000001110, q12);
876 ASSERT_EQUAL_128(0, 0x00000000ffffffff, q13);
877 ASSERT_EQUAL_128(0, 0x0000000011111110, q14);
878 ASSERT_EQUAL_128(0, 0x0000000011111110, q18);
879 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
880 ASSERT_EQUAL_128(0, 0x1111111111111110, q20);
881 ASSERT_EQUAL_128(0, 0x1111111111111110, q21);
882
883 int vl = core.GetSVELaneCount(kBRegSize) * 8;
884 switch (vl) {
885 case 128:
886 ASSERT_EQUAL_128(0, 0x0000000000001116, q10);
887 break;
888 case 384:
889 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
890 break;
891 case 2048:
892 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
893 break;
894 default:
895 printf("WARNING: Some tests skipped due to unexpected VL.\n");
896 break;
897 }
898 }
899}
900
901TEST_SVE(sve_clast_z) {
902 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
903 START();
904
905 __ Pfalse(p1.VnB());
906 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
907 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
908 Initialise(&masm, p2.VnB(), p2_inputs);
909 Initialise(&masm, p3.VnB(), p3_inputs);
910 __ Ptrue(p4.VnB());
911
912 __ Index(z0.VnB(), 0x10, 1);
913 __ Dup(z1.VnB(), 0xff);
914 __ Dup(z2.VnB(), 0xff);
915 __ Clasta(z1.VnB(), p1, z1.VnB(), z0.VnB());
916 __ Clastb(z2.VnB(), p1, z2.VnB(), z0.VnB());
917 __ Clasta(z3.VnB(), p2, z3.VnB(), z0.VnB());
918 __ Clastb(z4.VnB(), p2, z4.VnB(), z0.VnB());
919 __ Clasta(z5.VnB(), p3, z5.VnB(), z0.VnB());
920 __ Clastb(z6.VnB(), p3, z6.VnB(), z0.VnB());
921 __ Clasta(z7.VnB(), p4, z7.VnB(), z0.VnB());
922
923 __ Punpklo(p3.VnH(), p3.VnB());
924 __ Index(z0.VnH(), 0x1110, 1);
925 __ Dup(z9.VnB(), 0xff);
926 __ Clasta(z9.VnH(), p1, z9.VnH(), z0.VnH());
927 __ Clastb(z10.VnH(), p3, z10.VnH(), z0.VnH());
928 __ Clasta(z12.VnH(), p4, z12.VnH(), z0.VnH());
929
930 __ Index(z0.VnS(), 0x11111110, 1);
931 __ Dup(z13.VnB(), 0xff);
932 __ Clasta(z13.VnS(), p1, z13.VnS(), z0.VnS());
933 __ Clastb(z14.VnS(), p2, z14.VnS(), z0.VnS());
934 __ Clasta(z16.VnS(), p4, z16.VnS(), z0.VnS());
935
936 __ Index(z0.VnD(), 0x1111111111111110, 1);
937 __ Dup(z17.VnB(), 0xff);
938 __ Clasta(z17.VnD(), p1, z17.VnD(), z0.VnD());
939 __ Clastb(z18.VnD(), p2, z18.VnD(), z0.VnD());
940 __ Clasta(z20.VnD(), p4, z20.VnD(), z0.VnD());
941 END();
942
943 if (CAN_RUN()) {
944 RUN();
945 uint64_t z1_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
946 uint64_t z2_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
947 uint64_t z3_expected[] = {0x1111111111111111, 0x1111111111111111};
948 uint64_t z4_expected[] = {0x1010101010101010, 0x1010101010101010};
949 uint64_t z5_expected[] = {0x1919191919191919, 0x1919191919191919};
950 uint64_t z6_expected[] = {0x1818181818181818, 0x1818181818181818};
951 uint64_t z7_expected[] = {0x1010101010101010, 0x1010101010101010};
952 uint64_t z9_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
953 uint64_t z12_expected[] = {0x1110111011101110, 0x1110111011101110};
954 uint64_t z13_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
955 uint64_t z14_expected[] = {0x1111111011111110, 0x1111111011111110};
956 uint64_t z16_expected[] = {0x1111111011111110, 0x1111111011111110};
957 uint64_t z17_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
958 uint64_t z18_expected[] = {0x1111111111111110, 0x1111111111111110};
959 uint64_t z20_expected[] = {0x1111111111111110, 0x1111111111111110};
960
961 uint64_t z10_expected_vl128[] = {0x1116111611161116, 0x1116111611161116};
962 uint64_t z10_expected_vl_long[] = {0x1118111811181118, 0x1118111811181118};
963
964 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
965 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
966 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
967 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
968 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
969 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
970 ASSERT_EQUAL_SVE(z7_expected, z7.VnD());
971 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
972 ASSERT_EQUAL_SVE(z12_expected, z12.VnD());
973 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
974 ASSERT_EQUAL_SVE(z14_expected, z14.VnD());
975 ASSERT_EQUAL_SVE(z16_expected, z16.VnD());
976 ASSERT_EQUAL_SVE(z17_expected, z17.VnD());
977 ASSERT_EQUAL_SVE(z18_expected, z18.VnD());
978 ASSERT_EQUAL_SVE(z20_expected, z20.VnD());
979
980 int vl = core.GetSVELaneCount(kBRegSize) * 8;
981 switch (vl) {
982 case 128:
983 ASSERT_EQUAL_SVE(z10_expected_vl128, z10.VnD());
984 break;
985 case 384:
986 case 2048:
987 ASSERT_EQUAL_SVE(z10_expected_vl_long, z10.VnD());
988 break;
989 default:
990 printf("WARNING: Some tests skipped due to unexpected VL.\n");
991 break;
992 }
993 }
994}
995
996TEST_SVE(sve_compact) {
997 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
998 START();
999
1000 __ Ptrue(p0.VnB());
1001 __ Pfalse(p1.VnB());
1002 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
1003 __ Zip1(p3.VnS(), p1.VnS(), p0.VnS());
1004 __ Zip1(p4.VnD(), p0.VnD(), p1.VnD());
1005
1006 __ Index(z0.VnS(), 0x11111111, 0x11111111);
1007 __ Mov(q0, q0);
1008 __ Compact(z1.VnS(), p0, z0.VnS());
1009 __ Compact(z2.VnS(), p2, z0.VnS());
1010 __ Compact(z0.VnS(), p3, z0.VnS());
1011
1012 __ Index(z3.VnD(), 0x1111111111111111, 0x1111111111111111);
1013 __ Mov(q3, q3);
1014 __ Compact(z4.VnD(), p0, z3.VnD());
1015 __ Compact(z5.VnD(), p1, z3.VnD());
1016 __ Compact(z6.VnD(), p4, z3.VnD());
1017
1018 END();
1019
1020 if (CAN_RUN()) {
1021 RUN();
1022 uint64_t z1_expected[] = {0x4444444433333333, 0x2222222211111111};
1023 uint64_t z2_expected[] = {0x0000000000000000, 0x3333333311111111};
1024 uint64_t z0_expected[] = {0x0000000000000000, 0x4444444422222222};
1025 uint64_t z4_expected[] = {0x2222222222222222, 0x1111111111111111};
1026 uint64_t z5_expected[] = {0x0000000000000000, 0x0000000000000000};
1027 uint64_t z6_expected[] = {0x0000000000000000, 0x1111111111111111};
1028 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1029 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
1030 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1031 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1032 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1033 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
1034 }
1035}
1036
1037TEST_SVE(sve_splice) {
1038 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1039 START();
1040
1041 __ Ptrue(p0.VnB());
1042 __ Pfalse(p1.VnB());
1043 int p2b_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
1044 int p3b_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0};
1045 int p4b_inputs[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1046 int p5b_inputs[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0};
1047 int p6b_inputs[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0};
1048 Initialise(&masm, p2.VnB(), p2b_inputs);
1049 Initialise(&masm, p3.VnB(), p3b_inputs);
1050 Initialise(&masm, p4.VnB(), p4b_inputs);
1051 Initialise(&masm, p5.VnB(), p5b_inputs);
1052 Initialise(&masm, p6.VnB(), p6b_inputs);
1053
1054 __ Index(z30.VnB(), 1, 1);
1055
1056 __ Index(z0.VnB(), -1, -1);
1057 __ Splice(z0.VnB(), p0, z0.VnB(), z30.VnB());
1058 __ Index(z1.VnB(), -1, -1);
1059 __ Splice(z1.VnB(), p1, z1.VnB(), z30.VnB());
1060 __ Index(z2.VnB(), -1, -1);
1061 __ Splice(z2.VnB(), p2, z2.VnB(), z30.VnB());
1062 __ Index(z3.VnB(), -1, -1);
1063 __ Splice(z3.VnB(), p3, z3.VnB(), z30.VnB());
1064 __ Index(z4.VnB(), -1, -1);
1065 __ Splice(z4.VnB(), p4, z4.VnB(), z30.VnB());
1066 __ Index(z5.VnB(), -1, -1);
1067 __ Splice(z5.VnB(), p5, z5.VnB(), z30.VnB());
1068 __ Index(z6.VnB(), -1, -1);
1069 __ Splice(z6.VnB(), p6, z6.VnB(), z30.VnB());
1070
1071 int p2h_inputs[] = {0, 0, 0, 0, 0, 0, 1, 0};
1072 int p3h_inputs[] = {0, 0, 1, 0, 0, 0, 1, 0};
1073 Initialise(&masm, p2.VnH(), p2h_inputs);
1074 Initialise(&masm, p3.VnH(), p3h_inputs);
1075
1076 __ Index(z30.VnH(), 1, 1);
1077 __ Index(z29.VnH(), -1, -1);
1078 __ Splice(z7.VnH(), p2, z29.VnH(), z30.VnH());
1079 __ Splice(z8.VnH(), p3, z29.VnH(), z30.VnH());
1080
1081 int p2s_inputs[] = {0, 0, 1, 0};
1082 int p3s_inputs[] = {1, 0, 1, 0};
1083 Initialise(&masm, p2.VnS(), p2s_inputs);
1084 Initialise(&masm, p3.VnS(), p3s_inputs);
1085
1086 __ Index(z30.VnS(), 1, 1);
1087 __ Index(z29.VnS(), -1, -1);
1088 __ Splice(z9.VnS(), p2, z29.VnS(), z30.VnS());
1089 __ Splice(z10.VnS(), p3, z29.VnS(), z30.VnS());
1090
1091 int p2d_inputs[] = {0, 1};
1092 int p3d_inputs[] = {1, 0};
1093 Initialise(&masm, p2.VnD(), p2d_inputs);
1094 Initialise(&masm, p3.VnD(), p3d_inputs);
1095
1096 __ Index(z30.VnD(), 1, 1);
1097 __ Index(z29.VnD(), -1, -1);
1098 __ Splice(z11.VnD(), p2, z29.VnD(), z30.VnD());
1099 __ Splice(z30.VnD(), p3, z29.VnD(), z30.VnD());
1100
1101 END();
1102
1103 if (CAN_RUN()) {
1104 RUN();
1105 uint64_t z0_expected[] = {0xf0f1f2f3f4f5f6f7, 0xf8f9fafbfcfdfeff};
1106 uint64_t z1_expected[] = {0x100f0e0d0c0b0a09, 0x0807060504030201};
1107 uint64_t z2_expected[] = {0x0f0e0d0c0b0a0908, 0x07060504030201ff};
1108 uint64_t z3_expected[] = {0x0f0e0d0c0b0a0908, 0x07060504030201fe};
1109 uint64_t z4_expected[] = {0x0f0e0d0c0b0a0908, 0x07060504030201f0};
1110 uint64_t z5_expected[] = {0x0c0b0a0908070605, 0x04030201f6f7f8f9};
1111 uint64_t z6_expected[] = {0x01f0f1f2f3f4f5f6, 0xf7f8f9fafbfcfdfe};
1112 uint64_t z7_expected[] = {0x0007000600050004, 0x000300020001fffe};
1113 uint64_t z8_expected[] = {0x000300020001fffa, 0xfffbfffcfffdfffe};
1114 uint64_t z9_expected[] = {0x0000000300000002, 0x00000001fffffffe};
1115 uint64_t z10_expected[] = {0x00000001fffffffc, 0xfffffffdfffffffe};
1116 uint64_t z11_expected[] = {0x0000000000000001, 0xffffffffffffffff};
1117 uint64_t z30_expected[] = {0x0000000000000001, 0xfffffffffffffffe};
1118
1119 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1120 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1121 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
1122 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
1123 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1124 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1125 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
1126 ASSERT_EQUAL_SVE(z7_expected, z7.VnD());
1127 ASSERT_EQUAL_SVE(z8_expected, z8.VnD());
1128 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
1129 ASSERT_EQUAL_SVE(z10_expected, z10.VnD());
1130 ASSERT_EQUAL_SVE(z11_expected, z11.VnD());
1131 ASSERT_EQUAL_SVE(z30_expected, z30.VnD());
1132 }
1133}
1134
Jacob Bramleye8289202019-07-31 11:25:23 +01001135TEST_SVE(sve_predicate_logical) {
1136 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -07001137 START();
1138
1139 // 0b...01011010'10110111
1140 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
1141 // 0b...11011001'01010010
1142 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
1143 // 0b...01010101'10110010
1144 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
1145
1146 Initialise(&masm, p10.VnB(), p10_inputs);
1147 Initialise(&masm, p11.VnB(), p11_inputs);
1148 Initialise(&masm, p12.VnB(), p12_inputs);
1149
1150 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1151 __ Mrs(x0, NZCV);
1152 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1153 __ Mrs(x1, NZCV);
1154 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1155 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1156 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1157 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1158 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1159 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
1160
1161 END();
1162
1163 if (CAN_RUN()) {
1164 RUN();
1165
1166 // 0b...01010000'00010010
1167 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
1168 // 0b...00000001'00000000
1169 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
1170 // 0b...00000001'10100000
1171 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
1172 // 0b...00000101'10100000
1173 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
1174 // 0b...00000100'00000000
1175 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1176 // 0b...01010101'00010010
1177 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
1178 // 0b...01010001'10110010
1179 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
1180 // 0b...01011011'00010111
1181 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
1182
1183 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1184 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
1185 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
1186 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
1187 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1188 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
1189 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
1190 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
1191
TatWai Chong96713fe2019-06-04 16:39:37 -07001192 ASSERT_EQUAL_32(SVEFirstFlag, w0);
1193 ASSERT_EQUAL_32(SVENotLastFlag, w1);
1194 }
1195}
TatWai Chongf4fa8222019-06-17 12:08:14 -07001196
Jacob Bramleye8289202019-07-31 11:25:23 +01001197TEST_SVE(sve_int_compare_vectors) {
1198 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -07001199 START();
1200
1201 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
1202 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
1203 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
1204 InsrHelper(&masm, z10.VnB(), z10_inputs);
1205 InsrHelper(&masm, z11.VnB(), z11_inputs);
1206 Initialise(&masm, p0.VnB(), p0_inputs);
1207
1208 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
1209 __ Mrs(x6, NZCV);
1210
1211 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
1212 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
1213 int p1_inputs[] = {1, 1};
1214 InsrHelper(&masm, z12.VnD(), z12_inputs);
1215 InsrHelper(&masm, z13.VnD(), z13_inputs);
1216 Initialise(&masm, p1.VnD(), p1_inputs);
1217
1218 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
1219 __ Mrs(x7, NZCV);
1220
1221 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
1222 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
1223
1224 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
1225 InsrHelper(&masm, z14.VnH(), z14_inputs);
1226 InsrHelper(&masm, z15.VnH(), z15_inputs);
1227 Initialise(&masm, p2.VnH(), p2_inputs);
1228
1229 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
1230 __ Mrs(x8, NZCV);
1231
1232 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
1233 __ Mrs(x9, NZCV);
1234
1235 int z16_inputs[] = {0, -1, 0, 0};
1236 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
1237 int p3_inputs[] = {1, 1, 1, 1};
1238 InsrHelper(&masm, z16.VnS(), z16_inputs);
1239 InsrHelper(&masm, z17.VnS(), z17_inputs);
1240 Initialise(&masm, p3.VnS(), p3_inputs);
1241
1242 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
1243 __ Mrs(x10, NZCV);
1244
1245 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
1246 __ Mrs(x11, NZCV);
1247
1248 // Architectural aliases testing.
1249 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
1250 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
1251 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
1252 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
1253
1254 END();
1255
1256 if (CAN_RUN()) {
1257 RUN();
1258
1259 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
1260 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
1261 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
1262 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
1263 }
1264
1265 int p7_expected[] = {1, 0};
1266 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1267
1268 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
1269 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
1270
1271 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
1272 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1273
1274 int p10_expected[] = {0, 0, 0, 1};
1275 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1276
1277 int p11_expected[] = {0, 1, 1, 1};
1278 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
1279
1280 // Reuse the expected results to verify the architectural aliases.
1281 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
1282 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
1283 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
1284 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
1285
1286 ASSERT_EQUAL_32(SVEFirstFlag, w6);
1287 ASSERT_EQUAL_32(NoFlag, w7);
1288 ASSERT_EQUAL_32(NoFlag, w8);
1289 ASSERT_EQUAL_32(NoFlag, w9);
1290 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1291 }
1292}
1293
Jacob Bramleye8289202019-07-31 11:25:23 +01001294TEST_SVE(sve_int_compare_vectors_wide_elements) {
1295 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -07001296 START();
1297
1298 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
1299 int src2_inputs_1[] = {0, -1};
1300 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
1301 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
1302 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
1303 Initialise(&masm, p0.VnB(), mask_inputs_1);
1304
1305 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
1306 __ Mrs(x2, NZCV);
1307 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
1308 __ Mrs(x3, NZCV);
1309
1310 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
1311 int src2_inputs_2[] = {0, -32767};
1312 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
1313 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
1314 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
1315 Initialise(&masm, p0.VnH(), mask_inputs_2);
1316
1317 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
1318 __ Mrs(x4, NZCV);
1319 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
1320 __ Mrs(x5, NZCV);
1321
1322 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
1323 int src2_inputs_3[] = {0, -2147483648};
1324 int mask_inputs_3[] = {1, 1, 1, 1};
1325 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
1326 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
1327 Initialise(&masm, p0.VnS(), mask_inputs_3);
1328
1329 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
1330 __ Mrs(x6, NZCV);
1331 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
1332 __ Mrs(x7, NZCV);
1333
1334 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
1335 int src2_inputs_4[] = {0x00, 0x7f};
1336 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
1337 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
1338 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
1339 Initialise(&masm, p0.VnB(), mask_inputs_4);
1340
1341 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
1342 __ Mrs(x8, NZCV);
1343 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
1344 __ Mrs(x9, NZCV);
1345
1346 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
1347 int src2_inputs_5[] = {0x8000, 0xffff};
1348 int mask_inputs_5[] = {1, 1, 1, 1};
1349 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
1350 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
1351 Initialise(&masm, p0.VnS(), mask_inputs_5);
1352
1353 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
1354 __ Mrs(x10, NZCV);
1355 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
1356 __ Mrs(x11, NZCV);
1357
1358 END();
1359
1360 if (CAN_RUN()) {
1361 RUN();
1362 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
1363 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
1364
1365 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
1366 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
1367
1368 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
1369 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
1370
1371 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
1372 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1373
1374 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
1375 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1376
1377 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
1378 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
1379
1380 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
1381 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1382
1383 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
1384 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
1385
1386 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
1387 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1388
1389 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
1390 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
1391
1392 ASSERT_EQUAL_32(NoFlag, w2);
1393 ASSERT_EQUAL_32(NoFlag, w3);
1394 ASSERT_EQUAL_32(NoFlag, w4);
1395 ASSERT_EQUAL_32(SVENotLastFlag, w5);
1396 ASSERT_EQUAL_32(SVEFirstFlag, w6);
1397 ASSERT_EQUAL_32(SVENotLastFlag, w7);
1398 ASSERT_EQUAL_32(SVEFirstFlag, w8);
1399 ASSERT_EQUAL_32(SVEFirstFlag, w9);
1400 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
1401 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -07001402 }
TatWai Chongf4fa8222019-06-17 12:08:14 -07001403}
1404
Jacob Bramleye8289202019-07-31 11:25:23 +01001405TEST_SVE(sve_bitwise_imm) {
1406 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -07001407 START();
1408
1409 // clang-format off
1410 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
1411 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
1412 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
1413 0x0123, 0x4567, 0x89ab, 0xcdef};
1414 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
1415 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
1416 // clang-format on
1417
1418 InsrHelper(&masm, z1.VnD(), z21_inputs);
1419 InsrHelper(&masm, z2.VnS(), z22_inputs);
1420 InsrHelper(&masm, z3.VnH(), z23_inputs);
1421 InsrHelper(&masm, z4.VnB(), z24_inputs);
1422
1423 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
1424 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
1425 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
1426 __ And(z4.VnB(), z4.VnB(), 0x3f);
1427
1428 InsrHelper(&masm, z5.VnD(), z21_inputs);
1429 InsrHelper(&masm, z6.VnS(), z22_inputs);
1430 InsrHelper(&masm, z7.VnH(), z23_inputs);
1431 InsrHelper(&masm, z8.VnB(), z24_inputs);
1432
1433 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
1434 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
1435 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
1436 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
1437
1438 InsrHelper(&masm, z9.VnD(), z21_inputs);
1439 InsrHelper(&masm, z10.VnS(), z22_inputs);
1440 InsrHelper(&masm, z11.VnH(), z23_inputs);
1441 InsrHelper(&masm, z12.VnB(), z24_inputs);
1442
1443 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
1444 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
1445 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
1446 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
1447
Jacob Bramley6069fd42019-06-24 10:20:45 +01001448 {
1449 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
1450 // so here we test `dupm` directly.
1451 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
1452 __ dupm(z13.VnD(), 0x7ffffff800000000);
1453 __ dupm(z14.VnS(), 0x7ffc7ffc);
1454 __ dupm(z15.VnH(), 0x3ffc);
1455 __ dupm(z16.VnB(), 0xc3);
1456 }
TatWai Chonga1885a52019-04-15 17:19:14 -07001457
1458 END();
1459
1460 if (CAN_RUN()) {
1461 RUN();
1462
1463 // clang-format off
1464 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
1465 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
1466 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
1467 0x0120, 0x0560, 0x09a0, 0x0de0};
1468 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
1469 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
1470
1471 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1472 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1473 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1474 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
1475
1476 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
1477 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
1478 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
1479 0x0ed3, 0x4a97, 0x865b, 0xc21f};
1480 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
1481 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
1482
1483 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1484 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1485 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1486 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
1487
1488 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
1489 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
1490 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
1491 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
1492 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
1493 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
1494
1495 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
1496 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
1497 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
1498 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
1499
1500 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
1501 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
1502 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
1503 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
1504 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
1505 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
1506 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
1507 // clang-format on
1508 }
TatWai Chonga1885a52019-04-15 17:19:14 -07001509}
1510
Jacob Bramleye8289202019-07-31 11:25:23 +01001511TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +01001512 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
1513 // unencodable immediates.
1514
Jacob Bramleye8289202019-07-31 11:25:23 +01001515 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +01001516 START();
1517
1518 // Encodable with `dup` (shift 0).
1519 __ Dup(z0.VnD(), -1);
1520 __ Dup(z1.VnS(), 0x7f);
1521 __ Dup(z2.VnH(), -0x80);
1522 __ Dup(z3.VnB(), 42);
1523
1524 // Encodable with `dup` (shift 8).
TatWai Chong6995bfd2019-09-26 10:48:05 +01001525 __ Dup(z4.VnD(), -42 * 256);
1526 __ Dup(z5.VnS(), -0x8000);
1527 __ Dup(z6.VnH(), 0x7f00);
Jacob Bramley6069fd42019-06-24 10:20:45 +01001528 // B-sized lanes cannot take a shift of 8.
1529
1530 // Encodable with `dupm` (but not `dup`).
1531 __ Dup(z10.VnD(), 0x3fc);
1532 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
1533 __ Dup(z12.VnH(), 0x0001);
1534 // All values that fit B-sized lanes are encodable with `dup`.
1535
1536 // Cases that require immediate synthesis.
1537 __ Dup(z20.VnD(), 0x1234);
1538 __ Dup(z21.VnD(), -4242);
1539 __ Dup(z22.VnD(), 0xfedcba9876543210);
1540 __ Dup(z23.VnS(), 0x01020304);
1541 __ Dup(z24.VnS(), -0x01020304);
1542 __ Dup(z25.VnH(), 0x3c38);
1543 // All values that fit B-sized lanes are directly encodable.
1544
1545 END();
1546
1547 if (CAN_RUN()) {
1548 RUN();
1549
1550 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
1551 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
1552 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
1553 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
1554
TatWai Chong6995bfd2019-09-26 10:48:05 +01001555 ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
1556 ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
1557 ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley6069fd42019-06-24 10:20:45 +01001558
1559 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1560 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1561 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1562
1563 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1564 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1565 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1566 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1567 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1568 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1569 }
1570}
1571
Jacob Bramleye8289202019-07-31 11:25:23 +01001572TEST_SVE(sve_inc_dec_p_scalar) {
1573 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001574 START();
1575
1576 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1577 Initialise(&masm, p0.VnB(), p0_inputs);
1578
1579 int p0_b_count = 9;
1580 int p0_h_count = 5;
1581 int p0_s_count = 3;
1582 int p0_d_count = 2;
1583
1584 // 64-bit operations preserve their high bits.
1585 __ Mov(x0, 0x123456780000002a);
1586 __ Decp(x0, p0.VnB());
1587
1588 __ Mov(x1, 0x123456780000002a);
1589 __ Incp(x1, p0.VnH());
1590
1591 // Check that saturation does not occur.
1592 __ Mov(x10, 1);
1593 __ Decp(x10, p0.VnS());
1594
1595 __ Mov(x11, UINT64_MAX);
1596 __ Incp(x11, p0.VnD());
1597
1598 __ Mov(x12, INT64_MAX);
1599 __ Incp(x12, p0.VnB());
1600
1601 // With an all-true predicate, these instructions increment or decrement by
1602 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001603 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001604
1605 __ Mov(x20, 0x4000000000000000);
1606 __ Decp(x20, p15.VnB());
1607
1608 __ Mov(x21, 0x4000000000000000);
1609 __ Incp(x21, p15.VnH());
1610
1611 END();
1612 if (CAN_RUN()) {
1613 RUN();
1614
1615 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1616 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1617
1618 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1619 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1620 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1621
1622 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1623 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1624 }
1625}
1626
Jacob Bramleye8289202019-07-31 11:25:23 +01001627TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1628 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001629 START();
1630
1631 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1632 Initialise(&masm, p0.VnB(), p0_inputs);
1633
1634 int p0_b_count = 9;
1635 int p0_h_count = 5;
1636 int p0_s_count = 3;
1637 int p0_d_count = 2;
1638
1639 uint64_t dummy_high = 0x1234567800000000;
1640
1641 // 64-bit operations preserve their high bits.
1642 __ Mov(x0, dummy_high + 42);
1643 __ Sqdecp(x0, p0.VnB());
1644
1645 __ Mov(x1, dummy_high + 42);
1646 __ Sqincp(x1, p0.VnH());
1647
1648 // 32-bit operations sign-extend into their high bits.
1649 __ Mov(x2, dummy_high + 42);
1650 __ Sqdecp(x2, p0.VnS(), w2);
1651
1652 __ Mov(x3, dummy_high + 42);
1653 __ Sqincp(x3, p0.VnD(), w3);
1654
1655 __ Mov(x4, dummy_high + 1);
1656 __ Sqdecp(x4, p0.VnS(), w4);
1657
1658 __ Mov(x5, dummy_high - 1);
1659 __ Sqincp(x5, p0.VnD(), w5);
1660
1661 // Check that saturation behaves correctly.
1662 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001663 __ Sqdecp(x10, p0.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001664
1665 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1666 __ Sqdecp(x11, p0.VnH(), w11);
1667
1668 __ Mov(x12, 1);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001669 __ Sqdecp(x12, p0.VnS());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001670
1671 __ Mov(x13, dummy_high + 1);
1672 __ Sqdecp(x13, p0.VnD(), w13);
1673
1674 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001675 __ Sqincp(x14, p0.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001676
1677 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1678 __ Sqincp(x15, p0.VnH(), w15);
1679
1680 // Don't use x16 and x17 since they are scratch registers by default.
1681
1682 __ Mov(x18, 0xffffffffffffffff);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001683 __ Sqincp(x18, p0.VnS());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001684
1685 __ Mov(x19, dummy_high + 0xffffffff);
1686 __ Sqincp(x19, p0.VnD(), w19);
1687
1688 __ Mov(x20, dummy_high + 0xffffffff);
1689 __ Sqdecp(x20, p0.VnB(), w20);
1690
1691 // With an all-true predicate, these instructions increment or decrement by
1692 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001693 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001694
1695 __ Mov(x21, 0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001696 __ Sqdecp(x21, p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001697
1698 __ Mov(x22, 0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001699 __ Sqincp(x22, p15.VnH());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001700
1701 __ Mov(x23, dummy_high);
1702 __ Sqdecp(x23, p15.VnS(), w23);
1703
1704 __ Mov(x24, dummy_high);
1705 __ Sqincp(x24, p15.VnD(), w24);
1706
1707 END();
1708 if (CAN_RUN()) {
1709 RUN();
1710
1711 // 64-bit operations preserve their high bits.
1712 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1713 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1714
1715 // 32-bit operations sign-extend into their high bits.
1716 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1717 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1718 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1719 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1720
1721 // Check that saturation behaves correctly.
1722 ASSERT_EQUAL_64(INT64_MIN, x10);
1723 ASSERT_EQUAL_64(INT32_MIN, x11);
1724 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1725 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1726 ASSERT_EQUAL_64(INT64_MAX, x14);
1727 ASSERT_EQUAL_64(INT32_MAX, x15);
1728 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1729 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1730 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1731
1732 // Check all-true predicates.
1733 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1734 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1735 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1736 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1737 }
1738}
1739
Jacob Bramleye8289202019-07-31 11:25:23 +01001740TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1741 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001742 START();
1743
1744 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1745 Initialise(&masm, p0.VnB(), p0_inputs);
1746
1747 int p0_b_count = 9;
1748 int p0_h_count = 5;
1749 int p0_s_count = 3;
1750 int p0_d_count = 2;
1751
1752 uint64_t dummy_high = 0x1234567800000000;
1753
1754 // 64-bit operations preserve their high bits.
1755 __ Mov(x0, dummy_high + 42);
1756 __ Uqdecp(x0, p0.VnB());
1757
1758 __ Mov(x1, dummy_high + 42);
1759 __ Uqincp(x1, p0.VnH());
1760
1761 // 32-bit operations zero-extend into their high bits.
1762 __ Mov(x2, dummy_high + 42);
1763 __ Uqdecp(x2, p0.VnS(), w2);
1764
1765 __ Mov(x3, dummy_high + 42);
1766 __ Uqincp(x3, p0.VnD(), w3);
1767
1768 __ Mov(x4, dummy_high + 0x80000001);
1769 __ Uqdecp(x4, p0.VnS(), w4);
1770
1771 __ Mov(x5, dummy_high + 0x7fffffff);
1772 __ Uqincp(x5, p0.VnD(), w5);
1773
1774 // Check that saturation behaves correctly.
1775 __ Mov(x10, 1);
1776 __ Uqdecp(x10, p0.VnB(), x10);
1777
1778 __ Mov(x11, dummy_high + 1);
1779 __ Uqdecp(x11, p0.VnH(), w11);
1780
1781 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1782 __ Uqdecp(x12, p0.VnS(), x12);
1783
1784 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1785 __ Uqdecp(x13, p0.VnD(), w13);
1786
1787 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1788 __ Uqincp(x14, p0.VnB(), x14);
1789
1790 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1791 __ Uqincp(x15, p0.VnH(), w15);
1792
1793 // Don't use x16 and x17 since they are scratch registers by default.
1794
1795 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1796 __ Uqincp(x18, p0.VnS(), x18);
1797
1798 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1799 __ Uqincp(x19, p0.VnD(), w19);
1800
1801 // With an all-true predicate, these instructions increment or decrement by
1802 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001803 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001804
1805 __ Mov(x20, 0x4000000000000000);
1806 __ Uqdecp(x20, p15.VnB(), x20);
1807
1808 __ Mov(x21, 0x4000000000000000);
1809 __ Uqincp(x21, p15.VnH(), x21);
1810
1811 __ Mov(x22, dummy_high + 0x40000000);
1812 __ Uqdecp(x22, p15.VnS(), w22);
1813
1814 __ Mov(x23, dummy_high + 0x40000000);
1815 __ Uqincp(x23, p15.VnD(), w23);
1816
1817 END();
1818 if (CAN_RUN()) {
1819 RUN();
1820
1821 // 64-bit operations preserve their high bits.
1822 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1823 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1824
1825 // 32-bit operations zero-extend into their high bits.
1826 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1827 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1828 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1829 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1830
1831 // Check that saturation behaves correctly.
1832 ASSERT_EQUAL_64(0, x10);
1833 ASSERT_EQUAL_64(0, x11);
1834 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1835 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1836 ASSERT_EQUAL_64(UINT64_MAX, x14);
1837 ASSERT_EQUAL_64(UINT32_MAX, x15);
1838 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1839 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1840
1841 // Check all-true predicates.
1842 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1843 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1844 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1845 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1846 }
1847}
1848
Jacob Bramleye8289202019-07-31 11:25:23 +01001849TEST_SVE(sve_inc_dec_p_vector) {
1850 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001851 START();
1852
1853 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1854 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1855 Initialise(&masm, p0.VnB(), p0_inputs);
1856
1857 // Check that saturation does not occur.
1858
1859 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1860 InsrHelper(&masm, z0.VnD(), z0_inputs);
1861
1862 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1863 InsrHelper(&masm, z1.VnD(), z1_inputs);
1864
1865 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1866 InsrHelper(&masm, z2.VnS(), z2_inputs);
1867
1868 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1869 InsrHelper(&masm, z3.VnH(), z3_inputs);
1870
1871 // The MacroAssembler implements non-destructive operations using movprfx.
1872 __ Decp(z10.VnD(), p0, z0.VnD());
1873 __ Decp(z11.VnD(), p0, z1.VnD());
1874 __ Decp(z12.VnS(), p0, z2.VnS());
1875 __ Decp(z13.VnH(), p0, z3.VnH());
1876
1877 __ Incp(z14.VnD(), p0, z0.VnD());
1878 __ Incp(z15.VnD(), p0, z1.VnD());
1879 __ Incp(z16.VnS(), p0, z2.VnS());
1880 __ Incp(z17.VnH(), p0, z3.VnH());
1881
1882 // Also test destructive forms.
1883 __ Mov(z4, z0);
1884 __ Mov(z5, z1);
1885 __ Mov(z6, z2);
1886 __ Mov(z7, z3);
1887
1888 __ Decp(z0.VnD(), p0);
1889 __ Decp(z1.VnD(), p0);
1890 __ Decp(z2.VnS(), p0);
1891 __ Decp(z3.VnH(), p0);
1892
1893 __ Incp(z4.VnD(), p0);
1894 __ Incp(z5.VnD(), p0);
1895 __ Incp(z6.VnS(), p0);
1896 __ Incp(z7.VnH(), p0);
1897
1898 END();
1899 if (CAN_RUN()) {
1900 RUN();
1901
1902 // z0_inputs[...] - number of active D lanes (2)
1903 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1904 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1905
1906 // z1_inputs[...] - number of active D lanes (2)
1907 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1908 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1909
1910 // z2_inputs[...] - number of active S lanes (3)
1911 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1912 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1913
1914 // z3_inputs[...] - number of active H lanes (5)
1915 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1916 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1917
1918 // z0_inputs[...] + number of active D lanes (2)
1919 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1920 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1921
1922 // z1_inputs[...] + number of active D lanes (2)
1923 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1924 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1925
1926 // z2_inputs[...] + number of active S lanes (3)
1927 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1928 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1929
1930 // z3_inputs[...] + number of active H lanes (5)
1931 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1932 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1933
1934 // Check that the non-destructive macros produced the same results.
1935 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1936 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1937 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1938 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1939 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1940 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1941 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1942 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1943 }
1944}
1945
Jacob Bramleye8289202019-07-31 11:25:23 +01001946TEST_SVE(sve_inc_dec_ptrue_vector) {
1947 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001948 START();
1949
1950 // With an all-true predicate, these instructions increment or decrement by
1951 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001952 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001953
1954 __ Dup(z0.VnD(), 0);
1955 __ Decp(z0.VnD(), p15);
1956
1957 __ Dup(z1.VnS(), 0);
1958 __ Decp(z1.VnS(), p15);
1959
1960 __ Dup(z2.VnH(), 0);
1961 __ Decp(z2.VnH(), p15);
1962
1963 __ Dup(z3.VnD(), 0);
1964 __ Incp(z3.VnD(), p15);
1965
1966 __ Dup(z4.VnS(), 0);
1967 __ Incp(z4.VnS(), p15);
1968
1969 __ Dup(z5.VnH(), 0);
1970 __ Incp(z5.VnH(), p15);
1971
1972 END();
1973 if (CAN_RUN()) {
1974 RUN();
1975
1976 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1977 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1978 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1979
1980 for (int i = 0; i < d_lane_count; i++) {
1981 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1982 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1983 }
1984
1985 for (int i = 0; i < s_lane_count; i++) {
1986 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1987 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1988 }
1989
1990 for (int i = 0; i < h_lane_count; i++) {
1991 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1992 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1993 }
1994 }
1995}
1996
Jacob Bramleye8289202019-07-31 11:25:23 +01001997TEST_SVE(sve_sqinc_sqdec_p_vector) {
1998 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001999 START();
2000
2001 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
2002 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
2003 Initialise(&masm, p0.VnB(), p0_inputs);
2004
2005 // Check that saturation behaves correctly.
2006
2007 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
2008 InsrHelper(&masm, z0.VnD(), z0_inputs);
2009
2010 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
2011 InsrHelper(&masm, z1.VnD(), z1_inputs);
2012
2013 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
2014 InsrHelper(&masm, z2.VnS(), z2_inputs);
2015
2016 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
2017 InsrHelper(&masm, z3.VnH(), z3_inputs);
2018
2019 // The MacroAssembler implements non-destructive operations using movprfx.
2020 __ Sqdecp(z10.VnD(), p0, z0.VnD());
2021 __ Sqdecp(z11.VnD(), p0, z1.VnD());
2022 __ Sqdecp(z12.VnS(), p0, z2.VnS());
2023 __ Sqdecp(z13.VnH(), p0, z3.VnH());
2024
2025 __ Sqincp(z14.VnD(), p0, z0.VnD());
2026 __ Sqincp(z15.VnD(), p0, z1.VnD());
2027 __ Sqincp(z16.VnS(), p0, z2.VnS());
2028 __ Sqincp(z17.VnH(), p0, z3.VnH());
2029
2030 // Also test destructive forms.
2031 __ Mov(z4, z0);
2032 __ Mov(z5, z1);
2033 __ Mov(z6, z2);
2034 __ Mov(z7, z3);
2035
2036 __ Sqdecp(z0.VnD(), p0);
2037 __ Sqdecp(z1.VnD(), p0);
2038 __ Sqdecp(z2.VnS(), p0);
2039 __ Sqdecp(z3.VnH(), p0);
2040
2041 __ Sqincp(z4.VnD(), p0);
2042 __ Sqincp(z5.VnD(), p0);
2043 __ Sqincp(z6.VnS(), p0);
2044 __ Sqincp(z7.VnH(), p0);
2045
2046 END();
2047 if (CAN_RUN()) {
2048 RUN();
2049
2050 // z0_inputs[...] - number of active D lanes (2)
2051 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
2052 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
2053
2054 // z1_inputs[...] - number of active D lanes (2)
2055 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
2056 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
2057
2058 // z2_inputs[...] - number of active S lanes (3)
2059 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
2060 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
2061
2062 // z3_inputs[...] - number of active H lanes (5)
2063 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
2064 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
2065
2066 // z0_inputs[...] + number of active D lanes (2)
2067 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
2068 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
2069
2070 // z1_inputs[...] + number of active D lanes (2)
2071 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
2072 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
2073
2074 // z2_inputs[...] + number of active S lanes (3)
2075 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
2076 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
2077
2078 // z3_inputs[...] + number of active H lanes (5)
2079 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
2080 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
2081
2082 // Check that the non-destructive macros produced the same results.
2083 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
2084 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
2085 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
2086 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
2087 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
2088 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
2089 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
2090 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
2091 }
2092}
2093
Jacob Bramleye8289202019-07-31 11:25:23 +01002094TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
2095 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002096 START();
2097
2098 // With an all-true predicate, these instructions increment or decrement by
2099 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01002100 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002101
2102 __ Dup(z0.VnD(), 0);
2103 __ Sqdecp(z0.VnD(), p15);
2104
2105 __ Dup(z1.VnS(), 0);
2106 __ Sqdecp(z1.VnS(), p15);
2107
2108 __ Dup(z2.VnH(), 0);
2109 __ Sqdecp(z2.VnH(), p15);
2110
2111 __ Dup(z3.VnD(), 0);
2112 __ Sqincp(z3.VnD(), p15);
2113
2114 __ Dup(z4.VnS(), 0);
2115 __ Sqincp(z4.VnS(), p15);
2116
2117 __ Dup(z5.VnH(), 0);
2118 __ Sqincp(z5.VnH(), p15);
2119
2120 END();
2121 if (CAN_RUN()) {
2122 RUN();
2123
2124 int d_lane_count = core.GetSVELaneCount(kDRegSize);
2125 int s_lane_count = core.GetSVELaneCount(kSRegSize);
2126 int h_lane_count = core.GetSVELaneCount(kHRegSize);
2127
2128 for (int i = 0; i < d_lane_count; i++) {
2129 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
2130 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
2131 }
2132
2133 for (int i = 0; i < s_lane_count; i++) {
2134 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
2135 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
2136 }
2137
2138 for (int i = 0; i < h_lane_count; i++) {
2139 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
2140 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
2141 }
2142 }
2143}
2144
Jacob Bramleye8289202019-07-31 11:25:23 +01002145TEST_SVE(sve_uqinc_uqdec_p_vector) {
2146 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002147 START();
2148
2149 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
2150 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
2151 Initialise(&masm, p0.VnB(), p0_inputs);
2152
2153 // Check that saturation behaves correctly.
2154
2155 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
2156 InsrHelper(&masm, z0.VnD(), z0_inputs);
2157
2158 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
2159 InsrHelper(&masm, z1.VnD(), z1_inputs);
2160
2161 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
2162 InsrHelper(&masm, z2.VnS(), z2_inputs);
2163
2164 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
2165 InsrHelper(&masm, z3.VnH(), z3_inputs);
2166
2167 // The MacroAssembler implements non-destructive operations using movprfx.
2168 __ Uqdecp(z10.VnD(), p0, z0.VnD());
2169 __ Uqdecp(z11.VnD(), p0, z1.VnD());
2170 __ Uqdecp(z12.VnS(), p0, z2.VnS());
2171 __ Uqdecp(z13.VnH(), p0, z3.VnH());
2172
2173 __ Uqincp(z14.VnD(), p0, z0.VnD());
2174 __ Uqincp(z15.VnD(), p0, z1.VnD());
2175 __ Uqincp(z16.VnS(), p0, z2.VnS());
2176 __ Uqincp(z17.VnH(), p0, z3.VnH());
2177
2178 // Also test destructive forms.
2179 __ Mov(z4, z0);
2180 __ Mov(z5, z1);
2181 __ Mov(z6, z2);
2182 __ Mov(z7, z3);
2183
2184 __ Uqdecp(z0.VnD(), p0);
2185 __ Uqdecp(z1.VnD(), p0);
2186 __ Uqdecp(z2.VnS(), p0);
2187 __ Uqdecp(z3.VnH(), p0);
2188
2189 __ Uqincp(z4.VnD(), p0);
2190 __ Uqincp(z5.VnD(), p0);
2191 __ Uqincp(z6.VnS(), p0);
2192 __ Uqincp(z7.VnH(), p0);
2193
2194 END();
2195 if (CAN_RUN()) {
2196 RUN();
2197
2198 // z0_inputs[...] - number of active D lanes (2)
2199 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
2200 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
2201
2202 // z1_inputs[...] - number of active D lanes (2)
2203 uint64_t z1_expected[] = {0x12345678ffffff28,
2204 0,
2205 0xfffffffffffffffd,
2206 0x7ffffffffffffffd};
2207 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
2208
2209 // z2_inputs[...] - number of active S lanes (3)
2210 uint32_t z2_expected[] =
2211 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
2212 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
2213
2214 // z3_inputs[...] - number of active H lanes (5)
2215 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
2216 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
2217
2218 // z0_inputs[...] + number of active D lanes (2)
2219 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
2220 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
2221
2222 // z1_inputs[...] + number of active D lanes (2)
2223 uint64_t z5_expected[] = {0x12345678ffffff2c,
2224 2,
2225 UINT64_MAX,
2226 0x8000000000000001};
2227 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
2228
2229 // z2_inputs[...] + number of active S lanes (3)
2230 uint32_t z6_expected[] =
2231 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
2232 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
2233
2234 // z3_inputs[...] + number of active H lanes (5)
2235 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
2236 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
2237
2238 // Check that the non-destructive macros produced the same results.
2239 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
2240 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
2241 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
2242 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
2243 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
2244 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
2245 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
2246 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
2247 }
2248}
2249
Jacob Bramleye8289202019-07-31 11:25:23 +01002250TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
2251 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002252 START();
2253
2254 // With an all-true predicate, these instructions increment or decrement by
2255 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01002256 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002257
2258 __ Mov(x0, 0x1234567800000000);
2259 __ Mov(x1, 0x12340000);
2260 __ Mov(x2, 0x1200);
2261
2262 __ Dup(z0.VnD(), x0);
2263 __ Uqdecp(z0.VnD(), p15);
2264
2265 __ Dup(z1.VnS(), x1);
2266 __ Uqdecp(z1.VnS(), p15);
2267
2268 __ Dup(z2.VnH(), x2);
2269 __ Uqdecp(z2.VnH(), p15);
2270
2271 __ Dup(z3.VnD(), x0);
2272 __ Uqincp(z3.VnD(), p15);
2273
2274 __ Dup(z4.VnS(), x1);
2275 __ Uqincp(z4.VnS(), p15);
2276
2277 __ Dup(z5.VnH(), x2);
2278 __ Uqincp(z5.VnH(), p15);
2279
2280 END();
2281 if (CAN_RUN()) {
2282 RUN();
2283
2284 int d_lane_count = core.GetSVELaneCount(kDRegSize);
2285 int s_lane_count = core.GetSVELaneCount(kSRegSize);
2286 int h_lane_count = core.GetSVELaneCount(kHRegSize);
2287
2288 for (int i = 0; i < d_lane_count; i++) {
2289 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
2290 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
2291 }
2292
2293 for (int i = 0; i < s_lane_count; i++) {
2294 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
2295 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
2296 }
2297
2298 for (int i = 0; i < h_lane_count; i++) {
2299 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
2300 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
2301 }
2302 }
2303}
2304
Jacob Bramleye8289202019-07-31 11:25:23 +01002305TEST_SVE(sve_index) {
2306 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01002307 START();
2308
2309 // Simple cases.
2310 __ Index(z0.VnB(), 0, 1);
2311 __ Index(z1.VnH(), 1, 1);
2312 __ Index(z2.VnS(), 2, 1);
2313 __ Index(z3.VnD(), 3, 1);
2314
2315 // Synthesised immediates.
2316 __ Index(z4.VnB(), 42, -1);
2317 __ Index(z5.VnH(), -1, 42);
2318 __ Index(z6.VnS(), 42, 42);
2319
2320 // Register arguments.
2321 __ Mov(x0, 42);
2322 __ Mov(x1, -3);
2323 __ Index(z10.VnD(), x0, x1);
2324 __ Index(z11.VnB(), w0, w1);
2325 // The register size should correspond to the lane size, but VIXL allows any
2326 // register at least as big as the lane size.
2327 __ Index(z12.VnB(), x0, x1);
2328 __ Index(z13.VnH(), w0, x1);
2329 __ Index(z14.VnS(), x0, w1);
2330
2331 // Integer overflow.
2332 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
2333 __ Index(z21.VnH(), 7, -3);
2334 __ Index(z22.VnS(), INT32_MAX - 2, 1);
2335 __ Index(z23.VnD(), INT64_MIN + 6, -7);
2336
2337 END();
2338
2339 if (CAN_RUN()) {
2340 RUN();
2341
2342 int b_lane_count = core.GetSVELaneCount(kBRegSize);
2343 int h_lane_count = core.GetSVELaneCount(kHRegSize);
2344 int s_lane_count = core.GetSVELaneCount(kSRegSize);
2345 int d_lane_count = core.GetSVELaneCount(kDRegSize);
2346
2347 uint64_t b_mask = GetUintMask(kBRegSize);
2348 uint64_t h_mask = GetUintMask(kHRegSize);
2349 uint64_t s_mask = GetUintMask(kSRegSize);
2350 uint64_t d_mask = GetUintMask(kDRegSize);
2351
2352 // Simple cases.
2353 for (int i = 0; i < b_lane_count; i++) {
2354 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
2355 }
2356 for (int i = 0; i < h_lane_count; i++) {
2357 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
2358 }
2359 for (int i = 0; i < s_lane_count; i++) {
2360 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
2361 }
2362 for (int i = 0; i < d_lane_count; i++) {
2363 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
2364 }
2365
2366 // Synthesised immediates.
2367 for (int i = 0; i < b_lane_count; i++) {
2368 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
2369 }
2370 for (int i = 0; i < h_lane_count; i++) {
2371 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
2372 }
2373 for (int i = 0; i < s_lane_count; i++) {
2374 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
2375 }
2376
2377 // Register arguments.
2378 for (int i = 0; i < d_lane_count; i++) {
2379 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
2380 }
2381 for (int i = 0; i < b_lane_count; i++) {
2382 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
2383 }
2384 for (int i = 0; i < b_lane_count; i++) {
2385 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
2386 }
2387 for (int i = 0; i < h_lane_count; i++) {
2388 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
2389 }
2390 for (int i = 0; i < s_lane_count; i++) {
2391 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
2392 }
2393
2394 // Integer overflow.
2395 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
2396 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
2397 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
2398 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
2399 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
2400 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
2401 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
2402 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
2403 }
2404}
2405
TatWai Chongc844bb22019-06-10 15:32:53 -07002406TEST(sve_int_compare_count_and_limit_scalars) {
2407 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2408 START();
2409
2410 __ Mov(w20, 0xfffffffd);
2411 __ Mov(w21, 0xffffffff);
2412
2413 __ Whilele(p0.VnB(), w20, w21);
2414 __ Mrs(x0, NZCV);
2415 __ Whilele(p1.VnH(), w20, w21);
2416 __ Mrs(x1, NZCV);
2417
2418 __ Mov(w20, 0xffffffff);
2419 __ Mov(w21, 0x00000000);
2420
2421 __ Whilelt(p2.VnS(), w20, w21);
2422 __ Mrs(x2, NZCV);
2423 __ Whilelt(p3.VnD(), w20, w21);
2424 __ Mrs(x3, NZCV);
2425
2426 __ Mov(w20, 0xfffffffd);
2427 __ Mov(w21, 0xffffffff);
2428
2429 __ Whilels(p4.VnB(), w20, w21);
2430 __ Mrs(x4, NZCV);
2431 __ Whilels(p5.VnH(), w20, w21);
2432 __ Mrs(x5, NZCV);
2433
2434 __ Mov(w20, 0xffffffff);
2435 __ Mov(w21, 0x00000000);
2436
2437 __ Whilelo(p6.VnS(), w20, w21);
2438 __ Mrs(x6, NZCV);
2439 __ Whilelo(p7.VnD(), w20, w21);
2440 __ Mrs(x7, NZCV);
2441
2442 __ Mov(x20, 0xfffffffffffffffd);
2443 __ Mov(x21, 0xffffffffffffffff);
2444
2445 __ Whilele(p8.VnB(), x20, x21);
2446 __ Mrs(x8, NZCV);
2447 __ Whilele(p9.VnH(), x20, x21);
2448 __ Mrs(x9, NZCV);
2449
2450 __ Mov(x20, 0xffffffffffffffff);
2451 __ Mov(x21, 0x0000000000000000);
2452
2453 __ Whilelt(p10.VnS(), x20, x21);
2454 __ Mrs(x10, NZCV);
2455 __ Whilelt(p11.VnD(), x20, x21);
2456 __ Mrs(x11, NZCV);
2457
2458 __ Mov(x20, 0xfffffffffffffffd);
2459 __ Mov(x21, 0xffffffffffffffff);
2460
2461 __ Whilels(p12.VnB(), x20, x21);
2462 __ Mrs(x12, NZCV);
2463 __ Whilels(p13.VnH(), x20, x21);
2464 __ Mrs(x13, NZCV);
2465
2466 __ Mov(x20, 0xffffffffffffffff);
2467 __ Mov(x21, 0x0000000000000000);
2468
2469 __ Whilelo(p14.VnS(), x20, x21);
2470 __ Mrs(x14, NZCV);
2471 __ Whilelo(p15.VnD(), x20, x21);
2472 __ Mrs(x15, NZCV);
2473
2474 END();
2475
2476 if (CAN_RUN()) {
2477 RUN();
2478
2479 // 0b...00000000'00000111
2480 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
2481 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
2482
2483 // 0b...00000000'00010101
2484 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
2485 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
2486
2487 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
2488 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
2489
2490 int p3_expected[] = {0x00, 0x01};
2491 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
2492
2493 // 0b...11111111'11111111
2494 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
2495 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
2496
2497 // 0b...01010101'01010101
2498 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
2499 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2500
2501 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
2502 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2503
2504 int p7_expected[] = {0x00, 0x00};
2505 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
2506
2507 // 0b...00000000'00000111
2508 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
2509 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
2510
2511 // 0b...00000000'00010101
2512 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
2513 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
2514
2515 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
2516 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
2517
2518 int p11_expected[] = {0x00, 0x01};
2519 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2520
2521 // 0b...11111111'11111111
2522 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
2523 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
2524
2525 // 0b...01010101'01010101
2526 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
2527 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
2528
2529 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
2530 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
2531
2532 int p15_expected[] = {0x00, 0x00};
2533 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
2534
2535 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
2536 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
2537 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
2538 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
2539 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2540 ASSERT_EQUAL_32(SVEFirstFlag, w5);
2541 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
2542 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
2543 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
2544 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
2545 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
2546 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
2547 ASSERT_EQUAL_32(SVEFirstFlag, w12);
2548 ASSERT_EQUAL_32(SVEFirstFlag, w13);
2549 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
2550 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
2551 }
2552}
2553
TatWai Chong302729c2019-06-14 16:18:51 -07002554TEST(sve_int_compare_vectors_signed_imm) {
2555 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2556 START();
2557
2558 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
2559 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2560 InsrHelper(&masm, z13.VnB(), z13_inputs);
2561 Initialise(&masm, p0.VnB(), mask_inputs1);
2562
2563 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2564 __ Mrs(x2, NZCV);
2565 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2566
2567 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2568 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2569 InsrHelper(&masm, z14.VnH(), z14_inputs);
2570 Initialise(&masm, p0.VnH(), mask_inputs2);
2571
2572 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2573 __ Mrs(x4, NZCV);
2574 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2575
2576 int z15_inputs[] = {0, 1, -1, INT_MIN};
2577 int mask_inputs3[] = {0, 1, 1, 1};
2578 InsrHelper(&masm, z15.VnS(), z15_inputs);
2579 Initialise(&masm, p0.VnS(), mask_inputs3);
2580
2581 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2582 __ Mrs(x6, NZCV);
2583 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2584
2585 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2586 __ Mrs(x8, NZCV);
2587 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2588
2589 int64_t z16_inputs[] = {0, -1};
2590 int mask_inputs4[] = {1, 1};
2591 InsrHelper(&masm, z16.VnD(), z16_inputs);
2592 Initialise(&masm, p0.VnD(), mask_inputs4);
2593
2594 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2595 __ Mrs(x10, NZCV);
2596 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2597
2598 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2599 __ Mrs(x12, NZCV);
2600 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2601
2602 END();
2603
2604 if (CAN_RUN()) {
2605 RUN();
2606
2607 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2608 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2609
2610 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2611 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2612
2613 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2614 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2615
2616 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2617 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2618
2619 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2620 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2621
2622 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2623 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2624
2625 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2626 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2627
2628 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2629 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2630
2631 int p10_expected[] = {0x00, 0x01};
2632 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2633
2634 int p11_expected[] = {0x00, 0x00};
2635 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2636
2637 int p12_expected[] = {0x01, 0x00};
2638 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2639
2640 int p13_expected[] = {0x01, 0x01};
2641 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2642
2643 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2644 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2645 ASSERT_EQUAL_32(NoFlag, w6);
2646 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2647 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2648 ASSERT_EQUAL_32(NoFlag, w12);
2649 }
2650}
2651
2652TEST(sve_int_compare_vectors_unsigned_imm) {
2653 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2654 START();
2655
2656 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2657 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2658 InsrHelper(&masm, z13.VnB(), src1_inputs);
2659 Initialise(&masm, p0.VnB(), mask_inputs1);
2660
2661 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2662 __ Mrs(x2, NZCV);
2663 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2664
2665 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2666 int mask_inputs2[] = {1, 1, 1, 1, 0};
2667 InsrHelper(&masm, z13.VnH(), src2_inputs);
2668 Initialise(&masm, p0.VnH(), mask_inputs2);
2669
2670 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2671 __ Mrs(x4, NZCV);
2672 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2673
2674 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2675 int mask_inputs3[] = {1, 1, 1, 1};
2676 InsrHelper(&masm, z13.VnS(), src3_inputs);
2677 Initialise(&masm, p0.VnS(), mask_inputs3);
2678
2679 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2680 __ Mrs(x6, NZCV);
2681 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2682
2683 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2684 int mask_inputs4[] = {1, 1};
2685 InsrHelper(&masm, z13.VnD(), src4_inputs);
2686 Initialise(&masm, p0.VnD(), mask_inputs4);
2687
2688 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2689 __ Mrs(x8, NZCV);
2690 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2691
2692 END();
2693
2694 if (CAN_RUN()) {
2695 RUN();
2696
2697 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2698 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2699
2700 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2701 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2702
2703 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2704 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2705
2706 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2707 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2708
2709 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2710 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2711
2712 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2713 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2714
2715 int p8_expected[] = {0x00, 0x01};
2716 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2717
2718 int p9_expected[] = {0x00, 0x01};
2719 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2720
2721 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2722 ASSERT_EQUAL_32(NoFlag, w4);
2723 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2724 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2725 }
2726}
2727
TatWai Chongc844bb22019-06-10 15:32:53 -07002728TEST(sve_int_compare_conditionally_terminate_scalars) {
2729 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2730 START();
2731
2732 __ Mov(x0, 0xfedcba9887654321);
2733 __ Mov(x1, 0x1000100010001000);
2734
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002735 // Initialise Z and C. These are preserved by cterm*, and the V flag is set to
2736 // !C if the condition does not hold.
2737 __ Mov(x10, NoFlag);
2738 __ Msr(NZCV, x10);
2739
TatWai Chongc844bb22019-06-10 15:32:53 -07002740 __ Ctermeq(w0, w0);
2741 __ Mrs(x2, NZCV);
2742 __ Ctermeq(x0, x1);
2743 __ Mrs(x3, NZCV);
2744 __ Ctermne(x0, x0);
2745 __ Mrs(x4, NZCV);
2746 __ Ctermne(w0, w1);
2747 __ Mrs(x5, NZCV);
2748
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002749 // As above, but with all flags initially set.
2750 __ Mov(x10, NZCVFlag);
2751 __ Msr(NZCV, x10);
2752
2753 __ Ctermeq(w0, w0);
2754 __ Mrs(x6, NZCV);
2755 __ Ctermeq(x0, x1);
2756 __ Mrs(x7, NZCV);
2757 __ Ctermne(x0, x0);
2758 __ Mrs(x8, NZCV);
2759 __ Ctermne(w0, w1);
2760 __ Mrs(x9, NZCV);
2761
TatWai Chongc844bb22019-06-10 15:32:53 -07002762 END();
2763
2764 if (CAN_RUN()) {
2765 RUN();
2766
2767 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2768 ASSERT_EQUAL_32(VFlag, w3);
2769 ASSERT_EQUAL_32(VFlag, w4);
2770 ASSERT_EQUAL_32(SVEFirstFlag, w5);
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002771
2772 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w6);
2773 ASSERT_EQUAL_32(ZCFlag, w7);
2774 ASSERT_EQUAL_32(ZCFlag, w8);
2775 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w9);
TatWai Chongc844bb22019-06-10 15:32:53 -07002776 }
2777}
2778
Jacob Bramley0ce75842019-07-17 18:12:50 +01002779// Work out what the architectural `PredTest` pseudocode should produce for the
2780// given result and governing predicate.
2781template <typename Tg, typename Td, int N>
2782static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2783 const Tg (&pg)[N],
2784 int vl) {
2785 int first = -1;
2786 int last = -1;
2787 bool any_active = false;
2788
2789 // Only consider potentially-active lanes.
2790 int start = (N > vl) ? (N - vl) : 0;
2791 for (int i = start; i < N; i++) {
2792 if ((pg[i] & 1) == 1) {
2793 // Look for the first and last active lanes.
2794 // Note that the 'first' lane is the one with the highest index.
2795 if (last < 0) last = i;
2796 first = i;
2797 // Look for any active lanes that are also active in pd.
2798 if ((pd[i] & 1) == 1) any_active = true;
2799 }
2800 }
2801
2802 uint32_t flags = 0;
2803 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2804 if (!any_active) flags |= SVENoneFlag;
2805 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2806 return static_cast<StatusFlags>(flags);
2807}
2808
2809typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2810 const PRegister& pg,
2811 const PRegisterWithLaneSize& pn);
2812template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002813static void PfirstPnextHelper(Test* config,
2814 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002815 unsigned lane_size_in_bits,
2816 const Tg& pg_inputs,
2817 const Tn& pn_inputs,
2818 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002819 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002820 START();
2821
2822 PRegister pg = p15;
2823 PRegister pn = p14;
2824 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2825 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2826
2827 // Initialise NZCV to an impossible value, to check that we actually write it.
2828 __ Mov(x10, NZCVFlag);
2829
2830 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2831 // the Assembler.
2832 __ Msr(NZCV, x10);
2833 __ Mov(p0, pn);
2834 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2835 pg,
2836 p0.WithLaneSize(lane_size_in_bits));
2837 __ Mrs(x0, NZCV);
2838
2839 // The MacroAssembler supports non-destructive use.
2840 __ Msr(NZCV, x10);
2841 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2842 pg,
2843 pn.WithLaneSize(lane_size_in_bits));
2844 __ Mrs(x1, NZCV);
2845
2846 // If pd.Aliases(pg) the macro requires a scratch register.
2847 {
2848 UseScratchRegisterScope temps(&masm);
2849 temps.Include(p13);
2850 __ Msr(NZCV, x10);
2851 __ Mov(p2, p15);
2852 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2853 p2,
2854 pn.WithLaneSize(lane_size_in_bits));
2855 __ Mrs(x2, NZCV);
2856 }
2857
2858 END();
2859
2860 if (CAN_RUN()) {
2861 RUN();
2862
2863 // Check that the inputs weren't modified.
2864 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2865 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2866
2867 // Check the primary operation.
2868 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2869 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2870 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2871
2872 // Check that the flags were properly set.
2873 StatusFlags nzcv_expected =
2874 GetPredTestFlags(pd_expected,
2875 pg_inputs,
2876 core.GetSVELaneCount(kBRegSize));
2877 ASSERT_EQUAL_64(nzcv_expected, x0);
2878 ASSERT_EQUAL_64(nzcv_expected, x1);
2879 ASSERT_EQUAL_64(nzcv_expected, x2);
2880 }
2881}
2882
2883template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002884static void PfirstHelper(Test* config,
2885 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002886 const Tn& pn_inputs,
2887 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002888 PfirstPnextHelper(config,
2889 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002890 kBRegSize, // pfirst only accepts B-sized lanes.
2891 pg_inputs,
2892 pn_inputs,
2893 pd_expected);
2894}
2895
2896template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002897static void PnextHelper(Test* config,
2898 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002899 const Tg& pg_inputs,
2900 const Tn& pn_inputs,
2901 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002902 PfirstPnextHelper(config,
2903 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002904 lane_size_in_bits,
2905 pg_inputs,
2906 pn_inputs,
2907 pd_expected);
2908}
2909
Jacob Bramleye8289202019-07-31 11:25:23 +01002910TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002911 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2912 // large VL), but few enough to make the test easy to read.
2913 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2914 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2915 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2916 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2917 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2918 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2919
2920 // Pfirst finds the first active lane in pg, and activates the corresponding
2921 // lane in pn (if it isn't already active).
2922
2923 // The first active lane in in1 is here. |
2924 // v
2925 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2926 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2927 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2928 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002929 PfirstHelper(config, in1, in0, exp10);
2930 PfirstHelper(config, in1, in2, exp12);
2931 PfirstHelper(config, in1, in3, exp13);
2932 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002933
2934 // The first active lane in in2 is here. |
2935 // v
2936 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2937 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2938 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2939 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002940 PfirstHelper(config, in2, in0, exp20);
2941 PfirstHelper(config, in2, in1, exp21);
2942 PfirstHelper(config, in2, in3, exp23);
2943 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002944
2945 // The first active lane in in3 is here. |
2946 // v
2947 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2948 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2949 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2950 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002951 PfirstHelper(config, in3, in0, exp30);
2952 PfirstHelper(config, in3, in1, exp31);
2953 PfirstHelper(config, in3, in2, exp32);
2954 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002955
2956 // | The first active lane in in4 is here.
2957 // v
2958 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2959 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2960 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2961 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002962 PfirstHelper(config, in4, in0, exp40);
2963 PfirstHelper(config, in4, in1, exp41);
2964 PfirstHelper(config, in4, in2, exp42);
2965 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002966
2967 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002968 PfirstHelper(config, in0, in0, in0);
2969 PfirstHelper(config, in0, in1, in1);
2970 PfirstHelper(config, in0, in2, in2);
2971 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002972
2973 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002974 PfirstHelper(config, in0, in0, in0);
2975 PfirstHelper(config, in1, in1, in1);
2976 PfirstHelper(config, in2, in2, in2);
2977 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002978}
2979
Jacob Bramleye8289202019-07-31 11:25:23 +01002980TEST_SVE(sve_pfirst_alias) {
2981 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002982 START();
2983
2984 // Check that the Simulator behaves correctly when all arguments are aliased.
2985 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2986 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2987 int in_s[] = {0, 1, 1, 0};
2988 int in_d[] = {1, 1};
2989
2990 Initialise(&masm, p0.VnB(), in_b);
2991 Initialise(&masm, p1.VnH(), in_h);
2992 Initialise(&masm, p2.VnS(), in_s);
2993 Initialise(&masm, p3.VnD(), in_d);
2994
2995 // Initialise NZCV to an impossible value, to check that we actually write it.
2996 __ Mov(x10, NZCVFlag);
2997
2998 __ Msr(NZCV, x10);
2999 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
3000 __ Mrs(x0, NZCV);
3001
3002 __ Msr(NZCV, x10);
3003 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
3004 __ Mrs(x1, NZCV);
3005
3006 __ Msr(NZCV, x10);
3007 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
3008 __ Mrs(x2, NZCV);
3009
3010 __ Msr(NZCV, x10);
3011 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
3012 __ Mrs(x3, NZCV);
3013
3014 END();
3015
3016 if (CAN_RUN()) {
3017 RUN();
3018
3019 // The first lane from pg is already active in pdn, so the P register should
3020 // be unchanged.
3021 ASSERT_EQUAL_SVE(in_b, p0.VnB());
3022 ASSERT_EQUAL_SVE(in_h, p1.VnH());
3023 ASSERT_EQUAL_SVE(in_s, p2.VnS());
3024 ASSERT_EQUAL_SVE(in_d, p3.VnD());
3025
3026 ASSERT_EQUAL_64(SVEFirstFlag, x0);
3027 ASSERT_EQUAL_64(SVEFirstFlag, x1);
3028 ASSERT_EQUAL_64(SVEFirstFlag, x2);
3029 ASSERT_EQUAL_64(SVEFirstFlag, x3);
3030 }
3031}
3032
Jacob Bramleye8289202019-07-31 11:25:23 +01003033TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01003034 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
3035 // (to check propagation if we have a large VL), but few enough to make the
3036 // test easy to read.
3037 // For now, we just use kPRegMinSize so that the test works anywhere.
3038 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3039 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
3040 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
3041 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
3042 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3043
3044 // Pnext activates the next element that is true in pg, after the last-active
3045 // element in pn. If all pn elements are false (as in in0), it starts looking
3046 // at element 0.
3047
3048 // There are no active lanes in in0, so the result is simply the first active
3049 // lane from pg.
3050 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3051 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
3052 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
3053 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
3054 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3055
3056 // The last active lane in in1 is here. |
3057 // v
3058 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3059 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3060 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3061 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3062 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3063
3064 // | The last active lane in in2 is here.
3065 // v
3066 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3067 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3068 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3069 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3070 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3071
3072 // | The last active lane in in3 is here.
3073 // v
3074 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3075 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3076 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3077 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3078 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3079
3080 // | The last active lane in in4 is here.
3081 // v
3082 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3083 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3084 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3085 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3086 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3087
Jacob Bramleye8289202019-07-31 11:25:23 +01003088 PnextHelper(config, kBRegSize, in0, in0, exp00);
3089 PnextHelper(config, kBRegSize, in1, in0, exp10);
3090 PnextHelper(config, kBRegSize, in2, in0, exp20);
3091 PnextHelper(config, kBRegSize, in3, in0, exp30);
3092 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003093
Jacob Bramleye8289202019-07-31 11:25:23 +01003094 PnextHelper(config, kBRegSize, in0, in1, exp01);
3095 PnextHelper(config, kBRegSize, in1, in1, exp11);
3096 PnextHelper(config, kBRegSize, in2, in1, exp21);
3097 PnextHelper(config, kBRegSize, in3, in1, exp31);
3098 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003099
Jacob Bramleye8289202019-07-31 11:25:23 +01003100 PnextHelper(config, kBRegSize, in0, in2, exp02);
3101 PnextHelper(config, kBRegSize, in1, in2, exp12);
3102 PnextHelper(config, kBRegSize, in2, in2, exp22);
3103 PnextHelper(config, kBRegSize, in3, in2, exp32);
3104 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003105
Jacob Bramleye8289202019-07-31 11:25:23 +01003106 PnextHelper(config, kBRegSize, in0, in3, exp03);
3107 PnextHelper(config, kBRegSize, in1, in3, exp13);
3108 PnextHelper(config, kBRegSize, in2, in3, exp23);
3109 PnextHelper(config, kBRegSize, in3, in3, exp33);
3110 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003111
Jacob Bramleye8289202019-07-31 11:25:23 +01003112 PnextHelper(config, kBRegSize, in0, in4, exp04);
3113 PnextHelper(config, kBRegSize, in1, in4, exp14);
3114 PnextHelper(config, kBRegSize, in2, in4, exp24);
3115 PnextHelper(config, kBRegSize, in3, in4, exp34);
3116 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003117}
3118
Jacob Bramleye8289202019-07-31 11:25:23 +01003119TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01003120 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
3121 // (to check propagation if we have a large VL), but few enough to make the
3122 // test easy to read.
3123 // For now, we just use kPRegMinSize so that the test works anywhere.
3124 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
3125 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
3126 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
3127 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
3128 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
3129
3130 // Pnext activates the next element that is true in pg, after the last-active
3131 // element in pn. If all pn elements are false (as in in0), it starts looking
3132 // at element 0.
3133 //
3134 // As for other SVE instructions, elements are only considered to be active if
3135 // the _first_ bit in each field is one. Other bits are ignored.
3136
3137 // There are no active lanes in in0, so the result is simply the first active
3138 // lane from pg.
3139 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
3140 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
3141 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
3142 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
3143 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
3144
3145 // | The last active lane in in1 is here.
3146 // v
3147 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
3148 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
3149 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
3150 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
3151 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
3152
3153 // | The last active lane in in2 is here.
3154 // v
3155 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
3156 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
3157 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
3158 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
3159 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
3160
3161 // | The last active lane in in3 is here.
3162 // v
3163 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
3164 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
3165 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
3166 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
3167 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
3168
3169 // | The last active lane in in4 is here.
3170 // v
3171 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
3172 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
3173 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
3174 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
3175 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
3176
Jacob Bramleye8289202019-07-31 11:25:23 +01003177 PnextHelper(config, kHRegSize, in0, in0, exp00);
3178 PnextHelper(config, kHRegSize, in1, in0, exp10);
3179 PnextHelper(config, kHRegSize, in2, in0, exp20);
3180 PnextHelper(config, kHRegSize, in3, in0, exp30);
3181 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003182
Jacob Bramleye8289202019-07-31 11:25:23 +01003183 PnextHelper(config, kHRegSize, in0, in1, exp01);
3184 PnextHelper(config, kHRegSize, in1, in1, exp11);
3185 PnextHelper(config, kHRegSize, in2, in1, exp21);
3186 PnextHelper(config, kHRegSize, in3, in1, exp31);
3187 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003188
Jacob Bramleye8289202019-07-31 11:25:23 +01003189 PnextHelper(config, kHRegSize, in0, in2, exp02);
3190 PnextHelper(config, kHRegSize, in1, in2, exp12);
3191 PnextHelper(config, kHRegSize, in2, in2, exp22);
3192 PnextHelper(config, kHRegSize, in3, in2, exp32);
3193 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003194
Jacob Bramleye8289202019-07-31 11:25:23 +01003195 PnextHelper(config, kHRegSize, in0, in3, exp03);
3196 PnextHelper(config, kHRegSize, in1, in3, exp13);
3197 PnextHelper(config, kHRegSize, in2, in3, exp23);
3198 PnextHelper(config, kHRegSize, in3, in3, exp33);
3199 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003200
Jacob Bramleye8289202019-07-31 11:25:23 +01003201 PnextHelper(config, kHRegSize, in0, in4, exp04);
3202 PnextHelper(config, kHRegSize, in1, in4, exp14);
3203 PnextHelper(config, kHRegSize, in2, in4, exp24);
3204 PnextHelper(config, kHRegSize, in3, in4, exp34);
3205 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003206}
3207
Jacob Bramleye8289202019-07-31 11:25:23 +01003208TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01003209 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
3210 // (to check propagation if we have a large VL), but few enough to make the
3211 // test easy to read.
3212 // For now, we just use kPRegMinSize so that the test works anywhere.
3213 int in0[] = {0xe, 0xc, 0x8, 0x0};
3214 int in1[] = {0x0, 0x2, 0x0, 0x1};
3215 int in2[] = {0x0, 0x1, 0xf, 0x0};
3216 int in3[] = {0xf, 0x0, 0x0, 0x0};
3217
3218 // Pnext activates the next element that is true in pg, after the last-active
3219 // element in pn. If all pn elements are false (as in in0), it starts looking
3220 // at element 0.
3221 //
3222 // As for other SVE instructions, elements are only considered to be active if
3223 // the _first_ bit in each field is one. Other bits are ignored.
3224
3225 // There are no active lanes in in0, so the result is simply the first active
3226 // lane from pg.
3227 int exp00[] = {0, 0, 0, 0};
3228 int exp10[] = {0, 0, 0, 1};
3229 int exp20[] = {0, 0, 1, 0};
3230 int exp30[] = {1, 0, 0, 0};
3231
3232 // | The last active lane in in1 is here.
3233 // v
3234 int exp01[] = {0, 0, 0, 0};
3235 int exp11[] = {0, 0, 0, 0};
3236 int exp21[] = {0, 0, 1, 0};
3237 int exp31[] = {1, 0, 0, 0};
3238
3239 // | The last active lane in in2 is here.
3240 // v
3241 int exp02[] = {0, 0, 0, 0};
3242 int exp12[] = {0, 0, 0, 0};
3243 int exp22[] = {0, 0, 0, 0};
3244 int exp32[] = {1, 0, 0, 0};
3245
3246 // | The last active lane in in3 is here.
3247 // v
3248 int exp03[] = {0, 0, 0, 0};
3249 int exp13[] = {0, 0, 0, 0};
3250 int exp23[] = {0, 0, 0, 0};
3251 int exp33[] = {0, 0, 0, 0};
3252
Jacob Bramleye8289202019-07-31 11:25:23 +01003253 PnextHelper(config, kSRegSize, in0, in0, exp00);
3254 PnextHelper(config, kSRegSize, in1, in0, exp10);
3255 PnextHelper(config, kSRegSize, in2, in0, exp20);
3256 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003257
Jacob Bramleye8289202019-07-31 11:25:23 +01003258 PnextHelper(config, kSRegSize, in0, in1, exp01);
3259 PnextHelper(config, kSRegSize, in1, in1, exp11);
3260 PnextHelper(config, kSRegSize, in2, in1, exp21);
3261 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003262
Jacob Bramleye8289202019-07-31 11:25:23 +01003263 PnextHelper(config, kSRegSize, in0, in2, exp02);
3264 PnextHelper(config, kSRegSize, in1, in2, exp12);
3265 PnextHelper(config, kSRegSize, in2, in2, exp22);
3266 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003267
Jacob Bramleye8289202019-07-31 11:25:23 +01003268 PnextHelper(config, kSRegSize, in0, in3, exp03);
3269 PnextHelper(config, kSRegSize, in1, in3, exp13);
3270 PnextHelper(config, kSRegSize, in2, in3, exp23);
3271 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003272}
3273
Jacob Bramleye8289202019-07-31 11:25:23 +01003274TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01003275 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
3276 // (to check propagation if we have a large VL), but few enough to make the
3277 // test easy to read.
3278 // For now, we just use kPRegMinSize so that the test works anywhere.
3279 int in0[] = {0xfe, 0xf0};
3280 int in1[] = {0x00, 0x55};
3281 int in2[] = {0x33, 0xff};
3282
3283 // Pnext activates the next element that is true in pg, after the last-active
3284 // element in pn. If all pn elements are false (as in in0), it starts looking
3285 // at element 0.
3286 //
3287 // As for other SVE instructions, elements are only considered to be active if
3288 // the _first_ bit in each field is one. Other bits are ignored.
3289
3290 // There are no active lanes in in0, so the result is simply the first active
3291 // lane from pg.
3292 int exp00[] = {0, 0};
3293 int exp10[] = {0, 1};
3294 int exp20[] = {0, 1};
3295
3296 // | The last active lane in in1 is here.
3297 // v
3298 int exp01[] = {0, 0};
3299 int exp11[] = {0, 0};
3300 int exp21[] = {1, 0};
3301
3302 // | The last active lane in in2 is here.
3303 // v
3304 int exp02[] = {0, 0};
3305 int exp12[] = {0, 0};
3306 int exp22[] = {0, 0};
3307
Jacob Bramleye8289202019-07-31 11:25:23 +01003308 PnextHelper(config, kDRegSize, in0, in0, exp00);
3309 PnextHelper(config, kDRegSize, in1, in0, exp10);
3310 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003311
Jacob Bramleye8289202019-07-31 11:25:23 +01003312 PnextHelper(config, kDRegSize, in0, in1, exp01);
3313 PnextHelper(config, kDRegSize, in1, in1, exp11);
3314 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003315
Jacob Bramleye8289202019-07-31 11:25:23 +01003316 PnextHelper(config, kDRegSize, in0, in2, exp02);
3317 PnextHelper(config, kDRegSize, in1, in2, exp12);
3318 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003319}
3320
Jacob Bramleye8289202019-07-31 11:25:23 +01003321TEST_SVE(sve_pnext_alias) {
3322 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003323 START();
3324
3325 // Check that the Simulator behaves correctly when all arguments are aliased.
3326 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
3327 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
3328 int in_s[] = {0, 1, 1, 0};
3329 int in_d[] = {1, 1};
3330
3331 Initialise(&masm, p0.VnB(), in_b);
3332 Initialise(&masm, p1.VnH(), in_h);
3333 Initialise(&masm, p2.VnS(), in_s);
3334 Initialise(&masm, p3.VnD(), in_d);
3335
3336 // Initialise NZCV to an impossible value, to check that we actually write it.
3337 __ Mov(x10, NZCVFlag);
3338
3339 __ Msr(NZCV, x10);
3340 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
3341 __ Mrs(x0, NZCV);
3342
3343 __ Msr(NZCV, x10);
3344 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
3345 __ Mrs(x1, NZCV);
3346
3347 __ Msr(NZCV, x10);
3348 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
3349 __ Mrs(x2, NZCV);
3350
3351 __ Msr(NZCV, x10);
3352 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
3353 __ Mrs(x3, NZCV);
3354
3355 END();
3356
3357 if (CAN_RUN()) {
3358 RUN();
3359
3360 // Since pg.Is(pdn), there can be no active lanes in pg above the last
3361 // active lane in pdn, so the result should always be zero.
3362 ASSERT_EQUAL_SVE(0, p0.VnB());
3363 ASSERT_EQUAL_SVE(0, p1.VnH());
3364 ASSERT_EQUAL_SVE(0, p2.VnS());
3365 ASSERT_EQUAL_SVE(0, p3.VnD());
3366
3367 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
3368 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
3369 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
3370 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
3371 }
3372}
3373
Jacob Bramleye8289202019-07-31 11:25:23 +01003374static void PtrueHelper(Test* config,
3375 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01003376 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01003377 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003378 START();
3379
3380 PRegisterWithLaneSize p[kNumberOfPRegisters];
3381 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
3382 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
3383 }
3384
3385 // Initialise NZCV to an impossible value, to check that we actually write it.
3386 StatusFlags nzcv_unmodified = NZCVFlag;
3387 __ Mov(x20, nzcv_unmodified);
3388
3389 // We don't have enough registers to conveniently test every pattern, so take
3390 // samples from each group.
3391 __ Msr(NZCV, x20);
3392 __ Ptrue(p[0], SVE_POW2, s);
3393 __ Mrs(x0, NZCV);
3394
3395 __ Msr(NZCV, x20);
3396 __ Ptrue(p[1], SVE_VL1, s);
3397 __ Mrs(x1, NZCV);
3398
3399 __ Msr(NZCV, x20);
3400 __ Ptrue(p[2], SVE_VL2, s);
3401 __ Mrs(x2, NZCV);
3402
3403 __ Msr(NZCV, x20);
3404 __ Ptrue(p[3], SVE_VL5, s);
3405 __ Mrs(x3, NZCV);
3406
3407 __ Msr(NZCV, x20);
3408 __ Ptrue(p[4], SVE_VL6, s);
3409 __ Mrs(x4, NZCV);
3410
3411 __ Msr(NZCV, x20);
3412 __ Ptrue(p[5], SVE_VL8, s);
3413 __ Mrs(x5, NZCV);
3414
3415 __ Msr(NZCV, x20);
3416 __ Ptrue(p[6], SVE_VL16, s);
3417 __ Mrs(x6, NZCV);
3418
3419 __ Msr(NZCV, x20);
3420 __ Ptrue(p[7], SVE_VL64, s);
3421 __ Mrs(x7, NZCV);
3422
3423 __ Msr(NZCV, x20);
3424 __ Ptrue(p[8], SVE_VL256, s);
3425 __ Mrs(x8, NZCV);
3426
3427 {
3428 // We have to use the Assembler to use values not defined by
3429 // SVEPredicateConstraint, so call `ptrues` directly..
3430 typedef void (
3431 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
3432 int pattern);
3433 AssemblePtrueFn assemble =
3434 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
3435
3436 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
3437 __ msr(NZCV, x20);
3438 (masm.*assemble)(p[9], 0xe);
3439 __ mrs(x9, NZCV);
3440
3441 __ msr(NZCV, x20);
3442 (masm.*assemble)(p[10], 0x16);
3443 __ mrs(x10, NZCV);
3444
3445 __ msr(NZCV, x20);
3446 (masm.*assemble)(p[11], 0x1a);
3447 __ mrs(x11, NZCV);
3448
3449 __ msr(NZCV, x20);
3450 (masm.*assemble)(p[12], 0x1c);
3451 __ mrs(x12, NZCV);
3452 }
3453
3454 __ Msr(NZCV, x20);
3455 __ Ptrue(p[13], SVE_MUL4, s);
3456 __ Mrs(x13, NZCV);
3457
3458 __ Msr(NZCV, x20);
3459 __ Ptrue(p[14], SVE_MUL3, s);
3460 __ Mrs(x14, NZCV);
3461
3462 __ Msr(NZCV, x20);
3463 __ Ptrue(p[15], SVE_ALL, s);
3464 __ Mrs(x15, NZCV);
3465
3466 END();
3467
3468 if (CAN_RUN()) {
3469 RUN();
3470
3471 int all = core.GetSVELaneCount(lane_size_in_bits);
3472 int pow2 = 1 << HighestSetBitPosition(all);
3473 int mul4 = all - (all % 4);
3474 int mul3 = all - (all % 3);
3475
3476 // Check P register results.
3477 for (int i = 0; i < all; i++) {
3478 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
3479 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
3480 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
3481 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
3482 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
3483 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
3484 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
3485 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
3486 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
3487 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
3488 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
3489 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
3490 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
3491 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
3492 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
3493 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
3494 }
3495
3496 // Check NZCV results.
3497 if (s == LeaveFlags) {
3498 // No flags should have been updated.
3499 for (int i = 0; i <= 15; i++) {
3500 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
3501 }
3502 } else {
3503 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3504 StatusFlags nonzero = SVEFirstFlag;
3505
3506 // POW2
3507 ASSERT_EQUAL_64(nonzero, x0);
3508 // VL*
3509 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
3510 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
3511 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
3512 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
3513 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
3514 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
3515 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
3516 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
3517 // #uimm5
3518 ASSERT_EQUAL_64(zero, x9);
3519 ASSERT_EQUAL_64(zero, x10);
3520 ASSERT_EQUAL_64(zero, x11);
3521 ASSERT_EQUAL_64(zero, x12);
3522 // MUL*
3523 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
3524 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
3525 // ALL
3526 ASSERT_EQUAL_64(nonzero, x15);
3527 }
3528 }
3529}
3530
Jacob Bramleye8289202019-07-31 11:25:23 +01003531TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
3532TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
3533TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
3534TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01003535
Jacob Bramleye8289202019-07-31 11:25:23 +01003536TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
3537TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
3538TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
3539TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01003540
Jacob Bramleye8289202019-07-31 11:25:23 +01003541TEST_SVE(sve_pfalse) {
3542 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003543 START();
3544
3545 // Initialise non-zero inputs.
3546 __ Ptrue(p0.VnB());
3547 __ Ptrue(p1.VnH());
3548 __ Ptrue(p2.VnS());
3549 __ Ptrue(p3.VnD());
3550
3551 // The instruction only supports B-sized lanes, but the lane size has no
3552 // logical effect, so the MacroAssembler accepts anything.
3553 __ Pfalse(p0.VnB());
3554 __ Pfalse(p1.VnH());
3555 __ Pfalse(p2.VnS());
3556 __ Pfalse(p3.VnD());
3557
3558 END();
3559
3560 if (CAN_RUN()) {
3561 RUN();
3562
3563 ASSERT_EQUAL_SVE(0, p0.VnB());
3564 ASSERT_EQUAL_SVE(0, p1.VnB());
3565 ASSERT_EQUAL_SVE(0, p2.VnB());
3566 ASSERT_EQUAL_SVE(0, p3.VnB());
3567 }
3568}
3569
Jacob Bramleye8289202019-07-31 11:25:23 +01003570TEST_SVE(sve_ptest) {
3571 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003572 START();
3573
3574 // Initialise NZCV to a known (impossible) value.
3575 StatusFlags nzcv_unmodified = NZCVFlag;
3576 __ Mov(x0, nzcv_unmodified);
3577 __ Msr(NZCV, x0);
3578
3579 // Construct some test inputs.
3580 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
3581 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
3582 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3583 __ Pfalse(p0.VnB());
3584 __ Ptrue(p1.VnB());
3585 Initialise(&masm, p2.VnB(), in2);
3586 Initialise(&masm, p3.VnB(), in3);
3587 Initialise(&masm, p4.VnB(), in4);
3588
3589 // All-inactive pg.
3590 __ Ptest(p0, p0.VnB());
3591 __ Mrs(x0, NZCV);
3592 __ Ptest(p0, p1.VnB());
3593 __ Mrs(x1, NZCV);
3594 __ Ptest(p0, p2.VnB());
3595 __ Mrs(x2, NZCV);
3596 __ Ptest(p0, p3.VnB());
3597 __ Mrs(x3, NZCV);
3598 __ Ptest(p0, p4.VnB());
3599 __ Mrs(x4, NZCV);
3600
3601 // All-active pg.
3602 __ Ptest(p1, p0.VnB());
3603 __ Mrs(x5, NZCV);
3604 __ Ptest(p1, p1.VnB());
3605 __ Mrs(x6, NZCV);
3606 __ Ptest(p1, p2.VnB());
3607 __ Mrs(x7, NZCV);
3608 __ Ptest(p1, p3.VnB());
3609 __ Mrs(x8, NZCV);
3610 __ Ptest(p1, p4.VnB());
3611 __ Mrs(x9, NZCV);
3612
3613 // Combinations of other inputs.
3614 __ Ptest(p2, p2.VnB());
3615 __ Mrs(x20, NZCV);
3616 __ Ptest(p2, p3.VnB());
3617 __ Mrs(x21, NZCV);
3618 __ Ptest(p2, p4.VnB());
3619 __ Mrs(x22, NZCV);
3620 __ Ptest(p3, p2.VnB());
3621 __ Mrs(x23, NZCV);
3622 __ Ptest(p3, p3.VnB());
3623 __ Mrs(x24, NZCV);
3624 __ Ptest(p3, p4.VnB());
3625 __ Mrs(x25, NZCV);
3626 __ Ptest(p4, p2.VnB());
3627 __ Mrs(x26, NZCV);
3628 __ Ptest(p4, p3.VnB());
3629 __ Mrs(x27, NZCV);
3630 __ Ptest(p4, p4.VnB());
3631 __ Mrs(x28, NZCV);
3632
3633 END();
3634
3635 if (CAN_RUN()) {
3636 RUN();
3637
3638 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3639
3640 // If pg is all inactive, the value of pn is irrelevant.
3641 ASSERT_EQUAL_64(zero, x0);
3642 ASSERT_EQUAL_64(zero, x1);
3643 ASSERT_EQUAL_64(zero, x2);
3644 ASSERT_EQUAL_64(zero, x3);
3645 ASSERT_EQUAL_64(zero, x4);
3646
3647 // All-active pg.
3648 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3649 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3650 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3651 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3652 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3653 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3654
3655 // Other inputs.
3656 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3657 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3658 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3659 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3660 x23); // pg: in3, pn: in2
3661 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3662 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3663 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3664 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3665 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3666 }
3667}
3668
Jacob Bramleye8289202019-07-31 11:25:23 +01003669TEST_SVE(sve_cntp) {
3670 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003671 START();
3672
3673 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3674 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3675 Initialise(&masm, p0.VnB(), p0_inputs);
3676
3677 // With an all-true predicate, these instructions measure the vector length.
3678 __ Ptrue(p10.VnB());
3679 __ Ptrue(p11.VnH());
3680 __ Ptrue(p12.VnS());
3681 __ Ptrue(p13.VnD());
3682
3683 // `ptrue p10.b` provides an all-active pg.
3684 __ Cntp(x10, p10, p10.VnB());
3685 __ Cntp(x11, p10, p11.VnH());
3686 __ Cntp(x12, p10, p12.VnS());
3687 __ Cntp(x13, p10, p13.VnD());
3688
3689 // Check that the predicate mask is applied properly.
3690 __ Cntp(x14, p10, p10.VnB());
3691 __ Cntp(x15, p11, p10.VnB());
3692 __ Cntp(x16, p12, p10.VnB());
3693 __ Cntp(x17, p13, p10.VnB());
3694
3695 // Check other patterns (including some ignored bits).
3696 __ Cntp(x0, p10, p0.VnB());
3697 __ Cntp(x1, p10, p0.VnH());
3698 __ Cntp(x2, p10, p0.VnS());
3699 __ Cntp(x3, p10, p0.VnD());
3700 __ Cntp(x4, p0, p10.VnB());
3701 __ Cntp(x5, p0, p10.VnH());
3702 __ Cntp(x6, p0, p10.VnS());
3703 __ Cntp(x7, p0, p10.VnD());
3704
3705 END();
3706
3707 if (CAN_RUN()) {
3708 RUN();
3709
3710 int vl_b = core.GetSVELaneCount(kBRegSize);
3711 int vl_h = core.GetSVELaneCount(kHRegSize);
3712 int vl_s = core.GetSVELaneCount(kSRegSize);
3713 int vl_d = core.GetSVELaneCount(kDRegSize);
3714
3715 // Check all-active predicates in various combinations.
3716 ASSERT_EQUAL_64(vl_b, x10);
3717 ASSERT_EQUAL_64(vl_h, x11);
3718 ASSERT_EQUAL_64(vl_s, x12);
3719 ASSERT_EQUAL_64(vl_d, x13);
3720
3721 ASSERT_EQUAL_64(vl_b, x14);
3722 ASSERT_EQUAL_64(vl_h, x15);
3723 ASSERT_EQUAL_64(vl_s, x16);
3724 ASSERT_EQUAL_64(vl_d, x17);
3725
3726 // Check that irrelevant bits are properly ignored.
3727 ASSERT_EQUAL_64(7, x0);
3728 ASSERT_EQUAL_64(5, x1);
3729 ASSERT_EQUAL_64(2, x2);
3730 ASSERT_EQUAL_64(1, x3);
3731
3732 ASSERT_EQUAL_64(7, x4);
3733 ASSERT_EQUAL_64(5, x5);
3734 ASSERT_EQUAL_64(2, x6);
3735 ASSERT_EQUAL_64(1, x7);
3736 }
3737}
3738
Martyn Capewell74f84f62019-10-30 15:30:44 +00003739typedef void (MacroAssembler::*CntFn)(const Register& dst,
3740 int pattern,
3741 int multiplier);
3742
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003743template <typename T>
3744void GenerateCntSequence(MacroAssembler* masm,
3745 CntFn cnt,
3746 T acc_value,
3747 int multiplier) {
3748 // Initialise accumulators.
3749 masm->Mov(x0, acc_value);
3750 masm->Mov(x1, acc_value);
3751 masm->Mov(x2, acc_value);
3752 masm->Mov(x3, acc_value);
3753 masm->Mov(x4, acc_value);
3754 masm->Mov(x5, acc_value);
3755 masm->Mov(x6, acc_value);
3756 masm->Mov(x7, acc_value);
3757 masm->Mov(x8, acc_value);
3758 masm->Mov(x9, acc_value);
3759 masm->Mov(x10, acc_value);
3760 masm->Mov(x11, acc_value);
3761 masm->Mov(x12, acc_value);
3762 masm->Mov(x13, acc_value);
3763 masm->Mov(x14, acc_value);
3764 masm->Mov(x15, acc_value);
3765 masm->Mov(x18, acc_value);
3766 masm->Mov(x19, acc_value);
3767 masm->Mov(x20, acc_value);
3768 masm->Mov(x21, acc_value);
3769
3770 (masm->*cnt)(Register(0, sizeof(T) * kBitsPerByte), SVE_POW2, multiplier);
3771 (masm->*cnt)(Register(1, sizeof(T) * kBitsPerByte), SVE_VL1, multiplier);
3772 (masm->*cnt)(Register(2, sizeof(T) * kBitsPerByte), SVE_VL2, multiplier);
3773 (masm->*cnt)(Register(3, sizeof(T) * kBitsPerByte), SVE_VL3, multiplier);
3774 (masm->*cnt)(Register(4, sizeof(T) * kBitsPerByte), SVE_VL4, multiplier);
3775 (masm->*cnt)(Register(5, sizeof(T) * kBitsPerByte), SVE_VL5, multiplier);
3776 (masm->*cnt)(Register(6, sizeof(T) * kBitsPerByte), SVE_VL6, multiplier);
3777 (masm->*cnt)(Register(7, sizeof(T) * kBitsPerByte), SVE_VL7, multiplier);
3778 (masm->*cnt)(Register(8, sizeof(T) * kBitsPerByte), SVE_VL8, multiplier);
3779 (masm->*cnt)(Register(9, sizeof(T) * kBitsPerByte), SVE_VL16, multiplier);
3780 (masm->*cnt)(Register(10, sizeof(T) * kBitsPerByte), SVE_VL32, multiplier);
3781 (masm->*cnt)(Register(11, sizeof(T) * kBitsPerByte), SVE_VL64, multiplier);
3782 (masm->*cnt)(Register(12, sizeof(T) * kBitsPerByte), SVE_VL128, multiplier);
3783 (masm->*cnt)(Register(13, sizeof(T) * kBitsPerByte), SVE_VL256, multiplier);
3784 (masm->*cnt)(Register(14, sizeof(T) * kBitsPerByte), 16, multiplier);
3785 (masm->*cnt)(Register(15, sizeof(T) * kBitsPerByte), 23, multiplier);
3786 (masm->*cnt)(Register(18, sizeof(T) * kBitsPerByte), 28, multiplier);
3787 (masm->*cnt)(Register(19, sizeof(T) * kBitsPerByte), SVE_MUL4, multiplier);
3788 (masm->*cnt)(Register(20, sizeof(T) * kBitsPerByte), SVE_MUL3, multiplier);
3789 (masm->*cnt)(Register(21, sizeof(T) * kBitsPerByte), SVE_ALL, multiplier);
3790}
3791
3792int FixedVL(int fixed, int length) {
3793 VIXL_ASSERT(((fixed >= 1) && (fixed <= 8)) || (fixed == 16) ||
3794 (fixed == 32) || (fixed == 64) || (fixed == 128) ||
3795 (fixed = 256));
3796 return (length >= fixed) ? fixed : 0;
3797}
3798
Martyn Capewell74f84f62019-10-30 15:30:44 +00003799static void CntHelper(Test* config,
3800 CntFn cnt,
3801 int multiplier,
Martyn Capewell579c92d2019-10-30 17:48:52 +00003802 int lane_size_in_bits,
3803 int64_t acc_value = 0,
3804 bool is_increment = true) {
Martyn Capewell74f84f62019-10-30 15:30:44 +00003805 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3806 START();
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003807 GenerateCntSequence(&masm, cnt, acc_value, multiplier);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003808 END();
3809
3810 if (CAN_RUN()) {
3811 RUN();
3812
3813 int all = core.GetSVELaneCount(lane_size_in_bits);
3814 int pow2 = 1 << HighestSetBitPosition(all);
3815 int mul4 = all - (all % 4);
3816 int mul3 = all - (all % 3);
3817
Martyn Capewell579c92d2019-10-30 17:48:52 +00003818 multiplier = is_increment ? multiplier : -multiplier;
3819
3820 ASSERT_EQUAL_64(acc_value + (multiplier * pow2), x0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003821 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(1, all)), x1);
3822 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(2, all)), x2);
3823 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(3, all)), x3);
3824 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(4, all)), x4);
3825 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(5, all)), x5);
3826 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(6, all)), x6);
3827 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(7, all)), x7);
3828 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(8, all)), x8);
3829 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(16, all)), x9);
3830 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(32, all)), x10);
3831 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(64, all)), x11);
3832 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(128, all)), x12);
3833 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(256, all)), x13);
Martyn Capewell579c92d2019-10-30 17:48:52 +00003834 ASSERT_EQUAL_64(acc_value, x14);
3835 ASSERT_EQUAL_64(acc_value, x15);
3836 ASSERT_EQUAL_64(acc_value, x18);
3837 ASSERT_EQUAL_64(acc_value + (multiplier * mul4), x19);
3838 ASSERT_EQUAL_64(acc_value + (multiplier * mul3), x20);
3839 ASSERT_EQUAL_64(acc_value + (multiplier * all), x21);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003840 }
3841}
3842
Martyn Capewell579c92d2019-10-30 17:48:52 +00003843static void IncHelper(Test* config,
3844 CntFn cnt,
3845 int multiplier,
3846 int lane_size_in_bits,
3847 int64_t acc_value) {
3848 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3849}
3850
3851static void DecHelper(Test* config,
3852 CntFn cnt,
3853 int multiplier,
3854 int lane_size_in_bits,
3855 int64_t acc_value) {
3856 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
3857}
3858
Martyn Capewell74f84f62019-10-30 15:30:44 +00003859TEST_SVE(sve_cntb) {
3860 CntHelper(config, &MacroAssembler::Cntb, 1, kBRegSize);
3861 CntHelper(config, &MacroAssembler::Cntb, 2, kBRegSize);
3862 CntHelper(config, &MacroAssembler::Cntb, 15, kBRegSize);
3863 CntHelper(config, &MacroAssembler::Cntb, 16, kBRegSize);
3864}
3865
3866TEST_SVE(sve_cnth) {
3867 CntHelper(config, &MacroAssembler::Cnth, 1, kHRegSize);
3868 CntHelper(config, &MacroAssembler::Cnth, 2, kHRegSize);
3869 CntHelper(config, &MacroAssembler::Cnth, 15, kHRegSize);
3870 CntHelper(config, &MacroAssembler::Cnth, 16, kHRegSize);
3871}
3872
3873TEST_SVE(sve_cntw) {
3874 CntHelper(config, &MacroAssembler::Cntw, 1, kWRegSize);
3875 CntHelper(config, &MacroAssembler::Cntw, 2, kWRegSize);
3876 CntHelper(config, &MacroAssembler::Cntw, 15, kWRegSize);
3877 CntHelper(config, &MacroAssembler::Cntw, 16, kWRegSize);
3878}
3879
3880TEST_SVE(sve_cntd) {
3881 CntHelper(config, &MacroAssembler::Cntd, 1, kDRegSize);
3882 CntHelper(config, &MacroAssembler::Cntd, 2, kDRegSize);
3883 CntHelper(config, &MacroAssembler::Cntd, 15, kDRegSize);
3884 CntHelper(config, &MacroAssembler::Cntd, 16, kDRegSize);
3885}
3886
Martyn Capewell579c92d2019-10-30 17:48:52 +00003887TEST_SVE(sve_decb) {
3888 DecHelper(config, &MacroAssembler::Decb, 1, kBRegSize, 42);
3889 DecHelper(config, &MacroAssembler::Decb, 2, kBRegSize, -1);
3890 DecHelper(config, &MacroAssembler::Decb, 15, kBRegSize, INT64_MIN);
3891 DecHelper(config, &MacroAssembler::Decb, 16, kBRegSize, -42);
3892}
3893
3894TEST_SVE(sve_dech) {
3895 DecHelper(config, &MacroAssembler::Dech, 1, kHRegSize, 42);
3896 DecHelper(config, &MacroAssembler::Dech, 2, kHRegSize, -1);
3897 DecHelper(config, &MacroAssembler::Dech, 15, kHRegSize, INT64_MIN);
3898 DecHelper(config, &MacroAssembler::Dech, 16, kHRegSize, -42);
3899}
3900
3901TEST_SVE(sve_decw) {
3902 DecHelper(config, &MacroAssembler::Decw, 1, kWRegSize, 42);
3903 DecHelper(config, &MacroAssembler::Decw, 2, kWRegSize, -1);
3904 DecHelper(config, &MacroAssembler::Decw, 15, kWRegSize, INT64_MIN);
3905 DecHelper(config, &MacroAssembler::Decw, 16, kWRegSize, -42);
3906}
3907
3908TEST_SVE(sve_decd) {
3909 DecHelper(config, &MacroAssembler::Decd, 1, kDRegSize, 42);
3910 DecHelper(config, &MacroAssembler::Decd, 2, kDRegSize, -1);
3911 DecHelper(config, &MacroAssembler::Decd, 15, kDRegSize, INT64_MIN);
3912 DecHelper(config, &MacroAssembler::Decd, 16, kDRegSize, -42);
3913}
3914
3915TEST_SVE(sve_incb) {
3916 IncHelper(config, &MacroAssembler::Incb, 1, kBRegSize, 42);
3917 IncHelper(config, &MacroAssembler::Incb, 2, kBRegSize, -1);
3918 IncHelper(config, &MacroAssembler::Incb, 15, kBRegSize, INT64_MAX);
3919 IncHelper(config, &MacroAssembler::Incb, 16, kBRegSize, -42);
3920}
3921
3922TEST_SVE(sve_inch) {
3923 IncHelper(config, &MacroAssembler::Inch, 1, kHRegSize, 42);
3924 IncHelper(config, &MacroAssembler::Inch, 2, kHRegSize, -1);
3925 IncHelper(config, &MacroAssembler::Inch, 15, kHRegSize, INT64_MAX);
3926 IncHelper(config, &MacroAssembler::Inch, 16, kHRegSize, -42);
3927}
3928
3929TEST_SVE(sve_incw) {
3930 IncHelper(config, &MacroAssembler::Incw, 1, kWRegSize, 42);
3931 IncHelper(config, &MacroAssembler::Incw, 2, kWRegSize, -1);
3932 IncHelper(config, &MacroAssembler::Incw, 15, kWRegSize, INT64_MAX);
3933 IncHelper(config, &MacroAssembler::Incw, 16, kWRegSize, -42);
3934}
3935
3936TEST_SVE(sve_incd) {
3937 IncHelper(config, &MacroAssembler::Incd, 1, kDRegSize, 42);
3938 IncHelper(config, &MacroAssembler::Incd, 2, kDRegSize, -1);
3939 IncHelper(config, &MacroAssembler::Incd, 15, kDRegSize, INT64_MAX);
3940 IncHelper(config, &MacroAssembler::Incd, 16, kDRegSize, -42);
3941}
3942
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003943template <typename T>
3944static T QAdd(T x, int y) {
3945 VIXL_ASSERT(y > INT_MIN);
3946 T result;
3947 T min = std::numeric_limits<T>::min();
3948 T max = std::numeric_limits<T>::max();
3949 if ((x >= 0) && (y >= 0)) {
3950 // For positive a and b, saturate at max.
3951 result = (max - x) < static_cast<T>(y) ? max : x + y;
3952 } else if ((y < 0) && ((x < 0) || (min == 0))) {
3953 // For negative b, where either a negative or T unsigned.
3954 result = (x - min) < static_cast<T>(-y) ? min : x + y;
3955 } else {
3956 result = x + y;
3957 }
3958 return result;
3959}
3960
3961template <typename T>
3962static void QIncDecHelper(Test* config,
3963 CntFn cnt,
3964 int multiplier,
3965 int lane_size_in_bits,
3966 T acc_value,
3967 bool is_increment) {
3968 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3969 START();
3970 GenerateCntSequence(&masm, cnt, acc_value, multiplier);
3971 END();
3972
3973 if (CAN_RUN()) {
3974 RUN();
3975
3976 int all = core.GetSVELaneCount(lane_size_in_bits);
3977 int pow2 = 1 << HighestSetBitPosition(all);
3978 int mul4 = all - (all % 4);
3979 int mul3 = all - (all % 3);
3980
3981 multiplier = is_increment ? multiplier : -multiplier;
3982
3983 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
3984 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
3985 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
3986 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
3987 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
3988 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
3989 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
3990 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
3991 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
3992 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
3993 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
3994 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
3995 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
3996 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
3997 ASSERT_EQUAL_64(acc_value, x14);
3998 ASSERT_EQUAL_64(acc_value, x15);
3999 ASSERT_EQUAL_64(acc_value, x18);
4000 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
4001 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
4002 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
4003 }
4004}
4005
4006template <typename T>
4007static void QIncHelper(Test* config,
4008 CntFn cnt,
4009 int multiplier,
4010 int lane_size_in_bits,
4011 T acc_value) {
4012 QIncDecHelper<T>(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
4013}
4014
4015template <typename T>
4016static void QDecHelper(Test* config,
4017 CntFn cnt,
4018 int multiplier,
4019 int lane_size_in_bits,
4020 T acc_value) {
4021 QIncDecHelper<T>(config,
4022 cnt,
4023 multiplier,
4024 lane_size_in_bits,
4025 acc_value,
4026 false);
4027}
4028
4029TEST_SVE(sve_sqdecb) {
4030 int64_t bigneg = INT64_MIN + 42;
4031 int64_t bigpos = INT64_MAX - 42;
4032 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
4033 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 2, kBRegSize, bigneg);
4034 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
4035 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 16, kBRegSize, bigpos);
4036}
4037
4038TEST_SVE(sve_sqdech) {
4039 int64_t bigneg = INT64_MIN + 42;
4040 int64_t bigpos = INT64_MAX - 42;
4041 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
4042 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 2, kHRegSize, bigneg);
4043 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
4044 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 16, kHRegSize, bigpos);
4045}
4046
4047TEST_SVE(sve_sqdecw) {
4048 int64_t bigneg = INT64_MIN + 42;
4049 int64_t bigpos = INT64_MAX - 42;
4050 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
4051 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 2, kWRegSize, bigneg);
4052 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
4053 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 16, kWRegSize, bigpos);
4054}
4055
4056TEST_SVE(sve_sqdecd) {
4057 int64_t bigneg = INT64_MIN + 42;
4058 int64_t bigpos = INT64_MAX - 42;
4059 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
4060 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 2, kDRegSize, bigneg);
4061 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
4062 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 16, kDRegSize, bigpos);
4063}
4064
4065TEST_SVE(sve_sqincb) {
4066 int64_t bigneg = INT64_MIN + 42;
4067 int64_t bigpos = INT64_MAX - 42;
4068 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
4069 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 2, kBRegSize, bigneg);
4070 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
4071 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 16, kBRegSize, bigpos);
4072}
4073
4074TEST_SVE(sve_sqinch) {
4075 int64_t bigneg = INT64_MIN + 42;
4076 int64_t bigpos = INT64_MAX - 42;
4077 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
4078 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 2, kHRegSize, bigneg);
4079 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
4080 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 16, kHRegSize, bigpos);
4081}
4082
4083TEST_SVE(sve_sqincw) {
4084 int64_t bigneg = INT64_MIN + 42;
4085 int64_t bigpos = INT64_MAX - 42;
4086 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
4087 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 2, kWRegSize, bigneg);
4088 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
4089 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 16, kWRegSize, bigpos);
4090}
4091
4092TEST_SVE(sve_sqincd) {
4093 int64_t bigneg = INT64_MIN + 42;
4094 int64_t bigpos = INT64_MAX - 42;
4095 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
4096 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 2, kDRegSize, bigneg);
4097 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
4098 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 16, kDRegSize, bigpos);
4099}
4100
4101TEST_SVE(sve_uqdecb) {
4102 int32_t big32 = UINT32_MAX - 42;
4103 int64_t big64 = UINT64_MAX - 42;
4104 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
4105 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
4106 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
4107 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big32);
4108 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
4109 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
4110 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
4111 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big64);
4112}
4113
4114TEST_SVE(sve_uqdech) {
4115 int32_t big32 = UINT32_MAX - 42;
4116 int64_t big64 = UINT64_MAX - 42;
4117 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
4118 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
4119 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
4120 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big32);
4121 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
4122 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
4123 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
4124 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big64);
4125}
4126
4127TEST_SVE(sve_uqdecw) {
4128 int32_t big32 = UINT32_MAX - 42;
4129 int64_t big64 = UINT64_MAX - 42;
4130 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
4131 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
4132 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
4133 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big32);
4134 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
4135 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
4136 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
4137 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big64);
4138}
4139
4140TEST_SVE(sve_uqdecd) {
4141 int32_t big32 = UINT32_MAX - 42;
4142 int64_t big64 = UINT64_MAX - 42;
4143 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
4144 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
4145 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
4146 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big32);
4147 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
4148 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
4149 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
4150 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big64);
4151}
4152
4153TEST_SVE(sve_uqincb) {
4154 int32_t big32 = UINT32_MAX - 42;
4155 int64_t big64 = UINT64_MAX - 42;
4156 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
4157 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
4158 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
4159 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big32);
4160 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
4161 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
4162 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
4163 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big64);
4164}
4165
4166TEST_SVE(sve_uqinch) {
4167 int32_t big32 = UINT32_MAX - 42;
4168 int64_t big64 = UINT64_MAX - 42;
4169 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
4170 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
4171 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
4172 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big32);
4173 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
4174 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
4175 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
4176 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big64);
4177}
4178
4179TEST_SVE(sve_uqincw) {
4180 int32_t big32 = UINT32_MAX - 42;
4181 int64_t big64 = UINT64_MAX - 42;
4182 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
4183 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
4184 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
4185 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big32);
4186 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
4187 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
4188 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
4189 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big64);
4190}
4191
4192TEST_SVE(sve_uqincd) {
4193 int32_t big32 = UINT32_MAX - 42;
4194 int64_t big64 = UINT64_MAX - 42;
4195 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
4196 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
4197 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
4198 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big32);
4199 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
4200 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
4201 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
4202 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big64);
4203}
4204
4205typedef void (MacroAssembler::*QIncDecXWFn)(const Register& dst,
4206 const Register& src,
4207 int pattern,
4208 int multiplier);
4209
4210static void QIncDecXWHelper(Test* config,
4211 QIncDecXWFn cnt,
4212 int multiplier,
4213 int lane_size_in_bits,
4214 int32_t acc_value,
4215 bool is_increment) {
4216 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4217 START();
4218
4219 // Initialise accumulators.
4220 __ Mov(x0, acc_value);
4221 __ Mov(x1, acc_value);
4222 __ Mov(x2, acc_value);
4223 __ Mov(x3, acc_value);
4224 __ Mov(x4, acc_value);
4225 __ Mov(x5, acc_value);
4226 __ Mov(x6, acc_value);
4227 __ Mov(x7, acc_value);
4228 __ Mov(x8, acc_value);
4229 __ Mov(x9, acc_value);
4230 __ Mov(x10, acc_value);
4231 __ Mov(x11, acc_value);
4232 __ Mov(x12, acc_value);
4233 __ Mov(x13, acc_value);
4234 __ Mov(x14, acc_value);
4235 __ Mov(x15, acc_value);
4236 __ Mov(x18, acc_value);
4237 __ Mov(x19, acc_value);
4238 __ Mov(x20, acc_value);
4239 __ Mov(x21, acc_value);
4240
4241 (masm.*cnt)(x0, w0, SVE_POW2, multiplier);
4242 (masm.*cnt)(x1, w1, SVE_VL1, multiplier);
4243 (masm.*cnt)(x2, w2, SVE_VL2, multiplier);
4244 (masm.*cnt)(x3, w3, SVE_VL3, multiplier);
4245 (masm.*cnt)(x4, w4, SVE_VL4, multiplier);
4246 (masm.*cnt)(x5, w5, SVE_VL5, multiplier);
4247 (masm.*cnt)(x6, w6, SVE_VL6, multiplier);
4248 (masm.*cnt)(x7, w7, SVE_VL7, multiplier);
4249 (masm.*cnt)(x8, w8, SVE_VL8, multiplier);
4250 (masm.*cnt)(x9, w9, SVE_VL16, multiplier);
4251 (masm.*cnt)(x10, w10, SVE_VL32, multiplier);
4252 (masm.*cnt)(x11, w11, SVE_VL64, multiplier);
4253 (masm.*cnt)(x12, w12, SVE_VL128, multiplier);
4254 (masm.*cnt)(x13, w13, SVE_VL256, multiplier);
4255 (masm.*cnt)(x14, w14, 16, multiplier);
4256 (masm.*cnt)(x15, w15, 23, multiplier);
4257 (masm.*cnt)(x18, w18, 28, multiplier);
4258 (masm.*cnt)(x19, w19, SVE_MUL4, multiplier);
4259 (masm.*cnt)(x20, w20, SVE_MUL3, multiplier);
4260 (masm.*cnt)(x21, w21, SVE_ALL, multiplier);
4261
4262 END();
4263
4264 if (CAN_RUN()) {
4265 RUN();
4266
4267 int all = core.GetSVELaneCount(lane_size_in_bits);
4268 int pow2 = 1 << HighestSetBitPosition(all);
4269 int mul4 = all - (all % 4);
4270 int mul3 = all - (all % 3);
4271
4272 multiplier = is_increment ? multiplier : -multiplier;
4273
4274 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
4275 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
4276 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
4277 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
4278 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
4279 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
4280 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
4281 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
4282 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
4283 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
4284 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
4285 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
4286 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
4287 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
4288 ASSERT_EQUAL_64(acc_value, x14);
4289 ASSERT_EQUAL_64(acc_value, x15);
4290 ASSERT_EQUAL_64(acc_value, x18);
4291 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
4292 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
4293 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
4294 }
4295}
4296
4297static void QIncXWHelper(Test* config,
4298 QIncDecXWFn cnt,
4299 int multiplier,
4300 int lane_size_in_bits,
4301 int32_t acc_value) {
4302 QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
4303}
4304
4305static void QDecXWHelper(Test* config,
4306 QIncDecXWFn cnt,
4307 int multiplier,
4308 int lane_size_in_bits,
4309 int32_t acc_value) {
4310 QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
4311}
4312
4313TEST_SVE(sve_sqdecb_xw) {
4314 QDecXWHelper(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
4315 QDecXWHelper(config, &MacroAssembler::Sqdecb, 2, kBRegSize, INT32_MIN + 42);
4316 QDecXWHelper(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
4317 QDecXWHelper(config, &MacroAssembler::Sqdecb, 16, kBRegSize, INT32_MAX - 42);
4318}
4319
4320TEST_SVE(sve_sqdech_xw) {
4321 QDecXWHelper(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
4322 QDecXWHelper(config, &MacroAssembler::Sqdech, 2, kHRegSize, INT32_MIN + 42);
4323 QDecXWHelper(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
4324 QDecXWHelper(config, &MacroAssembler::Sqdech, 16, kHRegSize, INT32_MAX - 42);
4325}
4326
4327TEST_SVE(sve_sqdecw_xw) {
4328 QDecXWHelper(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
4329 QDecXWHelper(config, &MacroAssembler::Sqdecw, 2, kWRegSize, INT32_MIN + 42);
4330 QDecXWHelper(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
4331 QDecXWHelper(config, &MacroAssembler::Sqdecw, 16, kWRegSize, INT32_MAX - 42);
4332}
4333
4334TEST_SVE(sve_sqdecd_xw) {
4335 QDecXWHelper(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
4336 QDecXWHelper(config, &MacroAssembler::Sqdecd, 2, kDRegSize, INT32_MIN + 42);
4337 QDecXWHelper(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
4338 QDecXWHelper(config, &MacroAssembler::Sqdecd, 16, kDRegSize, INT32_MAX - 42);
4339}
4340
4341TEST_SVE(sve_sqincb_xw) {
4342 QIncXWHelper(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
4343 QIncXWHelper(config, &MacroAssembler::Sqincb, 2, kBRegSize, INT32_MIN + 42);
4344 QIncXWHelper(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
4345 QIncXWHelper(config, &MacroAssembler::Sqincb, 16, kBRegSize, INT32_MAX - 42);
4346}
4347
4348TEST_SVE(sve_sqinch_xw) {
4349 QIncXWHelper(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
4350 QIncXWHelper(config, &MacroAssembler::Sqinch, 2, kHRegSize, INT32_MIN + 42);
4351 QIncXWHelper(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
4352 QIncXWHelper(config, &MacroAssembler::Sqinch, 16, kHRegSize, INT32_MAX - 42);
4353}
4354
4355TEST_SVE(sve_sqincw_xw) {
4356 QIncXWHelper(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
4357 QIncXWHelper(config, &MacroAssembler::Sqincw, 2, kWRegSize, INT32_MIN + 42);
4358 QIncXWHelper(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
4359 QIncXWHelper(config, &MacroAssembler::Sqincw, 16, kWRegSize, INT32_MAX - 42);
4360}
4361
4362TEST_SVE(sve_sqincd_xw) {
4363 QIncXWHelper(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
4364 QIncXWHelper(config, &MacroAssembler::Sqincd, 2, kDRegSize, INT32_MIN + 42);
4365 QIncXWHelper(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
4366 QIncXWHelper(config, &MacroAssembler::Sqincd, 16, kDRegSize, INT32_MAX - 42);
4367}
4368
Martyn Capewell8188ddf2019-11-21 17:09:34 +00004369typedef void (MacroAssembler::*IncDecZFn)(const ZRegister& dst,
4370 int pattern,
4371 int multiplier);
4372typedef void (MacroAssembler::*AddSubFn)(const ZRegister& dst,
4373 const ZRegister& src1,
4374 const ZRegister& src2);
4375
4376static void IncDecZHelper(Test* config,
4377 IncDecZFn fn,
4378 CntFn cnt,
4379 AddSubFn addsub,
4380 int multiplier,
4381 int lane_size_in_bits) {
4382 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4383 START();
4384
4385 uint64_t acc_inputs[] = {0x7766554433221100,
4386 0xffffffffffffffff,
4387 0x0000000000000000,
4388 0xffffffff0000ffff,
4389 0x7fffffffffffffff,
4390 0x8000000000000000,
4391 0x7fffffff7fff7fff,
4392 0x8000000080008000};
4393
4394 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
4395 for (int j = 0; j < 4; j++) {
4396 InsrHelper(&masm, ZRegister(i, kDRegSize), acc_inputs);
4397 }
4398 }
4399 for (unsigned i = 0; i < 15; i++) {
4400 __ Mov(XRegister(i), 0);
4401 }
4402
4403 (masm.*fn)(z16.WithLaneSize(lane_size_in_bits), SVE_POW2, multiplier);
4404 (masm.*fn)(z17.WithLaneSize(lane_size_in_bits), SVE_VL1, multiplier);
4405 (masm.*fn)(z18.WithLaneSize(lane_size_in_bits), SVE_VL2, multiplier);
4406 (masm.*fn)(z19.WithLaneSize(lane_size_in_bits), SVE_VL3, multiplier);
4407 (masm.*fn)(z20.WithLaneSize(lane_size_in_bits), SVE_VL4, multiplier);
4408 (masm.*fn)(z21.WithLaneSize(lane_size_in_bits), SVE_VL7, multiplier);
4409 (masm.*fn)(z22.WithLaneSize(lane_size_in_bits), SVE_VL8, multiplier);
4410 (masm.*fn)(z23.WithLaneSize(lane_size_in_bits), SVE_VL16, multiplier);
4411 (masm.*fn)(z24.WithLaneSize(lane_size_in_bits), SVE_VL64, multiplier);
4412 (masm.*fn)(z25.WithLaneSize(lane_size_in_bits), SVE_VL256, multiplier);
4413 (masm.*fn)(z26.WithLaneSize(lane_size_in_bits), 16, multiplier);
4414 (masm.*fn)(z27.WithLaneSize(lane_size_in_bits), 28, multiplier);
4415 (masm.*fn)(z28.WithLaneSize(lane_size_in_bits), SVE_MUL3, multiplier);
4416 (masm.*fn)(z29.WithLaneSize(lane_size_in_bits), SVE_MUL4, multiplier);
4417 (masm.*fn)(z30.WithLaneSize(lane_size_in_bits), SVE_ALL, multiplier);
4418
4419 // Perform computation using alternative instructions.
4420 (masm.*cnt)(x0, SVE_POW2, multiplier);
4421 (masm.*cnt)(x1, SVE_VL1, multiplier);
4422 (masm.*cnt)(x2, SVE_VL2, multiplier);
4423 (masm.*cnt)(x3, SVE_VL3, multiplier);
4424 (masm.*cnt)(x4, SVE_VL4, multiplier);
4425 (masm.*cnt)(x5, SVE_VL7, multiplier);
4426 (masm.*cnt)(x6, SVE_VL8, multiplier);
4427 (masm.*cnt)(x7, SVE_VL16, multiplier);
4428 (masm.*cnt)(x8, SVE_VL64, multiplier);
4429 (masm.*cnt)(x9, SVE_VL256, multiplier);
4430 (masm.*cnt)(x10, 16, multiplier);
4431 (masm.*cnt)(x11, 28, multiplier);
4432 (masm.*cnt)(x12, SVE_MUL3, multiplier);
4433 (masm.*cnt)(x13, SVE_MUL4, multiplier);
4434 (masm.*cnt)(x14, SVE_ALL, multiplier);
4435
4436 ZRegister zscratch = z15.WithLaneSize(lane_size_in_bits);
4437 for (unsigned i = 0; i < 15; i++) {
4438 ZRegister zsrcdst = ZRegister(i, lane_size_in_bits);
4439 Register x = Register(i, kXRegSize);
4440 __ Dup(zscratch, x);
4441 (masm.*addsub)(zsrcdst, zsrcdst, zscratch);
4442 }
4443
4444 END();
4445
4446 if (CAN_RUN()) {
4447 RUN();
4448
4449 ASSERT_EQUAL_SVE(z0, z16);
4450 ASSERT_EQUAL_SVE(z1, z17);
4451 ASSERT_EQUAL_SVE(z2, z18);
4452 ASSERT_EQUAL_SVE(z3, z19);
4453 ASSERT_EQUAL_SVE(z4, z20);
4454 ASSERT_EQUAL_SVE(z5, z21);
4455 ASSERT_EQUAL_SVE(z6, z22);
4456 ASSERT_EQUAL_SVE(z7, z23);
4457 ASSERT_EQUAL_SVE(z8, z24);
4458 ASSERT_EQUAL_SVE(z9, z25);
4459 ASSERT_EQUAL_SVE(z10, z26);
4460 ASSERT_EQUAL_SVE(z11, z27);
4461 ASSERT_EQUAL_SVE(z12, z28);
4462 ASSERT_EQUAL_SVE(z13, z29);
4463 ASSERT_EQUAL_SVE(z14, z30);
4464 }
4465}
4466
4467TEST_SVE(sve_inc_dec_vec) {
4468 CntFn cnth = &MacroAssembler::Cnth;
4469 CntFn cntw = &MacroAssembler::Cntw;
4470 CntFn cntd = &MacroAssembler::Cntd;
4471 AddSubFn sub = &MacroAssembler::Sub;
4472 AddSubFn add = &MacroAssembler::Add;
4473 for (int mult = 1; mult <= 16; mult += 5) {
4474 IncDecZHelper(config, &MacroAssembler::Dech, cnth, sub, mult, kHRegSize);
4475 IncDecZHelper(config, &MacroAssembler::Decw, cntw, sub, mult, kSRegSize);
4476 IncDecZHelper(config, &MacroAssembler::Decd, cntd, sub, mult, kDRegSize);
4477 IncDecZHelper(config, &MacroAssembler::Inch, cnth, add, mult, kHRegSize);
4478 IncDecZHelper(config, &MacroAssembler::Incw, cntw, add, mult, kSRegSize);
4479 IncDecZHelper(config, &MacroAssembler::Incd, cntd, add, mult, kDRegSize);
4480 }
4481}
4482
4483TEST_SVE(sve_unsigned_sat_inc_dec_vec) {
4484 CntFn cnth = &MacroAssembler::Cnth;
4485 CntFn cntw = &MacroAssembler::Cntw;
4486 CntFn cntd = &MacroAssembler::Cntd;
4487 AddSubFn sub = &MacroAssembler::Uqsub;
4488 AddSubFn add = &MacroAssembler::Uqadd;
4489 for (int mult = 1; mult <= 16; mult += 5) {
4490 IncDecZHelper(config, &MacroAssembler::Uqdech, cnth, sub, mult, kHRegSize);
4491 IncDecZHelper(config, &MacroAssembler::Uqdecw, cntw, sub, mult, kSRegSize);
4492 IncDecZHelper(config, &MacroAssembler::Uqdecd, cntd, sub, mult, kDRegSize);
4493 IncDecZHelper(config, &MacroAssembler::Uqinch, cnth, add, mult, kHRegSize);
4494 IncDecZHelper(config, &MacroAssembler::Uqincw, cntw, add, mult, kSRegSize);
4495 IncDecZHelper(config, &MacroAssembler::Uqincd, cntd, add, mult, kDRegSize);
4496 }
4497}
4498
4499TEST_SVE(sve_signed_sat_inc_dec_vec) {
4500 CntFn cnth = &MacroAssembler::Cnth;
4501 CntFn cntw = &MacroAssembler::Cntw;
4502 CntFn cntd = &MacroAssembler::Cntd;
4503 AddSubFn sub = &MacroAssembler::Sqsub;
4504 AddSubFn add = &MacroAssembler::Sqadd;
4505 for (int mult = 1; mult <= 16; mult += 5) {
4506 IncDecZHelper(config, &MacroAssembler::Sqdech, cnth, sub, mult, kHRegSize);
4507 IncDecZHelper(config, &MacroAssembler::Sqdecw, cntw, sub, mult, kSRegSize);
4508 IncDecZHelper(config, &MacroAssembler::Sqdecd, cntd, sub, mult, kDRegSize);
4509 IncDecZHelper(config, &MacroAssembler::Sqinch, cnth, add, mult, kHRegSize);
4510 IncDecZHelper(config, &MacroAssembler::Sqincw, cntw, add, mult, kSRegSize);
4511 IncDecZHelper(config, &MacroAssembler::Sqincd, cntd, add, mult, kDRegSize);
4512 }
4513}
4514
TatWai Chong7a0d3672019-10-23 17:35:18 -07004515typedef void (MacroAssembler::*ArithPredicatedFn)(const ZRegister& zd,
4516 const PRegisterM& pg,
4517 const ZRegister& zn,
4518 const ZRegister& zm);
TatWai Chong13634762019-07-16 16:20:45 -07004519
4520template <typename Td, typename Tg, typename Tn>
4521static void IntBinArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07004522 ArithPredicatedFn macro,
TatWai Chong13634762019-07-16 16:20:45 -07004523 unsigned lane_size_in_bits,
4524 const Tg& pg_inputs,
4525 const Tn& zn_inputs,
4526 const Tn& zm_inputs,
4527 const Td& zd_expected) {
4528 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4529 START();
4530
4531 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
4532 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
4533 InsrHelper(&masm, src_a, zn_inputs);
4534 InsrHelper(&masm, src_b, zm_inputs);
4535
4536 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
4537
4538 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
4539 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
4540 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
4541
4542 // `instr` zd(dst), zd(src_a), zn(src_b)
4543 __ Mov(zd_1, src_a);
4544 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
4545
4546 // `instr` zd(dst), zm(src_a), zd(src_b)
4547 // Based on whether zd and zm registers are aliased, the macro of instructions
4548 // (`Instr`) swaps the order of operands if it has the commutative property,
4549 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
4550 __ Mov(zd_2, src_b);
4551 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
4552
4553 // `instr` zd(dst), zm(src_a), zn(src_b)
4554 // The macro of instructions (`Instr`) automatically selects between `instr`
4555 // and movprfx + `instr` based on whether zd and zn registers are aliased.
TatWai Chongd316c5e2019-10-16 12:22:10 -07004556 // A generated movprfx instruction is predicated that using the same
TatWai Chong13634762019-07-16 16:20:45 -07004557 // governing predicate register. In order to keep the result constant,
4558 // initialize the destination register first.
4559 __ Mov(zd_3, src_a);
4560 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
4561
4562 END();
4563
4564 if (CAN_RUN()) {
4565 RUN();
4566 ASSERT_EQUAL_SVE(zd_expected, zd_1);
4567
4568 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
4569 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
4570 if (!core.HasSVELane(zd_1, lane)) break;
TatWai Chongd316c5e2019-10-16 12:22:10 -07004571 if ((pg_inputs[i] & 1) != 0) {
TatWai Chong13634762019-07-16 16:20:45 -07004572 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
4573 } else {
4574 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
4575 }
4576 }
4577
4578 ASSERT_EQUAL_SVE(zd_expected, zd_3);
4579 }
4580}
4581
4582TEST_SVE(sve_binary_arithmetic_predicated_add) {
4583 // clang-format off
4584 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
4585
4586 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
4587
4588 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
4589
4590 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
4591
4592 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
4593 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
4594
4595 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
4596 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
4597
4598 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
4599 0x1010101010101010, 0x8181818181818181,
4600 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
4601 0x0101010101010101, 0x7f7f7f7fffffffff};
4602
4603 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
4604 0x1010101010101010, 0x0000000000000000,
4605 0x8181818181818181, 0x8080808080808080,
4606 0xffffffffffffffff, 0xffffffffffffffff};
4607
4608 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4609 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4610 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4611 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
4612
4613 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
4614
4615 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
4616 0x8180, 0x8f8f, 0x0101, 0x7f7e};
4617
4618 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
4619 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
4620
4621 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
4622 0x2020202020202020, 0x8181818181818181,
4623 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
4624 0x0101010101010100, 0x7f7f7f7ffffffffe};
4625
TatWai Chong7a0d3672019-10-23 17:35:18 -07004626 ArithPredicatedFn fn = &MacroAssembler::Add;
TatWai Chong13634762019-07-16 16:20:45 -07004627 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
4628 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
4629 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
4630 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
4631
4632 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
4633
4634 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
4635 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
4636
4637 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
4638 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
4639
4640 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
4641 0x0000000000000000, 0x8181818181818181,
4642 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
4643 0x0101010101010102, 0x7f7f7f8000000000};
4644
4645 fn = &MacroAssembler::Sub;
4646 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
4647 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
4648 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
4649 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
4650 // clang-format on
4651}
4652
4653TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
4654 // clang-format off
4655 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
4656
4657 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
4658
4659 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
4660 0xff00, 0xba98, 0x5555, 0x4567};
4661
4662 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
4663 0xfe00, 0xabab, 0xcdcd, 0x5678};
4664
4665 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
4666 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
4667
4668 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
4669 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
4670
4671 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
4672 0x5555555555555555, 0x0000000001234567};
4673
4674 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
4675 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
4676
4677 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4678 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4679 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4680 int pg_d[] = {1, 0, 1, 1};
4681
4682 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
4683
4684 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
4685 0xff00, 0xba98, 0x5555, 0x5678};
4686
4687 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
4688 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
4689
4690 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
4691 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
4692
TatWai Chong7a0d3672019-10-23 17:35:18 -07004693 ArithPredicatedFn fn = &MacroAssembler::Umax;
TatWai Chong13634762019-07-16 16:20:45 -07004694 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
4695 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
4696 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
4697 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
4698
4699 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
4700
4701 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
4702 0xfe00, 0xabab, 0x5555, 0x4567};
4703
4704 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
4705 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
4706
4707 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
4708 0x5555555555555555, 0x0000000001234567};
4709 fn = &MacroAssembler::Umin;
4710 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
4711 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
4712 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
4713 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
4714
4715 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
4716
4717 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
4718 0x0100, 0x0eed, 0x5555, 0x1111};
4719
4720 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
4721 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
4722
4723 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
4724 0x7878787878787878, 0x0000000011111111};
4725
4726 fn = &MacroAssembler::Uabd;
4727 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
4728 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
4729 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
4730 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
4731 // clang-format on
4732}
4733
4734TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
4735 // clang-format off
4736 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
4737
4738 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
4739
4740 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
4741 INT16_MIN, INT16_MAX, INT16_MAX, 1};
4742
4743 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
4744 INT16_MAX, INT16_MAX - 1, -1, 0};
4745
4746 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
4747 INT32_MIN, INT32_MAX, INT32_MAX, 1};
4748
4749 int zm_s[] = {-1, 0, -1, -INT32_MAX,
4750 INT32_MAX, INT32_MAX - 1, -1, 0};
4751
4752 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
4753 INT64_MIN, INT64_MAX, INT64_MAX, 1};
4754
4755 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
4756 INT64_MAX, INT64_MAX - 1, -1, 0};
4757
4758 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4759 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4760 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4761 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
4762
4763 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
4764
4765 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
4766 INT16_MAX, INT16_MAX, INT16_MAX, 1};
4767
4768 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
4769 INT32_MAX, INT32_MAX, INT32_MAX, 1};
4770
4771 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
4772 INT64_MIN, INT64_MAX, INT64_MAX, 1};
4773
TatWai Chong7a0d3672019-10-23 17:35:18 -07004774 ArithPredicatedFn fn = &MacroAssembler::Smax;
TatWai Chong13634762019-07-16 16:20:45 -07004775 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
4776 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
4777 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
4778 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
4779
4780 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
4781
4782 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
4783 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
4784
4785 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
4786 INT32_MIN, INT32_MAX, -1, 0};
4787
4788 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
4789 INT64_MIN, INT64_MAX - 1, -1, 0};
4790
4791 fn = &MacroAssembler::Smin;
4792 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
4793 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
4794 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
4795 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
4796
4797 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
4798
4799 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
4800
4801 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
4802 0xffffffff, 0x7fffffff, 0x80000000, 1};
4803
4804 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
4805 0x8000000000000000, 1, 0x8000000000000000, 1};
4806
4807 fn = &MacroAssembler::Sabd;
4808 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
4809 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
4810 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
4811 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
4812 // clang-format on
4813}
4814
4815TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
4816 // clang-format off
4817 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
4818
4819 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
4820
4821 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
4822 0x8000, 0xff00, 0x5555, 0xaaaa};
4823
4824 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
4825 0x5555, 0xaaaa, 0x0001, 0x1234};
4826
4827 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
4828 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
4829
4830 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
4831 0x12345678, 0x22223333, 0x55556666, 0x77778888};
4832
4833 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
4834 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
4835
4836 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
4837 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
4838
4839 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4840 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4841 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
4842 int pg_d[] = {1, 1, 0, 1};
4843
4844 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
4845
4846 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
4847 0x8000, 0xff00, 0x5555, 0x9e88};
4848
4849 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
4850 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
4851
4852 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
4853 0xffffffffffffffff, 0x38e38e38e38e38e4};
4854
TatWai Chong7a0d3672019-10-23 17:35:18 -07004855 ArithPredicatedFn fn = &MacroAssembler::Mul;
TatWai Chong13634762019-07-16 16:20:45 -07004856 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
4857 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
4858 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
4859 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
4860
4861 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
4862
4863 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
4864 0x2aaa, 0xff00, 0x0000, 0x0c22};
4865
4866 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
4867 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
4868
4869 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
4870 0xffffffffffffffff, 0x71c71c71c71c71c6};
4871
4872 fn = &MacroAssembler::Umulh;
4873 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
4874 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
4875 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
4876 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
4877 // clang-format on
4878}
4879
4880TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
4881 // clang-format off
4882 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
4883
4884 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
4885
4886 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
4887
4888 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
4889
4890 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
4891
4892 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
4893
4894 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
4895
4896 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
4897
4898 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4899 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4900 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
4901 int pg_d[] = {1, 1, 0, 1};
4902
4903 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
4904
4905 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
4906
4907 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
4908
4909 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
4910
TatWai Chong7a0d3672019-10-23 17:35:18 -07004911 ArithPredicatedFn fn = &MacroAssembler::Smulh;
TatWai Chong13634762019-07-16 16:20:45 -07004912 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
4913 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
4914 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
4915 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
4916 // clang-format on
4917}
4918
4919TEST_SVE(sve_binary_arithmetic_predicated_logical) {
4920 // clang-format off
4921 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
4922 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
4923
4924 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
4925 0x8000, 0xffff, 0x5555, 0xaaaa};
4926 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
4927 0x5555, 0xaaaa, 0x0000, 0x0800};
4928
4929 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
4930 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
4931
4932 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
4933 0x0001200880ff55aa, 0x0022446688aaccee};
4934 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
4935 0x7fcd80ff55aa0008, 0x1133557799bbddff};
4936
4937 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4938 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4939 int pg_s[] = {1, 1, 1, 0};
4940 int pg_d[] = {1, 1, 0, 1};
4941
4942 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
4943
4944 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
4945 0x0000, 0xffff, 0x0000, 0x0800};
4946
4947 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
4948
4949 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
4950 0x0001200880ff55aa, 0x0022446688aaccee};
4951
TatWai Chong7a0d3672019-10-23 17:35:18 -07004952 ArithPredicatedFn fn = &MacroAssembler::And;
TatWai Chong13634762019-07-16 16:20:45 -07004953 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
4954 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
4955 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
4956 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
4957
4958 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
4959
4960 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
4961 0x8000, 0xffff, 0x5555, 0xa2aa};
4962
4963 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
4964
4965 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
4966 0x0001200880ff55aa, 0x0000000000000000};
4967
4968 fn = &MacroAssembler::Bic;
4969 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
4970 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
4971 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
4972 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
4973
4974 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
4975
4976 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
4977 0xd555, 0xffff, 0x5555, 0xa2aa};
4978
4979 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
4980
4981 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
4982 0x0001200880ff55aa, 0x1111111111111111};
4983
4984 fn = &MacroAssembler::Eor;
4985 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
4986 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
4987 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
4988 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
4989
4990 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
4991
4992 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
4993 0xd555, 0xffff, 0x5555, 0xaaaa};
4994
4995 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
4996
4997 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
4998 0x0001200880ff55aa, 0x1133557799bbddff};
4999
5000 fn = &MacroAssembler::Orr;
5001 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
5002 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
5003 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
5004 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
5005 // clang-format on
5006}
5007
5008TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
5009 // clang-format off
5010 int zn_s[] = {0, 1, -1, 2468,
5011 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
5012 -11111111, 87654321, 0, 0};
5013
5014 int zm_s[] = {1, -1, 1, 1234,
5015 -1, INT32_MIN, 1, -1,
5016 22222222, 80000000, -1, 0};
5017
5018 int64_t zn_d[] = {0, 1, -1, 2468,
5019 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
5020 -11111111, 87654321, 0, 0};
5021
5022 int64_t zm_d[] = {1, -1, 1, 1234,
5023 -1, INT64_MIN, 1, -1,
5024 22222222, 80000000, -1, 0};
5025
5026 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
5027 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
5028
5029 int exp_s[] = {0, 1, -1, 2,
5030 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
5031 0, 1, 0, 0};
5032
5033 int64_t exp_d[] = {0, -1, -1, 2,
5034 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
5035 0, 1, 0, 0};
5036
TatWai Chong7a0d3672019-10-23 17:35:18 -07005037 ArithPredicatedFn fn = &MacroAssembler::Sdiv;
TatWai Chong13634762019-07-16 16:20:45 -07005038 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
5039 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
5040 // clang-format on
5041}
5042
5043TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
5044 // clang-format off
5045 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
5046 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
5047
5048 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
5049 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
5050
5051 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
5052 0xffffffffffffffff, 0x8000000000000000,
5053 0xffffffffffffffff, 0x8000000000000000,
5054 0xffffffffffffffff, 0xf0000000f0000000};
5055
5056 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
5057 0x8000000000000000, 0x0000000000000002,
5058 0x8888888888888888, 0x0000000000000001,
5059 0x0000000080000000, 0x00000000f0000000};
5060
5061 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
5062 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
5063
5064 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
5065 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
5066
5067 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
5068 0x0000000000000001, 0x4000000000000000,
5069 0x0000000000000001, 0x8000000000000000,
5070 0xffffffffffffffff, 0x0000000100000001};
5071
TatWai Chong7a0d3672019-10-23 17:35:18 -07005072 ArithPredicatedFn fn = &MacroAssembler::Udiv;
TatWai Chong13634762019-07-16 16:20:45 -07005073 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
5074 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
5075 // clang-format on
5076}
5077
TatWai Chong7a0d3672019-10-23 17:35:18 -07005078typedef void (MacroAssembler::*ArithFn)(const ZRegister& zd,
5079 const ZRegister& zn,
5080 const ZRegister& zm);
TatWai Chong845246b2019-08-08 00:01:58 -07005081
5082template <typename T>
5083static void IntArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07005084 ArithFn macro,
TatWai Chong845246b2019-08-08 00:01:58 -07005085 unsigned lane_size_in_bits,
5086 const T& zn_inputs,
5087 const T& zm_inputs,
5088 const T& zd_expected) {
5089 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5090 START();
5091
5092 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
5093 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
5094 InsrHelper(&masm, zn, zn_inputs);
5095 InsrHelper(&masm, zm, zm_inputs);
5096
5097 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
5098 (masm.*macro)(zd, zn, zm);
5099
5100 END();
5101
5102 if (CAN_RUN()) {
5103 RUN();
5104 ASSERT_EQUAL_SVE(zd_expected, zd);
5105 }
5106}
5107
5108TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
5109 // clang-format off
TatWai Chong6995bfd2019-09-26 10:48:05 +01005110 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
5111 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
5112 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
5113 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong845246b2019-08-08 00:01:58 -07005114 0x1000000010001010, 0xf0000000f000f0f0};
5115
TatWai Chong7a0d3672019-10-23 17:35:18 -07005116 ArithFn fn = &MacroAssembler::Add;
TatWai Chong845246b2019-08-08 00:01:58 -07005117
5118 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
5119 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
5120 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
5121 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
5122 0x2000000020002020, 0xe0000001e001e1e0};
5123
TatWai Chong6995bfd2019-09-26 10:48:05 +01005124 IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
5125 IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
5126 IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
5127 IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07005128
5129 fn = &MacroAssembler::Sqadd;
5130
5131 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
5132 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
5133 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
5134 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
5135 0x2000000020002020, 0xe0000001e001e1e0};
5136
TatWai Chong6995bfd2019-09-26 10:48:05 +01005137 IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
5138 IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
5139 IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
5140 IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07005141
5142 fn = &MacroAssembler::Uqadd;
5143
5144 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
5145 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
5146 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
5147 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
5148 0x2000000020002020, 0xffffffffffffffff};
5149
TatWai Chong6995bfd2019-09-26 10:48:05 +01005150 IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
5151 IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
5152 IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
5153 IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07005154 // clang-format on
5155}
5156
5157TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
5158 // clang-format off
5159
5160 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
5161 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
5162
5163 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
5164 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
5165
5166 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
5167 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
5168
5169 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
5170 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
5171 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
5172 0xf0000000f000f0f0, 0x5555555555555555};
5173
TatWai Chong7a0d3672019-10-23 17:35:18 -07005174 ArithFn fn = &MacroAssembler::Sub;
TatWai Chong845246b2019-08-08 00:01:58 -07005175
5176 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
5177 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
5178 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
5179 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
5180 0x8eeeeeed8eed8d8e, 0x5555555555555555};
5181
5182 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
5183 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
5184 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
5185 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
5186
5187 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
5188 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
5189 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
5190 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
5191 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
5192
5193 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
5194 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
5195 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
5196 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
5197
5198 fn = &MacroAssembler::Sqsub;
5199
5200 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
5201 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
5202 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
5203 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
5204 0x7fffffffffffffff, 0x8000000000000000};
5205
5206 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
5207 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
5208 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
5209 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
5210
5211 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
5212 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
5213 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
5214 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
5215 0x8000000000000000, 0x7fffffffffffffff};
5216
5217 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
5218 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
5219 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
5220 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
5221
5222 fn = &MacroAssembler::Uqsub;
5223
5224 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
5225 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
5226 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
5227 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
5228 0x0000000000000000, 0x5555555555555555};
5229
5230 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
5231 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
5232 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
5233 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
5234
5235 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
5236 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
5237 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
5238 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
5239 0x7111111271127272, 0x0000000000000000};
5240
5241 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
5242 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
5243 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
5244 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
5245 // clang-format on
5246}
5247
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01005248TEST_SVE(sve_rdvl) {
5249 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5250 START();
5251
5252 // Encodable multipliers.
5253 __ Rdvl(x0, 0);
5254 __ Rdvl(x1, 1);
5255 __ Rdvl(x2, 2);
5256 __ Rdvl(x3, 31);
5257 __ Rdvl(x4, -1);
5258 __ Rdvl(x5, -2);
5259 __ Rdvl(x6, -32);
5260
5261 // For unencodable multipliers, the MacroAssembler uses a sequence of
5262 // instructions.
5263 __ Rdvl(x10, 32);
5264 __ Rdvl(x11, -33);
5265 __ Rdvl(x12, 42);
5266 __ Rdvl(x13, -42);
5267
5268 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
5269 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
5270 // occurs in the macro.
5271 __ Rdvl(x14, 0x007fffffffffffff);
5272 __ Rdvl(x15, -0x0080000000000000);
5273
5274 END();
5275
5276 if (CAN_RUN()) {
5277 RUN();
5278
5279 uint64_t vl = config->sve_vl_in_bytes();
5280
5281 ASSERT_EQUAL_64(vl * 0, x0);
5282 ASSERT_EQUAL_64(vl * 1, x1);
5283 ASSERT_EQUAL_64(vl * 2, x2);
5284 ASSERT_EQUAL_64(vl * 31, x3);
5285 ASSERT_EQUAL_64(vl * -1, x4);
5286 ASSERT_EQUAL_64(vl * -2, x5);
5287 ASSERT_EQUAL_64(vl * -32, x6);
5288
5289 ASSERT_EQUAL_64(vl * 32, x10);
5290 ASSERT_EQUAL_64(vl * -33, x11);
5291 ASSERT_EQUAL_64(vl * 42, x12);
5292 ASSERT_EQUAL_64(vl * -42, x13);
5293
5294 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
5295 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
5296 }
5297}
5298
5299TEST_SVE(sve_rdpl) {
5300 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5301 START();
5302
5303 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
5304 // Addpl(xd, xzr, ...).
5305
5306 // Encodable multipliers (as `addvl`).
5307 __ Rdpl(x0, 0);
5308 __ Rdpl(x1, 8);
5309 __ Rdpl(x2, 248);
5310 __ Rdpl(x3, -8);
5311 __ Rdpl(x4, -256);
5312
5313 // Encodable multipliers (as `movz` + `addpl`).
5314 __ Rdpl(x7, 31);
Jacob Bramley889984c2019-10-28 17:28:48 +00005315 __ Rdpl(x8, -31);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01005316
5317 // For unencodable multipliers, the MacroAssembler uses a sequence of
5318 // instructions.
5319 __ Rdpl(x10, 42);
5320 __ Rdpl(x11, -42);
5321
5322 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
5323 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
5324 // occurs in the macro.
5325 __ Rdpl(x12, 0x007fffffffffffff);
5326 __ Rdpl(x13, -0x0080000000000000);
5327
5328 END();
5329
5330 if (CAN_RUN()) {
5331 RUN();
5332
5333 uint64_t vl = config->sve_vl_in_bytes();
5334 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5335 uint64_t pl = vl / kZRegBitsPerPRegBit;
5336
5337 ASSERT_EQUAL_64(pl * 0, x0);
5338 ASSERT_EQUAL_64(pl * 8, x1);
5339 ASSERT_EQUAL_64(pl * 248, x2);
5340 ASSERT_EQUAL_64(pl * -8, x3);
5341 ASSERT_EQUAL_64(pl * -256, x4);
5342
5343 ASSERT_EQUAL_64(pl * 31, x7);
Jacob Bramley889984c2019-10-28 17:28:48 +00005344 ASSERT_EQUAL_64(pl * -31, x8);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01005345
5346 ASSERT_EQUAL_64(pl * 42, x10);
5347 ASSERT_EQUAL_64(pl * -42, x11);
5348
5349 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
5350 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
5351 }
5352}
5353
5354TEST_SVE(sve_addvl) {
5355 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5356 START();
5357
5358 uint64_t base = 0x1234567800000000;
5359 __ Mov(x30, base);
5360
5361 // Encodable multipliers.
5362 __ Addvl(x0, x30, 0);
5363 __ Addvl(x1, x30, 1);
5364 __ Addvl(x2, x30, 31);
5365 __ Addvl(x3, x30, -1);
5366 __ Addvl(x4, x30, -32);
5367
5368 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
5369 __ Addvl(x5, x30, 32);
5370 __ Addvl(x6, x30, -33);
5371
5372 // Test the limits of the multiplier supported by the `Rdvl` macro.
5373 __ Addvl(x7, x30, 0x007fffffffffffff);
5374 __ Addvl(x8, x30, -0x0080000000000000);
5375
5376 // Check that xzr behaves correctly.
5377 __ Addvl(x9, xzr, 8);
5378 __ Addvl(x10, xzr, 42);
5379
5380 // Check that sp behaves correctly with encodable and unencodable multipliers.
5381 __ Addvl(sp, sp, -5);
5382 __ Addvl(sp, sp, -37);
5383 __ Addvl(x11, sp, -2);
5384 __ Addvl(sp, x11, 2);
5385 __ Addvl(x12, sp, -42);
5386
5387 // Restore the value of sp.
5388 __ Addvl(sp, x11, 39);
5389 __ Addvl(sp, sp, 5);
5390
5391 // Adjust x11 and x12 to make the test sp-agnostic.
5392 __ Sub(x11, sp, x11);
5393 __ Sub(x12, sp, x12);
5394
5395 // Check cases where xd.Is(xn). This stresses scratch register allocation.
5396 __ Mov(x20, x30);
5397 __ Mov(x21, x30);
5398 __ Mov(x22, x30);
5399 __ Addvl(x20, x20, 4);
5400 __ Addvl(x21, x21, 42);
5401 __ Addvl(x22, x22, -0x0080000000000000);
5402
5403 END();
5404
5405 if (CAN_RUN()) {
5406 RUN();
5407
5408 uint64_t vl = config->sve_vl_in_bytes();
5409
5410 ASSERT_EQUAL_64(base + (vl * 0), x0);
5411 ASSERT_EQUAL_64(base + (vl * 1), x1);
5412 ASSERT_EQUAL_64(base + (vl * 31), x2);
5413 ASSERT_EQUAL_64(base + (vl * -1), x3);
5414 ASSERT_EQUAL_64(base + (vl * -32), x4);
5415
5416 ASSERT_EQUAL_64(base + (vl * 32), x5);
5417 ASSERT_EQUAL_64(base + (vl * -33), x6);
5418
5419 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
5420 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
5421
5422 ASSERT_EQUAL_64(vl * 8, x9);
5423 ASSERT_EQUAL_64(vl * 42, x10);
5424
5425 ASSERT_EQUAL_64(vl * 44, x11);
5426 ASSERT_EQUAL_64(vl * 84, x12);
5427
5428 ASSERT_EQUAL_64(base + (vl * 4), x20);
5429 ASSERT_EQUAL_64(base + (vl * 42), x21);
5430 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
5431
5432 ASSERT_EQUAL_64(base, x30);
5433 }
5434}
5435
5436TEST_SVE(sve_addpl) {
5437 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5438 START();
5439
5440 uint64_t base = 0x1234567800000000;
5441 __ Mov(x30, base);
5442
5443 // Encodable multipliers.
5444 __ Addpl(x0, x30, 0);
5445 __ Addpl(x1, x30, 1);
5446 __ Addpl(x2, x30, 31);
5447 __ Addpl(x3, x30, -1);
5448 __ Addpl(x4, x30, -32);
5449
5450 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
5451 // it falls back to `Rdvl` and `Add`.
5452 __ Addpl(x5, x30, 32);
5453 __ Addpl(x6, x30, -33);
5454
5455 // Test the limits of the multiplier supported by the `Rdvl` macro.
5456 __ Addpl(x7, x30, 0x007fffffffffffff);
5457 __ Addpl(x8, x30, -0x0080000000000000);
5458
5459 // Check that xzr behaves correctly.
5460 __ Addpl(x9, xzr, 8);
5461 __ Addpl(x10, xzr, 42);
5462
5463 // Check that sp behaves correctly with encodable and unencodable multipliers.
5464 __ Addpl(sp, sp, -5);
5465 __ Addpl(sp, sp, -37);
5466 __ Addpl(x11, sp, -2);
5467 __ Addpl(sp, x11, 2);
5468 __ Addpl(x12, sp, -42);
5469
5470 // Restore the value of sp.
5471 __ Addpl(sp, x11, 39);
5472 __ Addpl(sp, sp, 5);
5473
5474 // Adjust x11 and x12 to make the test sp-agnostic.
5475 __ Sub(x11, sp, x11);
5476 __ Sub(x12, sp, x12);
5477
5478 // Check cases where xd.Is(xn). This stresses scratch register allocation.
5479 __ Mov(x20, x30);
5480 __ Mov(x21, x30);
5481 __ Mov(x22, x30);
5482 __ Addpl(x20, x20, 4);
5483 __ Addpl(x21, x21, 42);
5484 __ Addpl(x22, x22, -0x0080000000000000);
5485
5486 END();
5487
5488 if (CAN_RUN()) {
5489 RUN();
5490
5491 uint64_t vl = config->sve_vl_in_bytes();
5492 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5493 uint64_t pl = vl / kZRegBitsPerPRegBit;
5494
5495 ASSERT_EQUAL_64(base + (pl * 0), x0);
5496 ASSERT_EQUAL_64(base + (pl * 1), x1);
5497 ASSERT_EQUAL_64(base + (pl * 31), x2);
5498 ASSERT_EQUAL_64(base + (pl * -1), x3);
5499 ASSERT_EQUAL_64(base + (pl * -32), x4);
5500
5501 ASSERT_EQUAL_64(base + (pl * 32), x5);
5502 ASSERT_EQUAL_64(base + (pl * -33), x6);
5503
5504 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
5505 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
5506
5507 ASSERT_EQUAL_64(pl * 8, x9);
5508 ASSERT_EQUAL_64(pl * 42, x10);
5509
5510 ASSERT_EQUAL_64(pl * 44, x11);
5511 ASSERT_EQUAL_64(pl * 84, x12);
5512
5513 ASSERT_EQUAL_64(base + (pl * 4), x20);
5514 ASSERT_EQUAL_64(base + (pl * 42), x21);
5515 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
5516
5517 ASSERT_EQUAL_64(base, x30);
5518 }
5519}
5520
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005521TEST_SVE(sve_calculate_sve_address) {
5522 // Shadow the `MacroAssembler` type so that the test macros work without
5523 // modification.
5524 typedef CalculateSVEAddressMacroAssembler MacroAssembler;
5525
Jacob Bramley1314c462019-08-08 10:54:16 +01005526 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005527 START(); // NOLINT(clang-diagnostic-local-type-template-args)
Jacob Bramley1314c462019-08-08 10:54:16 +01005528
5529 uint64_t base = 0x1234567800000000;
5530 __ Mov(x28, base);
5531 __ Mov(x29, 48);
5532 __ Mov(x30, -48);
5533
5534 // Simple scalar (or equivalent) cases.
5535
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005536 __ CalculateSVEAddress(x0, SVEMemOperand(x28));
5537 __ CalculateSVEAddress(x1, SVEMemOperand(x28, 0));
5538 __ CalculateSVEAddress(x2, SVEMemOperand(x28, 0, SVE_MUL_VL));
5539 __ CalculateSVEAddress(x3, SVEMemOperand(x28, 0, SVE_MUL_VL), 3);
5540 __ CalculateSVEAddress(x4, SVEMemOperand(x28, xzr));
5541 __ CalculateSVEAddress(x5, SVEMemOperand(x28, xzr, LSL, 42));
Jacob Bramley1314c462019-08-08 10:54:16 +01005542
5543 // scalar-plus-immediate
5544
5545 // Unscaled immediates, handled with `Add`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005546 __ CalculateSVEAddress(x6, SVEMemOperand(x28, 42));
5547 __ CalculateSVEAddress(x7, SVEMemOperand(x28, -42));
Jacob Bramley1314c462019-08-08 10:54:16 +01005548 // Scaled immediates, handled with `Addvl` or `Addpl`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005549 __ CalculateSVEAddress(x8, SVEMemOperand(x28, 31, SVE_MUL_VL), 0);
5550 __ CalculateSVEAddress(x9, SVEMemOperand(x28, -32, SVE_MUL_VL), 0);
Jacob Bramley1314c462019-08-08 10:54:16 +01005551 // Out of `addvl` or `addpl` range.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005552 __ CalculateSVEAddress(x10, SVEMemOperand(x28, 42, SVE_MUL_VL), 0);
5553 __ CalculateSVEAddress(x11, SVEMemOperand(x28, -42, SVE_MUL_VL), 0);
5554 // As above, for VL-based accesses smaller than a Z register.
5555 VIXL_STATIC_ASSERT(kZRegBitsPerPRegBitLog2 == 3);
5556 __ CalculateSVEAddress(x12, SVEMemOperand(x28, -32 * 8, SVE_MUL_VL), 3);
5557 __ CalculateSVEAddress(x13, SVEMemOperand(x28, -42 * 8, SVE_MUL_VL), 3);
5558 __ CalculateSVEAddress(x14, SVEMemOperand(x28, -32 * 4, SVE_MUL_VL), 2);
5559 __ CalculateSVEAddress(x15, SVEMemOperand(x28, -42 * 4, SVE_MUL_VL), 2);
5560 __ CalculateSVEAddress(x18, SVEMemOperand(x28, -32 * 2, SVE_MUL_VL), 1);
5561 __ CalculateSVEAddress(x19, SVEMemOperand(x28, -42 * 2, SVE_MUL_VL), 1);
Jacob Bramley1314c462019-08-08 10:54:16 +01005562
5563 // scalar-plus-scalar
5564
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005565 __ CalculateSVEAddress(x20, SVEMemOperand(x28, x29));
5566 __ CalculateSVEAddress(x21, SVEMemOperand(x28, x30));
5567 __ CalculateSVEAddress(x22, SVEMemOperand(x28, x29, LSL, 8));
5568 __ CalculateSVEAddress(x23, SVEMemOperand(x28, x30, LSL, 8));
Jacob Bramley1314c462019-08-08 10:54:16 +01005569
5570 // In-place updates, to stress scratch register allocation.
5571
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005572 __ Mov(x24, 0xabcd000000000000);
5573 __ Mov(x25, 0xabcd101100000000);
5574 __ Mov(x26, 0xabcd202200000000);
5575 __ Mov(x27, 0xabcd303300000000);
5576 __ Mov(x28, 0xabcd404400000000);
5577 __ Mov(x29, 0xabcd505500000000);
Jacob Bramley1314c462019-08-08 10:54:16 +01005578
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005579 __ CalculateSVEAddress(x24, SVEMemOperand(x24));
5580 __ CalculateSVEAddress(x25, SVEMemOperand(x25, 0x42));
5581 __ CalculateSVEAddress(x26, SVEMemOperand(x26, 3, SVE_MUL_VL), 0);
5582 __ CalculateSVEAddress(x27, SVEMemOperand(x27, 0x42, SVE_MUL_VL), 3);
5583 __ CalculateSVEAddress(x28, SVEMemOperand(x28, x30));
5584 __ CalculateSVEAddress(x29, SVEMemOperand(x29, x30, LSL, 4));
Jacob Bramley1314c462019-08-08 10:54:16 +01005585
5586 END();
5587
5588 if (CAN_RUN()) {
5589 RUN();
5590
5591 uint64_t vl = config->sve_vl_in_bytes();
5592 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5593 uint64_t pl = vl / kZRegBitsPerPRegBit;
5594
5595 // Simple scalar (or equivalent) cases.
5596 ASSERT_EQUAL_64(base, x0);
5597 ASSERT_EQUAL_64(base, x1);
5598 ASSERT_EQUAL_64(base, x2);
5599 ASSERT_EQUAL_64(base, x3);
5600 ASSERT_EQUAL_64(base, x4);
5601 ASSERT_EQUAL_64(base, x5);
5602
5603 // scalar-plus-immediate
5604 ASSERT_EQUAL_64(base + 42, x6);
5605 ASSERT_EQUAL_64(base - 42, x7);
5606 ASSERT_EQUAL_64(base + (31 * vl), x8);
5607 ASSERT_EQUAL_64(base - (32 * vl), x9);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005608 ASSERT_EQUAL_64(base + (42 * vl), x10);
5609 ASSERT_EQUAL_64(base - (42 * vl), x11);
5610 ASSERT_EQUAL_64(base - (32 * vl), x12);
Jacob Bramley1314c462019-08-08 10:54:16 +01005611 ASSERT_EQUAL_64(base - (42 * vl), x13);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005612 ASSERT_EQUAL_64(base - (32 * vl), x14);
5613 ASSERT_EQUAL_64(base - (42 * vl), x15);
5614 ASSERT_EQUAL_64(base - (32 * vl), x18);
5615 ASSERT_EQUAL_64(base - (42 * vl), x19);
Jacob Bramley1314c462019-08-08 10:54:16 +01005616
5617 // scalar-plus-scalar
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005618 ASSERT_EQUAL_64(base + 48, x20);
5619 ASSERT_EQUAL_64(base - 48, x21);
5620 ASSERT_EQUAL_64(base + (48 << 8), x22);
5621 ASSERT_EQUAL_64(base - (48 << 8), x23);
Jacob Bramley1314c462019-08-08 10:54:16 +01005622
5623 // In-place updates.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005624 ASSERT_EQUAL_64(0xabcd000000000000, x24);
5625 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x25);
5626 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x26);
5627 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x27);
5628 ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28);
5629 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29);
Jacob Bramley1314c462019-08-08 10:54:16 +01005630 }
5631}
5632
TatWai Chong4f28df72019-08-14 17:50:30 -07005633TEST_SVE(sve_permute_vector_unpredicated) {
5634 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5635 START();
5636
Jacob Bramleye4983d42019-10-08 10:56:15 +01005637 // Initialise registers with known values first.
5638 __ Dup(z1.VnB(), 0x11);
5639 __ Dup(z2.VnB(), 0x22);
5640 __ Dup(z3.VnB(), 0x33);
5641 __ Dup(z4.VnB(), 0x44);
5642
TatWai Chong4f28df72019-08-14 17:50:30 -07005643 __ Mov(x0, 0x0123456789abcdef);
5644 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
5645 __ Insr(z1.VnS(), w0);
5646 __ Insr(z2.VnD(), x0);
5647 __ Insr(z3.VnH(), h0);
5648 __ Insr(z4.VnD(), d0);
5649
5650 uint64_t inputs[] = {0xfedcba9876543210,
5651 0x0123456789abcdef,
5652 0x8f8e8d8c8b8a8988,
5653 0x8786858483828180};
5654
5655 // Initialize a distinguishable value throughout the register first.
5656 __ Dup(z9.VnB(), 0xff);
5657 InsrHelper(&masm, z9.VnD(), inputs);
5658
5659 __ Rev(z5.VnB(), z9.VnB());
5660 __ Rev(z6.VnH(), z9.VnH());
5661 __ Rev(z7.VnS(), z9.VnS());
5662 __ Rev(z8.VnD(), z9.VnD());
5663
5664 int index[7] = {22, 7, 7, 3, 1, 1, 63};
5665 // Broadcasting an data within the input array.
5666 __ Dup(z10.VnB(), z9.VnB(), index[0]);
5667 __ Dup(z11.VnH(), z9.VnH(), index[1]);
5668 __ Dup(z12.VnS(), z9.VnS(), index[2]);
5669 __ Dup(z13.VnD(), z9.VnD(), index[3]);
5670 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
5671 // Test dst == src
5672 __ Mov(z15, z9);
5673 __ Dup(z15.VnS(), z15.VnS(), index[5]);
5674 // Selecting an data beyond the input array.
5675 __ Dup(z16.VnB(), z9.VnB(), index[6]);
5676
5677 END();
5678
5679 if (CAN_RUN()) {
5680 RUN();
5681
5682 // Insr
Jacob Bramleye4983d42019-10-08 10:56:15 +01005683 uint64_t z1_expected[] = {0x1111111111111111, 0x1111111189abcdef};
5684 uint64_t z2_expected[] = {0x2222222222222222, 0x0123456789abcdef};
5685 uint64_t z3_expected[] = {0x3333333333333333, 0x3333333333333456};
5686 uint64_t z4_expected[] = {0x4444444444444444, 0x7ffaaaaa22223456};
TatWai Chong4f28df72019-08-14 17:50:30 -07005687 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
5688 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
5689 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
5690 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
5691
5692 // Rev
5693 int lane_count = core.GetSVELaneCount(kBRegSize);
5694 for (int i = 0; i < lane_count; i++) {
5695 uint64_t expected =
5696 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
5697 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
5698 ASSERT_EQUAL_64(expected, input);
5699 }
5700
5701 lane_count = core.GetSVELaneCount(kHRegSize);
5702 for (int i = 0; i < lane_count; i++) {
5703 uint64_t expected =
5704 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
5705 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
5706 ASSERT_EQUAL_64(expected, input);
5707 }
5708
5709 lane_count = core.GetSVELaneCount(kSRegSize);
5710 for (int i = 0; i < lane_count; i++) {
5711 uint64_t expected =
5712 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
5713 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
5714 ASSERT_EQUAL_64(expected, input);
5715 }
5716
5717 lane_count = core.GetSVELaneCount(kDRegSize);
5718 for (int i = 0; i < lane_count; i++) {
5719 uint64_t expected =
5720 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
5721 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
5722 ASSERT_EQUAL_64(expected, input);
5723 }
5724
5725 // Dup
5726 unsigned vl = config->sve_vl_in_bits();
5727 lane_count = core.GetSVELaneCount(kBRegSize);
5728 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
5729 for (int i = 0; i < lane_count; i++) {
5730 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
5731 }
5732
5733 lane_count = core.GetSVELaneCount(kHRegSize);
5734 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
5735 for (int i = 0; i < lane_count; i++) {
5736 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
5737 }
5738
5739 lane_count = core.GetSVELaneCount(kSRegSize);
5740 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
5741 for (int i = 0; i < lane_count; i++) {
5742 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
5743 }
5744
5745 lane_count = core.GetSVELaneCount(kDRegSize);
5746 uint64_t expected_z13 =
5747 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
5748 for (int i = 0; i < lane_count; i++) {
5749 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
5750 }
5751
5752 lane_count = core.GetSVELaneCount(kDRegSize);
5753 uint64_t expected_z14_lo = 0;
5754 uint64_t expected_z14_hi = 0;
5755 if (vl > (index[4] * kQRegSize)) {
5756 expected_z14_lo = 0x0123456789abcdef;
5757 expected_z14_hi = 0xfedcba9876543210;
5758 }
5759 for (int i = 0; i < lane_count; i += 2) {
5760 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
5761 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
5762 }
5763
5764 lane_count = core.GetSVELaneCount(kSRegSize);
5765 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
5766 for (int i = 0; i < lane_count; i++) {
5767 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
5768 }
5769
5770 lane_count = core.GetSVELaneCount(kBRegSize);
5771 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
5772 for (int i = 0; i < lane_count; i++) {
5773 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
5774 }
5775 }
5776}
5777
Martyn Capewell2e954292020-01-14 14:56:42 +00005778TEST_SVE(sve_permute_vector_unpredicated_unpack_vector_elements) {
TatWai Chong4f28df72019-08-14 17:50:30 -07005779 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5780 START();
5781
5782 uint64_t z9_inputs[] = {0xfedcba9876543210,
5783 0x0123456789abcdef,
5784 0x8f8e8d8c8b8a8988,
5785 0x8786858483828180};
5786 InsrHelper(&masm, z9.VnD(), z9_inputs);
5787
5788 __ Sunpkhi(z10.VnH(), z9.VnB());
5789 __ Sunpkhi(z11.VnS(), z9.VnH());
5790 __ Sunpkhi(z12.VnD(), z9.VnS());
5791
5792 __ Sunpklo(z13.VnH(), z9.VnB());
5793 __ Sunpklo(z14.VnS(), z9.VnH());
5794 __ Sunpklo(z15.VnD(), z9.VnS());
5795
5796 __ Uunpkhi(z16.VnH(), z9.VnB());
5797 __ Uunpkhi(z17.VnS(), z9.VnH());
5798 __ Uunpkhi(z18.VnD(), z9.VnS());
5799
5800 __ Uunpklo(z19.VnH(), z9.VnB());
5801 __ Uunpklo(z20.VnS(), z9.VnH());
5802 __ Uunpklo(z21.VnD(), z9.VnS());
5803
Martyn Capewell2e954292020-01-14 14:56:42 +00005804 // Test unpacking with same source and destination.
5805 __ Mov(z22, z9);
5806 __ Sunpklo(z22.VnH(), z22.VnB());
5807 __ Mov(z23, z9);
5808 __ Uunpklo(z23.VnH(), z23.VnB());
5809
TatWai Chong4f28df72019-08-14 17:50:30 -07005810 END();
5811
5812 if (CAN_RUN()) {
5813 RUN();
5814
5815 // Suunpkhi
5816 int lane_count = core.GetSVELaneCount(kHRegSize);
5817 for (int i = lane_count - 1; i >= 0; i--) {
5818 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
5819 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
5820 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
5821 ASSERT_EQUAL_64(expected, input);
5822 }
5823
5824 lane_count = core.GetSVELaneCount(kSRegSize);
5825 for (int i = lane_count - 1; i >= 0; i--) {
5826 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
5827 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
5828 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
5829 ASSERT_EQUAL_64(expected, input);
5830 }
5831
5832 lane_count = core.GetSVELaneCount(kDRegSize);
5833 for (int i = lane_count - 1; i >= 0; i--) {
5834 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
5835 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
5836 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
5837 ASSERT_EQUAL_64(expected, input);
5838 }
5839
5840 // Suunpklo
5841 lane_count = core.GetSVELaneCount(kHRegSize);
5842 for (int i = lane_count - 1; i >= 0; i--) {
5843 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
5844 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
5845 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
5846 ASSERT_EQUAL_64(expected, input);
5847 }
5848
5849 lane_count = core.GetSVELaneCount(kSRegSize);
5850 for (int i = lane_count - 1; i >= 0; i--) {
5851 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
5852 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
5853 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
5854 ASSERT_EQUAL_64(expected, input);
5855 }
5856
5857 lane_count = core.GetSVELaneCount(kDRegSize);
5858 for (int i = lane_count - 1; i >= 0; i--) {
5859 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
5860 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
5861 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
5862 ASSERT_EQUAL_64(expected, input);
5863 }
5864
5865 // Uuunpkhi
5866 lane_count = core.GetSVELaneCount(kHRegSize);
5867 for (int i = lane_count - 1; i >= 0; i--) {
5868 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
5869 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
5870 ASSERT_EQUAL_64(expected, input);
5871 }
5872
5873 lane_count = core.GetSVELaneCount(kSRegSize);
5874 for (int i = lane_count - 1; i >= 0; i--) {
5875 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
5876 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
5877 ASSERT_EQUAL_64(expected, input);
5878 }
5879
5880 lane_count = core.GetSVELaneCount(kDRegSize);
5881 for (int i = lane_count - 1; i >= 0; i--) {
5882 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
5883 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
5884 ASSERT_EQUAL_64(expected, input);
5885 }
5886
5887 // Uuunpklo
5888 lane_count = core.GetSVELaneCount(kHRegSize);
5889 for (int i = lane_count - 1; i >= 0; i--) {
5890 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
5891 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
5892 ASSERT_EQUAL_64(expected, input);
5893 }
5894
5895 lane_count = core.GetSVELaneCount(kSRegSize);
5896 for (int i = lane_count - 1; i >= 0; i--) {
5897 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
5898 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
5899 ASSERT_EQUAL_64(expected, input);
5900 }
5901
5902 lane_count = core.GetSVELaneCount(kDRegSize);
5903 for (int i = lane_count - 1; i >= 0; i--) {
5904 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
5905 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
5906 ASSERT_EQUAL_64(expected, input);
5907 }
Martyn Capewell2e954292020-01-14 14:56:42 +00005908
5909 ASSERT_EQUAL_SVE(z13, z22);
5910 ASSERT_EQUAL_SVE(z19, z23);
TatWai Chong4f28df72019-08-14 17:50:30 -07005911 }
5912}
5913
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005914TEST_SVE(sve_cnot_not) {
5915 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5916 START();
5917
5918 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
5919
5920 // For simplicity, we re-use the same pg for various lane sizes.
5921 // For D lanes: 1, 1, 0
5922 // For S lanes: 1, 1, 1, 0, 0
5923 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5924 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5925 Initialise(&masm, p0.VnB(), pg_in);
5926 PRegisterM pg = p0.Merging();
5927
5928 // These are merging operations, so we have to initialise the result register.
5929 // We use a mixture of constructive and destructive operations.
5930
5931 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005932 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005933 __ Mov(z30, z31);
5934
5935 // For constructive operations, use a different initial result value.
5936 __ Index(z29.VnB(), 0, -1);
5937
5938 __ Mov(z0, z31);
5939 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
5940 __ Mov(z1, z29);
5941 __ Cnot(z1.VnH(), pg, z31.VnH());
5942 __ Mov(z2, z31);
5943 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
5944 __ Mov(z3, z29);
5945 __ Cnot(z3.VnD(), pg, z31.VnD());
5946
5947 __ Mov(z4, z29);
5948 __ Not(z4.VnB(), pg, z31.VnB());
5949 __ Mov(z5, z31);
5950 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
5951 __ Mov(z6, z29);
5952 __ Not(z6.VnS(), pg, z31.VnS());
5953 __ Mov(z7, z31);
5954 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
5955
5956 END();
5957
5958 if (CAN_RUN()) {
5959 RUN();
5960
5961 // Check that constructive operations preserve their inputs.
5962 ASSERT_EQUAL_SVE(z30, z31);
5963
5964 // clang-format off
5965
5966 // Cnot (B) destructive
5967 uint64_t expected_z0[] =
5968 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5969 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
5970 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5971
5972 // Cnot (H)
5973 uint64_t expected_z1[] =
5974 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5975 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
5976 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5977
5978 // Cnot (S) destructive
5979 uint64_t expected_z2[] =
5980 // pg: 0 1 1 1 0 0
5981 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
5982 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5983
5984 // Cnot (D)
5985 uint64_t expected_z3[] =
5986 // pg: 1 1 0
5987 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
5988 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5989
5990 // Not (B)
5991 uint64_t expected_z4[] =
5992 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5993 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
5994 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5995
5996 // Not (H) destructive
5997 uint64_t expected_z5[] =
5998 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5999 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
6000 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6001
6002 // Not (S)
6003 uint64_t expected_z6[] =
6004 // pg: 0 1 1 1 0 0
6005 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
6006 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6007
6008 // Not (D) destructive
6009 uint64_t expected_z7[] =
6010 // pg: 1 1 0
6011 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
6012 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6013
6014 // clang-format on
6015 }
6016}
6017
6018TEST_SVE(sve_fabs_fneg) {
6019 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6020 START();
6021
6022 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
6023 // NaNs, but fabs and fneg do not.
6024 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
6025 0xfff00000ff80fc01, // Signalling NaNs.
6026 0x123456789abcdef0};
6027
6028 // For simplicity, we re-use the same pg for various lane sizes.
6029 // For D lanes: 1, 1, 0
6030 // For S lanes: 1, 1, 1, 0, 0
6031 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6032 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6033 Initialise(&masm, p0.VnB(), pg_in);
6034 PRegisterM pg = p0.Merging();
6035
6036 // These are merging operations, so we have to initialise the result register.
6037 // We use a mixture of constructive and destructive operations.
6038
6039 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006040 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006041 __ Mov(z30, z31);
6042
6043 // For constructive operations, use a different initial result value.
6044 __ Index(z29.VnB(), 0, -1);
6045
6046 __ Mov(z0, z29);
6047 __ Fabs(z0.VnH(), pg, z31.VnH());
6048 __ Mov(z1, z31);
6049 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
6050 __ Mov(z2, z29);
6051 __ Fabs(z2.VnD(), pg, z31.VnD());
6052
6053 __ Mov(z3, z31);
6054 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
6055 __ Mov(z4, z29);
6056 __ Fneg(z4.VnS(), pg, z31.VnS());
6057 __ Mov(z5, z31);
6058 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
6059
6060 END();
6061
6062 if (CAN_RUN()) {
6063 RUN();
6064
6065 // Check that constructive operations preserve their inputs.
6066 ASSERT_EQUAL_SVE(z30, z31);
6067
6068 // clang-format off
6069
6070 // Fabs (H)
6071 uint64_t expected_z0[] =
6072 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6073 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
6074 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6075
6076 // Fabs (S) destructive
6077 uint64_t expected_z1[] =
6078 // pg: 0 1 1 1 0 0
6079 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
6080 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6081
6082 // Fabs (D)
6083 uint64_t expected_z2[] =
6084 // pg: 1 1 0
6085 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
6086 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6087
6088 // Fneg (H) destructive
6089 uint64_t expected_z3[] =
6090 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6091 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
6092 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6093
6094 // Fneg (S)
6095 uint64_t expected_z4[] =
6096 // pg: 0 1 1 1 0 0
6097 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
6098 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6099
6100 // Fneg (D) destructive
6101 uint64_t expected_z5[] =
6102 // pg: 1 1 0
6103 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
6104 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6105
6106 // clang-format on
6107 }
6108}
6109
6110TEST_SVE(sve_cls_clz_cnt) {
6111 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6112 START();
6113
6114 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
6115
6116 // For simplicity, we re-use the same pg for various lane sizes.
6117 // For D lanes: 1, 1, 0
6118 // For S lanes: 1, 1, 1, 0, 0
6119 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6120 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6121 Initialise(&masm, p0.VnB(), pg_in);
6122 PRegisterM pg = p0.Merging();
6123
6124 // These are merging operations, so we have to initialise the result register.
6125 // We use a mixture of constructive and destructive operations.
6126
6127 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006128 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006129 __ Mov(z30, z31);
6130
6131 // For constructive operations, use a different initial result value.
6132 __ Index(z29.VnB(), 0, -1);
6133
6134 __ Mov(z0, z29);
6135 __ Cls(z0.VnB(), pg, z31.VnB());
6136 __ Mov(z1, z31);
6137 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
6138 __ Mov(z2, z29);
6139 __ Cnt(z2.VnS(), pg, z31.VnS());
6140 __ Mov(z3, z31);
6141 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
6142
6143 END();
6144
6145 if (CAN_RUN()) {
6146 RUN();
6147 // Check that non-destructive operations preserve their inputs.
6148 ASSERT_EQUAL_SVE(z30, z31);
6149
6150 // clang-format off
6151
6152 // cls (B)
6153 uint8_t expected_z0[] =
6154 // pg: 0 0 0 0 1 0 1 1
6155 // pg: 1 0 0 1 0 1 1 1
6156 // pg: 0 0 1 0 1 1 1 0
6157 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
6158 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
6159 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
6160 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
6161
6162 // clz (H) destructive
6163 uint16_t expected_z1[] =
6164 // pg: 0 0 0 1
6165 // pg: 0 1 1 1
6166 // pg: 0 0 1 0
6167 {0x0000, 0x0000, 0x0000, 16,
6168 0xfefc, 0, 0, 0,
6169 0x1234, 0x5678, 0, 0xdef0};
6170 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
6171
6172 // cnt (S)
6173 uint32_t expected_z2[] =
6174 // pg: 0 1
6175 // pg: 1 1
6176 // pg: 0 0
6177 {0xe9eaebec, 0,
6178 22, 16,
6179 0xf9fafbfc, 0xfdfeff00};
6180 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
6181
6182 // cnt (D) destructive
6183 uint64_t expected_z3[] =
6184 // pg: 1 1 0
6185 { 0, 38, 0x123456789abcdef0};
6186 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6187
6188 // clang-format on
6189 }
6190}
6191
6192TEST_SVE(sve_sxt) {
6193 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6194 START();
6195
6196 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
6197
6198 // For simplicity, we re-use the same pg for various lane sizes.
6199 // For D lanes: 1, 1, 0
6200 // For S lanes: 1, 1, 1, 0, 0
6201 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6202 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6203 Initialise(&masm, p0.VnB(), pg_in);
6204 PRegisterM pg = p0.Merging();
6205
6206 // These are merging operations, so we have to initialise the result register.
6207 // We use a mixture of constructive and destructive operations.
6208
6209 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006210 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006211 __ Mov(z30, z31);
6212
6213 // For constructive operations, use a different initial result value.
6214 __ Index(z29.VnB(), 0, -1);
6215
6216 __ Mov(z0, z31);
6217 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
6218 __ Mov(z1, z29);
6219 __ Sxtb(z1.VnS(), pg, z31.VnS());
6220 __ Mov(z2, z31);
6221 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
6222 __ Mov(z3, z29);
6223 __ Sxth(z3.VnS(), pg, z31.VnS());
6224 __ Mov(z4, z31);
6225 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
6226 __ Mov(z5, z29);
6227 __ Sxtw(z5.VnD(), pg, z31.VnD());
6228
6229 END();
6230
6231 if (CAN_RUN()) {
6232 RUN();
6233 // Check that constructive operations preserve their inputs.
6234 ASSERT_EQUAL_SVE(z30, z31);
6235
6236 // clang-format off
6237
6238 // Sxtb (H) destructive
6239 uint64_t expected_z0[] =
6240 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6241 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
6242 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6243
6244 // Sxtb (S)
6245 uint64_t expected_z1[] =
6246 // pg: 0 1 1 1 0 0
6247 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
6248 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6249
6250 // Sxtb (D) destructive
6251 uint64_t expected_z2[] =
6252 // pg: 1 1 0
6253 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
6254 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6255
6256 // Sxth (S)
6257 uint64_t expected_z3[] =
6258 // pg: 0 1 1 1 0 0
6259 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
6260 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6261
6262 // Sxth (D) destructive
6263 uint64_t expected_z4[] =
6264 // pg: 1 1 0
6265 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
6266 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6267
6268 // Sxtw (D)
6269 uint64_t expected_z5[] =
6270 // pg: 1 1 0
6271 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
6272 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6273
6274 // clang-format on
6275 }
6276}
6277
6278TEST_SVE(sve_uxt) {
6279 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6280 START();
6281
6282 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
6283
6284 // For simplicity, we re-use the same pg for various lane sizes.
6285 // For D lanes: 1, 1, 0
6286 // For S lanes: 1, 1, 1, 0, 0
6287 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6288 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6289 Initialise(&masm, p0.VnB(), pg_in);
6290 PRegisterM pg = p0.Merging();
6291
6292 // These are merging operations, so we have to initialise the result register.
6293 // We use a mixture of constructive and destructive operations.
6294
6295 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006296 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006297 __ Mov(z30, z31);
6298
6299 // For constructive operations, use a different initial result value.
6300 __ Index(z29.VnB(), 0, -1);
6301
6302 __ Mov(z0, z29);
6303 __ Uxtb(z0.VnH(), pg, z31.VnH());
6304 __ Mov(z1, z31);
6305 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
6306 __ Mov(z2, z29);
6307 __ Uxtb(z2.VnD(), pg, z31.VnD());
6308 __ Mov(z3, z31);
6309 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
6310 __ Mov(z4, z29);
6311 __ Uxth(z4.VnD(), pg, z31.VnD());
6312 __ Mov(z5, z31);
6313 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
6314
6315 END();
6316
6317 if (CAN_RUN()) {
6318 RUN();
6319 // clang-format off
6320
6321 // Uxtb (H)
6322 uint64_t expected_z0[] =
6323 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6324 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
6325 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6326
6327 // Uxtb (S) destructive
6328 uint64_t expected_z1[] =
6329 // pg: 0 1 1 1 0 0
6330 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
6331 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6332
6333 // Uxtb (D)
6334 uint64_t expected_z2[] =
6335 // pg: 1 1 0
6336 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
6337 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6338
6339 // Uxth (S) destructive
6340 uint64_t expected_z3[] =
6341 // pg: 0 1 1 1 0 0
6342 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
6343 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6344
6345 // Uxth (D)
6346 uint64_t expected_z4[] =
6347 // pg: 1 1 0
6348 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
6349 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6350
6351 // Uxtw (D) destructive
6352 uint64_t expected_z5[] =
6353 // pg: 1 1 0
6354 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
6355 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6356
6357 // clang-format on
6358 }
6359}
6360
6361TEST_SVE(sve_abs_neg) {
6362 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6363 START();
6364
6365 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
6366
6367 // For simplicity, we re-use the same pg for various lane sizes.
6368 // For D lanes: 1, 1, 0
6369 // For S lanes: 1, 1, 1, 0, 0
6370 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6371 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6372 Initialise(&masm, p0.VnB(), pg_in);
6373 PRegisterM pg = p0.Merging();
6374
6375 InsrHelper(&masm, z31.VnD(), in);
6376
6377 // These are merging operations, so we have to initialise the result register.
6378 // We use a mixture of constructive and destructive operations.
6379
6380 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006381 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006382 __ Mov(z30, z31);
6383
6384 // For constructive operations, use a different initial result value.
6385 __ Index(z29.VnB(), 0, -1);
6386
6387 __ Mov(z0, z31);
6388 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
6389 __ Mov(z1, z29);
6390 __ Abs(z1.VnB(), pg, z31.VnB());
6391
6392 __ Mov(z2, z31);
6393 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
6394 __ Mov(z3, z29);
6395 __ Neg(z3.VnS(), pg, z31.VnS());
6396
Jacob Bramleyc0066272019-09-30 16:30:47 +01006397 // The unpredicated form of `Neg` is implemented using `subr`.
6398 __ Mov(z4, z31);
6399 __ Neg(z4.VnB(), z4.VnB()); // destructive
6400 __ Mov(z5, z29);
6401 __ Neg(z5.VnD(), z31.VnD());
6402
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006403 END();
6404
6405 if (CAN_RUN()) {
6406 RUN();
Jacob Bramleyc0066272019-09-30 16:30:47 +01006407
6408 ASSERT_EQUAL_SVE(z30, z31);
6409
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006410 // clang-format off
6411
6412 // Abs (D) destructive
6413 uint64_t expected_z0[] =
6414 // pg: 1 1 0
6415 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
6416 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6417
6418 // Abs (B)
6419 uint64_t expected_z1[] =
6420 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
6421 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
6422 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6423
6424 // Neg (H) destructive
6425 uint64_t expected_z2[] =
6426 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6427 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
6428 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6429
6430 // Neg (S)
6431 uint64_t expected_z3[] =
6432 // pg: 0 1 1 1 0 0
6433 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
6434 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6435
Jacob Bramleyc0066272019-09-30 16:30:47 +01006436 // Neg (B) destructive, unpredicated
6437 uint64_t expected_z4[] =
6438 {0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
6439 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6440
6441 // Neg (D) unpredicated
6442 uint64_t expected_z5[] =
6443 {0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
6444 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6445
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006446 // clang-format on
6447 }
6448}
6449
Jacob Bramley0093bb92019-10-04 15:54:10 +01006450TEST_SVE(sve_cpy) {
6451 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
6452 START();
6453
6454 // For simplicity, we re-use the same pg for various lane sizes.
6455 // For D lanes: 0, 1, 1
6456 // For S lanes: 0, 1, 1, 0, 1
6457 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6458 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6459
6460 PRegisterM pg = p7.Merging();
6461 Initialise(&masm, pg.VnB(), pg_in);
6462
6463 // These are merging operations, so we have to initialise the result registers
6464 // for each operation.
6465 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6466 __ Index(ZRegister(i, kBRegSize), 0, -1);
6467 }
6468
6469 // Recognisable values to copy.
6470 __ Mov(x0, 0xdeadbeefdeadbe42);
6471 __ Mov(x1, 0xdeadbeefdead8421);
6472 __ Mov(x2, 0xdeadbeef80042001);
6473 __ Mov(x3, 0x8000000420000001);
6474
6475 // Use NEON moves, to avoid testing SVE `cpy` against itself.
6476 __ Dup(v28.V2D(), x0);
6477 __ Dup(v29.V2D(), x1);
6478 __ Dup(v30.V2D(), x2);
6479 __ Dup(v31.V2D(), x3);
6480
6481 // Register forms (CPY_z_p_r)
6482 __ Cpy(z0.VnB(), pg, w0);
6483 __ Cpy(z1.VnH(), pg, x1); // X registers are accepted for small lanes.
6484 __ Cpy(z2.VnS(), pg, w2);
6485 __ Cpy(z3.VnD(), pg, x3);
6486
6487 // VRegister forms (CPY_z_p_v)
6488 __ Cpy(z4.VnB(), pg, b28);
6489 __ Cpy(z5.VnH(), pg, h29);
6490 __ Cpy(z6.VnS(), pg, s30);
6491 __ Cpy(z7.VnD(), pg, d31);
6492
6493 // Check that we can copy the stack pointer.
6494 __ Mov(x10, sp);
6495 __ Mov(sp, 0xabcabcabcabcabca); // Set sp to a known value.
6496 __ Cpy(z16.VnB(), pg, sp);
6497 __ Cpy(z17.VnH(), pg, wsp);
6498 __ Cpy(z18.VnS(), pg, wsp);
6499 __ Cpy(z19.VnD(), pg, sp);
6500 __ Mov(sp, x10); // Restore sp.
6501
6502 END();
6503
6504 if (CAN_RUN()) {
6505 RUN();
6506 // clang-format off
6507
6508 uint64_t expected_b[] =
6509 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6510 {0xe9eaebec424242f0, 0x42f2f34242f64242, 0xf942fbfcfdfeff42};
6511 ASSERT_EQUAL_SVE(expected_b, z0.VnD());
6512 ASSERT_EQUAL_SVE(expected_b, z4.VnD());
6513
6514 uint64_t expected_h[] =
6515 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6516 {0xe9eaebec8421eff0, 0xf1f28421f5f68421, 0x8421fbfcfdfe8421};
6517 ASSERT_EQUAL_SVE(expected_h, z1.VnD());
6518 ASSERT_EQUAL_SVE(expected_h, z5.VnD());
6519
6520 uint64_t expected_s[] =
6521 // pg: 0 0 1 1 0 1
6522 {0xe9eaebecedeeeff0, 0x8004200180042001, 0xf9fafbfc80042001};
6523 ASSERT_EQUAL_SVE(expected_s, z2.VnD());
6524 ASSERT_EQUAL_SVE(expected_s, z6.VnD());
6525
6526 uint64_t expected_d[] =
6527 // pg: 0 1 1
6528 {0xe9eaebecedeeeff0, 0x8000000420000001, 0x8000000420000001};
6529 ASSERT_EQUAL_SVE(expected_d, z3.VnD());
6530 ASSERT_EQUAL_SVE(expected_d, z7.VnD());
6531
6532
6533 uint64_t expected_b_sp[] =
6534 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6535 {0xe9eaebeccacacaf0, 0xcaf2f3cacaf6caca, 0xf9cafbfcfdfeffca};
6536 ASSERT_EQUAL_SVE(expected_b_sp, z16.VnD());
6537
6538 uint64_t expected_h_sp[] =
6539 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6540 {0xe9eaebecabcaeff0, 0xf1f2abcaf5f6abca, 0xabcafbfcfdfeabca};
6541 ASSERT_EQUAL_SVE(expected_h_sp, z17.VnD());
6542
6543 uint64_t expected_s_sp[] =
6544 // pg: 0 0 1 1 0 1
6545 {0xe9eaebecedeeeff0, 0xcabcabcacabcabca, 0xf9fafbfccabcabca};
6546 ASSERT_EQUAL_SVE(expected_s_sp, z18.VnD());
6547
6548 uint64_t expected_d_sp[] =
6549 // pg: 0 1 1
6550 {0xe9eaebecedeeeff0, 0xabcabcabcabcabca, 0xabcabcabcabcabca};
6551 ASSERT_EQUAL_SVE(expected_d_sp, z19.VnD());
6552
6553 // clang-format on
6554 }
6555}
6556
Jacob Bramley0f62eab2019-10-23 17:07:47 +01006557TEST_SVE(sve_cpy_imm) {
6558 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6559 START();
6560
6561 // For simplicity, we re-use the same pg for various lane sizes.
6562 // For D lanes: 0, 1, 1
6563 // For S lanes: 0, 1, 1, 0, 1
6564 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6565 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6566
6567 PRegister pg = p7;
6568 Initialise(&masm, pg.VnB(), pg_in);
6569
6570 // These are (mostly) merging operations, so we have to initialise the result
6571 // registers for each operation.
6572 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6573 __ Index(ZRegister(i, kBRegSize), 0, -1);
6574 }
6575
6576 // Encodable integer forms (CPY_z_p_i)
6577 __ Cpy(z0.VnB(), pg.Merging(), 0);
6578 __ Cpy(z1.VnB(), pg.Zeroing(), 42);
6579 __ Cpy(z2.VnB(), pg.Merging(), -42);
6580 __ Cpy(z3.VnB(), pg.Zeroing(), 0xff);
6581 __ Cpy(z4.VnH(), pg.Merging(), 127);
6582 __ Cpy(z5.VnS(), pg.Zeroing(), -128);
6583 __ Cpy(z6.VnD(), pg.Merging(), -1);
6584
6585 // Forms encodable using fcpy.
6586 __ Cpy(z7.VnH(), pg.Merging(), Float16ToRawbits(Float16(-31.0)));
6587 __ Cpy(z8.VnS(), pg.Zeroing(), FloatToRawbits(2.0f));
6588 __ Cpy(z9.VnD(), pg.Merging(), DoubleToRawbits(-4.0));
6589
6590 // Other forms use a scratch register.
6591 __ Cpy(z10.VnH(), pg.Merging(), 0xff);
6592 __ Cpy(z11.VnD(), pg.Zeroing(), 0x0123456789abcdef);
6593
6594 END();
6595
6596 if (CAN_RUN()) {
6597 RUN();
6598 // clang-format off
6599
6600 uint64_t expected_z0[] =
6601 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6602 {0xe9eaebec000000f0, 0x00f2f30000f60000, 0xf900fbfcfdfeff00};
6603 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6604
6605 uint64_t expected_z1[] =
6606 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6607 {0x000000002a2a2a00, 0x2a00002a2a002a2a, 0x002a00000000002a};
6608 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6609
6610 uint64_t expected_z2[] =
6611 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6612 {0xe9eaebecd6d6d6f0, 0xd6f2f3d6d6f6d6d6, 0xf9d6fbfcfdfeffd6};
6613 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6614
6615 uint64_t expected_z3[] =
6616 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6617 {0x00000000ffffff00, 0xff0000ffff00ffff, 0x00ff0000000000ff};
6618 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6619
6620 uint64_t expected_z4[] =
6621 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6622 {0xe9eaebec007feff0, 0xf1f2007ff5f6007f, 0x007ffbfcfdfe007f};
6623 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6624
6625 uint64_t expected_z5[] =
6626 // pg: 0 0 1 1 0 1
6627 {0x0000000000000000, 0xffffff80ffffff80, 0x00000000ffffff80};
6628 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6629
6630 uint64_t expected_z6[] =
6631 // pg: 0 1 1
6632 {0xe9eaebecedeeeff0, 0xffffffffffffffff, 0xffffffffffffffff};
6633 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6634
6635 uint64_t expected_z7[] =
6636 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6637 {0xe9eaebeccfc0eff0, 0xf1f2cfc0f5f6cfc0, 0xcfc0fbfcfdfecfc0};
6638 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6639
6640 uint64_t expected_z8[] =
6641 // pg: 0 0 1 1 0 1
6642 {0x0000000000000000, 0x4000000040000000, 0x0000000040000000};
6643 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
6644
6645 uint64_t expected_z9[] =
6646 // pg: 0 1 1
6647 {0xe9eaebecedeeeff0, 0xc010000000000000, 0xc010000000000000};
6648 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
6649
6650 uint64_t expected_z10[] =
6651 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6652 {0xe9eaebec00ffeff0, 0xf1f200fff5f600ff, 0x00fffbfcfdfe00ff};
6653 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
6654
6655 uint64_t expected_z11[] =
6656 // pg: 0 1 1
6657 {0x0000000000000000, 0x0123456789abcdef, 0x0123456789abcdef};
6658 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
6659
6660 // clang-format on
6661 }
6662}
6663
6664TEST_SVE(sve_fcpy_imm) {
6665 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6666 START();
6667
6668 // For simplicity, we re-use the same pg for various lane sizes.
6669 // For D lanes: 0, 1, 1
6670 // For S lanes: 0, 1, 1, 0, 1
6671 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6672 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6673
6674 PRegister pg = p7;
6675 Initialise(&masm, pg.VnB(), pg_in);
6676
6677 // These are (mostly) merging operations, so we have to initialise the result
6678 // registers for each operation.
6679 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6680 __ Index(ZRegister(i, kBRegSize), 0, -1);
6681 }
6682
6683 // Encodable floating-point forms (FCPY_z_p_i)
6684 __ Fcpy(z1.VnH(), pg.Merging(), Float16(1.0));
6685 __ Fcpy(z2.VnH(), pg.Merging(), -2.0f);
6686 __ Fcpy(z3.VnH(), pg.Merging(), 3.0);
6687 __ Fcpy(z4.VnS(), pg.Merging(), Float16(-4.0));
6688 __ Fcpy(z5.VnS(), pg.Merging(), 5.0f);
6689 __ Fcpy(z6.VnS(), pg.Merging(), 6.0);
6690 __ Fcpy(z7.VnD(), pg.Merging(), Float16(7.0));
6691 __ Fcpy(z8.VnD(), pg.Merging(), 8.0f);
6692 __ Fcpy(z9.VnD(), pg.Merging(), -9.0);
6693
6694 // Unencodable immediates.
6695 __ Fcpy(z10.VnS(), pg.Merging(), 0.0);
6696 __ Fcpy(z11.VnH(), pg.Merging(), Float16(42.0));
6697 __ Fcpy(z12.VnD(), pg.Merging(), RawbitsToDouble(0x7ff0000012340000)); // NaN
6698 __ Fcpy(z13.VnH(), pg.Merging(), kFP64NegativeInfinity);
6699
6700 END();
6701
6702 if (CAN_RUN()) {
6703 RUN();
6704 // clang-format off
6705
6706 // 1.0 as FP16: 0x3c00
6707 uint64_t expected_z1[] =
6708 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6709 {0xe9eaebec3c00eff0, 0xf1f23c00f5f63c00, 0x3c00fbfcfdfe3c00};
6710 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6711
6712 // -2.0 as FP16: 0xc000
6713 uint64_t expected_z2[] =
6714 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6715 {0xe9eaebecc000eff0, 0xf1f2c000f5f6c000, 0xc000fbfcfdfec000};
6716 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6717
6718 // 3.0 as FP16: 0x4200
6719 uint64_t expected_z3[] =
6720 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6721 {0xe9eaebec4200eff0, 0xf1f24200f5f64200, 0x4200fbfcfdfe4200};
6722 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6723
6724 // -4.0 as FP32: 0xc0800000
6725 uint64_t expected_z4[] =
6726 // pg: 0 0 1 1 0 1
6727 {0xe9eaebecedeeeff0, 0xc0800000c0800000, 0xf9fafbfcc0800000};
6728 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6729
6730 // 5.0 as FP32: 0x40a00000
6731 uint64_t expected_z5[] =
6732 // pg: 0 0 1 1 0 1
6733 {0xe9eaebecedeeeff0, 0x40a0000040a00000, 0xf9fafbfc40a00000};
6734 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6735
6736 // 6.0 as FP32: 0x40c00000
6737 uint64_t expected_z6[] =
6738 // pg: 0 0 1 1 0 1
6739 {0xe9eaebecedeeeff0, 0x40c0000040c00000, 0xf9fafbfc40c00000};
6740 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6741
6742 // 7.0 as FP64: 0x401c000000000000
6743 uint64_t expected_z7[] =
6744 // pg: 0 1 1
6745 {0xe9eaebecedeeeff0, 0x401c000000000000, 0x401c000000000000};
6746 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6747
6748 // 8.0 as FP64: 0x4020000000000000
6749 uint64_t expected_z8[] =
6750 // pg: 0 1 1
6751 {0xe9eaebecedeeeff0, 0x4020000000000000, 0x4020000000000000};
6752 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
6753
6754 // -9.0 as FP64: 0xc022000000000000
6755 uint64_t expected_z9[] =
6756 // pg: 0 1 1
6757 {0xe9eaebecedeeeff0, 0xc022000000000000, 0xc022000000000000};
6758 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
6759
6760 // 0.0 as FP32: 0x00000000
6761 uint64_t expected_z10[] =
6762 // pg: 0 0 1 1 0 1
6763 {0xe9eaebecedeeeff0, 0x0000000000000000, 0xf9fafbfc00000000};
6764 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
6765
6766 // 42.0 as FP16: 0x5140
6767 uint64_t expected_z11[] =
6768 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6769 {0xe9eaebec5140eff0, 0xf1f25140f5f65140, 0x5140fbfcfdfe5140};
6770 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
6771
6772 // Signalling NaN (with payload): 0x7ff0000012340000
6773 uint64_t expected_z12[] =
6774 // pg: 0 1 1
6775 {0xe9eaebecedeeeff0, 0x7ff0000012340000, 0x7ff0000012340000};
6776 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
6777
6778 // -infinity as FP16: 0xfc00
6779 uint64_t expected_z13[] =
6780 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6781 {0xe9eaebecfc00eff0, 0xf1f2fc00f5f6fc00, 0xfc00fbfcfdfefc00};
6782 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
6783
6784 // clang-format on
6785 }
6786}
6787
TatWai Chong4f28df72019-08-14 17:50:30 -07006788TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
6789 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6790 START();
6791
6792 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
6793
6794 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
6795
6796 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
6797
6798 int index_s[] = {1, 3, 2, 31, -1};
6799
6800 int index_d[] = {31, 1};
6801
6802 // Initialize the register with a value that doesn't existed in the table.
6803 __ Dup(z9.VnB(), 0x1f);
6804 InsrHelper(&masm, z9.VnD(), table_inputs);
6805
6806 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
6807 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
6808 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
6809 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
6810
6811 InsrHelper(&masm, ind_b, index_b);
6812 InsrHelper(&masm, ind_h, index_h);
6813 InsrHelper(&masm, ind_s, index_s);
6814 InsrHelper(&masm, ind_d, index_d);
6815
6816 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
6817
6818 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
6819
6820 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
6821
6822 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
6823
6824 END();
6825
6826 if (CAN_RUN()) {
6827 RUN();
6828
6829 // clang-format off
6830 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
6831 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
6832
6833 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
6834 0x5544, 0x7766, 0xddcc, 0x9988};
6835
6836 unsigned z28_expected[] =
6837 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
6838
6839 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
6840 // clang-format on
6841
6842 unsigned vl = config->sve_vl_in_bits();
6843 for (size_t i = 0; i < ArrayLength(index_b); i++) {
6844 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
6845 if (!core.HasSVELane(z26.VnB(), lane)) break;
6846 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
6847 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
6848 }
6849
6850 for (size_t i = 0; i < ArrayLength(index_h); i++) {
6851 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
6852 if (!core.HasSVELane(z27.VnH(), lane)) break;
6853 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
6854 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
6855 }
6856
6857 for (size_t i = 0; i < ArrayLength(index_s); i++) {
6858 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
6859 if (!core.HasSVELane(z28.VnS(), lane)) break;
6860 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
6861 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
6862 }
6863
6864 for (size_t i = 0; i < ArrayLength(index_d); i++) {
6865 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
6866 if (!core.HasSVELane(z29.VnD(), lane)) break;
6867 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
6868 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
6869 }
6870 }
6871}
6872
Jacob Bramley199339d2019-08-05 18:49:13 +01006873TEST_SVE(ldr_str_z_bi) {
6874 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6875 START();
6876
6877 int vl = config->sve_vl_in_bytes();
6878
6879 // The immediate can address [-256, 255] times the VL, so allocate enough
6880 // space to exceed that in both directions.
6881 int data_size = vl * 1024;
6882
6883 uint8_t* data = new uint8_t[data_size];
6884 memset(data, 0, data_size);
6885
6886 // Set the base half-way through the buffer so we can use negative indices.
6887 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6888
6889 __ Index(z1.VnB(), 1, 3);
6890 __ Index(z2.VnB(), 2, 5);
6891 __ Index(z3.VnB(), 3, 7);
6892 __ Index(z4.VnB(), 4, 11);
6893 __ Index(z5.VnB(), 5, 13);
6894 __ Index(z6.VnB(), 6, 2);
6895 __ Index(z7.VnB(), 7, 3);
6896 __ Index(z8.VnB(), 8, 5);
6897 __ Index(z9.VnB(), 9, 7);
6898
6899 // Encodable cases.
6900 __ Str(z1, SVEMemOperand(x0));
6901 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
6902 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
6903 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
6904 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
6905
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006906 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01006907 __ Str(z6, SVEMemOperand(x0, 6 * vl));
6908 __ Str(z7, SVEMemOperand(x0, -7 * vl));
6909 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
6910 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
6911
6912 // Corresponding loads.
6913 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
6914 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
6915 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
6916 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
6917 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
6918
6919 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
6920 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
6921 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
6922 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
6923
6924 END();
6925
6926 if (CAN_RUN()) {
6927 RUN();
6928
6929 uint8_t* expected = new uint8_t[data_size];
6930 memset(expected, 0, data_size);
6931 uint8_t* middle = &expected[data_size / 2];
6932
6933 for (int i = 0; i < vl; i++) {
6934 middle[i] = (1 + (3 * i)) & 0xff; // z1
6935 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
6936 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
6937 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
6938 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
6939 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
6940 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
6941 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
6942 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
6943 }
6944
Jacob Bramley33c99f92019-10-08 15:24:12 +01006945 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01006946
6947 ASSERT_EQUAL_SVE(z1, z11);
6948 ASSERT_EQUAL_SVE(z2, z12);
6949 ASSERT_EQUAL_SVE(z3, z13);
6950 ASSERT_EQUAL_SVE(z4, z14);
6951 ASSERT_EQUAL_SVE(z5, z15);
6952 ASSERT_EQUAL_SVE(z6, z16);
6953 ASSERT_EQUAL_SVE(z7, z17);
6954 ASSERT_EQUAL_SVE(z8, z18);
6955 ASSERT_EQUAL_SVE(z9, z19);
6956
6957 delete[] expected;
6958 }
6959 delete[] data;
6960}
6961
6962TEST_SVE(ldr_str_p_bi) {
6963 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6964 START();
6965
6966 int vl = config->sve_vl_in_bytes();
6967 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
6968 int pl = vl / kZRegBitsPerPRegBit;
6969
6970 // The immediate can address [-256, 255] times the PL, so allocate enough
6971 // space to exceed that in both directions.
6972 int data_size = pl * 1024;
6973
6974 uint8_t* data = new uint8_t[data_size];
6975 memset(data, 0, data_size);
6976
6977 // Set the base half-way through the buffer so we can use negative indices.
6978 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6979
6980 uint64_t pattern[4] = {0x1010101011101111,
6981 0x0010111011000101,
6982 0x1001101110010110,
6983 0x1010110101100011};
6984 for (int i = 8; i <= 15; i++) {
6985 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
6986 Initialise(&masm,
6987 PRegister(i),
6988 pattern[3] * i,
6989 pattern[2] * i,
6990 pattern[1] * i,
6991 pattern[0] * i);
6992 }
6993
6994 // Encodable cases.
6995 __ Str(p8, SVEMemOperand(x0));
6996 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
6997 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
6998 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
6999
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007000 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01007001 __ Str(p12, SVEMemOperand(x0, 6 * pl));
7002 __ Str(p13, SVEMemOperand(x0, -7 * pl));
7003 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
7004 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
7005
7006 // Corresponding loads.
7007 __ Ldr(p0, SVEMemOperand(x0));
7008 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
7009 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
7010 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
7011
7012 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
7013 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
7014 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
7015 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
7016
7017 END();
7018
7019 if (CAN_RUN()) {
7020 RUN();
7021
7022 uint8_t* expected = new uint8_t[data_size];
7023 memset(expected, 0, data_size);
7024 uint8_t* middle = &expected[data_size / 2];
7025
7026 for (int i = 0; i < pl; i++) {
7027 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
7028 size_t index = i / sizeof(pattern[0]);
7029 VIXL_ASSERT(index < ArrayLength(pattern));
7030 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
7031 // Each byte of `pattern` can be multiplied by 15 without carry.
7032 VIXL_ASSERT((byte * 15) <= 0xff);
7033
7034 middle[i] = byte * 8; // p8
7035 middle[(2 * pl) + i] = byte * 9; // p9
7036 middle[(-3 * pl) + i] = byte * 10; // p10
7037 middle[(255 * pl) + i] = byte * 11; // p11
7038 middle[(6 * pl) + i] = byte * 12; // p12
7039 middle[(-7 * pl) + i] = byte * 13; // p13
7040 middle[(314 * pl) + i] = byte * 14; // p14
7041 middle[(-314 * pl) + i] = byte * 15; // p15
7042 }
7043
Jacob Bramley33c99f92019-10-08 15:24:12 +01007044 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01007045
7046 ASSERT_EQUAL_SVE(p0, p8);
7047 ASSERT_EQUAL_SVE(p1, p9);
7048 ASSERT_EQUAL_SVE(p2, p10);
7049 ASSERT_EQUAL_SVE(p3, p11);
7050 ASSERT_EQUAL_SVE(p4, p12);
7051 ASSERT_EQUAL_SVE(p5, p13);
7052 ASSERT_EQUAL_SVE(p6, p14);
7053 ASSERT_EQUAL_SVE(p7, p15);
7054
7055 delete[] expected;
7056 }
7057 delete[] data;
7058}
7059
Jacob Bramleye668b202019-08-14 17:57:34 +01007060template <typename T>
7061static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
7062 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
7063}
7064
7065TEST_SVE(sve_ld1_st1_contiguous) {
7066 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7067 START();
7068
7069 int vl = config->sve_vl_in_bytes();
7070
7071 // The immediate can address [-8, 7] times the VL, so allocate enough space to
7072 // exceed that in both directions.
7073 int data_size = vl * 128;
7074
7075 uint8_t* data = new uint8_t[data_size];
7076 memset(data, 0, data_size);
7077
Martyn Capewell452ad8b2020-03-19 15:49:57 +00007078 // Set the base half-way through the buffer so we can use negative indices.
Jacob Bramleye668b202019-08-14 17:57:34 +01007079 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7080
Jacob Bramleye668b202019-08-14 17:57:34 +01007081 // Encodable scalar-plus-immediate cases.
7082 __ Index(z1.VnB(), 1, -3);
7083 __ Ptrue(p1.VnB());
7084 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
7085
7086 __ Index(z2.VnH(), -2, 5);
7087 __ Ptrue(p2.VnH(), SVE_MUL3);
7088 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
7089
7090 __ Index(z3.VnS(), 3, -7);
7091 __ Ptrue(p3.VnS(), SVE_POW2);
7092 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
7093
7094 // Encodable scalar-plus-scalar cases.
7095 __ Index(z4.VnD(), -4, 11);
7096 __ Ptrue(p4.VnD(), SVE_VL3);
7097 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
7098 __ Mov(x2, 17);
7099 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
7100
7101 __ Index(z5.VnD(), 6, -2);
7102 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01007103 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
7104 __ Mov(x4, 6);
7105 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01007106
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007107 // Unencodable cases fall back on `CalculateSVEAddress`.
Jacob Bramleye668b202019-08-14 17:57:34 +01007108 __ Index(z6.VnS(), -7, 3);
7109 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
7110 // predicate bits when handling larger lanes.
7111 __ Ptrue(p6.VnB(), SVE_ALL);
7112 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
7113
TatWai Chong6205eb42019-09-24 10:07:20 +01007114 __ Index(z7.VnD(), 32, -11);
7115 __ Ptrue(p7.VnD(), SVE_MUL4);
7116 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01007117
TatWai Chong6205eb42019-09-24 10:07:20 +01007118 // Corresponding loads.
7119 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
7120 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
7121 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
7122 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
7123 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
7124 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
7125
7126 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
7127 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
7128 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
7129 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
7130
7131 // We can test ld1 by comparing the value loaded with the value stored. In
7132 // most cases, there are two complications:
7133 // - Loads have zeroing predication, so we have to clear the inactive
7134 // elements on our reference.
7135 // - We have to replicate any sign- or zero-extension.
7136
7137 // Ld1b(z8.VnB(), ...)
7138 __ Dup(z18.VnB(), 0);
7139 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
7140
7141 // Ld1b(z9.VnH(), ...)
7142 __ Dup(z19.VnH(), 0);
7143 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
7144
7145 // Ld1h(z10.VnS(), ...)
7146 __ Dup(z20.VnS(), 0);
7147 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
7148
7149 // Ld1b(z11.VnD(), ...)
7150 __ Dup(z21.VnD(), 0);
7151 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
7152
7153 // Ld1d(z12.VnD(), ...)
7154 __ Dup(z22.VnD(), 0);
7155 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
7156
7157 // Ld1w(z13.VnS(), ...)
7158 __ Dup(z23.VnS(), 0);
7159 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
7160
7161 // Ld1sb(z14.VnH(), ...)
7162 __ Dup(z24.VnH(), 0);
7163 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
7164
7165 // Ld1sh(z15.VnS(), ...)
7166 __ Dup(z25.VnS(), 0);
7167 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
7168
7169 // Ld1sb(z16.VnD(), ...)
7170 __ Dup(z26.VnD(), 0);
7171 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
7172
7173 // Ld1sw(z17.VnD(), ...)
7174 __ Dup(z27.VnD(), 0);
7175 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01007176
7177 END();
7178
7179 if (CAN_RUN()) {
7180 RUN();
7181
7182 uint8_t* expected = new uint8_t[data_size];
7183 memset(expected, 0, data_size);
7184 uint8_t* middle = &expected[data_size / 2];
7185
7186 int vl_b = vl / kBRegSizeInBytes;
7187 int vl_h = vl / kHRegSizeInBytes;
7188 int vl_s = vl / kSRegSizeInBytes;
7189 int vl_d = vl / kDRegSizeInBytes;
7190
7191 // Encodable cases.
7192
7193 // st1b { z1.b }, SVE_ALL
7194 for (int i = 0; i < vl_b; i++) {
7195 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
7196 }
7197
7198 // st1b { z2.h }, SVE_MUL3
7199 int vl_h_mul3 = vl_h - (vl_h % 3);
7200 for (int i = 0; i < vl_h_mul3; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007201 int64_t offset = 7 * static_cast<int>(vl / (kHRegSize / kBRegSize));
7202 MemoryWrite(middle, offset, i, static_cast<uint8_t>(-2 + (5 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01007203 }
7204
7205 // st1h { z3.s }, SVE_POW2
7206 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7207 for (int i = 0; i < vl_s_pow2; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007208 int64_t offset = -8 * static_cast<int>(vl / (kSRegSize / kHRegSize));
7209 MemoryWrite(middle, offset, i, static_cast<uint16_t>(3 - (7 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01007210 }
7211
7212 // st1b { z4.d }, SVE_VL3
7213 if (vl_d >= 3) {
7214 for (int i = 0; i < 3; i++) {
7215 MemoryWrite(middle,
7216 (8 * vl) + 17,
7217 i,
7218 static_cast<uint8_t>(-4 + (11 * i)));
7219 }
7220 }
7221
7222 // st1d { z5.d }, SVE_VL16
7223 if (vl_d >= 16) {
7224 for (int i = 0; i < 16; i++) {
7225 MemoryWrite(middle,
7226 (10 * vl) + (6 * kDRegSizeInBytes),
7227 i,
7228 static_cast<uint64_t>(6 - (2 * i)));
7229 }
7230 }
7231
7232 // Unencodable cases.
7233
7234 // st1w { z6.s }, SVE_ALL
7235 for (int i = 0; i < vl_s; i++) {
7236 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
7237 }
7238
TatWai Chong6205eb42019-09-24 10:07:20 +01007239 // st1w { z7.d }, SVE_MUL4
7240 int vl_d_mul4 = vl_d - (vl_d % 4);
7241 for (int i = 0; i < vl_d_mul4; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007242 int64_t offset = 22 * static_cast<int>(vl / (kDRegSize / kWRegSize));
7243 MemoryWrite(middle, offset, i, static_cast<uint32_t>(32 + (-11 * i)));
TatWai Chong6205eb42019-09-24 10:07:20 +01007244 }
7245
Jacob Bramley33c99f92019-10-08 15:24:12 +01007246 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramleye668b202019-08-14 17:57:34 +01007247
TatWai Chong6205eb42019-09-24 10:07:20 +01007248 // Check that we loaded back the expected values.
7249
7250 ASSERT_EQUAL_SVE(z18, z8);
7251 ASSERT_EQUAL_SVE(z19, z9);
7252 ASSERT_EQUAL_SVE(z20, z10);
7253 ASSERT_EQUAL_SVE(z21, z11);
7254 ASSERT_EQUAL_SVE(z22, z12);
7255 ASSERT_EQUAL_SVE(z23, z13);
7256 ASSERT_EQUAL_SVE(z24, z14);
7257 ASSERT_EQUAL_SVE(z25, z15);
7258 ASSERT_EQUAL_SVE(z26, z16);
7259 ASSERT_EQUAL_SVE(z27, z17);
7260
Jacob Bramleye668b202019-08-14 17:57:34 +01007261 delete[] expected;
7262 }
7263 delete[] data;
7264}
7265
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007266TEST_SVE(sve_ld2_st2_scalar_plus_imm) {
7267 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7268 START();
7269
7270 int vl = config->sve_vl_in_bytes();
7271
7272 // The immediate can address [-16, 14] times the VL, so allocate enough space
7273 // to exceed that in both directions.
7274 int data_size = vl * 128;
7275
7276 uint8_t* data = new uint8_t[data_size];
7277 memset(data, 0, data_size);
7278
7279 // Set the base half-way through the buffer so we can use negative indeces.
7280 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7281
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007282 __ Index(z14.VnB(), 1, -3);
7283 __ Index(z15.VnB(), 2, -3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007284 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007285 __ St2b(z14.VnB(), z15.VnB(), p0, SVEMemOperand(x0));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007286
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007287 __ Index(z16.VnH(), -2, 5);
7288 __ Index(z17.VnH(), -3, 5);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007289 __ Ptrue(p1.VnH(), SVE_MUL3);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007290 __ St2h(z16.VnH(), z17.VnH(), p1, SVEMemOperand(x0, 8, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007291
7292 // Wrap around from z31 to z0.
7293 __ Index(z31.VnS(), 3, -7);
7294 __ Index(z0.VnS(), 4, -7);
7295 __ Ptrue(p2.VnS(), SVE_POW2);
7296 __ St2w(z31.VnS(), z0.VnS(), p2, SVEMemOperand(x0, -12, SVE_MUL_VL));
7297
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007298 __ Index(z18.VnD(), -7, 3);
7299 __ Index(z19.VnD(), -8, 3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007300 // Sparse predication, including some irrelevant bits (0xe). To make the
7301 // results easy to check, activate each lane <n> where n is a multiple of 5.
7302 Initialise(&masm,
7303 p3,
7304 0xeee10000000001ee,
7305 0xeeeeeee100000000,
7306 0x01eeeeeeeee10000,
7307 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007308 __ St2d(z18.VnD(), z19.VnD(), p3, SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007309
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007310 // We can test ld2 by comparing the values loaded with the values stored.
7311 // There are two complications:
7312 // - Loads have zeroing predication, so we have to clear the inactive
7313 // elements on our reference.
7314 // - We want to test both loads and stores that span { z31, z0 }, so we have
7315 // to move some values around.
7316 //
7317 // Registers z4-z11 will hold as-stored values (with inactive elements
7318 // cleared). Registers z20-z27 will hold the values that were loaded.
7319
7320 // Ld2b(z14.VnB(), z15.VnB(), ...)
7321 __ Dup(z4.VnB(), 0);
7322 __ Dup(z5.VnB(), 0);
7323 __ Mov(z4.VnB(), p0.Merging(), z14.VnB());
7324 __ Mov(z5.VnB(), p0.Merging(), z15.VnB());
7325
7326 // Ld2h(z16.VnH(), z17.VnH(), ...)
7327 __ Dup(z6.VnH(), 0);
7328 __ Dup(z7.VnH(), 0);
7329 __ Mov(z6.VnH(), p1.Merging(), z16.VnH());
7330 __ Mov(z7.VnH(), p1.Merging(), z17.VnH());
7331
7332 // Ld2w(z31.VnS(), z0.VnS(), ...)
7333 __ Dup(z8.VnS(), 0);
7334 __ Dup(z9.VnS(), 0);
7335 __ Mov(z8.VnS(), p2.Merging(), z31.VnS());
7336 __ Mov(z9.VnS(), p2.Merging(), z0.VnS());
7337
7338 // Ld2d(z18.VnD(), z19.VnD(), ...)
7339 __ Dup(z10.VnD(), 0);
7340 __ Dup(z11.VnD(), 0);
7341 __ Mov(z10.VnD(), p3.Merging(), z18.VnD());
7342 __ Mov(z11.VnD(), p3.Merging(), z19.VnD());
7343
7344 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7345 __ Ld2b(z31.VnB(), z0.VnB(), p0.Zeroing(), SVEMemOperand(x0));
7346 __ Mov(z20, z31);
7347 __ Mov(z21, z0);
7348
7349 __ Ld2h(z22.VnH(), z23.VnH(), p1.Zeroing(), SVEMemOperand(x0, 8, SVE_MUL_VL));
7350 __ Ld2w(z24.VnS(),
7351 z25.VnS(),
7352 p2.Zeroing(),
7353 SVEMemOperand(x0, -12, SVE_MUL_VL));
7354 __ Ld2d(z26.VnD(),
7355 z27.VnD(),
7356 p3.Zeroing(),
7357 SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007358
7359 END();
7360
7361 if (CAN_RUN()) {
7362 RUN();
7363
7364 uint8_t* expected = new uint8_t[data_size];
7365 memset(expected, 0, data_size);
7366 uint8_t* middle = &expected[data_size / 2];
7367
7368 int vl_b = vl / kBRegSizeInBytes;
7369 int vl_h = vl / kHRegSizeInBytes;
7370 int vl_s = vl / kSRegSizeInBytes;
7371 int vl_d = vl / kDRegSizeInBytes;
7372
7373 int reg_count = 2;
7374
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007375 // st2b { z14.b, z15.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007376 for (int i = 0; i < vl_b; i++) {
7377 uint8_t lane0 = 1 - (3 * i);
7378 uint8_t lane1 = 2 - (3 * i);
7379 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7380 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7381 }
7382
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007383 // st2h { z16.h, z17.h }, SVE_MUL3
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007384 int vl_h_mul3 = vl_h - (vl_h % 3);
7385 for (int i = 0; i < vl_h_mul3; i++) {
7386 int64_t offset = 8 * vl;
7387 uint16_t lane0 = -2 + (5 * i);
7388 uint16_t lane1 = -3 + (5 * i);
7389 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7390 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7391 }
7392
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007393 // st2w { z31.s, z0.s }, SVE_POW2
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007394 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7395 for (int i = 0; i < vl_s_pow2; i++) {
7396 int64_t offset = -12 * vl;
7397 uint32_t lane0 = 3 - (7 * i);
7398 uint32_t lane1 = 4 - (7 * i);
7399 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7400 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7401 }
7402
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007403 // st2d { z18.d, z19.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007404 for (int i = 0; i < vl_d; i++) {
7405 if ((i % 5) == 0) {
7406 int64_t offset = 14 * vl;
7407 uint64_t lane0 = -7 + (3 * i);
7408 uint64_t lane1 = -8 + (3 * i);
7409 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7410 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7411 }
7412 }
7413
7414 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7415
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007416 // Check that we loaded back the expected values.
7417
7418 // st2b/ld2b
7419 ASSERT_EQUAL_SVE(z4, z20);
7420 ASSERT_EQUAL_SVE(z5, z21);
7421
7422 // st2h/ld2h
7423 ASSERT_EQUAL_SVE(z6, z22);
7424 ASSERT_EQUAL_SVE(z7, z23);
7425
7426 // st2w/ld2w
7427 ASSERT_EQUAL_SVE(z8, z24);
7428 ASSERT_EQUAL_SVE(z9, z25);
7429
7430 // st2d/ld2d
7431 ASSERT_EQUAL_SVE(z10, z26);
7432 ASSERT_EQUAL_SVE(z11, z27);
7433
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007434 delete[] expected;
7435 }
7436 delete[] data;
7437}
7438
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007439TEST_SVE(sve_ld2_st2_scalar_plus_scalar) {
7440 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7441 START();
7442
7443 int vl = config->sve_vl_in_bytes();
7444
7445 // Allocate plenty of space to enable indexing in both directions.
7446 int data_size = vl * 128;
7447
7448 uint8_t* data = new uint8_t[data_size];
7449 memset(data, 0, data_size);
7450
7451 // Set the base half-way through the buffer so we can use negative indeces.
7452 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7453
Jacob Bramleye483ce52019-11-05 16:52:29 +00007454 __ Index(z10.VnB(), -4, 11);
7455 __ Index(z11.VnB(), -5, 11);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007456 __ Ptrue(p7.VnB(), SVE_MUL4);
7457 __ Mov(x1, 0);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007458 __ St2b(z10.VnB(), z11.VnB(), p7, SVEMemOperand(x0, x1));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007459
Jacob Bramleye483ce52019-11-05 16:52:29 +00007460 __ Index(z12.VnH(), 6, -2);
7461 __ Index(z13.VnH(), 7, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007462 __ Ptrue(p6.VnH(), SVE_VL16);
7463 __ Rdvl(x2, 3); // Make offsets VL-dependent so we can avoid overlap.
Jacob Bramleye483ce52019-11-05 16:52:29 +00007464 __ St2h(z12.VnH(), z13.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007465
Jacob Bramleye483ce52019-11-05 16:52:29 +00007466 __ Index(z14.VnS(), -7, 3);
7467 __ Index(z15.VnS(), -8, 3);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007468 // Sparse predication, including some irrelevant bits (0xe). To make the
7469 // results easy to check, activate each lane <n> where n is a multiple of 5.
7470 Initialise(&masm,
7471 p5,
7472 0xeee1000010000100,
7473 0x001eeee100001000,
7474 0x0100001eeee10000,
7475 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007476 __ Rdvl(x3, -3);
7477 __ St2w(z14.VnS(), z15.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007478
7479 // Wrap around from z31 to z0.
7480 __ Index(z31.VnD(), 32, -11);
7481 __ Index(z0.VnD(), 33, -11);
7482 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007483 __ Rdvl(x4, 1);
7484 __ St2d(z31.VnD(), z0.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007485
Jacob Bramleye483ce52019-11-05 16:52:29 +00007486 // We can test ld2 by comparing the values loaded with the values stored.
7487 // There are two complications:
7488 // - Loads have zeroing predication, so we have to clear the inactive
7489 // elements on our reference.
7490 // - We want to test both loads and stores that span { z31, z0 }, so we have
7491 // to move some values around.
7492 //
7493 // Registers z4-z11 will hold as-stored values (with inactive elements
7494 // cleared). Registers z20-z27 will hold the values that were loaded.
7495
7496 // Ld2b(z20.VnB(), z21.VnB(), ...)
7497 __ Dup(z4.VnB(), 0);
7498 __ Dup(z5.VnB(), 0);
7499 __ Mov(z4.VnB(), p7.Merging(), z10.VnB());
7500 __ Mov(z5.VnB(), p7.Merging(), z11.VnB());
7501
7502 // Ld2h(z22.VnH(), z23.VnH(), ...)
7503 __ Dup(z6.VnH(), 0);
7504 __ Dup(z7.VnH(), 0);
7505 __ Mov(z6.VnH(), p6.Merging(), z12.VnH());
7506 __ Mov(z7.VnH(), p6.Merging(), z13.VnH());
7507
7508 // Ld2w(z24.VnS(), z25.VnS(), ...)
7509 __ Dup(z8.VnS(), 0);
7510 __ Dup(z9.VnS(), 0);
7511 __ Mov(z8.VnS(), p5.Merging(), z14.VnS());
7512 __ Mov(z9.VnS(), p5.Merging(), z15.VnS());
7513
7514 // Ld2d(z31.VnD(), z0.VnD(), ...)
7515 __ Dup(z10.VnD(), 0);
7516 __ Dup(z11.VnD(), 0);
7517 __ Mov(z10.VnD(), p4.Merging(), z31.VnD());
7518 __ Mov(z11.VnD(), p4.Merging(), z0.VnD());
7519
7520 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7521 __ Ld2b(z31.VnB(), z0.VnB(), p7.Zeroing(), SVEMemOperand(x0, x1));
7522 __ Mov(z20, z31);
7523 __ Mov(z21, z0);
7524
7525 __ Ld2h(z22.VnH(), z23.VnH(), p6.Zeroing(), SVEMemOperand(x0, x2, LSL, 1));
7526 __ Ld2w(z24.VnS(), z25.VnS(), p5.Zeroing(), SVEMemOperand(x0, x3, LSL, 2));
7527 __ Ld2d(z26.VnD(), z27.VnD(), p4.Zeroing(), SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007528
7529 END();
7530
7531 if (CAN_RUN()) {
7532 RUN();
7533
7534 uint8_t* expected = new uint8_t[data_size];
7535 memset(expected, 0, data_size);
7536 uint8_t* middle = &expected[data_size / 2];
7537
7538 int vl_b = vl / kBRegSizeInBytes;
7539 int vl_h = vl / kHRegSizeInBytes;
7540 int vl_s = vl / kSRegSizeInBytes;
7541 int vl_d = vl / kDRegSizeInBytes;
7542
7543 int reg_count = 2;
7544
Jacob Bramleye483ce52019-11-05 16:52:29 +00007545 // st2b { z10.b, z11.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007546 int vl_b_mul4 = vl_b - (vl_b % 4);
7547 for (int i = 0; i < vl_b_mul4; i++) {
7548 uint8_t lane0 = -4 + (11 * i);
7549 uint8_t lane1 = -5 + (11 * i);
7550 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7551 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7552 }
7553
Jacob Bramleye483ce52019-11-05 16:52:29 +00007554 // st2h { z12.h, z13.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007555 if (vl_h >= 16) {
7556 for (int i = 0; i < 16; i++) {
7557 int64_t offset = (3 << kHRegSizeInBytesLog2) * vl;
7558 uint16_t lane0 = 6 - (2 * i);
7559 uint16_t lane1 = 7 - (2 * i);
7560 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7561 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7562 }
7563 }
7564
Jacob Bramleye483ce52019-11-05 16:52:29 +00007565 // st2w { z14.s, z15.s }, ((i % 5) == 0)
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007566 for (int i = 0; i < vl_s; i++) {
7567 if ((i % 5) == 0) {
7568 int64_t offset = -(3 << kSRegSizeInBytesLog2) * vl;
7569 uint32_t lane0 = -7 + (3 * i);
7570 uint32_t lane1 = -8 + (3 * i);
7571 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7572 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7573 }
7574 }
7575
7576 // st2d { z31.b, z0.b }, SVE_MUL3
7577 int vl_d_mul3 = vl_d - (vl_d % 3);
7578 for (int i = 0; i < vl_d_mul3; i++) {
7579 int64_t offset = (1 << kDRegSizeInBytesLog2) * vl;
7580 uint64_t lane0 = 32 - (11 * i);
7581 uint64_t lane1 = 33 - (11 * i);
7582 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7583 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7584 }
7585
7586 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7587
Jacob Bramleye483ce52019-11-05 16:52:29 +00007588 // Check that we loaded back the expected values.
7589
7590 // st2b/ld2b
7591 ASSERT_EQUAL_SVE(z4, z20);
7592 ASSERT_EQUAL_SVE(z5, z21);
7593
7594 // st2h/ld2h
7595 ASSERT_EQUAL_SVE(z6, z22);
7596 ASSERT_EQUAL_SVE(z7, z23);
7597
7598 // st2w/ld2w
7599 ASSERT_EQUAL_SVE(z8, z24);
7600 ASSERT_EQUAL_SVE(z9, z25);
7601
7602 // st2d/ld2d
7603 ASSERT_EQUAL_SVE(z10, z26);
7604 ASSERT_EQUAL_SVE(z11, z27);
7605
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007606 delete[] expected;
7607 }
7608 delete[] data;
7609}
7610
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007611TEST_SVE(sve_ld3_st3_scalar_plus_imm) {
7612 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7613 START();
7614
7615 int vl = config->sve_vl_in_bytes();
7616
7617 // The immediate can address [-24, 21] times the VL, so allocate enough space
7618 // to exceed that in both directions.
7619 int data_size = vl * 128;
7620
7621 uint8_t* data = new uint8_t[data_size];
7622 memset(data, 0, data_size);
7623
7624 // Set the base half-way through the buffer so we can use negative indeces.
7625 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7626
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007627 // We can test ld3 by comparing the values loaded with the values stored.
7628 // There are two complications:
7629 // - Loads have zeroing predication, so we have to clear the inactive
7630 // elements on our reference.
7631 // - We want to test both loads and stores that span { z31, z0 }, so we have
7632 // to move some values around.
7633 //
7634 // Registers z4-z15 will hold as-stored values (with inactive elements
7635 // cleared). Registers z16-z27 will hold the values that were loaded.
7636
7637 __ Index(z10.VnB(), 1, -3);
7638 __ Index(z11.VnB(), 2, -3);
7639 __ Index(z12.VnB(), 3, -3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007640 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007641 __ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p0, SVEMemOperand(x0));
7642 // Save the stored values for ld3 tests.
7643 __ Dup(z4.VnB(), 0);
7644 __ Dup(z5.VnB(), 0);
7645 __ Dup(z6.VnB(), 0);
7646 __ Mov(z4.VnB(), p0.Merging(), z10.VnB());
7647 __ Mov(z5.VnB(), p0.Merging(), z11.VnB());
7648 __ Mov(z6.VnB(), p0.Merging(), z12.VnB());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007649
7650 // Wrap around from z31 to z0.
7651 __ Index(z31.VnH(), -2, 5);
7652 __ Index(z0.VnH(), -3, 5);
7653 __ Index(z1.VnH(), -4, 5);
7654 __ Ptrue(p1.VnH(), SVE_MUL3);
7655 __ St3h(z31.VnH(), z0.VnH(), z1.VnH(), p1, SVEMemOperand(x0, 9, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007656 // Save the stored values for ld3 tests.
7657 __ Dup(z7.VnH(), 0);
7658 __ Dup(z8.VnH(), 0);
7659 __ Dup(z9.VnH(), 0);
7660 __ Mov(z7.VnH(), p1.Merging(), z31.VnH());
7661 __ Mov(z8.VnH(), p1.Merging(), z0.VnH());
7662 __ Mov(z9.VnH(), p1.Merging(), z1.VnH());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007663
7664 __ Index(z30.VnS(), 3, -7);
7665 __ Index(z31.VnS(), 4, -7);
7666 __ Index(z0.VnS(), 5, -7);
7667 __ Ptrue(p2.VnS(), SVE_POW2);
7668 __ St3w(z30.VnS(),
7669 z31.VnS(),
7670 z0.VnS(),
7671 p2,
7672 SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007673 // Save the stored values for ld3 tests.
7674 __ Dup(z10.VnS(), 0);
7675 __ Dup(z11.VnS(), 0);
7676 __ Dup(z12.VnS(), 0);
7677 __ Mov(z10.VnS(), p2.Merging(), z30.VnS());
7678 __ Mov(z11.VnS(), p2.Merging(), z31.VnS());
7679 __ Mov(z12.VnS(), p2.Merging(), z0.VnS());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007680
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007681 __ Index(z0.VnD(), -7, 3);
7682 __ Index(z1.VnD(), -8, 3);
7683 __ Index(z2.VnD(), -9, 3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007684 // Sparse predication, including some irrelevant bits (0xee). To make the
7685 // results easy to check, activate each lane <n> where n is a multiple of 5.
7686 Initialise(&masm,
7687 p3,
7688 0xeee10000000001ee,
7689 0xeeeeeee100000000,
7690 0x01eeeeeeeee10000,
7691 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007692 __ St3d(z0.VnD(), z1.VnD(), z2.VnD(), p3, SVEMemOperand(x0, 15, SVE_MUL_VL));
7693 // Save the stored values for ld3 tests.
7694 __ Dup(z13.VnD(), 0);
7695 __ Dup(z14.VnD(), 0);
7696 __ Dup(z15.VnD(), 0);
7697 __ Mov(z13.VnD(), p3.Merging(), z0.VnD());
7698 __ Mov(z14.VnD(), p3.Merging(), z1.VnD());
7699 __ Mov(z15.VnD(), p3.Merging(), z2.VnD());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007700
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007701 // Corresponding loads.
7702 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7703 __ Ld3b(z31.VnB(), z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(x0));
7704 __ Mov(z16, z31);
7705 __ Mov(z17, z0);
7706 __ Mov(z18, z1);
7707 __ Ld3h(z30.VnH(),
7708 z31.VnH(),
7709 z0.VnH(),
7710 p1.Zeroing(),
7711 SVEMemOperand(x0, 9, SVE_MUL_VL));
7712 __ Mov(z19, z30);
7713 __ Mov(z20, z31);
7714 __ Mov(z21, z0);
7715 __ Ld3w(z22.VnS(),
7716 z23.VnS(),
7717 z24.VnS(),
7718 p2.Zeroing(),
7719 SVEMemOperand(x0, -12, SVE_MUL_VL));
7720 __ Ld3d(z25.VnD(),
7721 z26.VnD(),
7722 z27.VnD(),
7723 p3.Zeroing(),
7724 SVEMemOperand(x0, 15, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007725
7726 END();
7727
7728 if (CAN_RUN()) {
7729 RUN();
7730
7731 uint8_t* expected = new uint8_t[data_size];
7732 memset(expected, 0, data_size);
7733 uint8_t* middle = &expected[data_size / 2];
7734
7735 int vl_b = vl / kBRegSizeInBytes;
7736 int vl_h = vl / kHRegSizeInBytes;
7737 int vl_s = vl / kSRegSizeInBytes;
7738 int vl_d = vl / kDRegSizeInBytes;
7739
7740 int reg_count = 3;
7741
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007742 // st3b { z10.b, z11.b, z12.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007743 for (int i = 0; i < vl_b; i++) {
7744 uint8_t lane0 = 1 - (3 * i);
7745 uint8_t lane1 = 2 - (3 * i);
7746 uint8_t lane2 = 3 - (3 * i);
7747 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7748 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7749 MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
7750 }
7751
7752 // st3h { z31.h, z0.h, z1.h }, SVE_MUL3
7753 int vl_h_mul3 = vl_h - (vl_h % 3);
7754 for (int i = 0; i < vl_h_mul3; i++) {
7755 int64_t offset = 9 * vl;
7756 uint16_t lane0 = -2 + (5 * i);
7757 uint16_t lane1 = -3 + (5 * i);
7758 uint16_t lane2 = -4 + (5 * i);
7759 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7760 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7761 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7762 }
7763
7764 // st3w { z30.s, z31.s, z0.s }, SVE_POW2
7765 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7766 for (int i = 0; i < vl_s_pow2; i++) {
7767 int64_t offset = -12 * vl;
7768 uint32_t lane0 = 3 - (7 * i);
7769 uint32_t lane1 = 4 - (7 * i);
7770 uint32_t lane2 = 5 - (7 * i);
7771 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7772 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7773 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7774 }
7775
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007776 // st3d { z0.d, z1.d, z2.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007777 for (int i = 0; i < vl_d; i++) {
7778 if ((i % 5) == 0) {
7779 int64_t offset = 15 * vl;
7780 uint64_t lane0 = -7 + (3 * i);
7781 uint64_t lane1 = -8 + (3 * i);
7782 uint64_t lane2 = -9 + (3 * i);
7783 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7784 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7785 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7786 }
7787 }
7788
7789 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7790
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007791 // Check that we loaded back the expected values.
7792
7793 // st3b/ld3b
7794 ASSERT_EQUAL_SVE(z4, z16);
7795 ASSERT_EQUAL_SVE(z5, z17);
7796 ASSERT_EQUAL_SVE(z6, z18);
7797
7798 // st3h/ld3h
7799 ASSERT_EQUAL_SVE(z7, z19);
7800 ASSERT_EQUAL_SVE(z8, z20);
7801 ASSERT_EQUAL_SVE(z9, z21);
7802
7803 // st3w/ld3w
7804 ASSERT_EQUAL_SVE(z10, z22);
7805 ASSERT_EQUAL_SVE(z11, z23);
7806 ASSERT_EQUAL_SVE(z12, z24);
7807
7808 // st3d/ld3d
7809 ASSERT_EQUAL_SVE(z13, z25);
7810 ASSERT_EQUAL_SVE(z14, z26);
7811 ASSERT_EQUAL_SVE(z15, z27);
7812
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007813 delete[] expected;
7814 }
7815 delete[] data;
7816}
7817
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007818TEST_SVE(sve_ld3_st3_scalar_plus_scalar) {
7819 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7820 START();
7821
7822 int vl = config->sve_vl_in_bytes();
7823
7824 // Allocate plenty of space to enable indexing in both directions.
7825 int data_size = vl * 128;
7826
7827 uint8_t* data = new uint8_t[data_size];
7828 memset(data, 0, data_size);
7829
7830 // Set the base half-way through the buffer so we can use negative indeces.
7831 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7832
Jacob Bramleye483ce52019-11-05 16:52:29 +00007833 // We can test ld3 by comparing the values loaded with the values stored.
7834 // There are two complications:
7835 // - Loads have zeroing predication, so we have to clear the inactive
7836 // elements on our reference.
7837 // - We want to test both loads and stores that span { z31, z0 }, so we have
7838 // to move some values around.
7839 //
7840 // Registers z4-z15 will hold as-stored values (with inactive elements
7841 // cleared). Registers z16-z27 will hold the values that were loaded.
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007842
Jacob Bramleye483ce52019-11-05 16:52:29 +00007843 __ Index(z10.VnB(), -4, 11);
7844 __ Index(z11.VnB(), -5, 11);
7845 __ Index(z12.VnB(), -6, 11);
7846 __ Ptrue(p7.VnB(), SVE_MUL4);
7847 __ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
7848 __ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p7, SVEMemOperand(x0, x1, LSL, 0));
7849 // Save the stored values for ld3 tests.
7850 __ Dup(z4.VnB(), 0);
7851 __ Dup(z5.VnB(), 0);
7852 __ Dup(z6.VnB(), 0);
7853 __ Mov(z4.VnB(), p7.Merging(), z10.VnB());
7854 __ Mov(z5.VnB(), p7.Merging(), z11.VnB());
7855 __ Mov(z6.VnB(), p7.Merging(), z12.VnB());
7856
7857 __ Index(z13.VnH(), 6, -2);
7858 __ Index(z14.VnH(), 7, -2);
7859 __ Index(z15.VnH(), 8, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007860 __ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007861 __ Rdvl(x2, 5); // (5 * vl) << 1 = 10 * vl
7862 __ St3h(z13.VnH(), z14.VnH(), z15.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
7863 // Save the stored values for ld3 tests.
7864 __ Dup(z7.VnH(), 0);
7865 __ Dup(z8.VnH(), 0);
7866 __ Dup(z9.VnH(), 0);
7867 __ Mov(z7.VnH(), p6.Merging(), z13.VnH());
7868 __ Mov(z8.VnH(), p6.Merging(), z14.VnH());
7869 __ Mov(z9.VnH(), p6.Merging(), z15.VnH());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007870
7871 // Wrap around from z31 to z0.
7872 __ Index(z30.VnS(), -7, 3);
7873 __ Index(z31.VnS(), -8, 3);
7874 __ Index(z0.VnS(), -9, 3);
7875 // Sparse predication, including some irrelevant bits (0xe). To make the
7876 // results easy to check, activate each lane <n> where n is a multiple of 5.
7877 Initialise(&masm,
7878 p5,
7879 0xeee1000010000100,
7880 0x001eeee100001000,
7881 0x0100001eeee10000,
7882 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007883 __ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
7884 __ St3w(z30.VnS(), z31.VnS(), z0.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
7885 // Save the stored values for ld3 tests.
7886 __ Dup(z10.VnS(), 0);
7887 __ Dup(z11.VnS(), 0);
7888 __ Dup(z12.VnS(), 0);
7889 __ Mov(z10.VnS(), p5.Merging(), z30.VnS());
7890 __ Mov(z11.VnS(), p5.Merging(), z31.VnS());
7891 __ Mov(z12.VnS(), p5.Merging(), z0.VnS());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007892
7893 __ Index(z31.VnD(), 32, -11);
7894 __ Index(z0.VnD(), 33, -11);
7895 __ Index(z1.VnD(), 34, -11);
7896 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007897 __ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 * vl
7898 __ St3d(z31.VnD(), z0.VnD(), z1.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
7899 // Save the stored values for ld3 tests.
7900 __ Dup(z13.VnD(), 0);
7901 __ Dup(z14.VnD(), 0);
7902 __ Dup(z15.VnD(), 0);
7903 __ Mov(z13.VnD(), p4.Merging(), z31.VnD());
7904 __ Mov(z14.VnD(), p4.Merging(), z0.VnD());
7905 __ Mov(z15.VnD(), p4.Merging(), z1.VnD());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007906
Jacob Bramleye483ce52019-11-05 16:52:29 +00007907 // Corresponding loads.
7908 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7909 __ Ld3b(z31.VnB(),
7910 z0.VnB(),
7911 z1.VnB(),
7912 p7.Zeroing(),
7913 SVEMemOperand(x0, x1, LSL, 0));
7914 __ Mov(z16, z31);
7915 __ Mov(z17, z0);
7916 __ Mov(z18, z1);
7917 __ Ld3h(z30.VnH(),
7918 z31.VnH(),
7919 z0.VnH(),
7920 p6.Zeroing(),
7921 SVEMemOperand(x0, x2, LSL, 1));
7922 __ Mov(z19, z30);
7923 __ Mov(z20, z31);
7924 __ Mov(z21, z0);
7925 __ Ld3w(z22.VnS(),
7926 z23.VnS(),
7927 z24.VnS(),
7928 p5.Zeroing(),
7929 SVEMemOperand(x0, x3, LSL, 2));
7930 __ Ld3d(z25.VnD(),
7931 z26.VnD(),
7932 z27.VnD(),
7933 p4.Zeroing(),
7934 SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007935
7936 END();
7937
7938 if (CAN_RUN()) {
7939 RUN();
7940
7941 uint8_t* expected = new uint8_t[data_size];
7942 memset(expected, 0, data_size);
7943 uint8_t* middle = &expected[data_size / 2];
7944
7945 int vl_b = vl / kBRegSizeInBytes;
7946 int vl_h = vl / kHRegSizeInBytes;
7947 int vl_s = vl / kSRegSizeInBytes;
7948 int vl_d = vl / kDRegSizeInBytes;
7949
7950 int reg_count = 3;
7951
Jacob Bramleye483ce52019-11-05 16:52:29 +00007952 // st3b { z10.b, z11.b, z12.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007953 int vl_b_mul4 = vl_b - (vl_b % 4);
7954 for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007955 int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007956 uint8_t lane0 = -4 + (11 * i);
7957 uint8_t lane1 = -5 + (11 * i);
7958 uint8_t lane2 = -6 + (11 * i);
7959 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7960 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7961 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7962 }
7963
Jacob Bramleye483ce52019-11-05 16:52:29 +00007964 // st3h { z13.h, z14.h, z15.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007965 if (vl_h >= 16) {
7966 for (int i = 0; i < 16; i++) {
7967 int64_t offset = (5 << kHRegSizeInBytesLog2) * vl;
7968 uint16_t lane0 = 6 - (2 * i);
7969 uint16_t lane1 = 7 - (2 * i);
7970 uint16_t lane2 = 8 - (2 * i);
7971 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7972 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7973 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7974 }
7975 }
7976
7977 // st3w { z30.s, z31.s, z0.s }, ((i % 5) == 0)
7978 for (int i = 0; i < vl_s; i++) {
7979 if ((i % 5) == 0) {
7980 int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
7981 uint32_t lane0 = -7 + (3 * i);
7982 uint32_t lane1 = -8 + (3 * i);
7983 uint32_t lane2 = -9 + (3 * i);
7984 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7985 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7986 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7987 }
7988 }
7989
7990 // st3d { z31.d, z0.d, z1.d }, SVE_MUL3
7991 int vl_d_mul3 = vl_d - (vl_d % 3);
7992 for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007993 int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007994 uint64_t lane0 = 32 - (11 * i);
7995 uint64_t lane1 = 33 - (11 * i);
7996 uint64_t lane2 = 34 - (11 * i);
7997 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7998 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7999 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8000 }
8001
8002 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
8003
Jacob Bramleye483ce52019-11-05 16:52:29 +00008004 // Check that we loaded back the expected values.
8005
8006 // st3b/ld3b
8007 ASSERT_EQUAL_SVE(z4, z16);
8008 ASSERT_EQUAL_SVE(z5, z17);
8009 ASSERT_EQUAL_SVE(z6, z18);
8010
8011 // st3h/ld3h
8012 ASSERT_EQUAL_SVE(z7, z19);
8013 ASSERT_EQUAL_SVE(z8, z20);
8014 ASSERT_EQUAL_SVE(z9, z21);
8015
8016 // st3w/ld3w
8017 ASSERT_EQUAL_SVE(z10, z22);
8018 ASSERT_EQUAL_SVE(z11, z23);
8019 ASSERT_EQUAL_SVE(z12, z24);
8020
8021 // st3d/ld3d
8022 ASSERT_EQUAL_SVE(z13, z25);
8023 ASSERT_EQUAL_SVE(z14, z26);
8024 ASSERT_EQUAL_SVE(z15, z27);
8025
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008026 delete[] expected;
8027 }
8028 delete[] data;
8029}
8030
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008031TEST_SVE(sve_ld4_st4_scalar_plus_imm) {
8032 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8033 START();
8034
8035 int vl = config->sve_vl_in_bytes();
8036
8037 // The immediate can address [-24, 21] times the VL, so allocate enough space
8038 // to exceed that in both directions.
8039 int data_size = vl * 128;
8040
8041 uint8_t* data = new uint8_t[data_size];
8042 memset(data, 0, data_size);
8043
8044 // Set the base half-way through the buffer so we can use negative indeces.
8045 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
8046
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008047 // We can test ld4 by comparing the values loaded with the values stored.
8048 // There are two complications:
8049 // - Loads have zeroing predication, so we have to clear the inactive
8050 // elements on our reference.
8051 // - We want to test both loads and stores that span { z31, z0 }, so we have
8052 // to move some values around.
8053 //
8054 // Registers z3-z18 will hold as-stored values (with inactive elements
8055 // cleared). Registers z19-z31 and z0-z2 will hold the values that were
8056 // loaded.
8057
8058 __ Index(z10.VnB(), 1, -7);
8059 __ Index(z11.VnB(), 2, -7);
8060 __ Index(z12.VnB(), 3, -7);
8061 __ Index(z13.VnB(), 4, -7);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008062 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008063 __ St4b(z10.VnB(), z11.VnB(), z12.VnB(), z13.VnB(), p0, SVEMemOperand(x0));
8064 // Save the stored values for ld4 tests.
8065 __ Dup(z3.VnB(), 0);
8066 __ Dup(z4.VnB(), 0);
8067 __ Dup(z5.VnB(), 0);
8068 __ Dup(z6.VnB(), 0);
8069 __ Mov(z3.VnB(), p0.Merging(), z10.VnB());
8070 __ Mov(z4.VnB(), p0.Merging(), z11.VnB());
8071 __ Mov(z5.VnB(), p0.Merging(), z12.VnB());
8072 __ Mov(z6.VnB(), p0.Merging(), z13.VnB());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008073
8074 // Wrap around from z31 to z0.
8075 __ Index(z31.VnH(), -2, 5);
8076 __ Index(z0.VnH(), -3, 5);
8077 __ Index(z1.VnH(), -4, 5);
8078 __ Index(z2.VnH(), -5, 5);
8079 __ Ptrue(p1.VnH(), SVE_MUL3);
8080 __ St4h(z31.VnH(),
8081 z0.VnH(),
8082 z1.VnH(),
8083 z2.VnH(),
8084 p1,
8085 SVEMemOperand(x0, 4, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008086 // Save the stored values for ld4 tests.
8087 __ Dup(z7.VnH(), 0);
8088 __ Dup(z8.VnH(), 0);
8089 __ Dup(z9.VnH(), 0);
8090 __ Dup(z10.VnH(), 0);
8091 __ Mov(z7.VnH(), p1.Merging(), z31.VnH());
8092 __ Mov(z8.VnH(), p1.Merging(), z0.VnH());
8093 __ Mov(z9.VnH(), p1.Merging(), z1.VnH());
8094 __ Mov(z10.VnH(), p1.Merging(), z2.VnH());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008095
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008096 // Wrap around from z31 to z0.
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008097 __ Index(z29.VnS(), 2, -7);
8098 __ Index(z30.VnS(), 3, -7);
8099 __ Index(z31.VnS(), 4, -7);
8100 __ Index(z0.VnS(), 5, -7);
8101 __ Ptrue(p2.VnS(), SVE_POW2);
8102 __ St4w(z29.VnS(),
8103 z30.VnS(),
8104 z31.VnS(),
8105 z0.VnS(),
8106 p2,
8107 SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008108 // Save the stored values for ld4 tests.
8109 __ Dup(z11.VnS(), 0);
8110 __ Dup(z12.VnS(), 0);
8111 __ Dup(z13.VnS(), 0);
8112 __ Dup(z14.VnS(), 0);
8113 __ Mov(z11.VnS(), p2.Merging(), z29.VnS());
8114 __ Mov(z12.VnS(), p2.Merging(), z30.VnS());
8115 __ Mov(z13.VnS(), p2.Merging(), z31.VnS());
8116 __ Mov(z14.VnS(), p2.Merging(), z0.VnS());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008117
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008118 __ Index(z20.VnD(), -7, 8);
8119 __ Index(z21.VnD(), -8, 8);
8120 __ Index(z22.VnD(), -9, 8);
8121 __ Index(z23.VnD(), -10, 8);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008122 // Sparse predication, including some irrelevant bits (0xee). To make the
8123 // results easy to check, activate each lane <n> where n is a multiple of 5.
8124 Initialise(&masm,
8125 p3,
8126 0xeee10000000001ee,
8127 0xeeeeeee100000000,
8128 0x01eeeeeeeee10000,
8129 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008130 __ St4d(z20.VnD(),
8131 z21.VnD(),
8132 z22.VnD(),
8133 z23.VnD(),
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008134 p3,
8135 SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008136 // Save the stored values for ld4 tests.
8137 __ Dup(z15.VnD(), 0);
8138 __ Dup(z16.VnD(), 0);
8139 __ Dup(z17.VnD(), 0);
8140 __ Dup(z18.VnD(), 0);
8141 __ Mov(z15.VnD(), p3.Merging(), z20.VnD());
8142 __ Mov(z16.VnD(), p3.Merging(), z21.VnD());
8143 __ Mov(z17.VnD(), p3.Merging(), z22.VnD());
8144 __ Mov(z18.VnD(), p3.Merging(), z23.VnD());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008145
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008146 // Corresponding loads.
8147 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
8148 __ Ld4b(z31.VnB(),
8149 z0.VnB(),
8150 z1.VnB(),
8151 z2.VnB(),
8152 p0.Zeroing(),
8153 SVEMemOperand(x0));
8154 __ Mov(z19, z31);
8155 __ Mov(z20, z0);
8156 __ Mov(z21, z1);
8157 __ Mov(z22, z2);
8158 __ Ld4h(z23.VnH(),
8159 z24.VnH(),
8160 z25.VnH(),
8161 z26.VnH(),
8162 p1.Zeroing(),
8163 SVEMemOperand(x0, 4, SVE_MUL_VL));
8164 __ Ld4w(z27.VnS(),
8165 z28.VnS(),
8166 z29.VnS(),
8167 z30.VnS(),
8168 p2.Zeroing(),
8169 SVEMemOperand(x0, -12, SVE_MUL_VL));
8170 // Wrap around from z31 to z0.
8171 __ Ld4d(z31.VnD(),
8172 z0.VnD(),
8173 z1.VnD(),
8174 z2.VnD(),
8175 p3.Zeroing(),
8176 SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008177
8178 END();
8179
8180 if (CAN_RUN()) {
8181 RUN();
8182
8183 uint8_t* expected = new uint8_t[data_size];
8184 memset(expected, 0, data_size);
8185 uint8_t* middle = &expected[data_size / 2];
8186
8187 int vl_b = vl / kBRegSizeInBytes;
8188 int vl_h = vl / kHRegSizeInBytes;
8189 int vl_s = vl / kSRegSizeInBytes;
8190 int vl_d = vl / kDRegSizeInBytes;
8191
8192 int reg_count = 4;
8193
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008194 // st2b { z10.b, z11.b, z12.b, z13.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008195 for (int i = 0; i < vl_b; i++) {
8196 uint8_t lane0 = 1 - (7 * i);
8197 uint8_t lane1 = 2 - (7 * i);
8198 uint8_t lane2 = 3 - (7 * i);
8199 uint8_t lane3 = 4 - (7 * i);
8200 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
8201 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
8202 MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
8203 MemoryWrite(middle, 0, (i * reg_count) + 3, lane3);
8204 }
8205
8206 // st4h { z31.h, z0.h, z1.h, z2.h }, SVE_MUL3
8207 int vl_h_mul3 = vl_h - (vl_h % 3);
8208 for (int i = 0; i < vl_h_mul3; i++) {
8209 int64_t offset = 4 * vl;
8210 uint16_t lane0 = -2 + (5 * i);
8211 uint16_t lane1 = -3 + (5 * i);
8212 uint16_t lane2 = -4 + (5 * i);
8213 uint16_t lane3 = -5 + (5 * i);
8214 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8215 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8216 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8217 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8218 }
8219
8220 // st4w { z29.s, z30.s, z31.s, z0.s }, SVE_POW2
8221 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
8222 for (int i = 0; i < vl_s_pow2; i++) {
8223 int64_t offset = -12 * vl;
8224 uint32_t lane0 = 2 - (7 * i);
8225 uint32_t lane1 = 3 - (7 * i);
8226 uint32_t lane2 = 4 - (7 * i);
8227 uint32_t lane3 = 5 - (7 * i);
8228 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8229 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8230 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8231 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8232 }
8233
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008234 // st4d { z20.d, z21.d, z22.d, z23.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008235 for (int i = 0; i < vl_d; i++) {
8236 if ((i % 5) == 0) {
8237 int64_t offset = 16 * vl;
8238 uint64_t lane0 = -7 + (8 * i);
8239 uint64_t lane1 = -8 + (8 * i);
8240 uint64_t lane2 = -9 + (8 * i);
8241 uint64_t lane3 = -10 + (8 * i);
8242 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8243 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8244 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8245 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8246 }
8247 }
8248
8249 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
8250
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008251 // Check that we loaded back the expected values.
8252
8253 // st4b/ld4b
8254 ASSERT_EQUAL_SVE(z3, z19);
8255 ASSERT_EQUAL_SVE(z4, z20);
8256 ASSERT_EQUAL_SVE(z5, z21);
8257 ASSERT_EQUAL_SVE(z6, z22);
8258
8259 // st4h/ld4h
8260 ASSERT_EQUAL_SVE(z7, z23);
8261 ASSERT_EQUAL_SVE(z8, z24);
8262 ASSERT_EQUAL_SVE(z9, z25);
8263 ASSERT_EQUAL_SVE(z10, z26);
8264
8265 // st4w/ld4w
8266 ASSERT_EQUAL_SVE(z11, z27);
8267 ASSERT_EQUAL_SVE(z12, z28);
8268 ASSERT_EQUAL_SVE(z13, z29);
8269 ASSERT_EQUAL_SVE(z14, z30);
8270
8271 // st4d/ld4d
8272 ASSERT_EQUAL_SVE(z15, z31);
8273 ASSERT_EQUAL_SVE(z16, z0);
8274 ASSERT_EQUAL_SVE(z17, z1);
8275 ASSERT_EQUAL_SVE(z18, z2);
8276
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008277 delete[] expected;
8278 }
8279 delete[] data;
8280}
8281
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008282TEST_SVE(sve_ld4_st4_scalar_plus_scalar) {
8283 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8284 START();
8285
8286 int vl = config->sve_vl_in_bytes();
8287
8288 // Allocate plenty of space to enable indexing in both directions.
8289 int data_size = vl * 128;
8290
8291 uint8_t* data = new uint8_t[data_size];
8292 memset(data, 0, data_size);
8293
8294 // Set the base half-way through the buffer so we can use negative indeces.
8295 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
8296
Jacob Bramleye483ce52019-11-05 16:52:29 +00008297 // We can test ld4 by comparing the values loaded with the values stored.
8298 // There are two complications:
8299 // - Loads have zeroing predication, so we have to clear the inactive
8300 // elements on our reference.
8301 // - We want to test both loads and stores that span { z31, z0 }, so we have
8302 // to move some values around.
8303 //
8304 // Registers z3-z18 will hold as-stored values (with inactive elements
8305 // cleared). Registers z19-z31 and z0-z2 will hold the values that were
8306 // loaded.
8307
8308 __ Index(z19.VnB(), -4, 11);
8309 __ Index(z20.VnB(), -5, 11);
8310 __ Index(z21.VnB(), -6, 11);
8311 __ Index(z22.VnB(), -7, 11);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008312 __ Ptrue(p7.VnB(), SVE_MUL4);
Jacob Bramleye483ce52019-11-05 16:52:29 +00008313 __ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
8314 __ St4b(z19.VnB(),
8315 z20.VnB(),
8316 z21.VnB(),
8317 z22.VnB(),
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008318 p7,
8319 SVEMemOperand(x0, x1, LSL, 0));
Jacob Bramleye483ce52019-11-05 16:52:29 +00008320 // Save the stored values for ld4 tests.
8321 __ Dup(z3.VnB(), 0);
8322 __ Dup(z4.VnB(), 0);
8323 __ Dup(z5.VnB(), 0);
8324 __ Dup(z6.VnB(), 0);
8325 __ Mov(z3.VnB(), p7.Merging(), z19.VnB());
8326 __ Mov(z4.VnB(), p7.Merging(), z20.VnB());
8327 __ Mov(z5.VnB(), p7.Merging(), z21.VnB());
8328 __ Mov(z6.VnB(), p7.Merging(), z22.VnB());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008329
Jacob Bramleye483ce52019-11-05 16:52:29 +00008330 __ Index(z23.VnH(), 6, -2);
8331 __ Index(z24.VnH(), 7, -2);
8332 __ Index(z25.VnH(), 8, -2);
8333 __ Index(z26.VnH(), 9, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008334 __ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramleye483ce52019-11-05 16:52:29 +00008335 __ Rdvl(x2, 7); // (7 * vl) << 1 = 14 * vl
8336 __ St4h(z23.VnH(),
8337 z24.VnH(),
8338 z25.VnH(),
8339 z26.VnH(),
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008340 p6,
8341 SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramleye483ce52019-11-05 16:52:29 +00008342 // Save the stored values for ld4 tests.
8343 __ Dup(z7.VnH(), 0);
8344 __ Dup(z8.VnH(), 0);
8345 __ Dup(z9.VnH(), 0);
8346 __ Dup(z10.VnH(), 0);
8347 __ Mov(z7.VnH(), p6.Merging(), z23.VnH());
8348 __ Mov(z8.VnH(), p6.Merging(), z24.VnH());
8349 __ Mov(z9.VnH(), p6.Merging(), z25.VnH());
8350 __ Mov(z10.VnH(), p6.Merging(), z26.VnH());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008351
8352 // Wrap around from z31 to z0.
8353 __ Index(z29.VnS(), -6, 7);
8354 __ Index(z30.VnS(), -7, 7);
8355 __ Index(z31.VnS(), -8, 7);
8356 __ Index(z0.VnS(), -9, 7);
8357 // Sparse predication, including some irrelevant bits (0xe). To make the
8358 // results easy to check, activate each lane <n> where n is a multiple of 5.
8359 Initialise(&masm,
8360 p5,
8361 0xeee1000010000100,
8362 0x001eeee100001000,
8363 0x0100001eeee10000,
8364 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00008365 __ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008366 __ St4w(z29.VnS(),
8367 z30.VnS(),
8368 z31.VnS(),
8369 z0.VnS(),
8370 p5,
Jacob Bramleye483ce52019-11-05 16:52:29 +00008371 SVEMemOperand(x0, x3, LSL, 2));
8372 // Save the stored values for ld4 tests.
8373 __ Dup(z11.VnS(), 0);
8374 __ Dup(z12.VnS(), 0);
8375 __ Dup(z13.VnS(), 0);
8376 __ Dup(z14.VnS(), 0);
8377 __ Mov(z11.VnS(), p5.Merging(), z29.VnS());
8378 __ Mov(z12.VnS(), p5.Merging(), z30.VnS());
8379 __ Mov(z13.VnS(), p5.Merging(), z31.VnS());
8380 __ Mov(z14.VnS(), p5.Merging(), z0.VnS());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008381
8382 __ Index(z31.VnD(), 32, -11);
8383 __ Index(z0.VnD(), 33, -11);
8384 __ Index(z1.VnD(), 34, -11);
8385 __ Index(z2.VnD(), 35, -11);
8386 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00008387 __ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 *vl
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008388 __ St4d(z31.VnD(),
8389 z0.VnD(),
8390 z1.VnD(),
8391 z2.VnD(),
8392 p4,
Jacob Bramleye483ce52019-11-05 16:52:29 +00008393 SVEMemOperand(x0, x4, LSL, 3));
8394 // Save the stored values for ld4 tests.
8395 __ Dup(z15.VnD(), 0);
8396 __ Dup(z16.VnD(), 0);
8397 __ Dup(z17.VnD(), 0);
8398 __ Dup(z18.VnD(), 0);
8399 __ Mov(z15.VnD(), p4.Merging(), z31.VnD());
8400 __ Mov(z16.VnD(), p4.Merging(), z0.VnD());
8401 __ Mov(z17.VnD(), p4.Merging(), z1.VnD());
8402 __ Mov(z18.VnD(), p4.Merging(), z2.VnD());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008403
Jacob Bramleye483ce52019-11-05 16:52:29 +00008404 // Corresponding loads.
8405 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
8406 __ Ld4b(z31.VnB(),
8407 z0.VnB(),
8408 z1.VnB(),
8409 z2.VnB(),
8410 p7.Zeroing(),
8411 SVEMemOperand(x0, x1, LSL, 0));
8412 __ Mov(z19, z31);
8413 __ Mov(z20, z0);
8414 __ Mov(z21, z1);
8415 __ Mov(z22, z2);
8416 __ Ld4h(z23.VnH(),
8417 z24.VnH(),
8418 z25.VnH(),
8419 z26.VnH(),
8420 p6.Zeroing(),
8421 SVEMemOperand(x0, x2, LSL, 1));
8422 __ Ld4w(z27.VnS(),
8423 z28.VnS(),
8424 z29.VnS(),
8425 z30.VnS(),
8426 p5.Zeroing(),
8427 SVEMemOperand(x0, x3, LSL, 2));
8428 // Wrap around from z31 to z0.
8429 __ Ld4d(z31.VnD(),
8430 z0.VnD(),
8431 z1.VnD(),
8432 z2.VnD(),
8433 p4.Zeroing(),
8434 SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008435
8436 END();
8437
8438 if (CAN_RUN()) {
8439 RUN();
8440
8441 uint8_t* expected = new uint8_t[data_size];
8442 memset(expected, 0, data_size);
8443 uint8_t* middle = &expected[data_size / 2];
8444
8445 int vl_b = vl / kBRegSizeInBytes;
8446 int vl_h = vl / kHRegSizeInBytes;
8447 int vl_s = vl / kSRegSizeInBytes;
8448 int vl_d = vl / kDRegSizeInBytes;
8449
8450 int reg_count = 4;
8451
Jacob Bramleye483ce52019-11-05 16:52:29 +00008452 // st4b { z19.b, z20.b, z21.b, z22.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008453 int vl_b_mul4 = vl_b - (vl_b % 4);
8454 for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00008455 int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008456 uint8_t lane0 = -4 + (11 * i);
8457 uint8_t lane1 = -5 + (11 * i);
8458 uint8_t lane2 = -6 + (11 * i);
8459 uint8_t lane3 = -7 + (11 * i);
8460 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8461 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8462 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8463 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8464 }
8465
Jacob Bramleye483ce52019-11-05 16:52:29 +00008466 // st4h { z22.h, z23.h, z24.h, z25.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008467 if (vl_h >= 16) {
8468 for (int i = 0; i < 16; i++) {
8469 int64_t offset = (7 << kHRegSizeInBytesLog2) * vl;
8470 uint16_t lane0 = 6 - (2 * i);
8471 uint16_t lane1 = 7 - (2 * i);
8472 uint16_t lane2 = 8 - (2 * i);
8473 uint16_t lane3 = 9 - (2 * i);
8474 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8475 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8476 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8477 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8478 }
8479 }
8480
8481 // st4w { z29.s, z30.s, z31.s, z0.s }, ((i % 5) == 0)
8482 for (int i = 0; i < vl_s; i++) {
8483 if ((i % 5) == 0) {
8484 int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
8485 uint32_t lane0 = -6 + (7 * i);
8486 uint32_t lane1 = -7 + (7 * i);
8487 uint32_t lane2 = -8 + (7 * i);
8488 uint32_t lane3 = -9 + (7 * i);
8489 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8490 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8491 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8492 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8493 }
8494 }
8495
8496 // st4d { z31.d, z0.d, z1.d, z2.d }, SVE_MUL3
8497 int vl_d_mul3 = vl_d - (vl_d % 3);
8498 for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00008499 int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008500 uint64_t lane0 = 32 - (11 * i);
8501 uint64_t lane1 = 33 - (11 * i);
8502 uint64_t lane2 = 34 - (11 * i);
8503 uint64_t lane3 = 35 - (11 * i);
8504 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8505 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8506 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8507 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8508 }
8509
8510 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
8511
Jacob Bramleye483ce52019-11-05 16:52:29 +00008512 // Check that we loaded back the expected values.
8513
8514 // st4b/ld4b
8515 ASSERT_EQUAL_SVE(z3, z19);
8516 ASSERT_EQUAL_SVE(z4, z20);
8517 ASSERT_EQUAL_SVE(z5, z21);
8518 ASSERT_EQUAL_SVE(z6, z22);
8519
8520 // st4h/ld4h
8521 ASSERT_EQUAL_SVE(z7, z23);
8522 ASSERT_EQUAL_SVE(z8, z24);
8523 ASSERT_EQUAL_SVE(z9, z25);
8524 ASSERT_EQUAL_SVE(z10, z26);
8525
8526 // st4w/ld4w
8527 ASSERT_EQUAL_SVE(z11, z27);
8528 ASSERT_EQUAL_SVE(z12, z28);
8529 ASSERT_EQUAL_SVE(z13, z29);
8530 ASSERT_EQUAL_SVE(z14, z30);
8531
8532 // st4d/ld4d
8533 ASSERT_EQUAL_SVE(z15, z31);
8534 ASSERT_EQUAL_SVE(z16, z0);
8535 ASSERT_EQUAL_SVE(z17, z1);
8536 ASSERT_EQUAL_SVE(z18, z2);
8537
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008538 delete[] expected;
8539 }
8540 delete[] data;
8541}
8542
8543TEST_SVE(sve_ld234_st234_scalar_plus_scalar_sp) {
8544 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8545 START();
8546
8547 // Check that the simulator correctly interprets rn == 31 as sp.
8548 // The indexing logic is the same regardless so we just check one load and
8549 // store of each type.
8550
8551 // There are no pre- or post-indexing modes, so reserve space first.
8552 __ ClaimVL(2 + 3 + 4);
8553
8554 __ Index(z0.VnB(), 42, 2);
8555 __ Index(z1.VnB(), 43, 2);
8556 __ Ptrue(p0.VnB(), SVE_VL7);
8557 __ Rdvl(x0, 0);
8558 __ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, x0));
8559
8560 __ Index(z4.VnH(), 42, 3);
8561 __ Index(z5.VnH(), 43, 3);
8562 __ Index(z6.VnH(), 44, 3);
8563 __ Ptrue(p1.VnH(), SVE_POW2);
8564 __ Rdvl(x1, 2);
8565 __ Lsr(x1, x1, 1);
8566 __ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, x1, LSL, 1));
8567
8568 __ Index(z8.VnS(), 42, 4);
8569 __ Index(z9.VnS(), 43, 4);
8570 __ Index(z10.VnS(), 44, 4);
8571 __ Index(z11.VnS(), 45, 4);
8572 __ Ptrue(p2.VnS());
8573 __ Rdvl(x2, 2 + 3);
8574 __ Lsr(x2, x2, 2);
8575 __ St4w(z8.VnS(),
8576 z9.VnS(),
8577 z10.VnS(),
8578 z11.VnS(),
8579 p2,
8580 SVEMemOperand(sp, x2, LSL, 2));
8581
Jacob Bramleye483ce52019-11-05 16:52:29 +00008582 // Corresponding loads.
8583 // We have to explicitly zero inactive lanes in the reference values because
8584 // loads have zeroing predication.
8585 __ Dup(z12.VnB(), 0);
8586 __ Dup(z13.VnB(), 0);
8587 __ Mov(z12.VnB(), p0.Merging(), z0.VnB());
8588 __ Mov(z13.VnB(), p0.Merging(), z1.VnB());
8589 __ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, x0));
8590
8591 __ Dup(z16.VnH(), 0);
8592 __ Dup(z17.VnH(), 0);
8593 __ Dup(z18.VnH(), 0);
8594 __ Mov(z16.VnH(), p1.Merging(), z4.VnH());
8595 __ Mov(z17.VnH(), p1.Merging(), z5.VnH());
8596 __ Mov(z18.VnH(), p1.Merging(), z6.VnH());
8597 __ Ld3h(z4.VnH(),
8598 z5.VnH(),
8599 z6.VnH(),
8600 p1.Zeroing(),
8601 SVEMemOperand(sp, x1, LSL, 1));
8602
8603 __ Dup(z20.VnS(), 0);
8604 __ Dup(z21.VnS(), 0);
8605 __ Dup(z22.VnS(), 0);
8606 __ Dup(z23.VnS(), 0);
8607 __ Mov(z20.VnS(), p2.Merging(), z8.VnS());
8608 __ Mov(z21.VnS(), p2.Merging(), z9.VnS());
8609 __ Mov(z22.VnS(), p2.Merging(), z10.VnS());
8610 __ Mov(z23.VnS(), p2.Merging(), z11.VnS());
8611 __ Ld4w(z8.VnS(),
8612 z9.VnS(),
8613 z10.VnS(),
8614 z11.VnS(),
8615 p2.Zeroing(),
8616 SVEMemOperand(sp, x2, LSL, 2));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008617
8618 __ DropVL(2 + 3 + 4);
8619
8620 END();
8621
8622 if (CAN_RUN()) {
8623 RUN();
8624
8625 // The most likely failure mode is the that simulator reads sp as xzr and
8626 // crashes on execution. We already test the address calculations separately
8627 // and sp doesn't change this, so just test that we load the values we
8628 // stored.
Jacob Bramleye483ce52019-11-05 16:52:29 +00008629
8630 // st2b/ld2b
8631 ASSERT_EQUAL_SVE(z0, z12);
8632 ASSERT_EQUAL_SVE(z1, z13);
8633
8634 // st3h/ld3h
8635 ASSERT_EQUAL_SVE(z4, z16);
8636 ASSERT_EQUAL_SVE(z5, z17);
8637 ASSERT_EQUAL_SVE(z6, z18);
8638
8639 // st4h/ld4h
8640 ASSERT_EQUAL_SVE(z8, z20);
8641 ASSERT_EQUAL_SVE(z9, z21);
8642 ASSERT_EQUAL_SVE(z10, z22);
8643 ASSERT_EQUAL_SVE(z11, z23);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008644 }
8645}
8646
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008647TEST_SVE(sve_ld234_st234_scalar_plus_imm_sp) {
8648 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8649 START();
8650
8651 // Check that the simulator correctly interprets rn == 31 as sp.
8652 // The indexing logic is the same regardless so we just check one load and
8653 // store of each type.
8654
8655 // There are no pre- or post-indexing modes, so reserve space first.
8656 // Note that the stores fill in an order that allows each immediate to be a
8657 // multiple of the number of registers.
8658 __ ClaimVL(4 + 2 + 3);
8659
8660 __ Index(z0.VnB(), 42, 2);
8661 __ Index(z1.VnB(), 43, 2);
8662 __ Ptrue(p0.VnB(), SVE_POW2);
8663 __ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, 4, SVE_MUL_VL));
8664
8665 __ Index(z4.VnH(), 42, 3);
8666 __ Index(z5.VnH(), 43, 3);
8667 __ Index(z6.VnH(), 44, 3);
8668 __ Ptrue(p1.VnH(), SVE_VL7);
8669 __ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, 6, SVE_MUL_VL));
8670
8671 __ Index(z8.VnS(), 42, 4);
8672 __ Index(z9.VnS(), 43, 4);
8673 __ Index(z10.VnS(), 44, 4);
8674 __ Index(z11.VnS(), 45, 4);
8675 __ Ptrue(p2.VnS());
8676 __ St4w(z8.VnS(), z9.VnS(), z10.VnS(), z11.VnS(), p2, SVEMemOperand(sp));
8677
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008678 // Corresponding loads.
8679 // We have to explicitly zero inactive lanes in the reference values because
8680 // loads have zeroing predication.
8681 __ Dup(z12.VnB(), 0);
8682 __ Dup(z13.VnB(), 0);
8683 __ Mov(z12.VnB(), p0.Merging(), z0.VnB());
8684 __ Mov(z13.VnB(), p0.Merging(), z1.VnB());
8685 __ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, 4, SVE_MUL_VL));
8686
8687 __ Dup(z16.VnH(), 0);
8688 __ Dup(z17.VnH(), 0);
8689 __ Dup(z18.VnH(), 0);
8690 __ Mov(z16.VnH(), p1.Merging(), z4.VnH());
8691 __ Mov(z17.VnH(), p1.Merging(), z5.VnH());
8692 __ Mov(z18.VnH(), p1.Merging(), z6.VnH());
8693 __ Ld3h(z4.VnH(),
8694 z5.VnH(),
8695 z6.VnH(),
8696 p1.Zeroing(),
8697 SVEMemOperand(sp, 6, SVE_MUL_VL));
8698
8699 __ Dup(z20.VnS(), 0);
8700 __ Dup(z21.VnS(), 0);
8701 __ Dup(z22.VnS(), 0);
8702 __ Dup(z23.VnS(), 0);
8703 __ Mov(z20.VnS(), p2.Merging(), z8.VnS());
8704 __ Mov(z21.VnS(), p2.Merging(), z9.VnS());
8705 __ Mov(z22.VnS(), p2.Merging(), z10.VnS());
8706 __ Mov(z23.VnS(), p2.Merging(), z11.VnS());
8707 __ Ld4w(z8.VnS(),
8708 z9.VnS(),
8709 z10.VnS(),
8710 z11.VnS(),
8711 p2.Zeroing(),
8712 SVEMemOperand(sp));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008713
8714 __ DropVL(4 + 2 + 3);
8715
8716 END();
8717
8718 if (CAN_RUN()) {
8719 RUN();
8720
8721 // The most likely failure mode is the that simulator reads sp as xzr and
8722 // crashes on execution. We already test the address calculations separately
8723 // and sp doesn't change this, so just test that we load the values we
8724 // stored.
8725 // TODO: Actually do this, once loads are implemented.
8726 }
8727}
8728
TatWai Chong85e15102020-05-04 21:00:40 -07008729// Fill the input buffer with arbitrary data. Meanwhile, assign random offsets
8730// from the base address of the buffer and corresponding addresses to the
8731// arguments if provided.
8732static void BufferFillingHelper(uint64_t data_ptr,
8733 size_t buffer_size,
8734 unsigned lane_size_in_bytes,
8735 int lane_count,
8736 uint64_t* offsets,
8737 uint64_t* addresses = nullptr,
8738 uint64_t* max_address = nullptr) {
8739 // Use a fixed seed for nrand48() so that test runs are reproducible.
8740 unsigned short seed[3] = {1, 2, 3}; // NOLINT(runtime/int)
8741
8742 // Fill a buffer with arbitrary data.
8743 for (size_t i = 0; i < buffer_size; i++) {
8744 uint8_t byte = nrand48(seed) & 0xff;
8745 memcpy(reinterpret_cast<void*>(data_ptr + i), &byte, 1);
8746 }
8747
8748 if (max_address != nullptr) {
8749 *max_address = 0;
8750 }
8751
8752 // Vectors of random addresses and offsets into the buffer.
8753 for (int i = 0; i < lane_count; i++) {
8754 uint64_t rnd = nrand48(seed);
8755 // Limit the range to the set of completely-accessible elements in memory.
8756 offsets[i] = rnd % (buffer_size - lane_size_in_bytes);
8757 if ((addresses != nullptr) && (max_address != nullptr)) {
8758 addresses[i] = data_ptr + offsets[i];
8759 *max_address = std::max(*max_address, addresses[i]);
8760 }
8761 }
8762}
8763
TatWai Chong85e15102020-05-04 21:00:40 -07008764static void ScalarLoadHelper(MacroAssembler* masm,
8765 Register dst,
8766 Register addr,
8767 int msize_in_bits,
8768 bool is_signed) {
8769 if (is_signed) {
8770 switch (msize_in_bits) {
8771 case kBRegSize:
8772 masm->Ldrsb(dst, MemOperand(addr));
8773 break;
8774 case kHRegSize:
8775 masm->Ldrsh(dst, MemOperand(addr));
8776 break;
8777 case kWRegSize:
8778 masm->Ldrsw(dst, MemOperand(addr));
8779 break;
8780 default:
8781 VIXL_UNIMPLEMENTED();
8782 break;
8783 }
8784 } else {
8785 switch (msize_in_bits) {
8786 case kBRegSize:
8787 masm->Ldrb(dst, MemOperand(addr));
8788 break;
8789 case kHRegSize:
8790 masm->Ldrh(dst, MemOperand(addr));
8791 break;
8792 case kWRegSize:
8793 masm->Ldr(dst.W(), MemOperand(addr));
8794 break;
8795 case kXRegSize:
8796 masm->Ldr(dst, MemOperand(addr));
8797 break;
8798 default:
8799 VIXL_UNIMPLEMENTED();
8800 break;
8801 }
8802 }
8803}
8804
8805// Generate a reference result using scalar loads.
8806// For now this helper doesn't save and restore the caller registers.
8807// Clobber register z30, x28, x29 and p7.
8808template <size_t N>
8809static void ScalarLoadHelper(MacroAssembler* masm,
8810 int vl,
8811 const uint64_t (&addresses)[N],
8812 const ZRegister& zt_ref,
8813 const PRegisterZ& pg,
8814 unsigned esize_in_bits,
8815 unsigned msize_in_bits,
8816 bool is_signed) {
8817 unsigned esize_in_bytes = esize_in_bits / kBitsPerByte;
8818 ZRegister lane_numbers = z30.WithLaneSize(esize_in_bits);
8819 masm->Index(lane_numbers, 0, 1);
8820 masm->Dup(zt_ref, 0);
8821 for (unsigned i = 0; i < (vl / esize_in_bytes); i++) {
8822 masm->Mov(x29, addresses[N - i - 1]);
8823 Register rt(28, std::min(std::max(esize_in_bits, kSRegSize), kDRegSize));
8824 ScalarLoadHelper(masm, rt, x29, msize_in_bits, is_signed);
8825
8826 // Emulate predication.
8827 masm->Cmpeq(p7.WithLaneSize(esize_in_bits), pg, lane_numbers, i);
8828 masm->Cpy(zt_ref, p7.Merging(), rt);
8829 }
8830}
8831
TatWai Chong113d9192020-05-19 01:02:36 -07008832typedef void (MacroAssembler::*Ld1Macro)(const ZRegister& zt,
8833 const PRegisterZ& pg,
8834 const SVEMemOperand& addr);
8835
TatWai Chong6537a9a2020-05-05 14:15:16 -07008836static void Ldff1Helper(Test* config,
8837 uintptr_t data,
8838 unsigned msize_in_bits,
8839 unsigned esize_in_bits,
TatWai Chong1af34f12020-06-01 20:54:06 -07008840 CPURegister::RegisterType base_type,
TatWai Chong6537a9a2020-05-05 14:15:16 -07008841 Ld1Macro ldff1,
8842 Ld1Macro ld1,
TatWai Chong1af34f12020-06-01 20:54:06 -07008843 SVEOffsetModifier mod,
TatWai Chong6537a9a2020-05-05 14:15:16 -07008844 bool scale = false) {
8845 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8846 START();
8847
8848 int vl = config->sve_vl_in_bytes();
8849 size_t page_size = sysconf(_SC_PAGE_SIZE);
8850 VIXL_ASSERT(page_size > static_cast<size_t>(vl));
8851
8852 unsigned esize_in_bytes = esize_in_bits / kBitsPerByte;
8853 unsigned msize_in_bytes = msize_in_bits / kBitsPerByte;
8854 unsigned msize_in_bytes_log2 = std::log2(msize_in_bytes);
8855 VIXL_ASSERT(msize_in_bits <= esize_in_bits);
8856
8857 PRegister all = p7;
8858 __ Ptrue(all.VnB());
8859
8860 size_t offset_modifier = 0;
8861
8862 // The highest adddress at which a load stopped. Every FF load should fault at
8863 // `data + page_size`, so this value should not exceed that value. However,
8864 // the architecture allows fault-tolerant loads to fault arbitrarily, so the
8865 // real value may be lower.
8866 //
8867 // This is used to check that the `mprotect` above really does make the second
8868 // page inaccessible, and that the resulting FFR from each load reflects that.
8869 Register limit = x22;
8870 __ Mov(limit, 0);
8871
8872 // If the FFR grows unexpectedly, we increment this register by the
8873 // difference. FFR should never grow, except when explicitly set.
8874 Register ffr_grow_count = x23;
8875 __ Mov(ffr_grow_count, 0);
8876
8877 // Set the offset so that the load is guaranteed to start in the
8878 // accessible page, but end in the inaccessible one.
8879 VIXL_ASSERT((page_size % msize_in_bytes) == 0);
8880 VIXL_ASSERT((vl % msize_in_bytes) == 0);
8881 size_t elements_per_page = page_size / msize_in_bytes;
8882 size_t elements_per_access = vl / esize_in_bytes;
8883 size_t min_offset = (elements_per_page - elements_per_access) + 1;
8884 size_t max_offset = elements_per_page - 1;
8885 size_t offset =
8886 min_offset + (offset_modifier % (max_offset - min_offset + 1));
8887 offset_modifier++;
8888
8889 __ Setffr();
8890 __ Mov(x20, data);
8891 __ Mov(x21, offset);
8892
TatWai Chong1af34f12020-06-01 20:54:06 -07008893 if (base_type == CPURegister::kRegister) {
8894 // Scalar-plus-scalar mode.
8895 if ((mod == SVE_LSL) || (mod == NO_SVE_OFFSET_MODIFIER)) {
8896 (masm.*ldff1)(z0.WithLaneSize(esize_in_bits),
8897 all.Zeroing(),
8898 SVEMemOperand(x20, x21, mod, msize_in_bytes_log2));
8899 } else {
8900 VIXL_UNIMPLEMENTED();
8901 }
TatWai Chong6537a9a2020-05-05 14:15:16 -07008902
TatWai Chong1af34f12020-06-01 20:54:06 -07008903 } else if (base_type == CPURegister::kZRegister) {
8904 int offs_size;
8905 bool offs_is_unsigned;
8906 if ((mod == SVE_SXTW) || (mod == SVE_UXTW)) {
8907 // Scalar-plus-vector mode with 32-bit optional unpacked or upacked, and
8908 // unscaled or scaled offset.
8909 if (scale == true) {
8910 // Gather first-fault bytes load doesn't support scaled offset.
8911 VIXL_ASSERT(msize_in_bits != kBRegSize);
8912 }
8913 offs_is_unsigned = (mod == SVE_UXTW) ? true : false;
8914 offs_size = kSRegSize;
8915
8916 } else {
8917 // Scalar-plus-vector mode with 64-bit unscaled or scaled offset.
8918 VIXL_ASSERT((mod == SVE_LSL) || (mod == NO_SVE_OFFSET_MODIFIER));
8919 offs_is_unsigned = false;
8920 offs_size = kDRegSize;
8921 }
8922
TatWai Chong6537a9a2020-05-05 14:15:16 -07008923 // For generating the pattern of "base address + index << shift".
8924 // In case of unscaled-offset operation, use `msize_in_bytes` be an offset
8925 // of each decreasing memory accesses. otherwise, decreases the indexes by 1
8926 // and then scale it by the shift value.
8927 int shift = (scale == true) ? msize_in_bytes_log2 : 0;
8928 int index_offset = msize_in_bytes >> shift;
8929 VIXL_ASSERT(index_offset > 0);
TatWai Chong6537a9a2020-05-05 14:15:16 -07008930 uint64_t index = 0;
8931 uint64_t base_address = 0;
8932
TatWai Chong1af34f12020-06-01 20:54:06 -07008933 if (offs_is_unsigned == true) {
TatWai Chong6537a9a2020-05-05 14:15:16 -07008934 // Base address.
8935 base_address = data;
8936 // Maximum unsigned positive index.
8937 index = page_size >> shift;
8938
8939 } else {
8940 // Base address.
8941 base_address = data + (2 * page_size);
8942 // Maximum unsigned positive index.
8943 uint64_t uint_e_max =
8944 (esize_in_bits == kDRegSize) ? UINT64_MAX : UINT32_MAX;
8945 index = uint_e_max - (page_size >> shift) + 1;
8946 }
8947
8948 __ Mov(x19, base_address);
8949 if ((offs_size == kSRegSize) && (esize_in_bits == kDRegSize)) {
8950 // In this case, the index values are optionally sign or zero-extended
8951 // from 32 to 64 bits, assign a convenient value to the top 32 bits to
8952 // ensure only the low 32 bits be the index values.
8953 index |= 0x1234567800000000;
8954 }
8955
8956 index -= index_offset * (elements_per_access - 1);
8957 __ Index(z17.WithLaneSize(esize_in_bits), index, index_offset);
8958
8959 // Scalar plus vector mode.
8960 (masm.*
8961 ldff1)(z0.WithLaneSize(esize_in_bits),
8962 all.Zeroing(),
8963 SVEMemOperand(x19, z17.WithLaneSize(esize_in_bits), mod, shift));
TatWai Chong1af34f12020-06-01 20:54:06 -07008964 } else {
8965 VIXL_UNIMPLEMENTED();
TatWai Chong6537a9a2020-05-05 14:15:16 -07008966 }
8967
8968 __ Rdffrs(p0.VnB(), all.Zeroing());
8969
8970 // Execute another Ldff1 with no offset, so that every element could be
8971 // read. It should respect FFR, and load no more than we loaded the
8972 // first time.
8973 (masm.*
8974 ldff1)(z16.WithLaneSize(esize_in_bits), all.Zeroing(), SVEMemOperand(x20));
8975 __ Rdffrs(p1.VnB(), all.Zeroing());
8976 __ Cntp(x0, all, p1.VnB());
8977 __ Uqdecp(x0, p0.VnB());
8978 __ Add(ffr_grow_count, ffr_grow_count, x0);
8979
8980 // Use the FFR to predicate the normal load. If it wasn't properly set,
8981 // the normal load will abort.
8982 (masm.*ld1)(z16.WithLaneSize(esize_in_bits),
8983 p0.Zeroing(),
8984 SVEMemOperand(x20, x21, LSL, msize_in_bytes_log2));
8985
8986 // Work out the address after the one that was just accessed.
8987 __ Incp(x21, p0.WithLaneSize(esize_in_bits));
8988 __ Add(x0, x20, Operand(x21, LSL, msize_in_bytes_log2));
8989 __ Cmp(limit, x0);
8990 __ Csel(limit, limit, x0, hs);
8991
8992 // Clear lanes inactive in FFR. These have an undefined result.
8993 // TODO: Use the 'Not' and 'Mov' aliases once they are implemented.
8994 __ Eor(p0.WithLaneSize(esize_in_bits),
8995 all.Zeroing(),
8996 p0.WithLaneSize(esize_in_bits),
8997 all.WithLaneSize(esize_in_bits));
8998 __ Cpy(z0.WithLaneSize(esize_in_bits), p0.Merging(), 0);
8999
9000 END();
9001
9002 if (CAN_RUN()) {
9003 RUN();
9004
9005 uintptr_t expected_limit = data + page_size;
9006 uintptr_t measured_limit = core.xreg(limit.GetCode());
9007 VIXL_CHECK(measured_limit <= expected_limit);
9008 if (measured_limit < expected_limit) {
9009 // We can't fail the test for this case, but a warning is helpful for
9010 // manually-run tests.
9011 printf(
9012 "WARNING: All fault-tolerant loads detected faults before the\n"
9013 "expected limit. This is architecturally possible, but improbable,\n"
9014 "and could be a symptom of another problem.\n");
9015 }
9016
9017 ASSERT_EQUAL_64(0, ffr_grow_count);
9018
9019 ASSERT_EQUAL_SVE(z0.WithLaneSize(esize_in_bits),
9020 z16.WithLaneSize(esize_in_bits));
9021 }
9022}
9023
9024TEST_SVE(sve_ldff1_scalar_plus_scalar) {
9025 size_t page_size = sysconf(_SC_PAGE_SIZE);
9026 VIXL_ASSERT(page_size > static_cast<size_t>(config->sve_vl_in_bytes()));
9027
9028 // Allocate two pages, then mprotect the second one to make it inaccessible.
9029 uintptr_t data = reinterpret_cast<uintptr_t>(mmap(NULL,
9030 page_size * 2,
9031 PROT_READ | PROT_WRITE,
9032 MAP_PRIVATE | MAP_ANONYMOUS,
9033 -1,
9034 0));
9035 mprotect(reinterpret_cast<void*>(data + page_size), page_size, PROT_NONE);
9036
9037 // Fill the accessible page with arbitrary data.
9038 for (size_t i = 0; i < page_size; i++) {
9039 // Reverse bits so we get a mixture of positive and negative values.
9040 uint8_t byte = ReverseBits(static_cast<uint8_t>(i));
9041 memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
9042 }
9043
TatWai Chong1af34f12020-06-01 20:54:06 -07009044 auto ldff1_unscaled_offset_helper = std::bind(&Ldff1Helper,
9045 config,
9046 data,
9047 std::placeholders::_1,
9048 std::placeholders::_2,
9049 CPURegister::kRegister,
9050 std::placeholders::_3,
9051 std::placeholders::_4,
9052 NO_SVE_OFFSET_MODIFIER,
9053 false);
9054
TatWai Chong6537a9a2020-05-05 14:15:16 -07009055 Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
9056 Ld1Macro ld1b = &MacroAssembler::Ld1b;
TatWai Chong1af34f12020-06-01 20:54:06 -07009057 ldff1_unscaled_offset_helper(kBRegSize, kBRegSize, ldff1b, ld1b);
9058 ldff1_unscaled_offset_helper(kBRegSize, kHRegSize, ldff1b, ld1b);
9059 ldff1_unscaled_offset_helper(kBRegSize, kSRegSize, ldff1b, ld1b);
9060 ldff1_unscaled_offset_helper(kBRegSize, kDRegSize, ldff1b, ld1b);
TatWai Chong6537a9a2020-05-05 14:15:16 -07009061
9062 Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
9063 Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
TatWai Chong1af34f12020-06-01 20:54:06 -07009064 ldff1_unscaled_offset_helper(kBRegSize, kHRegSize, ldff1sb, ld1sb);
9065 ldff1_unscaled_offset_helper(kBRegSize, kSRegSize, ldff1sb, ld1sb);
9066 ldff1_unscaled_offset_helper(kBRegSize, kDRegSize, ldff1sb, ld1sb);
9067
9068 auto ldff1_scaled_offset_helper = std::bind(&Ldff1Helper,
9069 config,
9070 data,
9071 std::placeholders::_1,
9072 std::placeholders::_2,
9073 CPURegister::kRegister,
9074 std::placeholders::_3,
9075 std::placeholders::_4,
9076 SVE_LSL,
9077 true);
9078
9079 Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
9080 Ld1Macro ld1h = &MacroAssembler::Ld1h;
9081 ldff1_scaled_offset_helper(kHRegSize, kHRegSize, ldff1h, ld1h);
9082 ldff1_scaled_offset_helper(kHRegSize, kSRegSize, ldff1h, ld1h);
9083 ldff1_scaled_offset_helper(kHRegSize, kDRegSize, ldff1h, ld1h);
9084
9085 Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
9086 Ld1Macro ld1w = &MacroAssembler::Ld1w;
9087 ldff1_scaled_offset_helper(kSRegSize, kSRegSize, ldff1w, ld1w);
9088 ldff1_scaled_offset_helper(kSRegSize, kDRegSize, ldff1w, ld1w);
9089
9090 Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
9091 Ld1Macro ld1d = &MacroAssembler::Ld1d;
9092 ldff1_scaled_offset_helper(kDRegSize, kDRegSize, ldff1d, ld1d);
TatWai Chong6537a9a2020-05-05 14:15:16 -07009093
9094 Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
9095 Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
TatWai Chong1af34f12020-06-01 20:54:06 -07009096 ldff1_scaled_offset_helper(kHRegSize, kSRegSize, ldff1sh, ld1sh);
9097 ldff1_scaled_offset_helper(kHRegSize, kDRegSize, ldff1sh, ld1sh);
TatWai Chong6537a9a2020-05-05 14:15:16 -07009098
9099 Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
9100 Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
TatWai Chong1af34f12020-06-01 20:54:06 -07009101 ldff1_scaled_offset_helper(kSRegSize, kDRegSize, ldff1sw, ld1sw);
TatWai Chong6537a9a2020-05-05 14:15:16 -07009102
9103 munmap(reinterpret_cast<void*>(data), page_size * 2);
9104}
9105
TatWai Chong1af34f12020-06-01 20:54:06 -07009106static void sve_ldff1_scalar_plus_vector_32_scaled_offset(Test* config,
9107 uintptr_t data) {
9108 auto ldff1_32_scaled_offset_helper = std::bind(&Ldff1Helper,
9109 config,
9110 data,
9111 std::placeholders::_1,
9112 kSRegSize,
9113 CPURegister::kZRegister,
9114 std::placeholders::_2,
9115 std::placeholders::_3,
9116 std::placeholders::_4,
9117 true);
9118 Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
9119 Ld1Macro ld1h = &MacroAssembler::Ld1h;
9120 ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
9121 ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
9122
9123 Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
9124 Ld1Macro ld1w = &MacroAssembler::Ld1w;
9125 ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
9126 ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
9127
9128 Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
9129 Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
9130 ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
9131 ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
9132}
9133
9134static void sve_ldff1_scalar_plus_vector_32_unscaled_offset(Test* config,
9135 uintptr_t data) {
9136 auto ldff1_32_unscaled_offset_helper = std::bind(&Ldff1Helper,
9137 config,
9138 data,
9139 std::placeholders::_1,
9140 kSRegSize,
9141 CPURegister::kZRegister,
9142 std::placeholders::_2,
9143 std::placeholders::_3,
9144 std::placeholders::_4,
9145 false);
9146
9147 Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
9148 Ld1Macro ld1b = &MacroAssembler::Ld1b;
9149 ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_UXTW);
9150 ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_SXTW);
9151
9152 Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
9153 Ld1Macro ld1h = &MacroAssembler::Ld1h;
9154 ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
9155 ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
9156
9157 Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
9158 Ld1Macro ld1w = &MacroAssembler::Ld1w;
9159 ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
9160 ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
9161
9162 Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
9163 Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
9164 ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_UXTW);
9165 ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_SXTW);
9166
9167 Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
9168 Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
9169 ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
9170 ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
9171}
9172
9173static void sve_ldff1_scalar_plus_vector_32_unpacked_scaled_offset(
9174 Test* config, uintptr_t data) {
9175 auto ldff1_32_unpacked_scaled_offset_helper =
9176 std::bind(&Ldff1Helper,
9177 config,
9178 data,
9179 std::placeholders::_1,
9180 kDRegSize,
9181 CPURegister::kZRegister,
9182 std::placeholders::_2,
9183 std::placeholders::_3,
9184 std::placeholders::_4,
9185 true);
9186
9187 Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
9188 Ld1Macro ld1h = &MacroAssembler::Ld1h;
9189 ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
9190 ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
9191
9192 Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
9193 Ld1Macro ld1w = &MacroAssembler::Ld1w;
9194 ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
9195 ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
9196
9197 Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
9198 Ld1Macro ld1d = &MacroAssembler::Ld1d;
9199 ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_UXTW);
9200 ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_SXTW);
9201
9202 Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
9203 Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
9204 ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
9205 ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
9206
9207 Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
9208 Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
9209 ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_UXTW);
9210 ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_SXTW);
9211}
9212
9213static void sve_ldff1_scalar_plus_vector_32_unpacked_unscaled_offset(
9214 Test* config, uintptr_t data) {
9215 auto ldff1_32_unpacked_unscaled_offset_helper =
9216 std::bind(&Ldff1Helper,
9217 config,
9218 data,
9219 std::placeholders::_1,
9220 kDRegSize,
9221 CPURegister::kZRegister,
9222 std::placeholders::_2,
9223 std::placeholders::_3,
9224 std::placeholders::_4,
9225 false);
9226
9227 Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
9228 Ld1Macro ld1b = &MacroAssembler::Ld1b;
9229 ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_UXTW);
9230 ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_SXTW);
9231
9232 Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
9233 Ld1Macro ld1h = &MacroAssembler::Ld1h;
9234 ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
9235 ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
9236
9237 Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
9238 Ld1Macro ld1w = &MacroAssembler::Ld1w;
9239 ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
9240 ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
9241
9242 Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
9243 Ld1Macro ld1d = &MacroAssembler::Ld1d;
9244 ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_UXTW);
9245 ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_SXTW);
9246
9247 Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
9248 Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
9249 ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_UXTW);
9250 ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_SXTW);
9251
9252 Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
9253 Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
9254 ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
9255 ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
9256
9257 Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
9258 Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
9259 ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_UXTW);
9260 ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_SXTW);
9261}
9262
9263static void sve_ldff1_scalar_plus_vector_64_scaled_offset(Test* config,
9264 uintptr_t data) {
9265 auto ldff1_64_scaled_offset_helper = std::bind(&Ldff1Helper,
9266 config,
9267 data,
9268 std::placeholders::_1,
9269 kDRegSize,
9270 CPURegister::kZRegister,
9271 std::placeholders::_2,
9272 std::placeholders::_3,
9273 SVE_LSL,
9274 true);
9275
9276 Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
9277 Ld1Macro ld1h = &MacroAssembler::Ld1h;
9278 ldff1_64_scaled_offset_helper(kHRegSize, ldff1h, ld1h);
9279
9280 Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
9281 Ld1Macro ld1w = &MacroAssembler::Ld1w;
9282 ldff1_64_scaled_offset_helper(kSRegSize, ldff1w, ld1w);
9283
9284 Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
9285 Ld1Macro ld1d = &MacroAssembler::Ld1d;
9286 ldff1_64_scaled_offset_helper(kDRegSize, ldff1d, ld1d);
9287
9288 Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
9289 Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
9290 ldff1_64_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh);
9291
9292 Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
9293 Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
9294 ldff1_64_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw);
9295}
9296
9297static void sve_ldff1_scalar_plus_vector_64_unscaled_offset(Test* config,
9298 uintptr_t data) {
9299 auto ldff1_64_unscaled_offset_helper = std::bind(&Ldff1Helper,
9300 config,
9301 data,
9302 std::placeholders::_1,
9303 kDRegSize,
9304 CPURegister::kZRegister,
9305 std::placeholders::_2,
9306 std::placeholders::_3,
9307 NO_SVE_OFFSET_MODIFIER,
9308 false);
9309
9310 Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
9311 Ld1Macro ld1b = &MacroAssembler::Ld1b;
9312 ldff1_64_unscaled_offset_helper(kBRegSize, ldff1b, ld1b);
9313
9314 Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
9315 Ld1Macro ld1h = &MacroAssembler::Ld1h;
9316 ldff1_64_unscaled_offset_helper(kHRegSize, ldff1h, ld1h);
9317
9318 Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
9319 Ld1Macro ld1w = &MacroAssembler::Ld1w;
9320 ldff1_64_unscaled_offset_helper(kSRegSize, ldff1w, ld1w);
9321
9322 Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
9323 Ld1Macro ld1d = &MacroAssembler::Ld1d;
9324 ldff1_64_unscaled_offset_helper(kDRegSize, ldff1d, ld1d);
9325
9326 Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
9327 Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
9328 ldff1_64_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb);
9329
9330 Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
9331 Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
9332 ldff1_64_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh);
9333
9334 Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
9335 Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
9336 ldff1_64_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw);
9337}
9338
TatWai Chong6537a9a2020-05-05 14:15:16 -07009339TEST_SVE(sve_ldff1_scalar_plus_vector) {
9340 size_t page_size = sysconf(_SC_PAGE_SIZE);
9341 VIXL_ASSERT(page_size > static_cast<size_t>(config->sve_vl_in_bytes()));
9342
9343 // Allocate two pages, then mprotect the second one to make it inaccessible.
9344 uintptr_t data = reinterpret_cast<uintptr_t>(mmap(NULL,
9345 page_size * 2,
9346 PROT_READ | PROT_WRITE,
9347 MAP_PRIVATE | MAP_ANONYMOUS,
9348 -1,
9349 0));
9350 mprotect(reinterpret_cast<void*>(data + page_size), page_size, PROT_NONE);
9351
9352 // Fill the accessible page with arbitrary data.
9353 for (size_t i = 0; i < page_size; i++) {
9354 // Reverse bits so we get a mixture of positive and negative values.
9355 uint8_t byte = ReverseBits(static_cast<uint8_t>(i));
9356 memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
9357 }
9358
TatWai Chong1af34f12020-06-01 20:54:06 -07009359 sve_ldff1_scalar_plus_vector_32_scaled_offset(config, data);
9360 sve_ldff1_scalar_plus_vector_32_unscaled_offset(config, data);
9361 sve_ldff1_scalar_plus_vector_32_unpacked_scaled_offset(config, data);
9362 sve_ldff1_scalar_plus_vector_32_unpacked_unscaled_offset(config, data);
9363 sve_ldff1_scalar_plus_vector_64_scaled_offset(config, data);
9364 sve_ldff1_scalar_plus_vector_64_unscaled_offset(config, data);
TatWai Chong6537a9a2020-05-05 14:15:16 -07009365
9366 munmap(reinterpret_cast<void*>(data), page_size * 2);
9367}
9368
TatWai Chong113d9192020-05-19 01:02:36 -07009369// Test gather loads by comparing them with the result of a set of equivalent
9370// scalar loads.
9371static void GatherLoadScalarPlusVectorHelper(Test* config,
9372 unsigned msize_in_bits,
9373 unsigned esize_in_bits,
9374 Ld1Macro ld1,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009375 Ld1Macro ldff1,
TatWai Chong113d9192020-05-19 01:02:36 -07009376 bool is_signed,
9377 bool is_scaled) {
9378 // SVE supports 32- and 64-bit addressing for gather loads.
9379 VIXL_ASSERT((esize_in_bits == kSRegSize) || (esize_in_bits == kDRegSize));
9380 static const unsigned kMaxLaneCount = kZRegMaxSize / kSRegSize;
9381
9382 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9383 START();
9384
9385 unsigned msize_in_bytes = msize_in_bits / kBitsPerByte;
9386 int vl = config->sve_vl_in_bytes();
9387
9388 uint64_t addresses[kMaxLaneCount];
9389 uint64_t offsets[kMaxLaneCount];
9390 uint64_t max_address = 0;
9391 uint64_t buffer_size = vl * 64;
9392 uint64_t data = reinterpret_cast<uintptr_t>(malloc(buffer_size));
9393 // Fill the buffer with arbitrary data. Meanwhile, create the random addresses
9394 // and offsets into the buffer placed in the argument list.
9395 BufferFillingHelper(data,
9396 buffer_size,
9397 msize_in_bytes,
9398 kMaxLaneCount,
9399 offsets,
9400 addresses,
9401 &max_address);
9402
9403 ZRegister zn = z0.WithLaneSize(esize_in_bits);
9404 ZRegister zt_ref = z1.WithLaneSize(esize_in_bits);
9405 ZRegister zt_ux = z2.WithLaneSize(esize_in_bits);
9406 ZRegister zt_sx = z3.WithLaneSize(esize_in_bits);
TatWai Chong6537a9a2020-05-05 14:15:16 -07009407 ZRegister zt_ff_ux = z4.WithLaneSize(esize_in_bits);
9408 ZRegister zt_ff_sx = z5.WithLaneSize(esize_in_bits);
TatWai Chong113d9192020-05-19 01:02:36 -07009409
9410 int shift = 0;
9411 if (is_scaled) {
9412 shift = std::log2(msize_in_bytes);
9413 for (unsigned i = 0; i < kMaxLaneCount; i++) {
9414 // Ensure the offsets are the multiple of the scale factor of the
9415 // operation.
9416 offsets[i] = (offsets[i] >> shift) << shift;
9417 addresses[i] = data + offsets[i];
9418 }
9419 }
9420
9421 PRegister all = p6;
9422 __ Ptrue(all.WithLaneSize(esize_in_bits));
9423
9424 PRegisterZ pg = p0.Zeroing();
9425 Initialise(&masm,
9426 pg,
9427 0x9abcdef012345678,
9428 0xabcdef0123456789,
9429 0xf4f3f1f0fefdfcfa,
9430 0xf9f8f6f5f3f2f1ff);
9431
9432 __ Mov(x0, data);
9433
9434 // Generate a reference result for scalar-plus-scalar form using scalar loads.
9435 ScalarLoadHelper(&masm,
9436 vl,
9437 addresses,
9438 zt_ref,
9439 pg,
9440 esize_in_bits,
9441 msize_in_bits,
9442 is_signed);
9443
9444 InsrHelper(&masm, zn, offsets);
9445 if (is_scaled) {
9446 // Scale down the offsets if testing scaled-offset operation.
9447 __ Lsr(zn, zn, shift);
9448 }
9449
9450 // TODO: Also test 64 bit scalar-plus-vector SVEMemOperands.
9451 VIXL_ASSERT(esize_in_bits == kSRegSize);
9452 (masm.*ld1)(zt_ux, pg, SVEMemOperand(x0, zn, UXTW, shift));
9453 (masm.*ld1)(zt_sx, pg, SVEMemOperand(x0, zn, SXTW, shift));
9454
TatWai Chong6537a9a2020-05-05 14:15:16 -07009455 Register ffr_check_count = x17;
9456 __ Mov(ffr_check_count, 0);
9457
9458 // Compare these two vector register and place the different to
9459 // `ffr_check_count`.
9460 auto ffr_check = [&](auto zt_ref, auto zt) {
9461 PRegisterWithLaneSize pg_ff = p1.WithLaneSize(esize_in_bits);
9462 PRegisterWithLaneSize pg_diff = p2.WithLaneSize(esize_in_bits);
9463
9464 masm.Rdffrs(pg_ff.VnB(), all.Zeroing());
9465 masm.Cmpeq(pg_diff, all.Zeroing(), zt_ref, zt);
9466 masm.Eor(pg_diff, all.Zeroing(), pg_diff, pg_ff);
9467 masm.Cntp(x12, all, pg_diff);
9468 masm.Add(ffr_check_count, ffr_check_count, x12);
9469 };
9470
9471 // Test the data correctness in which the data gather load from different
9472 // addresses. The first-fault behavior test is emphasized in `Ldff1Helper`.
9473 __ Setffr();
9474 (masm.*ldff1)(zt_ff_ux, pg, SVEMemOperand(x0, zn, UXTW, shift));
9475 ffr_check(zt_ref, zt_ff_ux);
9476 (masm.*ldff1)(zt_ff_sx, pg, SVEMemOperand(x0, zn, SXTW, shift));
9477 ffr_check(zt_ref, zt_ff_sx);
9478
TatWai Chong113d9192020-05-19 01:02:36 -07009479 END();
9480
9481 if (CAN_RUN()) {
9482 RUN();
9483
9484 ASSERT_EQUAL_SVE(zt_ref, zt_ux);
9485 ASSERT_EQUAL_SVE(zt_ref, zt_sx);
TatWai Chong6537a9a2020-05-05 14:15:16 -07009486 ASSERT_EQUAL_64(0, ffr_check_count);
TatWai Chong113d9192020-05-19 01:02:36 -07009487 }
9488
9489 free(reinterpret_cast<void*>(data));
9490}
9491
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009492// Test gather loads by comparing them with the result of a set of equivalent
9493// scalar loads.
9494template <typename F>
TatWai Chong113d9192020-05-19 01:02:36 -07009495static void GatherLoadScalarPlusScalarOrImmHelper(Test* config,
9496 unsigned msize_in_bits,
9497 unsigned esize_in_bits,
9498 F sve_ld1,
9499 bool is_signed) {
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009500 // SVE supports 32- and 64-bit addressing for gather loads.
9501 VIXL_ASSERT((esize_in_bits == kSRegSize) || (esize_in_bits == kDRegSize));
9502 static const unsigned kMaxLaneCount = kZRegMaxSize / kSRegSize;
9503
9504 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9505 START();
9506
9507 unsigned msize_in_bytes = msize_in_bits / kBitsPerByte;
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009508 int vl = config->sve_vl_in_bytes();
9509
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009510 uint64_t addresses[kMaxLaneCount];
9511 uint64_t offsets[kMaxLaneCount];
9512 uint64_t max_address = 0;
TatWai Chong85e15102020-05-04 21:00:40 -07009513 uint64_t buffer_size = vl * 64;
9514 uint64_t data = reinterpret_cast<uintptr_t>(malloc(buffer_size));
9515 BufferFillingHelper(data,
9516 buffer_size,
9517 msize_in_bytes,
9518 kMaxLaneCount,
9519 offsets,
9520 addresses,
9521 &max_address);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009522
9523 // Maximised offsets, to ensure that the address calculation is modulo-2^64,
9524 // and that the vector addresses are not sign-extended.
9525 uint64_t uint_e_max = (esize_in_bits == kDRegSize) ? UINT64_MAX : UINT32_MAX;
9526 uint64_t maxed_offsets[kMaxLaneCount];
9527 uint64_t maxed_offsets_imm = max_address - uint_e_max;
9528 for (unsigned i = 0; i < kMaxLaneCount; i++) {
9529 maxed_offsets[i] = addresses[i] - maxed_offsets_imm;
9530 }
9531
9532 ZRegister zn = z0.WithLaneSize(esize_in_bits);
9533 ZRegister zt_addresses = z1.WithLaneSize(esize_in_bits);
9534 ZRegister zt_offsets = z2.WithLaneSize(esize_in_bits);
9535 ZRegister zt_maxed = z3.WithLaneSize(esize_in_bits);
9536 ZRegister zt_ref = z4.WithLaneSize(esize_in_bits);
9537
9538 PRegisterZ pg = p0.Zeroing();
9539 Initialise(&masm,
9540 pg,
9541 0x9abcdef012345678,
9542 0xabcdef0123456789,
9543 0xf4f3f1f0fefdfcfa,
9544 0xf9f8f6f5f3f2f0ff);
9545
9546 // Execute each load.
9547
9548 if (esize_in_bits == kDRegSize) {
9549 // Only test `addresses` if we can use 64-bit pointers. InsrHelper will fail
9550 // if any value won't fit in a lane of zn.
9551 InsrHelper(&masm, zn, addresses);
9552 (masm.*sve_ld1)(zt_addresses, pg, SVEMemOperand(zn));
9553 }
9554
9555 InsrHelper(&masm, zn, offsets);
9556 (masm.*sve_ld1)(zt_offsets, pg, SVEMemOperand(zn, data));
9557
9558 InsrHelper(&masm, zn, maxed_offsets);
9559 (masm.*sve_ld1)(zt_maxed, pg, SVEMemOperand(zn, maxed_offsets_imm));
9560
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009561 // Generate a reference result using scalar loads.
TatWai Chong85e15102020-05-04 21:00:40 -07009562 ScalarLoadHelper(&masm,
9563 vl,
9564 addresses,
9565 zt_ref,
9566 pg,
9567 esize_in_bits,
9568 msize_in_bits,
9569 is_signed);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009570
9571 END();
9572
9573 if (CAN_RUN()) {
9574 RUN();
9575
9576 if (esize_in_bits == kDRegSize) {
9577 ASSERT_EQUAL_SVE(zt_ref, zt_addresses);
9578 }
9579 ASSERT_EQUAL_SVE(zt_ref, zt_offsets);
9580 ASSERT_EQUAL_SVE(zt_ref, zt_maxed);
9581 }
9582
9583 free(reinterpret_cast<void*>(data));
9584}
9585
9586TEST_SVE(sve_ld1b_64bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009587 GatherLoadScalarPlusScalarOrImmHelper(config,
9588 kBRegSize,
9589 kDRegSize,
9590 &MacroAssembler::Ld1b,
9591 false);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009592}
9593
9594TEST_SVE(sve_ld1h_64bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009595 GatherLoadScalarPlusScalarOrImmHelper(config,
9596 kHRegSize,
9597 kDRegSize,
9598 &MacroAssembler::Ld1h,
9599 false);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009600}
9601
9602TEST_SVE(sve_ld1w_64bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009603 GatherLoadScalarPlusScalarOrImmHelper(config,
9604 kSRegSize,
9605 kDRegSize,
9606 &MacroAssembler::Ld1w,
9607 false);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009608}
9609
9610TEST_SVE(sve_ld1d_64bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009611 GatherLoadScalarPlusScalarOrImmHelper(config,
9612 kDRegSize,
9613 kDRegSize,
9614 &MacroAssembler::Ld1d,
9615 false);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009616}
9617
9618TEST_SVE(sve_ld1sb_64bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009619 GatherLoadScalarPlusScalarOrImmHelper(config,
9620 kBRegSize,
9621 kDRegSize,
9622 &MacroAssembler::Ld1sb,
9623 true);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009624}
9625
9626TEST_SVE(sve_ld1sh_64bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009627 GatherLoadScalarPlusScalarOrImmHelper(config,
9628 kHRegSize,
9629 kDRegSize,
9630 &MacroAssembler::Ld1sh,
9631 true);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009632}
9633
9634TEST_SVE(sve_ld1sw_64bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009635 GatherLoadScalarPlusScalarOrImmHelper(config,
9636 kSRegSize,
9637 kDRegSize,
9638 &MacroAssembler::Ld1sw,
9639 true);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009640}
9641
9642TEST_SVE(sve_ld1b_32bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009643 GatherLoadScalarPlusScalarOrImmHelper(config,
9644 kBRegSize,
9645 kSRegSize,
9646 &MacroAssembler::Ld1b,
9647 false);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009648}
9649
9650TEST_SVE(sve_ld1h_32bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009651 GatherLoadScalarPlusScalarOrImmHelper(config,
9652 kHRegSize,
9653 kSRegSize,
9654 &MacroAssembler::Ld1h,
9655 false);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009656}
9657
9658TEST_SVE(sve_ld1w_32bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009659 GatherLoadScalarPlusScalarOrImmHelper(config,
9660 kSRegSize,
9661 kSRegSize,
9662 &MacroAssembler::Ld1w,
9663 false);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009664}
9665
9666TEST_SVE(sve_ld1sb_32bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009667 GatherLoadScalarPlusScalarOrImmHelper(config,
9668 kBRegSize,
9669 kSRegSize,
9670 &MacroAssembler::Ld1sb,
9671 true);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009672}
9673
9674TEST_SVE(sve_ld1sh_32bit_vector_plus_immediate) {
TatWai Chong113d9192020-05-19 01:02:36 -07009675 GatherLoadScalarPlusScalarOrImmHelper(config,
9676 kHRegSize,
9677 kSRegSize,
9678 &MacroAssembler::Ld1sh,
9679 true);
9680}
9681
9682TEST_SVE(sve_ld1b_32bit_scalar_plus_vector) {
9683 bool is_signed = false;
9684 bool is_scaled = false;
9685 GatherLoadScalarPlusVectorHelper(config,
9686 kBRegSize,
9687 kSRegSize,
9688 &MacroAssembler::Ld1b,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009689 &MacroAssembler::Ldff1b,
TatWai Chong113d9192020-05-19 01:02:36 -07009690 is_signed,
9691 is_scaled);
9692}
9693
9694TEST_SVE(sve_ld1h_32bit_scalar_plus_vector) {
9695 bool is_signed = false;
9696 bool is_scaled = false;
9697 GatherLoadScalarPlusVectorHelper(config,
9698 kHRegSize,
9699 kSRegSize,
9700 &MacroAssembler::Ld1h,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009701 &MacroAssembler::Ldff1h,
TatWai Chong113d9192020-05-19 01:02:36 -07009702 is_signed,
9703 is_scaled);
9704
9705 is_scaled = true;
9706 GatherLoadScalarPlusVectorHelper(config,
9707 kHRegSize,
9708 kSRegSize,
9709 &MacroAssembler::Ld1h,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009710 &MacroAssembler::Ldff1h,
TatWai Chong113d9192020-05-19 01:02:36 -07009711 is_signed,
9712 is_scaled);
9713}
9714
9715TEST_SVE(sve_ld1w_32bit_scalar_plus_vector) {
9716 bool is_signed = false;
9717 bool is_scaled = false;
9718 GatherLoadScalarPlusVectorHelper(config,
9719 kSRegSize,
9720 kSRegSize,
9721 &MacroAssembler::Ld1w,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009722 &MacroAssembler::Ldff1w,
TatWai Chong113d9192020-05-19 01:02:36 -07009723 is_signed,
9724 is_scaled);
9725
9726 is_scaled = true;
9727 GatherLoadScalarPlusVectorHelper(config,
9728 kSRegSize,
9729 kSRegSize,
9730 &MacroAssembler::Ld1w,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009731 &MacroAssembler::Ldff1w,
TatWai Chong113d9192020-05-19 01:02:36 -07009732 is_signed,
9733 is_scaled);
9734}
9735
9736TEST_SVE(sve_ld1sb_32bit_scalar_plus_vector) {
9737 bool is_signed = true;
9738 bool is_scaled = false;
9739 GatherLoadScalarPlusVectorHelper(config,
9740 kBRegSize,
9741 kSRegSize,
9742 &MacroAssembler::Ld1sb,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009743 &MacroAssembler::Ldff1sb,
TatWai Chong113d9192020-05-19 01:02:36 -07009744 is_signed,
9745 is_scaled);
9746}
9747
9748TEST_SVE(sve_ld1sh_32bit_scalar_plus_vector) {
9749 bool is_signed = true;
9750 bool is_scaled = false;
9751 GatherLoadScalarPlusVectorHelper(config,
9752 kHRegSize,
9753 kSRegSize,
9754 &MacroAssembler::Ld1sh,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009755 &MacroAssembler::Ldff1sh,
TatWai Chong113d9192020-05-19 01:02:36 -07009756 is_signed,
9757 is_scaled);
9758
9759 is_scaled = true;
9760 GatherLoadScalarPlusVectorHelper(config,
9761 kHRegSize,
9762 kSRegSize,
9763 &MacroAssembler::Ld1sh,
TatWai Chong6537a9a2020-05-05 14:15:16 -07009764 &MacroAssembler::Ldff1sh,
TatWai Chong113d9192020-05-19 01:02:36 -07009765 is_signed,
9766 is_scaled);
Jacob Bramleydcdbd752020-01-20 11:47:36 +00009767}
9768
Martyn Capewell72765d12020-03-23 14:25:53 +00009769TEST_SVE(sve_ldnt1) {
9770 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9771 START();
9772
9773 int data_size = kZRegMaxSizeInBytes * 16;
9774 uint8_t* data = new uint8_t[data_size];
9775 for (int i = 0; i < data_size; i++) {
9776 data[i] = i & 0xff;
9777 }
9778
9779 // Set the base half-way through the buffer so we can use negative indices.
9780 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
9781 __ Ptrue(p0.VnB());
9782 __ Punpklo(p1.VnH(), p0.VnB());
9783 __ Punpklo(p2.VnH(), p1.VnB());
9784 __ Punpklo(p3.VnH(), p2.VnB());
9785 __ Punpklo(p4.VnH(), p3.VnB());
9786
9787 __ Mov(x1, 42);
9788 __ Ld1b(z0.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1));
9789 __ Ldnt1b(z1.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1));
9790
9791 __ Mov(x1, -21);
9792 __ Ld1h(z2.VnH(), p2.Zeroing(), SVEMemOperand(x0, x1, LSL, 1));
9793 __ Ldnt1h(z3.VnH(), p2.Zeroing(), SVEMemOperand(x0, x1, LSL, 1));
9794
9795 __ Mov(x1, 10);
9796 __ Ld1w(z4.VnS(), p3.Zeroing(), SVEMemOperand(x0, x1, LSL, 2));
9797 __ Ldnt1w(z5.VnS(), p3.Zeroing(), SVEMemOperand(x0, x1, LSL, 2));
9798
9799 __ Mov(x1, -5);
9800 __ Ld1d(z6.VnD(), p4.Zeroing(), SVEMemOperand(x0, x1, LSL, 3));
9801 __ Ldnt1d(z7.VnD(), p4.Zeroing(), SVEMemOperand(x0, x1, LSL, 3));
9802
9803 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0, 1, SVE_MUL_VL));
9804 __ Ldnt1b(z9.VnB(), p1.Zeroing(), SVEMemOperand(x0, 1, SVE_MUL_VL));
9805
9806 __ Ld1h(z10.VnH(), p2.Zeroing(), SVEMemOperand(x0, -1, SVE_MUL_VL));
9807 __ Ldnt1h(z11.VnH(), p2.Zeroing(), SVEMemOperand(x0, -1, SVE_MUL_VL));
9808
9809 __ Ld1w(z12.VnS(), p3.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
9810 __ Ldnt1w(z13.VnS(), p3.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
9811
9812 __ Ld1d(z14.VnD(), p4.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
9813 __ Ldnt1d(z15.VnD(), p4.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
9814 END();
9815
9816 if (CAN_RUN()) {
9817 RUN();
9818 ASSERT_EQUAL_SVE(z0, z1);
9819 ASSERT_EQUAL_SVE(z2, z3);
9820 ASSERT_EQUAL_SVE(z4, z5);
9821 ASSERT_EQUAL_SVE(z6, z7);
9822 ASSERT_EQUAL_SVE(z8, z9);
9823 ASSERT_EQUAL_SVE(z10, z11);
9824 ASSERT_EQUAL_SVE(z12, z13);
9825 ASSERT_EQUAL_SVE(z14, z15);
9826 }
9827}
9828
Martyn Capewell3e2fb502020-03-24 12:04:07 +00009829TEST_SVE(sve_stnt1) {
9830 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9831 START();
9832
9833 int data_size = kZRegMaxSizeInBytes * 16;
9834 uint8_t* data = new uint8_t[data_size];
9835
9836 // Set the base half-way through the buffer so we can use negative indices.
9837 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
9838 __ Ptrue(p0.VnB());
9839 __ Punpklo(p1.VnH(), p0.VnB());
9840 __ Punpklo(p2.VnH(), p1.VnB());
9841 __ Punpklo(p3.VnH(), p2.VnB());
9842 __ Punpklo(p4.VnH(), p3.VnB());
9843 __ Dup(z0.VnB(), 0x55);
9844 __ Index(z1.VnB(), 0, 1);
9845
9846 // Store with all-true and patterned predication, load back, and create a
9847 // reference value for later comparison.
9848 __ Rdvl(x1, 1);
9849 __ Stnt1b(z0.VnB(), p0, SVEMemOperand(x0, x1));
9850 __ Stnt1b(z1.VnB(), p1, SVEMemOperand(x0, 1, SVE_MUL_VL));
9851 __ Ld1b(z2.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1));
9852 __ Sel(z3.VnB(), p1, z1.VnB(), z0.VnB());
9853
9854 // Repeated, with wider elements and different offsets.
9855 __ Rdvl(x1, -1);
9856 __ Lsr(x1, x1, 1);
9857 __ Stnt1h(z0.VnH(), p0, SVEMemOperand(x0, x1, LSL, 1));
9858 __ Stnt1h(z1.VnH(), p2, SVEMemOperand(x0, -1, SVE_MUL_VL));
9859 __ Ld1b(z4.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1));
9860 __ Sel(z5.VnH(), p2, z1.VnH(), z0.VnH());
9861
9862 __ Rdvl(x1, 7);
9863 __ Lsr(x1, x1, 2);
9864 __ Stnt1w(z0.VnS(), p0, SVEMemOperand(x0, x1, LSL, 2));
9865 __ Stnt1w(z1.VnS(), p3, SVEMemOperand(x0, 7, SVE_MUL_VL));
9866 __ Ld1b(z6.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2));
9867 __ Sel(z7.VnS(), p3, z1.VnS(), z0.VnS());
9868
9869 __ Rdvl(x1, -8);
9870 __ Lsr(x1, x1, 3);
9871 __ Stnt1d(z0.VnD(), p0, SVEMemOperand(x0, x1, LSL, 3));
9872 __ Stnt1d(z1.VnD(), p4, SVEMemOperand(x0, -8, SVE_MUL_VL));
9873 __ Ld1b(z8.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3));
9874 __ Sel(z9.VnD(), p4, z1.VnD(), z0.VnD());
9875 END();
9876
9877 if (CAN_RUN()) {
9878 RUN();
9879 ASSERT_EQUAL_SVE(z2, z3);
9880 ASSERT_EQUAL_SVE(z4, z5);
9881 ASSERT_EQUAL_SVE(z6, z7);
9882 ASSERT_EQUAL_SVE(z8, z9);
9883 }
9884}
9885
Martyn Capewell452ad8b2020-03-19 15:49:57 +00009886TEST_SVE(sve_ld1rq) {
9887 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9888 START();
9889
9890 int data_size = (kQRegSizeInBytes + 128) * 2;
9891 uint8_t* data = new uint8_t[data_size];
9892 for (int i = 0; i < data_size; i++) {
9893 data[i] = i & 0xff;
9894 }
9895
9896 // Set the base half-way through the buffer so we can use negative indices.
9897 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
9898
9899 __ Index(z0.VnB(), 0, 1);
9900 __ Ptrue(p0.VnB());
9901 __ Cmplo(p0.VnB(), p0.Zeroing(), z0.VnB(), 4);
9902 __ Pfalse(p1.VnB());
9903 __ Zip1(p1.VnB(), p0.VnB(), p1.VnB());
9904
9905 // Load and broadcast using scalar offsets.
9906 __ Mov(x1, -42);
9907 __ Ld1rqb(z0.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1));
9908
9909 __ Add(x2, x0, 1);
9910 __ Mov(x1, -21);
9911 __ Punpklo(p2.VnH(), p1.VnB());
9912 __ Ld1rqh(z1.VnH(), p2.Zeroing(), SVEMemOperand(x2, x1, LSL, 1));
9913
9914 __ Add(x2, x2, 1);
9915 __ Mov(x1, -10);
9916 __ Punpklo(p3.VnH(), p2.VnB());
9917 __ Ld1rqw(z2.VnS(), p3.Zeroing(), SVEMemOperand(x2, x1, LSL, 2));
9918
9919 __ Add(x2, x2, 1);
9920 __ Mov(x1, 5);
9921 __ Punpklo(p4.VnH(), p3.VnB());
9922 __ Ld1rqd(z3.VnD(), p4.Zeroing(), SVEMemOperand(x2, x1, LSL, 3));
9923
9924 // Check that all segments match by rotating the vector by one segment,
9925 // eoring, and orring across the vector.
9926 __ Ext(z4.VnB(), z0.VnB(), z0.VnB(), 16);
9927 __ Eor(z4.VnB(), z4.VnB(), z0.VnB());
9928 __ Orv(b4, p0, z4.VnB());
9929 __ Ext(z5.VnB(), z1.VnB(), z1.VnB(), 16);
9930 __ Eor(z5.VnB(), z5.VnB(), z1.VnB());
9931 __ Orv(b5, p0, z5.VnB());
9932 __ Orr(z4, z4, z5);
9933 __ Ext(z5.VnB(), z2.VnB(), z2.VnB(), 16);
9934 __ Eor(z5.VnB(), z5.VnB(), z2.VnB());
9935 __ Orv(b5, p0, z5.VnB());
9936 __ Orr(z4, z4, z5);
9937 __ Ext(z5.VnB(), z3.VnB(), z3.VnB(), 16);
9938 __ Eor(z5.VnB(), z5.VnB(), z3.VnB());
9939 __ Orv(b5, p0, z5.VnB());
9940 __ Orr(z4, z4, z5);
9941
9942 // Load and broadcast the same values, using immediate offsets.
9943 __ Add(x1, x0, 6);
9944 __ Ld1rqb(z5.VnB(), p1.Zeroing(), SVEMemOperand(x1, -48));
9945 __ Add(x1, x0, -9);
9946 __ Ld1rqh(z6.VnH(), p2.Zeroing(), SVEMemOperand(x1, -32));
9947 __ Add(x1, x0, -70);
9948 __ Ld1rqw(z7.VnS(), p3.Zeroing(), SVEMemOperand(x1, 32));
9949 __ Add(x1, x0, 27);
9950 __ Ld1rqd(z8.VnD(), p4.Zeroing(), SVEMemOperand(x1, 16));
9951 END();
9952
9953 if (CAN_RUN()) {
9954 RUN();
9955 uint64_t expected_z0[] = {0x0000000000000000, 0x006c006a00680066};
9956 uint64_t expected_z1[] = {0x000074730000706f, 0x00006c6b00006867};
9957 uint64_t expected_z2[] = {0x0000000075747372, 0x000000006d6c6b6a};
9958 uint64_t expected_z3[] = {0x0000000000000000, 0xc2c1c0bfbebdbcbb};
9959 uint64_t expected_z4[] = {0, 0};
9960 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
9961 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
9962 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
9963 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
9964 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
9965 ASSERT_EQUAL_SVE(z0, z5);
9966 ASSERT_EQUAL_SVE(z1, z6);
9967 ASSERT_EQUAL_SVE(z2, z7);
9968 ASSERT_EQUAL_SVE(z3, z8);
9969 }
9970}
9971
TatWai Chong6995bfd2019-09-26 10:48:05 +01009972typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
9973 const ZRegister& zn,
9974 const IntegerOperand imm);
9975
9976template <typename F, typename Td, typename Tn>
9977static void IntWideImmHelper(Test* config,
9978 F macro,
9979 unsigned lane_size_in_bits,
9980 const Tn& zn_inputs,
9981 IntegerOperand imm,
9982 const Td& zd_expected) {
9983 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9984 START();
9985
9986 ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
9987 InsrHelper(&masm, zd1, zn_inputs);
9988
9989 // Also test with a different zn, to test the movprfx case.
9990 ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
9991 InsrHelper(&masm, zn, zn_inputs);
9992 ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
9993 ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
9994
9995 // Make a copy so we can check that constructive operations preserve zn.
9996 __ Mov(zn_copy, zn);
9997
9998 {
9999 UseScratchRegisterScope temps(&masm);
10000 // The MacroAssembler needs a P scratch register for some of these macros,
10001 // and it doesn't have one by default.
10002 temps.Include(p3);
10003
10004 (masm.*macro)(zd1, zd1, imm);
10005 (masm.*macro)(zd2, zn, imm);
10006 }
10007
10008 END();
10009
10010 if (CAN_RUN()) {
10011 RUN();
10012
10013 ASSERT_EQUAL_SVE(zd_expected, zd1);
10014
10015 // Check the result from `instr` with movprfx is the same as
10016 // the immediate version.
10017 ASSERT_EQUAL_SVE(zd_expected, zd2);
10018
10019 ASSERT_EQUAL_SVE(zn_copy, zn);
10020 }
10021}
10022
10023TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
10024 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
10025 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
10026 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
10027 int64_t in_d[] = {1, 10, 10000, 1000000};
10028
10029 IntWideImmFn fn = &MacroAssembler::Smax;
10030
10031 int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
10032 int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
10033 int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
10034 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
10035
10036 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
10037 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
10038 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
10039 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
10040
10041 int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
10042 int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
10043 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
10044
10045 // The immediate is in the range [-128, 127], but the macro is able to
10046 // synthesise unencodable immediates.
10047 // B-sized lanes cannot take an immediate out of the range [-128, 127].
10048 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
10049 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
10050 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
10051}
10052
10053TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
10054 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
10055 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
10056 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
10057 int64_t in_d[] = {1, 10, 10000, 1000000};
10058
10059 IntWideImmFn fn = &MacroAssembler::Smin;
10060
10061 int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
10062 int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
10063 int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
10064 int64_t exp_d_1[] = {1, 10, 99, 99};
10065
10066 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
10067 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
10068 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
10069 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
10070
10071 int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
10072 int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
10073 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
10074
10075 // The immediate is in the range [-128, 127], but the macro is able to
10076 // synthesise unencodable immediates.
10077 // B-sized lanes cannot take an immediate out of the range [-128, 127].
10078 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
10079 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
10080 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
10081}
10082
10083TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
10084 int in_b[] = {0, 255, 127, 0x80, 1, 55};
10085 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
10086 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
10087 int64_t in_d[] = {1, 10, 10000, 1000000};
10088
10089 IntWideImmFn fn = &MacroAssembler::Umax;
10090
10091 int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
10092 int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
10093 int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
10094 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
10095
10096 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
10097 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
10098 IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
10099 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
10100
10101 int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
10102 int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
10103 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
10104
10105 // The immediate is in the range [0, 255], but the macro is able to
10106 // synthesise unencodable immediates.
10107 // B-sized lanes cannot take an immediate out of the range [0, 255].
10108 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
10109 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
10110 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
10111}
10112
10113TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
10114 int in_b[] = {0, 255, 127, 0x80, 1, 55};
10115 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
10116 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
10117 int64_t in_d[] = {1, 10, 10000, 1000000};
10118
10119 IntWideImmFn fn = &MacroAssembler::Umin;
10120
10121 int exp_b_1[] = {0, 17, 17, 17, 1, 17};
10122 int exp_h_1[] = {0, 127, 127, 127, 1, 127};
10123 int exp_s_1[] = {0, 255, 127, 255, 1, 255};
10124 int64_t exp_d_1[] = {1, 10, 99, 99};
10125
10126 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
10127 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
10128 IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
10129 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
10130
10131 int exp_h_2[] = {0, 255, 127, 511, 1, 511};
10132 int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
10133 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
10134
10135 // The immediate is in the range [0, 255], but the macro is able to
10136 // synthesise unencodable immediates.
10137 // B-sized lanes cannot take an immediate out of the range [0, 255].
10138 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
10139 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
10140 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
10141}
10142
10143TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
10144 int in_b[] = {11, -1, 7, -3};
10145 int in_h[] = {111, -1, 17, -123};
10146 int in_s[] = {11111, -1, 117, -12345};
10147 int64_t in_d[] = {0x7fffffff, 0x80000000};
10148
10149 IntWideImmFn fn = &MacroAssembler::Mul;
10150
10151 int exp_b_1[] = {66, -6, 42, -18};
10152 int exp_h_1[] = {-14208, 128, -2176, 15744};
10153 int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
10154 int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
10155
10156 IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
10157 IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
10158 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
10159 IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
10160
10161 int exp_h_2[] = {-28305, 255, -4335, 31365};
10162 int exp_s_2[] = {22755328, -2048, 239616, -25282560};
10163 int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
10164
10165 // The immediate is in the range [-128, 127], but the macro is able to
10166 // synthesise unencodable immediates.
10167 // B-sized lanes cannot take an immediate out of the range [0, 255].
10168 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
10169 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
10170 IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
10171
10172 // Integer overflow on multiplication.
10173 unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
10174
10175 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
10176}
10177
10178TEST_SVE(sve_int_wide_imm_unpredicated_add) {
10179 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
10180 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
10181 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
10182 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
10183
10184 IntWideImmFn fn = &MacroAssembler::Add;
10185
10186 unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
10187 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
10188 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
10189 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
10190
10191 // Encodable with `add` (shift 0).
10192 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
10193 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
10194 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
10195 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
10196
10197 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
10198 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
10199 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
10200
10201 // Encodable with `add` (shift 8).
10202 // B-sized lanes cannot take a shift of 8.
10203 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
10204 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
10205 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
10206
10207 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
10208
10209 // The macro is able to synthesise unencodable immediates.
10210 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +010010211
10212 unsigned exp_b_4[] = {0x61, 0x5f, 0xf0, 0xdf};
10213 unsigned exp_h_4[] = {0x6181, 0x5f7f, 0xf010, 0x8aaa};
10214 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
10215 uint64_t exp_d_4[] = {0x8000000180018180, 0x7fffffff7fff7f7e};
10216
10217 // Negative immediates use `sub`.
10218 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
10219 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
10220 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
10221 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010222}
10223
10224TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
10225 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
10226 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
10227 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
10228 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
10229
10230 IntWideImmFn fn = &MacroAssembler::Sqadd;
10231
Jacob Bramleyb28f6172019-10-02 12:12:35 +010010232 unsigned exp_b_1[] = {0x02, 0x7f, 0x7f, 0x7f};
TatWai Chong6995bfd2019-09-26 10:48:05 +010010233 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
10234 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
10235 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
10236
10237 // Encodable with `sqadd` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +010010238 // Note that encodable immediates are unsigned, even for signed saturation.
10239 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010240 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
10241 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +010010242 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010243
10244 unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
10245 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
10246 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
10247
10248 // Encodable with `sqadd` (shift 8).
10249 // B-sized lanes cannot take a shift of 8.
10250 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
10251 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
10252 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010253}
10254
10255TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
10256 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
10257 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
10258 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
10259 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
10260
10261 IntWideImmFn fn = &MacroAssembler::Uqadd;
10262
10263 unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
10264 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
10265 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
10266 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
10267
10268 // Encodable with `uqadd` (shift 0).
10269 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
10270 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
10271 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
10272 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
10273
10274 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
10275 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
10276 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
10277
10278 // Encodable with `uqadd` (shift 8).
10279 // B-sized lanes cannot take a shift of 8.
10280 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
10281 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
10282 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010283}
10284
10285TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
10286 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
10287 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
10288 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
10289 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
10290
10291 IntWideImmFn fn = &MacroAssembler::Sub;
10292
10293 unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
10294 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
10295 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
10296 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
10297
10298 // Encodable with `sub` (shift 0).
10299 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
10300 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
10301 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
10302 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
10303
10304 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
10305 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
10306 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
10307
10308 // Encodable with `sub` (shift 8).
10309 // B-sized lanes cannot take a shift of 8.
10310 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
10311 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
10312 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
10313
10314 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
10315
10316 // The macro is able to synthesise unencodable immediates.
10317 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +010010318
10319 unsigned exp_b_4[] = {0xa1, 0x9f, 0x30, 0x1f};
10320 unsigned exp_h_4[] = {0xa181, 0x9f7f, 0x3010, 0xcaaa};
10321 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
10322 uint64_t exp_d_4[] = {0x8000000180018182, 0x7fffffff7fff7f80};
10323
10324 // Negative immediates use `add`.
10325 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
10326 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
10327 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
10328 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010329}
10330
10331TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
10332 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
10333 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
10334 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
10335 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
10336
10337 IntWideImmFn fn = &MacroAssembler::Sqsub;
10338
Jacob Bramleyb28f6172019-10-02 12:12:35 +010010339 unsigned exp_b_1[] = {0x80, 0xfe, 0x8f, 0x80};
TatWai Chong6995bfd2019-09-26 10:48:05 +010010340 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
10341 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
10342 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
10343
10344 // Encodable with `sqsub` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +010010345 // Note that encodable immediates are unsigned, even for signed saturation.
10346 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010347 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
10348 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +010010349 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010350
10351 unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
10352 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
10353 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
10354
10355 // Encodable with `sqsub` (shift 8).
10356 // B-sized lanes cannot take a shift of 8.
10357 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
10358 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
10359 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010360}
10361
10362TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
10363 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
10364 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
10365 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
10366 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
10367
10368 IntWideImmFn fn = &MacroAssembler::Uqsub;
10369
10370 unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
10371 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
10372 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
10373 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
10374
10375 // Encodable with `uqsub` (shift 0).
10376 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
10377 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
10378 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
10379 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
10380
10381 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
10382 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
10383 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
10384
10385 // Encodable with `uqsub` (shift 8).
10386 // B-sized lanes cannot take a shift of 8.
10387 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
10388 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
10389 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +010010390}
10391
10392TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
10393 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10394 START();
10395
10396 // Encodable with `subr` (shift 0).
10397 __ Index(z0.VnD(), 1, 1);
10398 __ Sub(z0.VnD(), 100, z0.VnD());
10399 __ Index(z1.VnS(), 0x7f, 1);
10400 __ Sub(z1.VnS(), 0xf7, z1.VnS());
10401 __ Index(z2.VnH(), 0xaaaa, 0x2222);
10402 __ Sub(z2.VnH(), 0x80, z2.VnH());
10403 __ Index(z3.VnB(), 133, 1);
10404 __ Sub(z3.VnB(), 255, z3.VnB());
10405
10406 // Encodable with `subr` (shift 8).
10407 __ Index(z4.VnD(), 256, -1);
10408 __ Sub(z4.VnD(), 42 * 256, z4.VnD());
10409 __ Index(z5.VnS(), 0x7878, 1);
10410 __ Sub(z5.VnS(), 0x8000, z5.VnS());
10411 __ Index(z6.VnH(), 0x30f0, -1);
10412 __ Sub(z6.VnH(), 0x7f00, z6.VnH());
10413 // B-sized lanes cannot take a shift of 8.
10414
10415 // Select with movprfx.
10416 __ Index(z31.VnD(), 256, 4001);
10417 __ Sub(z7.VnD(), 42 * 256, z31.VnD());
10418
10419 // Out of immediate encodable range of `sub`.
10420 __ Index(z30.VnS(), 0x11223344, 1);
10421 __ Sub(z8.VnS(), 0x88776655, z30.VnS());
10422
10423 END();
10424
10425 if (CAN_RUN()) {
10426 RUN();
10427
10428 int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
10429 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
10430
10431 int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
10432 ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
10433
10434 int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
10435 ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
10436
10437 int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
10438 ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
10439
10440 int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
10441 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
10442
10443 int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
10444 ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
10445
10446 int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
10447 ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
10448
10449 int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
10450 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
10451
10452 int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
10453 ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
10454 }
10455}
10456
10457TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
10458 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10459 START();
10460
10461 // Immediates which can be encoded in the instructions.
10462 __ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
10463 __ Fdup(z1.VnS(), Float16(2.0));
10464 __ Fdup(z2.VnD(), Float16(3.875));
10465 __ Fdup(z3.VnH(), 8.0f);
10466 __ Fdup(z4.VnS(), -4.75f);
10467 __ Fdup(z5.VnD(), 0.5f);
10468 __ Fdup(z6.VnH(), 1.0);
10469 __ Fdup(z7.VnS(), 2.125);
10470 __ Fdup(z8.VnD(), -13.0);
10471
10472 // Immediates which cannot be encoded in the instructions.
10473 __ Fdup(z10.VnH(), Float16(0.0));
10474 __ Fdup(z11.VnH(), kFP16PositiveInfinity);
10475 __ Fdup(z12.VnS(), 255.0f);
10476 __ Fdup(z13.VnS(), kFP32NegativeInfinity);
10477 __ Fdup(z14.VnD(), 12.3456);
10478 __ Fdup(z15.VnD(), kFP64PositiveInfinity);
10479
10480 END();
10481
10482 if (CAN_RUN()) {
10483 RUN();
10484
10485 ASSERT_EQUAL_SVE(0xc500, z0.VnH());
10486 ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
10487 ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
10488 ASSERT_EQUAL_SVE(0x4800, z3.VnH());
10489 ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
10490 ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
10491 ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
10492 ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
10493 ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
10494
10495 ASSERT_EQUAL_SVE(0x0000, z10.VnH());
10496 ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
10497 ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
10498 ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
10499 ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
10500 ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
10501 }
10502}
10503
TatWai Chong6f111bc2019-10-07 09:20:37 +010010504TEST_SVE(sve_andv_eorv_orv) {
10505 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10506 START();
10507
10508 uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
10509 InsrHelper(&masm, z31.VnD(), in);
10510
10511 // For simplicity, we re-use the same pg for various lane sizes.
10512 // For D lanes: 1, 1, 0
10513 // For S lanes: 1, 1, 1, 0, 0
10514 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
10515 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
10516 Initialise(&masm, p0.VnB(), pg_in);
10517
10518 // Make a copy so we can check that constructive operations preserve zn.
10519 __ Mov(z0, z31);
10520 __ Andv(b0, p0, z0.VnB()); // destructive
10521 __ Andv(h1, p0, z31.VnH());
10522 __ Mov(z2, z31);
10523 __ Andv(s2, p0, z2.VnS()); // destructive
10524 __ Andv(d3, p0, z31.VnD());
10525
10526 __ Eorv(b4, p0, z31.VnB());
10527 __ Mov(z5, z31);
10528 __ Eorv(h5, p0, z5.VnH()); // destructive
10529 __ Eorv(s6, p0, z31.VnS());
10530 __ Mov(z7, z31);
10531 __ Eorv(d7, p0, z7.VnD()); // destructive
10532
10533 __ Mov(z8, z31);
10534 __ Orv(b8, p0, z8.VnB()); // destructive
10535 __ Orv(h9, p0, z31.VnH());
10536 __ Mov(z10, z31);
10537 __ Orv(s10, p0, z10.VnS()); // destructive
10538 __ Orv(d11, p0, z31.VnD());
10539
10540 END();
10541
10542 if (CAN_RUN()) {
10543 RUN();
10544
10545 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
10546 ASSERT_EQUAL_64(0x10, d0);
10547 ASSERT_EQUAL_64(0x1010, d1);
10548 ASSERT_EQUAL_64(0x33331111, d2);
10549 ASSERT_EQUAL_64(0x7777555533331111, d3);
10550 ASSERT_EQUAL_64(0xbf, d4);
10551 ASSERT_EQUAL_64(0xedcb, d5);
10552 ASSERT_EQUAL_64(0x44444444, d6);
10553 ASSERT_EQUAL_64(0x7777555533331111, d7);
10554 ASSERT_EQUAL_64(0xff, d8);
10555 ASSERT_EQUAL_64(0xffff, d9);
10556 ASSERT_EQUAL_64(0x77775555, d10);
10557 ASSERT_EQUAL_64(0x7777555533331111, d11);
10558 } else {
10559 ASSERT_EQUAL_64(0, d0);
10560 ASSERT_EQUAL_64(0x0010, d1);
10561 ASSERT_EQUAL_64(0x00110011, d2);
10562 ASSERT_EQUAL_64(0x0011001100110011, d3);
10563 ASSERT_EQUAL_64(0x62, d4);
10564 ASSERT_EQUAL_64(0x0334, d5);
10565 ASSERT_EQUAL_64(0x8899aabb, d6);
10566 ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
10567 ASSERT_EQUAL_64(0xff, d8);
10568 ASSERT_EQUAL_64(0xffff, d9);
10569 ASSERT_EQUAL_64(0xffffffff, d10);
10570 ASSERT_EQUAL_64(0xffffffffffffffff, d11);
10571 }
10572
10573 // Check the upper lanes above the top of the V register are all clear.
10574 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
10575 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
10576 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
10577 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
10578 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
10579 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
10580 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
10581 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
10582 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
10583 ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
10584 ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
10585 ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
10586 ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
10587 }
10588 }
10589}
10590
TatWai Chongb2d8d1f2019-10-21 15:19:31 -070010591
10592TEST_SVE(sve_saddv_uaddv) {
10593 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10594 START();
10595
10596 uint64_t in[] = {0x8899aabbccddeeff, 0x8182838485868788, 0x0807060504030201};
10597 InsrHelper(&masm, z31.VnD(), in);
10598
10599 // For simplicity, we re-use the same pg for various lane sizes.
10600 // For D lanes: 1, 1, 0
10601 // For S lanes: 1, 1, 1, 0, 0
10602 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
10603 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
10604 Initialise(&masm, p0.VnB(), pg_in);
10605
10606 // Make a copy so we can check that constructive operations preserve zn.
10607 __ Mov(z0, z31);
10608 __ Saddv(b0, p0, z0.VnB()); // destructive
10609 __ Saddv(h1, p0, z31.VnH());
10610 __ Mov(z2, z31);
10611 __ Saddv(s2, p0, z2.VnS()); // destructive
10612
10613 __ Uaddv(b4, p0, z31.VnB());
10614 __ Mov(z5, z31);
10615 __ Uaddv(h5, p0, z5.VnH()); // destructive
10616 __ Uaddv(s6, p0, z31.VnS());
10617 __ Mov(z7, z31);
10618 __ Uaddv(d7, p0, z7.VnD()); // destructive
10619
10620 END();
10621
10622 if (CAN_RUN()) {
10623 RUN();
10624
10625 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
10626 // Saddv
10627 ASSERT_EQUAL_64(0xfffffffffffffda9, d0);
10628 ASSERT_EQUAL_64(0xfffffffffffe9495, d1);
10629 ASSERT_EQUAL_64(0xffffffff07090b0c, d2);
10630 // Uaddv
10631 ASSERT_EQUAL_64(0x00000000000002a9, d4);
10632 ASSERT_EQUAL_64(0x0000000000019495, d5);
10633 ASSERT_EQUAL_64(0x0000000107090b0c, d6);
10634 ASSERT_EQUAL_64(0x8182838485868788, d7);
10635 } else {
10636 // Saddv
10637 ASSERT_EQUAL_64(0xfffffffffffffd62, d0);
10638 ASSERT_EQUAL_64(0xfffffffffffe8394, d1);
10639 ASSERT_EQUAL_64(0xfffffffed3e6fa0b, d2);
10640 // Uaddv
10641 ASSERT_EQUAL_64(0x0000000000000562, d4);
10642 ASSERT_EQUAL_64(0x0000000000028394, d5);
10643 ASSERT_EQUAL_64(0x00000001d3e6fa0b, d6);
10644 ASSERT_EQUAL_64(0x0a1c2e4052647687, d7);
10645 }
10646
10647 // Check the upper lanes above the top of the V register are all clear.
10648 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
10649 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
10650 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
10651 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
10652 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
10653 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
10654 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
10655 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
10656 }
10657 }
10658}
10659
10660
10661TEST_SVE(sve_sminv_uminv) {
10662 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10663 START();
10664
10665 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
10666 InsrHelper(&masm, z31.VnD(), in);
10667
10668 // For simplicity, we re-use the same pg for various lane sizes.
10669 // For D lanes: 1, 0, 1
10670 // For S lanes: 1, 1, 0, 0, 1
10671 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
10672 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
10673 Initialise(&masm, p0.VnB(), pg_in);
10674
10675 // Make a copy so we can check that constructive operations preserve zn.
10676 __ Mov(z0, z31);
10677 __ Sminv(b0, p0, z0.VnB()); // destructive
10678 __ Sminv(h1, p0, z31.VnH());
10679 __ Mov(z2, z31);
10680 __ Sminv(s2, p0, z2.VnS()); // destructive
10681 __ Sminv(d3, p0, z31.VnD());
10682
10683 __ Uminv(b4, p0, z31.VnB());
10684 __ Mov(z5, z31);
10685 __ Uminv(h5, p0, z5.VnH()); // destructive
10686 __ Uminv(s6, p0, z31.VnS());
10687 __ Mov(z7, z31);
10688 __ Uminv(d7, p0, z7.VnD()); // destructive
10689
10690 END();
10691
10692 if (CAN_RUN()) {
10693 RUN();
10694
10695 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
10696 // Sminv
10697 ASSERT_EQUAL_64(0xaa, d0);
10698 ASSERT_EQUAL_64(0xaabb, d1);
10699 ASSERT_EQUAL_64(0xaabbfc00, d2);
10700 ASSERT_EQUAL_64(0x00112233aabbfc00, d3); // The smaller lane is inactive.
10701 // Uminv
10702 ASSERT_EQUAL_64(0, d4);
10703 ASSERT_EQUAL_64(0x2233, d5);
10704 ASSERT_EQUAL_64(0x112233, d6);
10705 ASSERT_EQUAL_64(0x00112233aabbfc00, d7); // The smaller lane is inactive.
10706 } else {
10707 // Sminv
10708 ASSERT_EQUAL_64(0xaa, d0);
10709 ASSERT_EQUAL_64(0xaaaa, d1);
10710 ASSERT_EQUAL_64(0xaaaaaaaa, d2);
10711 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d3);
10712 // Uminv
10713 ASSERT_EQUAL_64(0, d4);
10714 ASSERT_EQUAL_64(0x2233, d5);
10715 ASSERT_EQUAL_64(0x112233, d6);
10716 ASSERT_EQUAL_64(0x00112233aabbfc00, d7);
10717 }
10718
10719 // Check the upper lanes above the top of the V register are all clear.
10720 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
10721 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
10722 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
10723 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
10724 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
10725 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
10726 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
10727 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
10728 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
10729 }
10730 }
10731}
10732
10733TEST_SVE(sve_smaxv_umaxv) {
10734 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10735 START();
10736
10737 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
10738 InsrHelper(&masm, z31.VnD(), in);
10739
10740 // For simplicity, we re-use the same pg for various lane sizes.
10741 // For D lanes: 1, 0, 1
10742 // For S lanes: 1, 1, 0, 0, 1
10743 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
10744 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
10745 Initialise(&masm, p0.VnB(), pg_in);
10746
10747 // Make a copy so we can check that constructive operations preserve zn.
10748 __ Mov(z0, z31);
10749 __ Smaxv(b0, p0, z0.VnB()); // destructive
10750 __ Smaxv(h1, p0, z31.VnH());
10751 __ Mov(z2, z31);
10752 __ Smaxv(s2, p0, z2.VnS()); // destructive
10753 __ Smaxv(d3, p0, z31.VnD());
10754
10755 __ Umaxv(b4, p0, z31.VnB());
10756 __ Mov(z5, z31);
10757 __ Umaxv(h5, p0, z5.VnH()); // destructive
10758 __ Umaxv(s6, p0, z31.VnS());
10759 __ Mov(z7, z31);
10760 __ Umaxv(d7, p0, z7.VnD()); // destructive
10761
10762 END();
10763
10764 if (CAN_RUN()) {
10765 RUN();
10766
10767 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
10768 // Smaxv
10769 ASSERT_EQUAL_64(0x33, d0);
10770 ASSERT_EQUAL_64(0x44aa, d1);
10771 ASSERT_EQUAL_64(0x112233, d2);
10772 ASSERT_EQUAL_64(0x112233aabbfc00, d3);
10773 // Umaxv
10774 ASSERT_EQUAL_64(0xfe, d4);
10775 ASSERT_EQUAL_64(0xfc00, d5);
10776 ASSERT_EQUAL_64(0xaabbfc00, d6);
10777 ASSERT_EQUAL_64(0x112233aabbfc00, d7);
10778 } else {
10779 // Smaxv
10780 ASSERT_EQUAL_64(0x33, d0);
10781 ASSERT_EQUAL_64(0x44aa, d1);
10782 ASSERT_EQUAL_64(0x112233, d2);
10783 ASSERT_EQUAL_64(0x00112233aabbfc00, d3);
10784 // Umaxv
10785 ASSERT_EQUAL_64(0xfe, d4);
10786 ASSERT_EQUAL_64(0xfc00, d5);
10787 ASSERT_EQUAL_64(0xaabbfc00, d6);
10788 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d7);
10789 }
10790
10791 // Check the upper lanes above the top of the V register are all clear.
10792 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
10793 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
10794 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
10795 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
10796 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
10797 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
10798 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
10799 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
10800 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
10801 }
10802 }
10803}
10804
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010805template <typename T, size_t M, size_t N>
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010806static void SdotUdotHelper(Test* config,
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010807 unsigned lane_size_in_bits,
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010808 const T (&zd_inputs)[M],
10809 const T (&za_inputs)[M],
10810 const T (&zn_inputs)[N],
10811 const T (&zm_inputs)[N],
10812 const T (&zd_expected)[M],
10813 const T (&zdnm_expected)[M],
10814 bool is_signed,
10815 int index = -1) {
10816 VIXL_STATIC_ASSERT(N == (M * 4));
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010817 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10818 START();
10819
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010820 auto dot_fn = [&](const ZRegister& zd,
10821 const ZRegister& za,
10822 const ZRegister& zn,
10823 const ZRegister& zm,
10824 bool is_signed,
10825 int index) {
10826 if (is_signed) {
10827 if (index < 0) {
10828 __ Sdot(zd, za, zn, zm);
10829 } else {
10830 __ Sdot(zd, za, zn, zm, index);
10831 }
10832 } else {
10833 if (index < 0) {
10834 __ Udot(zd, za, zn, zm);
10835 } else {
10836 __ Udot(zd, za, zn, zm, index);
10837 }
10838 }
10839 };
10840
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010841 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
10842 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
10843 ZRegister zn = z2.WithLaneSize(lane_size_in_bits / 4);
10844 ZRegister zm = z3.WithLaneSize(lane_size_in_bits / 4);
10845
10846 InsrHelper(&masm, zd, zd_inputs);
10847 InsrHelper(&masm, za, za_inputs);
10848 InsrHelper(&masm, zn, zn_inputs);
10849 InsrHelper(&masm, zm, zm_inputs);
10850
10851 // The Dot macro handles arbitrarily-aliased registers in the argument list.
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010852 ZRegister dm_result = z4.WithLaneSize(lane_size_in_bits);
10853 ZRegister dnm_result = z5.WithLaneSize(lane_size_in_bits);
10854 ZRegister da_result = z6.WithLaneSize(lane_size_in_bits);
10855 ZRegister dn_result = z7.WithLaneSize(lane_size_in_bits);
10856 ZRegister d_result = z8.WithLaneSize(lane_size_in_bits);
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010857
10858 __ Mov(da_result, za);
10859 // zda = zda + (zn . zm)
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010860 dot_fn(da_result, da_result, zn, zm, is_signed, index);
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010861
10862 __ Mov(dn_result, zn);
10863 // zdn = za + (zdn . zm)
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010864 dot_fn(dn_result, za, dn_result.WithSameLaneSizeAs(zn), zm, is_signed, index);
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010865
10866 __ Mov(dm_result, zm);
10867 // zdm = za + (zn . zdm)
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010868 dot_fn(dm_result, za, zn, dm_result.WithSameLaneSizeAs(zm), is_signed, index);
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010869
10870 __ Mov(d_result, zd);
10871 // zd = za + (zn . zm)
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010872 dot_fn(d_result, za, zn, zm, is_signed, index);
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010873
10874 __ Mov(dnm_result, zn);
10875 // zdnm = za + (zdmn . zdnm)
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010876 dot_fn(dnm_result,
10877 za,
10878 dnm_result.WithSameLaneSizeAs(zn),
10879 dnm_result.WithSameLaneSizeAs(zm),
10880 is_signed,
10881 index);
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010882
10883 END();
10884
10885 if (CAN_RUN()) {
10886 RUN();
10887
10888 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
10889 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits / 4));
10890 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits / 4));
10891
10892 ASSERT_EQUAL_SVE(zd_expected, da_result);
10893 ASSERT_EQUAL_SVE(zd_expected, dn_result);
10894 ASSERT_EQUAL_SVE(zd_expected, dm_result);
10895 ASSERT_EQUAL_SVE(zd_expected, d_result);
10896
10897 ASSERT_EQUAL_SVE(zdnm_expected, dnm_result);
10898 }
10899}
10900
10901TEST_SVE(sve_sdot) {
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010902 int64_t zd_inputs[] = {0x33, 0xee, 0xff};
10903 int64_t za_inputs[] = {INT32_MAX, -3, 2};
10904 int64_t zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
10905 int64_t zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010906
10907 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010908 int64_t zd_expected_s[] = {-2147418113, -183, 133}; // 0x8000ffff
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010909 int64_t zd_expected_d[] = {2147549183, -183, 133}; // 0x8000ffff
10910
10911 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010912 int64_t zdnm_expected_s[] = {-2147418113, 980, 572};
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010913 int64_t zdnm_expected_d[] = {2147549183, 980, 572};
10914
10915 SdotUdotHelper(config,
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010916 kSRegSize,
10917 zd_inputs,
10918 za_inputs,
10919 zn_inputs,
10920 zm_inputs,
10921 zd_expected_s,
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010922 zdnm_expected_s,
10923 true);
10924
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010925 SdotUdotHelper(config,
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010926 kDRegSize,
10927 zd_inputs,
10928 za_inputs,
10929 zn_inputs,
10930 zm_inputs,
10931 zd_expected_d,
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010932 zdnm_expected_d,
10933 true);
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010934}
10935
10936TEST_SVE(sve_udot) {
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010937 int64_t zd_inputs[] = {0x33, 0xee, 0xff};
10938 int64_t za_inputs[] = {INT32_MAX, -3, 2};
10939 int64_t zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
10940 int64_t zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010941
10942 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010943 int64_t zd_expected_s[] = {0x8000ffff, 0x00001749, 0x0000f085};
10944 int64_t zd_expected_d[] = {0x000000047c00ffff,
10945 0x000000000017ff49,
10946 0x00000000fff00085};
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010947
10948 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010949 int64_t zdnm_expected_s[] = {0x8000ffff, 0x000101d4, 0x0001d03c};
10950 int64_t zdnm_expected_d[] = {0x000000047c00ffff,
10951 0x00000000fffe03d4,
10952 0x00000001ffce023c};
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010953
10954 SdotUdotHelper(config,
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010955 kSRegSize,
10956 zd_inputs,
10957 za_inputs,
10958 zn_inputs,
10959 zm_inputs,
10960 zd_expected_s,
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010961 zdnm_expected_s,
10962 false);
10963
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010964 SdotUdotHelper(config,
TatWai Chong4d2a4e92019-10-23 16:19:32 -070010965 kDRegSize,
10966 zd_inputs,
10967 za_inputs,
10968 zn_inputs,
10969 zm_inputs,
10970 zd_expected_d,
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070010971 zdnm_expected_d,
10972 false);
10973}
10974
10975TEST_SVE(sve_sdot_indexed_s) {
10976 int64_t zd_inputs[] = {0xff, 0xff, 0xff, 0xff};
10977 int64_t za_inputs[] = {0, 1, 2, 3};
10978 int64_t zn_inputs[] =
10979 {-1, -1, -1, -1, -2, -2, -2, -2, -3, -3, -3, -3, -4, -4, -4, -4};
10980 int64_t zm_inputs[] =
10981 {127, 127, 127, 127, -128, -128, -128, -128, -1, -1, -1, -1, 0, 0, 0, 0};
10982
10983 constexpr int s = kQRegSize / kSRegSize;
10984
10985 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
10986 int64_t zd_expected_s[][s] = {{0, 1, 2, 3}, // Generated from zm[0]
10987 {4, 9, 14, 19},
10988 {512, 1025, 1538, 2051},
10989 {-508, -1015, -1522, -2029}};
10990
10991 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
10992 int64_t zdnm_expected_s[][s] = {{16, 33, 50, 67},
10993 {12, 25, 38, 51},
10994 {8, 17, 26, 35},
10995 {4, 9, 14, 19}};
10996
10997 for (unsigned i = 0; i < s; i++) {
10998 SdotUdotHelper(config,
10999 kSRegSize,
11000 zd_inputs,
11001 za_inputs,
11002 zn_inputs,
11003 zm_inputs,
11004 zd_expected_s[i],
11005 zdnm_expected_s[i],
11006 true,
11007 i);
11008 }
11009}
11010
11011TEST_SVE(sve_sdot_indexed_d) {
11012 int64_t zd_inputs[] = {0xff, 0xff};
11013 int64_t za_inputs[] = {0, 1};
11014 int64_t zn_inputs[] = {-1, -1, -1, -1, -1, -1, -1, -1};
11015 int64_t zm_inputs[] = {-128, -128, -128, -128, 127, 127, 127, 127};
11016
11017 constexpr int d = kQRegSize / kDRegSize;
11018
11019 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
11020 int64_t zd_expected_d[][d] = {{-508, -507}, // Generated from zm[0]
11021 {512, 513}};
11022
11023 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
11024 int64_t zdnm_expected_d[][d] = {{4, 5}, {4, 5}};
11025
11026 for (unsigned i = 0; i < d; i++) {
11027 SdotUdotHelper(config,
11028 kDRegSize,
11029 zd_inputs,
11030 za_inputs,
11031 zn_inputs,
11032 zm_inputs,
11033 zd_expected_d[i],
11034 zdnm_expected_d[i],
11035 true,
11036 i);
11037 }
11038}
11039
11040TEST_SVE(sve_udot_indexed_s) {
11041 int64_t zd_inputs[] = {0xff, 0xff, 0xff, 0xff};
11042 int64_t za_inputs[] = {0, 1, 2, 3};
11043 int64_t zn_inputs[] = {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4};
11044 int64_t zm_inputs[] =
11045 {127, 127, 127, 127, 255, 255, 255, 255, 1, 1, 1, 1, 0, 0, 0, 0};
11046
11047 constexpr int s = kQRegSize / kSRegSize;
11048
11049 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
11050 int64_t zd_expected_s[][s] = {{0, 1, 2, 3},
11051 {4, 9, 14, 19},
11052 {1020, 2041, 3062, 4083},
11053 {508, 1017, 1526, 2035}};
11054
11055 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
11056 int64_t zdnm_expected_s[][s] = {{16, 33, 50, 67},
11057 {12, 25, 38, 51},
11058 {8, 17, 26, 35},
11059 {4, 9, 14, 19}};
11060
11061 for (unsigned i = 0; i < s; i++) {
11062 SdotUdotHelper(config,
11063 kSRegSize,
11064 zd_inputs,
11065 za_inputs,
11066 zn_inputs,
11067 zm_inputs,
11068 zd_expected_s[i],
11069 zdnm_expected_s[i],
11070 false,
11071 i);
11072 }
11073}
11074
11075TEST_SVE(sve_udot_indexed_d) {
11076 int64_t zd_inputs[] = {0xff, 0xff};
11077 int64_t za_inputs[] = {0, 1};
11078 int64_t zn_inputs[] = {1, 1, 1, 1, 1, 1, 1, 1};
11079 int64_t zm_inputs[] = {255, 255, 255, 255, 127, 127, 127, 127};
11080
11081 constexpr int d = kQRegSize / kDRegSize;
11082
11083 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
11084 int64_t zd_expected_d[][d] = {{508, 509}, {1020, 1021}};
11085
11086 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
11087 int64_t zdnm_expected_d[][d] = {{4, 5}, {4, 5}};
11088
11089 for (unsigned i = 0; i < d; i++) {
11090 SdotUdotHelper(config,
11091 kDRegSize,
11092 zd_inputs,
11093 za_inputs,
11094 zn_inputs,
11095 zm_inputs,
11096 zd_expected_d[i],
11097 zdnm_expected_d[i],
11098 false,
11099 i);
11100 }
11101}
11102
11103static void IntSegmentPatternHelper(MacroAssembler* masm,
11104 const ZRegister& dst,
11105 const ZRegister& src) {
11106 VIXL_ASSERT(AreSameLaneSize(dst, src));
11107 UseScratchRegisterScope temps(masm);
11108 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(dst);
11109 masm->Index(ztmp, 0, 1);
11110 masm->Asr(ztmp, ztmp, kQRegSizeInBytesLog2 - dst.GetLaneSizeInBytesLog2());
11111 masm->Add(dst, src, ztmp);
11112}
11113
11114TEST_SVE(sve_sdot_udot_indexed_s) {
11115 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11116 START();
11117
11118 const int multiplier = 2;
11119 __ Dup(z9.VnS(), multiplier);
11120
11121 __ Ptrue(p0.VnB());
11122 __ Index(z29.VnS(), 4, 1);
11123
11124 // z29 = [... 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0]
11125 __ And(z29.VnS(), z29.VnS(), 3);
11126
11127 // p7 = [... 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]
11128 __ Cmple(p7.VnS(), p0.Zeroing(), z29.VnS(), 0);
11129
11130 // p6 = [... 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
11131 __ Cmple(p6.VnS(), p0.Zeroing(), z29.VnS(), 1);
11132
11133 // p5 = [... 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1]
11134 __ Cmple(p5.VnS(), p0.Zeroing(), z29.VnS(), 2);
11135
11136 __ Index(z28.VnB(), 1, 1);
11137 __ Dup(z27.VnS(), z28.VnS(), 0);
11138
11139 // z27 = [... 3, 2, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1]
11140 IntSegmentPatternHelper(&masm, z27.VnB(), z27.VnB());
11141
11142 // z27 = [... 6, 4, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 8, 6, 4, 2]
11143 __ Mul(z27.VnS(), p7.Merging(), z27.VnS(), z9.VnS());
11144
11145 // z27 = [... 12, 8, 4, 3, 2, 1, 4, 3, 2, 1, 8, 6, 4, 2, 16, 12, 8, 4]
11146 __ Mul(z27.VnS(), p6.Merging(), z27.VnS(), z9.VnS());
11147
11148 // 2nd segment | 1st segment |
11149 // v v
11150 // z27 = [... 24, 16, 4, 3, 2, 1, 8, 6, 4, 2, 16, 12, 8, 4, 32, 24, 16, 8]
11151 __ Mul(z27.VnS(), p5.Merging(), z27.VnS(), z9.VnS());
11152
11153 __ Dup(z0.VnS(), 0);
11154 __ Dup(z1.VnS(), 0);
11155 __ Dup(z2.VnS(), 0);
11156 __ Dup(z3.VnS(), 0);
11157 __ Dup(z4.VnS(), 0);
11158 __ Dup(z5.VnS(), 0);
11159
11160 // Skip the lanes starting from the 129th lane since the value of these lanes
11161 // are overflow after the number sequence creation by `index`.
11162 __ Cmpls(p3.VnB(), p0.Zeroing(), z28.VnB(), 128);
11163 __ Mov(z0.VnB(), p3.Merging(), z27.VnB());
11164 __ Mov(z1.VnB(), p3.Merging(), z28.VnB());
11165
11166 __ Dup(z2.VnS(), 0);
11167 __ Dup(z3.VnS(), 0);
11168 __ Dup(z4.VnS(), 0);
11169 __ Dup(z5.VnS(), 0);
11170
11171 __ Udot(z2.VnS(), z2.VnS(), z1.VnB(), z0.VnB(), 0);
11172
11173 __ Udot(z3.VnS(), z3.VnS(), z1.VnB(), z0.VnB(), 1);
11174 __ Mul(z3.VnS(), z3.VnS(), 2);
11175
11176 __ Udot(z4.VnS(), z4.VnS(), z1.VnB(), z0.VnB(), 2);
11177 __ Mul(z4.VnS(), z4.VnS(), 4);
11178
11179 __ Udot(z5.VnS(), z5.VnS(), z1.VnB(), z0.VnB(), 3);
11180 __ Mul(z5.VnS(), z5.VnS(), 8);
11181
11182 __ Dup(z7.VnS(), 0);
11183 __ Dup(z8.VnS(), 0);
11184 __ Dup(z9.VnS(), 0);
11185 __ Dup(z10.VnS(), 0);
11186
11187 // Negate the all positive vector for testing signed dot.
11188 __ Neg(z6.VnB(), p0.Merging(), z0.VnB());
11189 __ Sdot(z7.VnS(), z7.VnS(), z1.VnB(), z6.VnB(), 0);
11190
11191 __ Sdot(z8.VnS(), z8.VnS(), z1.VnB(), z6.VnB(), 1);
11192 __ Mul(z8.VnS(), z8.VnS(), 2);
11193
11194 __ Sdot(z9.VnS(), z9.VnS(), z1.VnB(), z6.VnB(), 2);
11195 __ Mul(z9.VnS(), z9.VnS(), 4);
11196
11197 __ Sdot(z10.VnS(), z10.VnS(), z1.VnB(), z6.VnB(), 3);
11198 __ Mul(z10.VnS(), z10.VnS(), 8);
11199
11200 END();
11201
11202 if (CAN_RUN()) {
11203 RUN();
11204
11205 // Only compare the first 128-bit segment of destination register, use
11206 // another result from generated instructions to check the remaining part.
11207 // s_lane[0] = (1 * 8) + (2 * 16) + (3 * 24) + (4 * 32) = 240
11208 // ...
11209 // s_lane[3] = (13 * 8) + (14 * 16) + (15 * 24) + (16 * 32) = 1200
11210 int udot_expected[] = {1200, 880, 560, 240};
11211 ASSERT_EQUAL_SVE(udot_expected, z2.VnS());
11212 ASSERT_EQUAL_SVE(z2.VnS(), z3.VnS());
11213 ASSERT_EQUAL_SVE(z2.VnS(), z4.VnS());
11214 ASSERT_EQUAL_SVE(z2.VnS(), z5.VnS());
11215
11216 int sdot_expected[] = {-1200, -880, -560, -240};
11217 ASSERT_EQUAL_SVE(sdot_expected, z7.VnS());
11218 ASSERT_EQUAL_SVE(z7.VnS(), z8.VnS());
11219 ASSERT_EQUAL_SVE(z7.VnS(), z9.VnS());
11220 ASSERT_EQUAL_SVE(z7.VnS(), z10.VnS());
11221 }
11222}
11223
11224TEST_SVE(sve_sdot_udot_indexed_d) {
11225 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11226 START();
11227
11228 const int multiplier = 2;
11229 __ Dup(z9.VnD(), multiplier);
11230
11231 __ Ptrue(p0.VnD());
11232 __ Pfalse(p1.VnD());
11233
11234 // p2 = [..., 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
11235 __ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
11236
11237 __ Index(z1.VnH(), 1, 1);
11238 __ Dup(z0.VnD(), z1.VnD(), 0);
11239
11240 // z0 = [... 5, 4, 3, 2, 5, 4, 3, 2, 4, 3, 2, 1, 4, 3, 2, 1]
11241 IntSegmentPatternHelper(&masm, z0.VnH(), z0.VnH());
11242
11243 // 2nd segment | 1st segment |
11244 // v v
11245 // z0 = [... 5, 4, 3, 2, 10, 8, 6, 4, 4, 3, 2, 1, 8, 6, 4, 2]
11246 __ Mul(z0.VnD(), p2.Merging(), z0.VnD(), z9.VnD());
11247
11248 __ Dup(z3.VnD(), 0);
11249 __ Dup(z4.VnD(), 0);
11250
11251 __ Udot(z3.VnD(), z3.VnD(), z1.VnH(), z0.VnH(), 0);
11252
11253 __ Udot(z4.VnD(), z4.VnD(), z1.VnH(), z0.VnH(), 1);
11254 __ Mul(z4.VnD(), z4.VnD(), multiplier);
11255
11256 __ Dup(z12.VnD(), 0);
11257 __ Dup(z13.VnD(), 0);
11258
11259 __ Ptrue(p4.VnH());
11260 __ Neg(z10.VnH(), p4.Merging(), z0.VnH());
11261
11262 __ Sdot(z12.VnD(), z12.VnD(), z1.VnH(), z10.VnH(), 0);
11263
11264 __ Sdot(z13.VnD(), z13.VnD(), z1.VnH(), z10.VnH(), 1);
11265 __ Mul(z13.VnD(), z13.VnD(), multiplier);
11266
11267 END();
11268
11269 if (CAN_RUN()) {
11270 RUN();
11271
11272 // Only compare the first 128-bit segment of destination register, use
11273 // another result from generated instructions to check the remaining part.
11274 // d_lane[0] = (1 * 2) + (2 * 4) + (3 * 6) + (4 * 8) = 60
11275 // d_lane[1] = (5 * 2) + (6 * 4) + (7 * 6) + (8 * 8) = 140
11276 uint64_t udot_expected[] = {416, 304, 140, 60};
11277 ASSERT_EQUAL_SVE(udot_expected, z3.VnD());
11278 ASSERT_EQUAL_SVE(z3.VnD(), z4.VnD());
11279
11280 int64_t sdot_expected[] = {-416, -304, -140, -60};
11281 ASSERT_EQUAL_SVE(sdot_expected, z12.VnD());
11282 ASSERT_EQUAL_SVE(z12.VnD(), z13.VnD());
11283 }
TatWai Chong4d2a4e92019-10-23 16:19:32 -070011284}
11285
TatWai Chong7a0d3672019-10-23 17:35:18 -070011286template <typename T, size_t N>
11287static void FPToRawbitsWithSize(const T (&inputs)[N],
11288 uint64_t* outputs,
11289 unsigned size_in_bits) {
TatWai Chongfe536042019-10-23 16:34:11 -070011290 for (size_t i = 0; i < N; i++) {
TatWai Chong7a0d3672019-10-23 17:35:18 -070011291 outputs[i] = vixl::FPToRawbitsWithSize(size_in_bits, inputs[i]);
TatWai Chongfe536042019-10-23 16:34:11 -070011292 }
11293}
11294
TatWai Chong7a0d3672019-10-23 17:35:18 -070011295template <typename Ti, typename Te, size_t N>
11296static void FPBinArithHelper(Test* config,
11297 ArithFn macro,
11298 int lane_size_in_bits,
11299 const Ti (&zn_inputs)[N],
11300 const Ti (&zm_inputs)[N],
11301 const Te (&zd_expected)[N]) {
TatWai Chongfe536042019-10-23 16:34:11 -070011302 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong7a0d3672019-10-23 17:35:18 -070011303
TatWai Chongfe536042019-10-23 16:34:11 -070011304 START();
11305
11306 ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
11307 ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
11308 ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
11309
11310 uint64_t zn_rawbits[N];
11311 uint64_t zm_rawbits[N];
11312
TatWai Chong7a0d3672019-10-23 17:35:18 -070011313 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
11314 FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
TatWai Chongfe536042019-10-23 16:34:11 -070011315
11316 InsrHelper(&masm, zn, zn_rawbits);
11317 InsrHelper(&masm, zm, zm_rawbits);
11318
11319 (masm.*macro)(zd, zn, zm);
11320
11321 END();
11322
11323 if (CAN_RUN()) {
11324 RUN();
11325
11326 ASSERT_EQUAL_SVE(zd_expected, zd);
11327 }
11328}
11329
11330TEST_SVE(sve_fp_arithmetic_unpredicated_fadd) {
11331 double zn_inputs[] = {24.0,
11332 5.5,
11333 0.0,
11334 3.875,
11335 2.125,
11336 kFP64PositiveInfinity,
11337 kFP64NegativeInfinity};
11338
11339 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
11340
TatWai Chong7a0d3672019-10-23 17:35:18 -070011341 ArithFn fn = &MacroAssembler::Fadd;
TatWai Chongfe536042019-10-23 16:34:11 -070011342
11343 uint16_t expected_h[] = {Float16ToRawbits(Float16(1048.0)),
11344 Float16ToRawbits(Float16(2053.5)),
11345 Float16ToRawbits(Float16(0.1)),
11346 Float16ToRawbits(Float16(-0.875)),
11347 Float16ToRawbits(Float16(14.465)),
11348 Float16ToRawbits(kFP16PositiveInfinity),
11349 Float16ToRawbits(kFP16NegativeInfinity)};
11350
TatWai Chong7a0d3672019-10-23 17:35:18 -070011351 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -070011352
11353 uint32_t expected_s[] = {FloatToRawbits(1048.0f),
11354 FloatToRawbits(2053.5f),
11355 FloatToRawbits(0.1f),
11356 FloatToRawbits(-0.875f),
11357 FloatToRawbits(14.465f),
11358 FloatToRawbits(kFP32PositiveInfinity),
11359 FloatToRawbits(kFP32NegativeInfinity)};
11360
TatWai Chong7a0d3672019-10-23 17:35:18 -070011361 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -070011362
11363 uint64_t expected_d[] = {DoubleToRawbits(1048.0),
11364 DoubleToRawbits(2053.5),
11365 DoubleToRawbits(0.1),
11366 DoubleToRawbits(-0.875),
11367 DoubleToRawbits(14.465),
11368 DoubleToRawbits(kFP64PositiveInfinity),
11369 DoubleToRawbits(kFP64NegativeInfinity)};
11370
TatWai Chong7a0d3672019-10-23 17:35:18 -070011371 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -070011372}
11373
11374TEST_SVE(sve_fp_arithmetic_unpredicated_fsub) {
11375 double zn_inputs[] = {24.0,
11376 5.5,
11377 0.0,
11378 3.875,
11379 2.125,
11380 kFP64PositiveInfinity,
11381 kFP64NegativeInfinity};
11382
11383 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
11384
TatWai Chong7a0d3672019-10-23 17:35:18 -070011385 ArithFn fn = &MacroAssembler::Fsub;
TatWai Chongfe536042019-10-23 16:34:11 -070011386
11387 uint16_t expected_h[] = {Float16ToRawbits(Float16(-1000.0)),
11388 Float16ToRawbits(Float16(-2042.5)),
11389 Float16ToRawbits(Float16(-0.1)),
11390 Float16ToRawbits(Float16(8.625)),
11391 Float16ToRawbits(Float16(-10.215)),
11392 Float16ToRawbits(kFP16PositiveInfinity),
11393 Float16ToRawbits(kFP16NegativeInfinity)};
11394
TatWai Chong7a0d3672019-10-23 17:35:18 -070011395 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -070011396
11397 uint32_t expected_s[] = {FloatToRawbits(-1000.0),
11398 FloatToRawbits(-2042.5),
11399 FloatToRawbits(-0.1),
11400 FloatToRawbits(8.625),
11401 FloatToRawbits(-10.215),
11402 FloatToRawbits(kFP32PositiveInfinity),
11403 FloatToRawbits(kFP32NegativeInfinity)};
11404
TatWai Chong7a0d3672019-10-23 17:35:18 -070011405 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -070011406
11407 uint64_t expected_d[] = {DoubleToRawbits(-1000.0),
11408 DoubleToRawbits(-2042.5),
11409 DoubleToRawbits(-0.1),
11410 DoubleToRawbits(8.625),
11411 DoubleToRawbits(-10.215),
11412 DoubleToRawbits(kFP64PositiveInfinity),
11413 DoubleToRawbits(kFP64NegativeInfinity)};
11414
TatWai Chong7a0d3672019-10-23 17:35:18 -070011415 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -070011416}
11417
11418TEST_SVE(sve_fp_arithmetic_unpredicated_fmul) {
11419 double zn_inputs[] = {24.0,
11420 5.5,
11421 0.0,
11422 3.875,
11423 2.125,
11424 kFP64PositiveInfinity,
11425 kFP64NegativeInfinity};
11426
11427 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
11428
TatWai Chong7a0d3672019-10-23 17:35:18 -070011429 ArithFn fn = &MacroAssembler::Fmul;
TatWai Chongfe536042019-10-23 16:34:11 -070011430
11431 uint16_t expected_h[] = {Float16ToRawbits(Float16(24576.0)),
11432 Float16ToRawbits(Float16(11264.0)),
11433 Float16ToRawbits(Float16(0.0)),
11434 Float16ToRawbits(Float16(-18.4)),
11435 Float16ToRawbits(Float16(26.23)),
11436 Float16ToRawbits(kFP16PositiveInfinity),
11437 Float16ToRawbits(kFP16PositiveInfinity)};
11438
TatWai Chong7a0d3672019-10-23 17:35:18 -070011439 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -070011440
11441 uint32_t expected_s[] = {FloatToRawbits(24576.0),
11442 FloatToRawbits(11264.0),
11443 FloatToRawbits(0.0),
11444 FloatToRawbits(-18.40625),
11445 FloatToRawbits(26.2225),
11446 FloatToRawbits(kFP32PositiveInfinity),
11447 FloatToRawbits(kFP32PositiveInfinity)};
11448
TatWai Chong7a0d3672019-10-23 17:35:18 -070011449 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -070011450
11451 uint64_t expected_d[] = {DoubleToRawbits(24576.0),
11452 DoubleToRawbits(11264.0),
11453 DoubleToRawbits(0.0),
11454 DoubleToRawbits(-18.40625),
11455 DoubleToRawbits(26.2225),
11456 DoubleToRawbits(kFP64PositiveInfinity),
11457 DoubleToRawbits(kFP64PositiveInfinity)};
11458
TatWai Chong7a0d3672019-10-23 17:35:18 -070011459 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -070011460}
11461
TatWai Chong7a0d3672019-10-23 17:35:18 -070011462typedef void (MacroAssembler::*FPArithPredicatedFn)(
11463 const ZRegister& zd,
11464 const PRegisterM& pg,
11465 const ZRegister& zn,
11466 const ZRegister& zm,
11467 FPMacroNaNPropagationOption nan_option);
11468
Martyn Capewell37f28182020-01-14 10:15:10 +000011469typedef void (MacroAssembler::*FPArithPredicatedNoNaNOptFn)(
11470 const ZRegister& zd,
11471 const PRegisterM& pg,
11472 const ZRegister& zn,
11473 const ZRegister& zm);
11474
TatWai Chong7a0d3672019-10-23 17:35:18 -070011475template <typename Ti, typename Te, size_t N>
11476static void FPBinArithHelper(
11477 Test* config,
11478 FPArithPredicatedFn macro,
Martyn Capewell37f28182020-01-14 10:15:10 +000011479 FPArithPredicatedNoNaNOptFn macro_nonan,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011480 unsigned lane_size_in_bits,
11481 const Ti (&zd_inputs)[N],
11482 const int (&pg_inputs)[N],
11483 const Ti (&zn_inputs)[N],
11484 const Ti (&zm_inputs)[N],
11485 const Te (&zd_expected)[N],
11486 FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
Martyn Capewell37f28182020-01-14 10:15:10 +000011487 VIXL_ASSERT((macro == NULL) ^ (macro_nonan == NULL));
TatWai Chongd316c5e2019-10-16 12:22:10 -070011488 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11489 START();
11490
TatWai Chong7a0d3672019-10-23 17:35:18 -070011491 // Avoid choosing default scratch registers.
11492 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
11493 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
11494 ZRegister zm = z28.WithLaneSize(lane_size_in_bits);
TatWai Chongd316c5e2019-10-16 12:22:10 -070011495
TatWai Chong7a0d3672019-10-23 17:35:18 -070011496 uint64_t zn_inputs_rawbits[N];
11497 uint64_t zm_inputs_rawbits[N];
11498 uint64_t zd_inputs_rawbits[N];
TatWai Chongd316c5e2019-10-16 12:22:10 -070011499
TatWai Chong7a0d3672019-10-23 17:35:18 -070011500 FPToRawbitsWithSize(zn_inputs, zn_inputs_rawbits, lane_size_in_bits);
11501 FPToRawbitsWithSize(zm_inputs, zm_inputs_rawbits, lane_size_in_bits);
11502 FPToRawbitsWithSize(zd_inputs, zd_inputs_rawbits, lane_size_in_bits);
11503
11504 InsrHelper(&masm, zn, zn_inputs_rawbits);
11505 InsrHelper(&masm, zm, zm_inputs_rawbits);
11506 InsrHelper(&masm, zd, zd_inputs_rawbits);
TatWai Chongd316c5e2019-10-16 12:22:10 -070011507
11508 PRegisterWithLaneSize pg = p0.WithLaneSize(lane_size_in_bits);
11509 Initialise(&masm, pg, pg_inputs);
11510
11511 // `instr` zdn, pg, zdn, zm
11512 ZRegister dn_result = z0.WithLaneSize(lane_size_in_bits);
11513 __ Mov(dn_result, zn);
Martyn Capewell37f28182020-01-14 10:15:10 +000011514 if (macro_nonan == NULL) {
11515 (masm.*macro)(dn_result, pg.Merging(), dn_result, zm, nan_option);
11516 } else {
11517 (masm.*macro_nonan)(dn_result, pg.Merging(), dn_result, zm);
11518 }
TatWai Chongd316c5e2019-10-16 12:22:10 -070011519
11520 // Based on whether zd and zm registers are aliased, the macro of instructions
11521 // (`Instr`) swaps the order of operands if it has the commutative property,
11522 // otherwise, transfer to the reversed `Instr`, such as fdivr.
11523 // `instr` zdm, pg, zn, zdm
11524 ZRegister dm_result = z1.WithLaneSize(lane_size_in_bits);
11525 __ Mov(dm_result, zm);
Martyn Capewell37f28182020-01-14 10:15:10 +000011526 if (macro_nonan == NULL) {
11527 (masm.*macro)(dm_result, pg.Merging(), zn, dm_result, nan_option);
11528 } else {
11529 (masm.*macro_nonan)(dm_result, pg.Merging(), zn, dm_result);
11530 }
TatWai Chongd316c5e2019-10-16 12:22:10 -070011531
11532 // The macro of instructions (`Instr`) automatically selects between `instr`
11533 // and movprfx + `instr` based on whether zd and zn registers are aliased.
11534 // A generated movprfx instruction is predicated that using the same
11535 // governing predicate register. In order to keep the result constant,
11536 // initialize the destination register first.
11537 // `instr` zd, pg, zn, zm
11538 ZRegister d_result = z2.WithLaneSize(lane_size_in_bits);
11539 __ Mov(d_result, zd);
Martyn Capewell37f28182020-01-14 10:15:10 +000011540 if (macro_nonan == NULL) {
11541 (masm.*macro)(d_result, pg.Merging(), zn, zm, nan_option);
11542 } else {
11543 (masm.*macro_nonan)(d_result, pg.Merging(), zn, zm);
11544 }
TatWai Chongd316c5e2019-10-16 12:22:10 -070011545
11546 END();
11547
11548 if (CAN_RUN()) {
11549 RUN();
11550
11551 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
11552 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
11553 if (!core.HasSVELane(dn_result, lane)) break;
11554 if ((pg_inputs[i] & 1) != 0) {
11555 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dn_result, lane);
11556 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -070011557 ASSERT_EQUAL_SVE_LANE(zn_inputs_rawbits[i], dn_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -070011558 }
11559 }
11560
11561 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
11562 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
11563 if (!core.HasSVELane(dm_result, lane)) break;
11564 if ((pg_inputs[i] & 1) != 0) {
11565 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dm_result, lane);
11566 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -070011567 ASSERT_EQUAL_SVE_LANE(zm_inputs_rawbits[i], dm_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -070011568 }
11569 }
11570
11571 ASSERT_EQUAL_SVE(zd_expected, d_result);
11572 }
11573}
11574
11575TEST_SVE(sve_binary_arithmetic_predicated_fdiv) {
TatWai Chong7a0d3672019-10-23 17:35:18 -070011576 // The inputs are shared with different precision tests.
TatWai Chongd316c5e2019-10-16 12:22:10 -070011577 double zd_in[] = {0.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};
11578
11579 double zn_in[] = {24.0,
11580 24.0,
11581 -2.0,
11582 -2.0,
11583 5.5,
11584 5.5,
11585 kFP64PositiveInfinity,
11586 kFP64PositiveInfinity,
11587 kFP64NegativeInfinity,
11588 kFP64NegativeInfinity};
11589
11590 double zm_in[] = {-2.0, -2.0, 24.0, 24.0, 0.5, 0.5, 0.65, 0.65, 24.0, 24.0};
11591
TatWai Chongd316c5e2019-10-16 12:22:10 -070011592 int pg_in[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
11593
TatWai Chong7a0d3672019-10-23 17:35:18 -070011594 uint16_t exp_h[] = {Float16ToRawbits(Float16(0.1)),
TatWai Chongd316c5e2019-10-16 12:22:10 -070011595 Float16ToRawbits(Float16(-12.0)),
11596 Float16ToRawbits(Float16(2.2)),
11597 Float16ToRawbits(Float16(-0.0833)),
11598 Float16ToRawbits(Float16(4.4)),
11599 Float16ToRawbits(Float16(11.0)),
11600 Float16ToRawbits(Float16(6.6)),
11601 Float16ToRawbits(kFP16PositiveInfinity),
11602 Float16ToRawbits(Float16(8.8)),
11603 Float16ToRawbits(kFP16NegativeInfinity)};
11604
TatWai Chong7a0d3672019-10-23 17:35:18 -070011605 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +000011606 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011607 &MacroAssembler::Fdiv,
11608 kHRegSize,
11609 zd_in,
11610 pg_in,
11611 zn_in,
11612 zm_in,
11613 exp_h);
TatWai Chongd316c5e2019-10-16 12:22:10 -070011614
11615 uint32_t exp_s[] = {FloatToRawbits(0.1),
11616 FloatToRawbits(-12.0),
11617 FloatToRawbits(2.2),
11618 0xbdaaaaab,
11619 FloatToRawbits(4.4),
11620 FloatToRawbits(11.0),
11621 FloatToRawbits(6.6),
11622 FloatToRawbits(kFP32PositiveInfinity),
11623 FloatToRawbits(8.8),
11624 FloatToRawbits(kFP32NegativeInfinity)};
11625
TatWai Chong7a0d3672019-10-23 17:35:18 -070011626 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +000011627 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011628 &MacroAssembler::Fdiv,
11629 kSRegSize,
11630 zd_in,
11631 pg_in,
11632 zn_in,
11633 zm_in,
11634 exp_s);
TatWai Chongd316c5e2019-10-16 12:22:10 -070011635
11636 uint64_t exp_d[] = {DoubleToRawbits(0.1),
11637 DoubleToRawbits(-12.0),
11638 DoubleToRawbits(2.2),
11639 0xbfb5555555555555,
11640 DoubleToRawbits(4.4),
11641 DoubleToRawbits(11.0),
11642 DoubleToRawbits(6.6),
11643 DoubleToRawbits(kFP64PositiveInfinity),
11644 DoubleToRawbits(8.8),
11645 DoubleToRawbits(kFP64NegativeInfinity)};
11646
TatWai Chong7a0d3672019-10-23 17:35:18 -070011647 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +000011648 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011649 &MacroAssembler::Fdiv,
11650 kDRegSize,
11651 zd_in,
11652 pg_in,
11653 zn_in,
11654 zm_in,
11655 exp_d);
TatWai Chongd316c5e2019-10-16 12:22:10 -070011656}
11657
Martyn Capewell9cc3f142019-10-29 14:06:35 +000011658TEST_SVE(sve_select) {
11659 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11660 START();
11661
11662 uint64_t in0[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
11663 uint64_t in1[] = {0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa};
11664
11665 // For simplicity, we re-use the same pg for various lane sizes.
11666 // For D lanes: 1, 1, 0
11667 // For S lanes: 1, 1, 1, 0, 0
11668 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
11669 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
11670 Initialise(&masm, p0.VnB(), pg_in);
11671 PRegisterM pg = p0.Merging();
11672
11673 InsrHelper(&masm, z30.VnD(), in0);
11674 InsrHelper(&masm, z31.VnD(), in1);
11675
11676 __ Sel(z0.VnB(), pg, z30.VnB(), z31.VnB());
11677 __ Sel(z1.VnH(), pg, z30.VnH(), z31.VnH());
11678 __ Sel(z2.VnS(), pg, z30.VnS(), z31.VnS());
11679 __ Sel(z3.VnD(), pg, z30.VnD(), z31.VnD());
11680
11681 END();
11682
11683 if (CAN_RUN()) {
11684 RUN();
11685
11686 uint64_t expected_z0[] = {0xaaaaaaaa05aa07f8,
11687 0xfeaaaaf0aac3870f,
11688 0xaaaa56aa9abcdeaa};
11689 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
11690
11691 uint64_t expected_z1[] = {0xaaaaaaaaaaaa07f8,
11692 0xaaaaf8f0e1c3870f,
11693 0xaaaaaaaa9abcaaaa};
11694 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
11695
11696 uint64_t expected_z2[] = {0xaaaaaaaa05f607f8,
11697 0xfefcf8f0e1c3870f,
11698 0xaaaaaaaaaaaaaaaa};
11699 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11700
11701 uint64_t expected_z3[] = {0x01f203f405f607f8,
11702 0xfefcf8f0e1c3870f,
11703 0xaaaaaaaaaaaaaaaa};
11704 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11705 }
11706}
TatWai Chongd316c5e2019-10-16 12:22:10 -070011707
TatWai Chong7a0d3672019-10-23 17:35:18 -070011708TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_h) {
11709 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
11710 double zn_inputs[] = {-2.1,
11711 8.5,
11712 225.5,
11713 0.0,
11714 8.8,
11715 -4.75,
11716 kFP64PositiveInfinity,
11717 kFP64NegativeInfinity};
11718 double zm_inputs[] = {-2.0,
11719 -13.0,
11720 24.0,
11721 0.01,
11722 0.5,
11723 300.75,
11724 kFP64NegativeInfinity,
11725 kFP64PositiveInfinity};
11726 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
11727
11728 uint16_t zd_expected_max[] = {Float16ToRawbits(Float16(-2.0)),
11729 Float16ToRawbits(Float16(8.5)),
11730 Float16ToRawbits(Float16(3.3)),
11731 Float16ToRawbits(Float16(0.01)),
11732 Float16ToRawbits(Float16(5.5)),
11733 Float16ToRawbits(Float16(300.75)),
11734 Float16ToRawbits(kFP16PositiveInfinity),
11735 Float16ToRawbits(kFP16PositiveInfinity)};
11736 FPBinArithHelper(config,
11737 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +000011738 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011739 kHRegSize,
11740 zd_inputs,
11741 pg_inputs,
11742 zn_inputs,
11743 zm_inputs,
11744 zd_expected_max);
11745
11746 uint16_t zd_expected_min[] = {Float16ToRawbits(Float16(-2.1)),
11747 Float16ToRawbits(Float16(-13.0)),
11748 Float16ToRawbits(Float16(3.3)),
11749 Float16ToRawbits(Float16(0.0)),
11750 Float16ToRawbits(Float16(5.5)),
11751 Float16ToRawbits(Float16(-4.75)),
11752 Float16ToRawbits(kFP16NegativeInfinity),
11753 Float16ToRawbits(kFP16NegativeInfinity)};
11754 FPBinArithHelper(config,
11755 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +000011756 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011757 kHRegSize,
11758 zd_inputs,
11759 pg_inputs,
11760 zn_inputs,
11761 zm_inputs,
11762 zd_expected_min);
11763}
11764
11765TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_s) {
11766 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
11767 double zn_inputs[] = {-2.1,
11768 8.5,
11769 225.5,
11770 0.0,
11771 8.8,
11772 -4.75,
11773 kFP64PositiveInfinity,
11774 kFP64NegativeInfinity};
11775 double zm_inputs[] = {-2.0,
11776 -13.0,
11777 24.0,
11778 0.01,
11779 0.5,
11780 300.75,
11781 kFP64NegativeInfinity,
11782 kFP64PositiveInfinity};
11783 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
11784
11785 uint32_t zd_expected_max[] = {FloatToRawbits(-2.0),
11786 FloatToRawbits(8.5),
11787 FloatToRawbits(3.3),
11788 FloatToRawbits(0.01),
11789 FloatToRawbits(5.5),
11790 FloatToRawbits(300.75),
11791 FloatToRawbits(kFP32PositiveInfinity),
11792 FloatToRawbits(kFP32PositiveInfinity)};
11793 FPBinArithHelper(config,
11794 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +000011795 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011796 kSRegSize,
11797 zd_inputs,
11798 pg_inputs,
11799 zn_inputs,
11800 zm_inputs,
11801 zd_expected_max);
11802
11803 uint32_t zd_expected_min[] = {FloatToRawbits(-2.1),
11804 FloatToRawbits(-13.0),
11805 FloatToRawbits(3.3),
11806 FloatToRawbits(0.0),
11807 FloatToRawbits(5.5),
11808 FloatToRawbits(-4.75),
11809 FloatToRawbits(kFP32NegativeInfinity),
11810 FloatToRawbits(kFP32NegativeInfinity)};
11811 FPBinArithHelper(config,
11812 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +000011813 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011814 kSRegSize,
11815 zd_inputs,
11816 pg_inputs,
11817 zn_inputs,
11818 zm_inputs,
11819 zd_expected_min);
11820}
11821
11822TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_d) {
11823 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
11824 double zn_inputs[] = {-2.1,
11825 8.5,
11826 225.5,
11827 0.0,
11828 8.8,
11829 -4.75,
11830 kFP64PositiveInfinity,
11831 kFP64NegativeInfinity};
11832 double zm_inputs[] = {-2.0,
11833 -13.0,
11834 24.0,
11835 0.01,
11836 0.5,
11837 300.75,
11838 kFP64NegativeInfinity,
11839 kFP64PositiveInfinity};
11840 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
11841
11842 uint64_t zd_expected_max[] = {DoubleToRawbits(-2.0),
11843 DoubleToRawbits(8.5),
11844 DoubleToRawbits(3.3),
11845 DoubleToRawbits(0.01),
11846 DoubleToRawbits(5.5),
11847 DoubleToRawbits(300.75),
11848 DoubleToRawbits(kFP64PositiveInfinity),
11849 DoubleToRawbits(kFP64PositiveInfinity)};
11850 FPBinArithHelper(config,
11851 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +000011852 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011853 kDRegSize,
11854 zd_inputs,
11855 pg_inputs,
11856 zn_inputs,
11857 zm_inputs,
11858 zd_expected_max);
11859
11860 uint64_t zd_expected_min[] = {DoubleToRawbits(-2.1),
11861 DoubleToRawbits(-13.0),
11862 DoubleToRawbits(3.3),
11863 DoubleToRawbits(0.0),
11864 DoubleToRawbits(5.5),
11865 DoubleToRawbits(-4.75),
11866 DoubleToRawbits(kFP64NegativeInfinity),
11867 DoubleToRawbits(kFP64NegativeInfinity)};
11868 FPBinArithHelper(config,
11869 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +000011870 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070011871 kDRegSize,
11872 zd_inputs,
11873 pg_inputs,
11874 zn_inputs,
11875 zm_inputs,
11876 zd_expected_min);
11877}
TatWai Chong29a0c432019-11-06 22:20:44 -080011878
11879template <typename T, size_t N>
11880static void BitwiseShiftImmHelper(Test* config,
11881 int lane_size_in_bits,
11882 const T (&zn_inputs)[N],
11883 int shift) {
11884 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11885 START();
11886
11887 ZRegister zd_asr = z25.WithLaneSize(lane_size_in_bits);
11888 ZRegister zd_lsr = z26.WithLaneSize(lane_size_in_bits);
11889 ZRegister zd_lsl = z27.WithLaneSize(lane_size_in_bits);
11890 ZRegister zn = z28.WithLaneSize(lane_size_in_bits);
11891
11892 InsrHelper(&masm, zn, zn_inputs);
11893
11894 __ Asr(zd_asr, zn, shift);
11895 __ Lsr(zd_lsr, zn, shift);
Martyn Capewell147b0ba2020-02-19 11:16:02 +000011896 __ Lsl(zd_lsl, zn, shift - 1); // Lsl supports 0 - lane_size-1.
TatWai Chong29a0c432019-11-06 22:20:44 -080011897
11898 END();
11899
11900 if (CAN_RUN()) {
11901 RUN();
11902
11903 const uint64_t mask = GetUintMask(lane_size_in_bits);
11904 for (int i = 0; i < static_cast<int>(N); i++) {
11905 int lane = N - i - 1;
11906 if (!core.HasSVELane(zd_asr, lane)) break;
11907 bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
11908 uint64_t result;
11909 if (shift >= lane_size_in_bits) {
11910 result = is_negative ? mask : 0;
11911 } else {
11912 result = zn_inputs[i] >> shift;
11913 if (is_negative) {
11914 result |= mask << (lane_size_in_bits - shift);
11915 result &= mask;
11916 }
11917 }
11918 ASSERT_EQUAL_SVE_LANE(result, zd_asr, lane);
11919 }
11920
11921 for (int i = 0; i < static_cast<int>(N); i++) {
11922 int lane = N - i - 1;
11923 if (!core.HasSVELane(zd_lsr, lane)) break;
11924 uint64_t result =
11925 (shift >= lane_size_in_bits) ? 0 : zn_inputs[i] >> shift;
11926 ASSERT_EQUAL_SVE_LANE(result, zd_lsr, lane);
11927 }
11928
11929 for (int i = 0; i < static_cast<int>(N); i++) {
11930 int lane = N - i - 1;
11931 if (!core.HasSVELane(zd_lsl, lane)) break;
Jacob Bramley504d5e92020-05-21 11:40:21 +010011932 uint64_t result =
11933 (shift > lane_size_in_bits) ? 0 : zn_inputs[i] << (shift - 1);
TatWai Chong29a0c432019-11-06 22:20:44 -080011934 ASSERT_EQUAL_SVE_LANE(result & mask, zd_lsl, lane);
11935 }
11936 }
11937}
11938
11939TEST_SVE(sve_bitwise_shift_imm_unpredicated) {
11940 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
11941 int shift_b[] = {1, 3, 5, 8};
11942 for (size_t i = 0; i < ArrayLength(shift_b); i++) {
11943 BitwiseShiftImmHelper(config, kBRegSize, inputs_b, shift_b[i]);
11944 }
11945
11946 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233};
11947 int shift_h[] = {1, 8, 11, 16};
11948 for (size_t i = 0; i < ArrayLength(shift_h); i++) {
11949 BitwiseShiftImmHelper(config, kHRegSize, inputs_h, shift_h[i]);
11950 }
11951
11952 uint64_t inputs_s[] = {0xfedcba98, 0xfffa55aa, 0x00112233};
11953 int shift_s[] = {1, 9, 17, 32};
11954 for (size_t i = 0; i < ArrayLength(shift_s); i++) {
11955 BitwiseShiftImmHelper(config, kSRegSize, inputs_s, shift_s[i]);
11956 }
11957
11958 uint64_t inputs_d[] = {0xfedcba98fedcba98,
11959 0xfffa5555aaaaaaaa,
11960 0x0011223344aafe80};
11961 int shift_d[] = {1, 23, 45, 64};
11962 for (size_t i = 0; i < ArrayLength(shift_d); i++) {
11963 BitwiseShiftImmHelper(config, kDRegSize, inputs_d, shift_d[i]);
11964 }
11965}
11966
11967template <typename T, typename R, size_t N>
11968static void BitwiseShiftWideElementsHelper(Test* config,
11969 Shift shift_type,
11970 int lane_size_in_bits,
11971 const T (&zn_inputs)[N],
11972 const R& zm_inputs,
11973 const T (&zd_expected)[N]) {
11974 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11975 START();
11976
11977 ArithFn macro;
11978 // Since logical shift left and right by the current lane size width is equal
11979 // to 0, so initialize the array to 0 for convenience.
11980 uint64_t zd_expected_max_shift_amount[N] = {0};
11981 switch (shift_type) {
11982 case ASR: {
11983 macro = &MacroAssembler::Asr;
11984 uint64_t mask = GetUintMask(lane_size_in_bits);
11985 for (size_t i = 0; i < ArrayLength(zn_inputs); i++) {
11986 bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
11987 zd_expected_max_shift_amount[i] = is_negative ? mask : 0;
11988 }
11989 break;
11990 }
11991 case LSR:
11992 macro = &MacroAssembler::Lsr;
11993 break;
11994 case LSL:
11995 macro = &MacroAssembler::Lsl;
11996 break;
11997 default:
11998 VIXL_UNIMPLEMENTED();
11999 macro = NULL;
12000 break;
12001 }
12002
12003 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
12004 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
12005 ZRegister zm = z28.WithLaneSize(kDRegSize);
12006
12007 InsrHelper(&masm, zn, zn_inputs);
12008 InsrHelper(&masm, zm, zm_inputs);
12009
12010 (masm.*macro)(zd, zn, zm);
12011
12012 ZRegister zm_max_shift_amount = z25.WithLaneSize(kDRegSize);
12013 ZRegister zd_max_shift_amount = z24.WithLaneSize(lane_size_in_bits);
12014
12015 __ Dup(zm_max_shift_amount, lane_size_in_bits);
12016 (masm.*macro)(zd_max_shift_amount, zn, zm_max_shift_amount);
12017
12018 ZRegister zm_out_of_range = z23.WithLaneSize(kDRegSize);
12019 ZRegister zd_out_of_range = z22.WithLaneSize(lane_size_in_bits);
12020
12021 __ Dup(zm_out_of_range, GetUintMask(lane_size_in_bits));
12022 (masm.*macro)(zd_out_of_range, zn, zm_out_of_range);
12023
12024 END();
12025
12026 if (CAN_RUN()) {
12027 RUN();
12028
12029 ASSERT_EQUAL_SVE(zd_expected, zd);
12030 ASSERT_EQUAL_SVE(zd_expected_max_shift_amount, zd_max_shift_amount);
12031 ASSERT_EQUAL_SVE(zd_max_shift_amount, zd_out_of_range);
12032 }
12033}
12034
12035TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_asr) {
12036 // clang-format off
12037 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
12038 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
12039 int shift_b[] = {1, 3};
12040 uint64_t expected_b[] = {0xff, 0xee, 0xdd, 0xcc, 0xff, 0x2a, 0xd5, 0xc0,
12041 0xff, 0xfb, 0xf7, 0xf3, 0xff, 0x0a, 0xf5, 0xf0};
12042 BitwiseShiftWideElementsHelper(config,
12043 ASR,
12044 kBRegSize,
12045 inputs_b,
12046 shift_b,
12047 expected_b);
12048
12049 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
12050 0xfedc, 0xfa55, 0x0011, 0x2233,
12051 0xfedc, 0xfa55, 0x0011, 0x2233};
12052 int shift_h[] = {1, 8, 11};
12053 uint64_t expected_h[] = {0xff6e, 0xfd2a, 0x0008, 0x1119,
12054 0xfffe, 0xfffa, 0x0000, 0x0022,
12055 0xffff, 0xffff, 0x0000, 0x0004};
12056 BitwiseShiftWideElementsHelper(config,
12057 ASR,
12058 kHRegSize,
12059 inputs_h,
12060 shift_h,
12061 expected_h);
12062
12063 uint64_t inputs_s[] =
12064 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
12065 int shift_s[] = {1, 9, 23};
12066 uint64_t expected_s[] =
12067 {0xff6e5d4c, 0xfffd2ad5, 0x00000891, 0x000091a2, 0xffffff55, 0xffffff11};
12068 BitwiseShiftWideElementsHelper(config,
12069 ASR,
12070 kSRegSize,
12071 inputs_s,
12072 shift_s,
12073 expected_s);
12074 // clang-format on
12075}
12076
12077TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsr) {
12078 // clang-format off
12079 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
12080 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
12081 int shift_b[] = {1, 3};
12082 uint64_t expected_b[] = {0x7f, 0x6e, 0x5d, 0x4c, 0x7f, 0x2a, 0x55, 0x40,
12083 0x1f, 0x1b, 0x17, 0x13, 0x1f, 0x0a, 0x15, 0x10};
12084
12085 BitwiseShiftWideElementsHelper(config,
12086 LSR,
12087 kBRegSize,
12088 inputs_b,
12089 shift_b,
12090 expected_b);
12091
12092 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
12093 0xfedc, 0xfa55, 0x0011, 0x2233,
12094 0xfedc, 0xfa55, 0x0011, 0x2233};
12095 int shift_h[] = {1, 8, 11};
12096 uint64_t expected_h[] = {0x7f6e, 0x7d2a, 0x0008, 0x1119,
12097 0x00fe, 0x00fa, 0x0000, 0x0022,
12098 0x001f, 0x001f, 0x0000, 0x0004};
12099 BitwiseShiftWideElementsHelper(config,
12100 LSR,
12101 kHRegSize,
12102 inputs_h,
12103 shift_h,
12104 expected_h);
12105
12106 uint64_t inputs_s[] =
12107 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
12108 int shift_s[] = {1, 9, 23};
12109 uint64_t expected_s[] =
12110 {0x7f6e5d4c, 0x7ffd2ad5, 0x00000891, 0x000091a2, 0x00000155, 0x00000111};
12111 BitwiseShiftWideElementsHelper(config,
12112 LSR,
12113 kSRegSize,
12114 inputs_s,
12115 shift_s,
12116 expected_s);
12117 // clang-format on
12118}
12119
12120TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsl) {
12121 // clang-format off
12122 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
12123 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
12124 int shift_b[] = {1, 5};
12125
12126 uint64_t expected_b[] = {0xfc, 0xb8, 0x74, 0x30, 0xfe, 0xaa, 0x54, 0x00,
12127 0xc0, 0x80, 0x40, 0x00, 0xe0, 0xa0, 0x40, 0x00};
12128
12129 BitwiseShiftWideElementsHelper(config,
12130 LSL,
12131 kBRegSize,
12132 inputs_b,
12133 shift_b,
12134 expected_b);
12135 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
12136 0xfedc, 0xfa55, 0x0011, 0x2233,
12137 0xfedc, 0xfa55, 0x0011, 0x2233};
12138 int shift_h[] = {1, 2, 14};
12139
12140 uint64_t expected_h[] = {0xfdb8, 0xf4aa, 0x0022, 0x4466,
12141 0xfb70, 0xe954, 0x0044, 0x88cc,
12142 0x0000, 0x4000, 0x4000, 0xc000};
12143 BitwiseShiftWideElementsHelper(config,
12144 LSL,
12145 kHRegSize,
12146 inputs_h,
12147 shift_h,
12148 expected_h);
12149 uint64_t inputs_s[] =
12150 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
12151 int shift_s[] = {1, 19, 26};
12152 uint64_t expected_s[] =
12153 {0xfdb97530, 0xfff4ab54, 0x11980000, 0x2b380000, 0xa8000000, 0x20000000};
12154 BitwiseShiftWideElementsHelper(config,
12155 LSL,
12156 kSRegSize,
12157 inputs_s,
12158 shift_s,
12159 expected_s);
Martyn Capewell3bf2d162020-02-17 15:04:36 +000012160
12161 // Test large shifts outside the range of the "unsigned" type.
12162 uint64_t inputs_b2[] = {1, 2, 4, 8, 3, 5, 7, 9,
12163 1, 2, 4, 8, 3, 5, 7, 9};
12164 uint64_t shift_b2[] = {1, 0x1000000001};
12165 uint64_t expected_b2[] = {2, 4, 8, 16, 6, 10, 14, 18,
12166 0, 0, 0, 0, 0, 0, 0, 0};
12167 BitwiseShiftWideElementsHelper(config, LSL, kBRegSize, inputs_b2, shift_b2,
12168 expected_b2);
12169
TatWai Chong29a0c432019-11-06 22:20:44 -080012170 // clang-format on
12171}
12172
Martyn Capewell76c094a2020-02-13 17:26:49 +000012173TEST_SVE(sve_shift_by_vector) {
12174 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12175
12176 START();
12177 __ Ptrue(p0.VnB());
12178 __ Pfalse(p1.VnB());
12179 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
12180 __ Zip1(p3.VnH(), p0.VnH(), p1.VnH());
12181 __ Zip1(p4.VnS(), p0.VnS(), p1.VnS());
12182 __ Zip1(p5.VnD(), p0.VnD(), p1.VnD());
12183
12184 __ Dup(z31.VnD(), 0x8000000080008080);
12185 __ Dup(z0.VnB(), -1);
12186
12187 __ Index(z1.VnB(), 0, 1);
12188 __ Dup(z2.VnB(), 0x55);
12189 __ Lsr(z2.VnB(), p2.Merging(), z0.VnB(), z1.VnB());
12190 __ Lsl(z3.VnB(), p0.Merging(), z0.VnB(), z1.VnB());
12191 __ Asr(z4.VnB(), p0.Merging(), z31.VnB(), z1.VnB());
12192
12193 __ Index(z1.VnH(), 0, 1);
12194 __ Dup(z6.VnB(), 0x55);
12195 __ Lsr(z5.VnH(), p0.Merging(), z0.VnH(), z1.VnH());
12196 __ Lsl(z6.VnH(), p3.Merging(), z0.VnH(), z1.VnH());
12197 __ Asr(z7.VnH(), p0.Merging(), z31.VnH(), z1.VnH());
12198
12199 __ Index(z1.VnS(), 0, 1);
12200 __ Dup(z10.VnB(), 0x55);
12201 __ Lsr(z8.VnS(), p0.Merging(), z0.VnS(), z1.VnS());
12202 __ Lsl(z9.VnS(), p0.Merging(), z0.VnS(), z1.VnS());
12203 __ Asr(z10.VnS(), p4.Merging(), z31.VnS(), z1.VnS());
12204
12205 __ Index(z1.VnD(), 0, 1);
12206 __ Lsr(z0.VnD(), p5.Merging(), z0.VnD(), z1.VnD());
12207 __ Lsl(z12.VnD(), p0.Merging(), z0.VnD(), z1.VnD());
12208 __ Asr(z13.VnD(), p0.Merging(), z31.VnD(), z1.VnD());
12209
12210 __ Dup(z11.VnD(), 0x100000001);
12211 __ Lsl(z14.VnD(), p0.Merging(), z1.VnD(), z11.VnD());
12212
12213 __ Index(z0.VnH(), 7, -1);
12214 __ Lsr(z0.VnH(), p0.Merging(), z31.VnH(), z0.VnH());
12215 END();
12216
12217 if (CAN_RUN()) {
12218 RUN();
12219
12220 uint64_t expected_z0[] = {0x8000000020001010, 0x0800000002000101};
12221 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
12222 uint64_t expected_z2[] = {0x5500550055005500, 0x5503550f553f55ff};
12223 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12224 uint64_t expected_z3[] = {0x0000000000000000, 0x80c0e0f0f8fcfeff};
12225 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12226 uint64_t expected_z4[] = {0xff000000ff00ffff, 0xff000000f000c080};
12227 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12228 uint64_t expected_z5[] = {0x01ff03ff07ff0fff, 0x1fff3fff7fffffff};
12229 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12230 uint64_t expected_z6[] = {0x5555ffc05555fff0, 0x5555fffc5555ffff};
12231 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12232 uint64_t expected_z7[] = {0xff000000fc00f808, 0xf0000000c0008080};
12233 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12234 uint64_t expected_z8[] = {0x1fffffff3fffffff, 0x7fffffffffffffff};
12235 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12236 uint64_t expected_z9[] = {0xfffffff8fffffffc, 0xfffffffeffffffff};
12237 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12238 uint64_t expected_z10[] = {0x55555555e0002020, 0x5555555580008080};
12239 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
12240 uint64_t expected_z12[] = {0xfffffffffffffffe, 0xffffffffffffffff};
12241 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
12242 uint64_t expected_z13[] = {0xc000000040004040, 0x8000000080008080};
12243 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
12244 uint64_t expected_z14[] = {0, 0};
12245 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
12246 }
12247}
12248
12249TEST_SVE(sve_shift_by_wide_vector) {
12250 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12251
12252 START();
12253 __ Ptrue(p0.VnB());
12254 __ Pfalse(p1.VnB());
12255 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
12256 __ Zip1(p3.VnH(), p0.VnH(), p1.VnH());
12257 __ Zip1(p4.VnS(), p0.VnS(), p1.VnS());
12258
12259 __ Dup(z31.VnD(), 0x8000000080008080);
12260 __ Dup(z0.VnB(), -1);
12261 __ Index(z1.VnD(), 1, 5);
12262
12263 __ Dup(z2.VnB(), 0x55);
12264 __ Lsr(z2.VnB(), p2.Merging(), z2.VnB(), z1.VnD());
12265 __ Lsl(z3.VnB(), p0.Merging(), z0.VnB(), z1.VnD());
12266 __ Asr(z4.VnB(), p0.Merging(), z31.VnB(), z1.VnD());
12267
12268 __ Dup(z6.VnB(), 0x55);
12269 __ Lsr(z5.VnH(), p0.Merging(), z0.VnH(), z1.VnD());
12270 __ Lsl(z6.VnH(), p3.Merging(), z6.VnH(), z1.VnD());
12271 __ Asr(z7.VnH(), p0.Merging(), z31.VnH(), z1.VnD());
12272
12273 __ Dup(z10.VnB(), 0x55);
12274 __ Lsr(z8.VnS(), p0.Merging(), z0.VnS(), z1.VnD());
12275 __ Lsl(z9.VnS(), p0.Merging(), z0.VnS(), z1.VnD());
12276 __ Asr(z10.VnS(), p4.Merging(), z31.VnS(), z1.VnD());
12277 END();
12278
12279 if (CAN_RUN()) {
12280 RUN();
12281
12282 uint64_t expected_z2[] = {0x5501550155015501, 0x552a552a552a552a};
12283 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12284 uint64_t expected_z3[] = {0xc0c0c0c0c0c0c0c0, 0xfefefefefefefefe};
12285 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12286 uint64_t expected_z4[] = {0xfe000000fe00fefe, 0xc0000000c000c0c0};
12287 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12288 uint64_t expected_z5[] = {0x03ff03ff03ff03ff, 0x7fff7fff7fff7fff};
12289 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12290 uint64_t expected_z6[] = {0x5555554055555540, 0x5555aaaa5555aaaa};
12291 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12292 uint64_t expected_z7[] = {0xfe000000fe00fe02, 0xc0000000c000c040};
12293 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12294 uint64_t expected_z8[] = {0x03ffffff03ffffff, 0x7fffffff7fffffff};
12295 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12296 uint64_t expected_z9[] = {0xffffffc0ffffffc0, 0xfffffffefffffffe};
12297 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12298 uint64_t expected_z10[] = {0x55555555fe000202, 0x55555555c0004040};
12299 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
12300 }
12301}
12302
Martyn Capewell83e86612020-02-19 15:46:15 +000012303TEST_SVE(sve_pred_shift_imm) {
12304 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12305
12306 START();
12307 __ Ptrue(p0.VnB());
12308 __ Pfalse(p1.VnB());
12309 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
12310 __ Zip1(p3.VnH(), p0.VnH(), p1.VnH());
12311 __ Zip1(p4.VnS(), p0.VnS(), p1.VnS());
12312 __ Zip1(p5.VnD(), p0.VnD(), p1.VnD());
12313
12314 __ Dup(z31.VnD(), 0x8000000080008080);
12315 __ Lsr(z0.VnB(), p0.Merging(), z31.VnB(), 1);
12316 __ Mov(z1, z0);
12317 __ Lsl(z1.VnB(), p2.Merging(), z1.VnB(), 1);
12318 __ Asr(z2.VnB(), p0.Merging(), z1.VnB(), 2);
12319
12320 __ Lsr(z3.VnH(), p0.Merging(), z31.VnH(), 2);
12321 __ Mov(z4, z3);
12322 __ Lsl(z4.VnH(), p3.Merging(), z4.VnH(), 2);
12323 __ Asr(z5.VnH(), p0.Merging(), z4.VnH(), 3);
12324
12325 __ Lsr(z6.VnS(), p0.Merging(), z31.VnS(), 3);
12326 __ Mov(z7, z6);
12327 __ Lsl(z7.VnS(), p4.Merging(), z7.VnS(), 3);
12328 __ Asr(z8.VnS(), p0.Merging(), z7.VnS(), 4);
12329
12330 __ Lsr(z9.VnD(), p0.Merging(), z31.VnD(), 4);
12331 __ Mov(z10, z9);
12332 __ Lsl(z10.VnD(), p5.Merging(), z10.VnD(), 4);
12333 __ Asr(z11.VnD(), p0.Merging(), z10.VnD(), 5);
12334 END();
12335
12336 if (CAN_RUN()) {
12337 RUN();
12338 uint64_t expected_z0[] = {0x4000000040004040, 0x4000000040004040};
12339 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
12340 uint64_t expected_z1[] = {0x4000000040004080, 0x4000000040004080};
12341 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
12342 uint64_t expected_z2[] = {0x10000000100010e0, 0x10000000100010e0};
12343 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12344 uint64_t expected_z3[] = {0x2000000020002020, 0x2000000020002020};
12345 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12346 uint64_t expected_z4[] = {0x2000000020008080, 0x2000000020008080};
12347 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12348 uint64_t expected_z5[] = {0x040000000400f010, 0x040000000400f010};
12349 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12350 uint64_t expected_z6[] = {0x1000000010001010, 0x1000000010001010};
12351 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12352 uint64_t expected_z7[] = {0x1000000080008080, 0x1000000080008080};
12353 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12354 uint64_t expected_z8[] = {0x01000000f8000808, 0x01000000f8000808};
12355 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12356 uint64_t expected_z9[] = {0x0800000008000808, 0x0800000008000808};
12357 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12358 uint64_t expected_z10[] = {0x0800000008000808, 0x8000000080008080};
12359 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
12360 uint64_t expected_z11[] = {0x0040000000400040, 0xfc00000004000404};
12361 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
12362 }
12363}
12364
12365TEST_SVE(sve_asrd) {
12366 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12367
12368 START();
12369 __ Ptrue(p0.VnB());
12370 __ Pfalse(p1.VnB());
12371 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
12372 __ Zip1(p3.VnH(), p0.VnH(), p1.VnH());
12373 __ Zip1(p4.VnS(), p0.VnS(), p1.VnS());
12374 __ Zip1(p5.VnD(), p0.VnD(), p1.VnD());
12375
12376 __ Index(z31.VnB(), 0x7f - 3, 1);
12377 __ Asrd(z0.VnB(), p0.Merging(), z31.VnB(), 1);
12378 __ Mov(z1, z31);
12379 __ Asrd(z1.VnB(), p2.Merging(), z1.VnB(), 2);
12380 __ Asrd(z2.VnB(), p0.Merging(), z31.VnB(), 7);
12381 __ Asrd(z3.VnB(), p0.Merging(), z31.VnB(), 8);
12382
12383 __ Index(z31.VnH(), 0x7fff - 3, 1);
12384 __ Asrd(z4.VnH(), p0.Merging(), z31.VnH(), 1);
12385 __ Mov(z5, z31);
12386 __ Asrd(z5.VnH(), p3.Merging(), z5.VnH(), 2);
12387 __ Asrd(z6.VnH(), p0.Merging(), z31.VnH(), 15);
12388 __ Asrd(z7.VnH(), p0.Merging(), z31.VnH(), 16);
12389
12390 __ Index(z31.VnS(), 0x7fffffff - 1, 1);
12391 __ Asrd(z8.VnS(), p0.Merging(), z31.VnS(), 1);
12392 __ Mov(z9, z31);
12393 __ Asrd(z9.VnS(), p4.Merging(), z9.VnS(), 2);
12394 __ Asrd(z10.VnS(), p0.Merging(), z31.VnS(), 31);
12395 __ Asrd(z11.VnS(), p0.Merging(), z31.VnS(), 32);
12396
12397 __ Index(z31.VnD(), 0x7fffffffffffffff, 1);
12398 __ Asrd(z12.VnD(), p0.Merging(), z31.VnD(), 1);
12399 __ Mov(z13, z31);
12400 __ Asrd(z13.VnD(), p5.Merging(), z13.VnD(), 2);
12401 __ Asrd(z14.VnD(), p0.Merging(), z31.VnD(), 63);
12402 __ Asrd(z31.VnD(), p0.Merging(), z31.VnD(), 64);
12403 END();
12404
12405 if (CAN_RUN()) {
12406 RUN();
12407 uint64_t expected_z0[] = {0xc6c5c5c4c4c3c3c2, 0xc2c1c1c03f3f3e3e};
12408 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
12409 uint64_t expected_z1[] = {0x8be389e287e285e1, 0x83e181e07f1f7d1f};
12410 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
12411 uint64_t expected_z2[] = {0x0000000000000000, 0x000000ff00000000};
12412 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12413 uint64_t expected_z3[] = {0x0000000000000000, 0x0000000000000000};
12414 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12415 uint64_t expected_z4[] = {0xc002c001c001c000, 0x3fff3fff3ffe3ffe};
12416 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12417 uint64_t expected_z5[] = {0x8003e0018001e000, 0x7fff1fff7ffd1fff};
12418 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12419 uint64_t expected_z6[] = {0x000000000000ffff, 0x0000000000000000};
12420 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12421 uint64_t expected_z7[] = {0x0000000000000000, 0x0000000000000000};
12422 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12423 uint64_t expected_z8[] = {0xc0000001c0000000, 0x3fffffff3fffffff};
12424 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12425 uint64_t expected_z9[] = {0x80000001e0000000, 0x7fffffff1fffffff};
12426 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12427 uint64_t expected_z10[] = {0x00000000ffffffff, 0x0000000000000000};
12428 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
12429 uint64_t expected_z11[] = {0x0000000000000000, 0x0000000000000000};
12430 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
12431 uint64_t expected_z12[] = {0xc000000000000000, 0x3fffffffffffffff};
12432 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
12433 uint64_t expected_z13[] = {0x8000000000000000, 0x1fffffffffffffff};
12434 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
12435 uint64_t expected_z14[] = {0xffffffffffffffff, 0x0000000000000000};
12436 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
12437 uint64_t expected_z31[] = {0x0000000000000000, 0x0000000000000000};
12438 ASSERT_EQUAL_SVE(expected_z31, z31.VnD());
12439 }
12440}
12441
TatWai Chong4023d7a2019-11-18 14:16:28 -080012442TEST_SVE(sve_setffr) {
12443 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12444 START();
12445
12446 __ Ptrue(p15.VnB());
12447 __ Setffr();
12448 __ Rdffr(p14.VnB());
12449
12450 END();
12451
12452 if (CAN_RUN()) {
12453 RUN();
12454
12455 ASSERT_EQUAL_SVE(p14.VnB(), p15.VnB());
12456 }
12457}
12458
12459static void WrffrHelper(Test* config, unsigned active_lanes) {
12460 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12461 START();
12462
12463 int inputs[kPRegMaxSize] = {0};
12464 VIXL_ASSERT(active_lanes <= kPRegMaxSize);
12465 for (unsigned i = 0; i < active_lanes; i++) {
12466 // The rightmost (highest-indexed) array element maps to the lowest-numbered
12467 // lane.
12468 inputs[kPRegMaxSize - i - 1] = 1;
12469 }
12470
12471 Initialise(&masm, p1.VnB(), inputs);
12472 __ Wrffr(p1.VnB());
12473 __ Rdffr(p2.VnB());
12474
12475 END();
12476
12477 if (CAN_RUN()) {
12478 RUN();
12479
12480 ASSERT_EQUAL_SVE(p1.VnB(), p2.VnB());
12481 }
12482}
12483
12484TEST_SVE(sve_wrffr) {
12485 int active_lanes_inputs[] = {0, 1, 7, 10, 32, 48, kPRegMaxSize};
12486 for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
12487 WrffrHelper(config, active_lanes_inputs[i]);
12488 }
12489}
12490
TatWai Chonga3e8b172019-11-22 21:48:56 -080012491template <size_t N>
12492static void RdffrHelper(Test* config,
12493 size_t active_lanes,
12494 const int (&pg_inputs)[N]) {
12495 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12496 START();
12497
12498 VIXL_ASSERT(active_lanes <= kPRegMaxSize);
12499
12500 // The rightmost (highest-indexed) array element maps to the lowest-numbered
12501 // lane.
12502 int pd[kPRegMaxSize] = {0};
12503 for (unsigned i = 0; i < active_lanes; i++) {
12504 pd[kPRegMaxSize - i - 1] = 1;
12505 }
12506
12507 int pg[kPRegMaxSize] = {0};
12508 for (unsigned i = 0; i < N; i++) {
12509 pg[kPRegMaxSize - i - 1] = pg_inputs[i];
12510 }
12511
12512 int pd_expected[kPRegMaxSize] = {0};
12513 for (unsigned i = 0; i < std::min(active_lanes, N); i++) {
12514 int lane = kPRegMaxSize - i - 1;
12515 pd_expected[lane] = pd[lane] & pg[lane];
12516 }
12517
12518 Initialise(&masm, p0.VnB(), pg);
12519 Initialise(&masm, p1.VnB(), pd);
12520
12521 // The unpredicated form of rdffr has been tested in `WrffrHelper`.
12522 __ Wrffr(p1.VnB());
12523 __ Rdffr(p14.VnB(), p0.Zeroing());
12524 __ Rdffrs(p13.VnB(), p0.Zeroing());
12525 __ Mrs(x8, NZCV);
12526
12527 END();
12528
12529 if (CAN_RUN()) {
12530 RUN();
12531
12532 ASSERT_EQUAL_SVE(pd_expected, p14.VnB());
12533 ASSERT_EQUAL_SVE(pd_expected, p13.VnB());
12534 StatusFlags nzcv_expected =
12535 GetPredTestFlags(pd_expected, pg, core.GetSVELaneCount(kBRegSize));
12536 ASSERT_EQUAL_64(nzcv_expected, x8);
12537 }
12538}
12539
12540TEST_SVE(sve_rdffr_rdffrs) {
12541 // clang-format off
12542 int active_lanes_inputs[] = {0, 1, 15, 26, 39, 47, kPRegMaxSize};
12543 int pg_inputs_0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12544 int pg_inputs_1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
12545 int pg_inputs_2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
12546 int pg_inputs_3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
12547 int pg_inputs_4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12548 // clang-format on
12549
12550 for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
12551 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_0);
12552 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_1);
12553 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_2);
12554 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_3);
12555 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_4);
12556 }
12557}
12558
TatWai Chong38303d92019-12-02 15:49:29 -080012559typedef void (MacroAssembler::*BrkpFn)(const PRegisterWithLaneSize& pd,
12560 const PRegisterZ& pg,
12561 const PRegisterWithLaneSize& pn,
12562 const PRegisterWithLaneSize& pm);
12563
12564template <typename Tg, typename Tn, typename Td>
12565static void BrkpaBrkpbHelper(Test* config,
12566 BrkpFn macro,
12567 BrkpFn macro_set_flags,
12568 const Tg& pg_inputs,
12569 const Tn& pn_inputs,
12570 const Tn& pm_inputs,
12571 const Td& pd_expected) {
12572 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12573 START();
12574
12575 PRegister pg = p15;
12576 PRegister pn = p14;
12577 PRegister pm = p13;
12578 Initialise(&masm, pg.VnB(), pg_inputs);
12579 Initialise(&masm, pn.VnB(), pn_inputs);
12580 Initialise(&masm, pm.VnB(), pm_inputs);
12581
12582 // Initialise NZCV to an impossible value, to check that we actually write it.
12583 __ Mov(x10, NZCVFlag);
12584 __ Msr(NZCV, x10);
12585
12586 (masm.*macro_set_flags)(p0.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
12587 __ Mrs(x0, NZCV);
12588
12589 (masm.*macro)(p1.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
12590
12591 END();
12592
12593 if (CAN_RUN()) {
12594 RUN();
12595
12596 ASSERT_EQUAL_SVE(pd_expected, p0.VnB());
12597
12598 // Check that the flags were properly set.
12599 StatusFlags nzcv_expected =
12600 GetPredTestFlags(pd_expected,
12601 pg_inputs,
12602 core.GetSVELaneCount(kBRegSize));
12603 ASSERT_EQUAL_64(nzcv_expected, x0);
12604 ASSERT_EQUAL_SVE(p0.VnB(), p1.VnB());
12605 }
12606}
12607
12608template <typename Tg, typename Tn, typename Td>
12609static void BrkpaHelper(Test* config,
12610 const Tg& pg_inputs,
12611 const Tn& pn_inputs,
12612 const Tn& pm_inputs,
12613 const Td& pd_expected) {
12614 BrkpaBrkpbHelper(config,
12615 &MacroAssembler::Brkpa,
12616 &MacroAssembler::Brkpas,
12617 pg_inputs,
12618 pn_inputs,
12619 pm_inputs,
12620 pd_expected);
12621}
12622
12623template <typename Tg, typename Tn, typename Td>
12624static void BrkpbHelper(Test* config,
12625 const Tg& pg_inputs,
12626 const Tn& pn_inputs,
12627 const Tn& pm_inputs,
12628 const Td& pd_expected) {
12629 BrkpaBrkpbHelper(config,
12630 &MacroAssembler::Brkpb,
12631 &MacroAssembler::Brkpbs,
12632 pg_inputs,
12633 pn_inputs,
12634 pm_inputs,
12635 pd_expected);
12636}
12637
12638TEST_SVE(sve_brkpb) {
12639 // clang-format off
12640 // The last active element of `pn` are `true` in all vector length configurations.
12641 // | boundary of 128-bits VL.
12642 // v
12643 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12644 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12645 int pg_3[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
12646
12647 // | highest-numbered lane lowest-numbered lane |
12648 // v v
12649 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
12650 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
12651 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1};
12652
12653 int pm_1[] = {1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
12654 int pm_2[] = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12655 int pm_3[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
12656
12657 // | first active
12658 // v
12659 int exp_1_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
12660 // | first active
12661 // v
12662 int exp_1_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12663 // | first active
12664 // v
12665 int exp_1_3_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
12666
12667 BrkpbHelper(config, pg_1, pn_1, pm_1, exp_1_1_1);
12668 BrkpbHelper(config, pg_1, pn_2, pm_2, exp_1_2_2);
12669 BrkpbHelper(config, pg_1, pn_3, pm_3, exp_1_3_3);
12670
12671 // | first active
12672 // v
12673 int exp_2_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12674 // | first active
12675 // v
12676 int exp_2_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12677 // | first active
12678 // v
12679 int exp_2_3_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
12680 BrkpbHelper(config, pg_2, pn_1, pm_2, exp_2_1_2);
12681 BrkpbHelper(config, pg_2, pn_2, pm_3, exp_2_2_3);
12682 BrkpbHelper(config, pg_2, pn_3, pm_1, exp_2_3_1);
12683
12684 // | first active
12685 // v
12686 int exp_3_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
12687 // | first active
12688 // v
12689 int exp_3_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
12690 // | first active
12691 // v
12692 int exp_3_3_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
12693 BrkpbHelper(config, pg_3, pn_1, pm_3, exp_3_1_3);
12694 BrkpbHelper(config, pg_3, pn_2, pm_1, exp_3_2_1);
12695 BrkpbHelper(config, pg_3, pn_3, pm_2, exp_3_3_2);
12696
12697 // The last active element of `pn` are `false` in all vector length configurations.
12698 // | last active lane when VL > 128 bits.
12699 // v
12700 // | last active lane when VL == 128 bits.
12701 // v
12702 int pg_4[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
12703 int exp_4_x_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12704 BrkpbHelper(config, pg_4, pn_1, pm_1, exp_4_x_x);
12705 BrkpbHelper(config, pg_4, pn_2, pm_2, exp_4_x_x);
12706 BrkpbHelper(config, pg_4, pn_3, pm_3, exp_4_x_x);
12707 // clang-format on
12708}
12709
12710TEST_SVE(sve_brkpa) {
12711 // clang-format off
12712 // The last active element of `pn` are `true` in all vector length configurations.
12713 // | boundary of 128-bits VL.
12714 // v
12715 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12716 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12717 int pg_3[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
12718
12719 // | highest-numbered lane lowest-numbered lane |
12720 // v v
12721 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
12722 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
12723 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1};
12724
12725 int pm_1[] = {1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
12726 int pm_2[] = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12727 int pm_3[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
12728
12729 // | first active
12730 // v
12731 int exp_1_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
12732 // | first active
12733 // v
12734 int exp_1_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12735 // | first active
12736 // v
12737 int exp_1_3_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
12738
12739 BrkpaHelper(config, pg_1, pn_1, pm_1, exp_1_1_1);
12740 BrkpaHelper(config, pg_1, pn_2, pm_2, exp_1_2_2);
12741 BrkpaHelper(config, pg_1, pn_3, pm_3, exp_1_3_3);
12742
12743 // | first active
12744 // v
12745 int exp_2_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12746 // | first active
12747 // v
12748 int exp_2_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12749 // | first active
12750 // v
12751 int exp_2_3_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1};
12752 BrkpaHelper(config, pg_2, pn_1, pm_2, exp_2_1_2);
12753 BrkpaHelper(config, pg_2, pn_2, pm_3, exp_2_2_3);
12754 BrkpaHelper(config, pg_2, pn_3, pm_1, exp_2_3_1);
12755
12756 // | first active
12757 // v
12758 int exp_3_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
12759 // | first active
12760 // v
12761 int exp_3_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
12762 // | first active
12763 // v
12764 int exp_3_3_2[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
12765 BrkpaHelper(config, pg_3, pn_1, pm_3, exp_3_1_3);
12766 BrkpaHelper(config, pg_3, pn_2, pm_1, exp_3_2_1);
12767 BrkpaHelper(config, pg_3, pn_3, pm_2, exp_3_3_2);
12768
12769 // The last active element of `pn` are `false` in all vector length configurations.
12770 // | last active lane when VL > 128 bits.
12771 // v
12772 // | last active lane when VL == 128 bits.
12773 // v
12774 int pg_4[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
12775 int exp_4_x_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12776 BrkpaHelper(config, pg_4, pn_1, pm_1, exp_4_x_x);
12777 BrkpaHelper(config, pg_4, pn_2, pm_2, exp_4_x_x);
12778 BrkpaHelper(config, pg_4, pn_3, pm_3, exp_4_x_x);
12779 // clang-format on
12780}
12781
Martyn Capewell77b6d982019-12-02 18:34:59 +000012782TEST_SVE(sve_rbit) {
12783 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12784 START();
12785
12786 uint64_t inputs[] = {0xaaaaaaaa55555555, 0xaaaa5555aa55aa55};
12787 InsrHelper(&masm, z0.VnD(), inputs);
12788
12789 __ Ptrue(p1.VnB());
12790 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
12791 Initialise(&masm, p2.VnB(), pred);
12792
12793 __ Rbit(z0.VnB(), p1.Merging(), z0.VnB());
12794 __ Rbit(z0.VnB(), p1.Merging(), z0.VnB());
12795
12796 __ Rbit(z1.VnB(), p1.Merging(), z0.VnB());
12797 __ Rbit(z2.VnH(), p1.Merging(), z0.VnH());
12798 __ Rbit(z3.VnS(), p1.Merging(), z0.VnS());
12799 __ Rbit(z4.VnD(), p1.Merging(), z0.VnD());
12800
12801 __ Dup(z5.VnB(), 0x42);
12802 __ Rbit(z5.VnB(), p2.Merging(), z0.VnB());
12803 __ Dup(z6.VnB(), 0x42);
12804 __ Rbit(z6.VnS(), p2.Merging(), z0.VnS());
12805
12806 END();
12807
12808 if (CAN_RUN()) {
12809 RUN();
12810
12811 ASSERT_EQUAL_SVE(inputs, z0.VnD());
12812
12813 uint64_t expected_z1[] = {0x55555555aaaaaaaa, 0x5555aaaa55aa55aa};
12814 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
12815 uint64_t expected_z2[] = {0x55555555aaaaaaaa, 0x5555aaaaaa55aa55};
12816 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12817 uint64_t expected_z3[] = {0x55555555aaaaaaaa, 0xaaaa5555aa55aa55};
12818 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12819 uint64_t expected_z4[] = {0xaaaaaaaa55555555, 0xaa55aa55aaaa5555};
12820 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12821 uint64_t expected_z5[] = {0x4255425542aa42aa, 0x4255424242aa42aa};
12822 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12823 uint64_t expected_z6[] = {0x55555555aaaaaaaa, 0x42424242aa55aa55};
12824 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12825 }
12826}
12827
12828TEST_SVE(sve_rev_bhw) {
12829 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12830 START();
12831
12832 uint64_t inputs[] = {0xaaaaaaaa55555555, 0xaaaa5555aa55aa55};
12833 InsrHelper(&masm, z0.VnD(), inputs);
12834
12835 __ Ptrue(p1.VnB());
12836 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
12837 Initialise(&masm, p2.VnB(), pred);
12838
12839 __ Revb(z1.VnH(), p1.Merging(), z0.VnH());
12840 __ Revb(z2.VnS(), p1.Merging(), z0.VnS());
12841 __ Revb(z3.VnD(), p1.Merging(), z0.VnD());
12842 __ Revh(z4.VnS(), p1.Merging(), z0.VnS());
12843 __ Revh(z5.VnD(), p1.Merging(), z0.VnD());
12844 __ Revw(z6.VnD(), p1.Merging(), z0.VnD());
12845
12846 __ Dup(z7.VnB(), 0x42);
12847 __ Revb(z7.VnH(), p2.Merging(), z0.VnH());
12848 __ Dup(z8.VnB(), 0x42);
12849 __ Revh(z8.VnS(), p2.Merging(), z0.VnS());
12850
12851 END();
12852
12853 if (CAN_RUN()) {
12854 RUN();
12855
12856 uint64_t expected_z1[] = {0xaaaaaaaa55555555, 0xaaaa555555aa55aa};
12857 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
12858 uint64_t expected_z2[] = {0xaaaaaaaa55555555, 0x5555aaaa55aa55aa};
12859 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12860 uint64_t expected_z3[] = {0x55555555aaaaaaaa, 0x55aa55aa5555aaaa};
12861 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12862 uint64_t expected_z4[] = {0xaaaaaaaa55555555, 0x5555aaaaaa55aa55};
12863 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12864 uint64_t expected_z5[] = {0x55555555aaaaaaaa, 0xaa55aa555555aaaa};
12865 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12866 uint64_t expected_z6[] = {0x55555555aaaaaaaa, 0xaa55aa55aaaa5555};
12867 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12868 uint64_t expected_z7[] = {0xaaaaaaaa55555555, 0xaaaa424255aa55aa};
12869 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12870 uint64_t expected_z8[] = {0xaaaaaaaa55555555, 0x42424242aa55aa55};
12871 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12872 }
12873}
12874
Martyn Capewell43782632019-12-12 13:22:10 +000012875TEST_SVE(sve_ftssel) {
12876 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12877 START();
12878
12879 uint64_t in[] = {0x1111777766665555, 0xaaaabbbbccccdddd};
12880 uint64_t q[] = {0x0001000300000002, 0x0001000200000003};
12881 InsrHelper(&masm, z0.VnD(), in);
12882 InsrHelper(&masm, z1.VnD(), q);
12883
12884 __ Ftssel(z2.VnH(), z0.VnH(), z1.VnH());
12885 __ Ftssel(z3.VnS(), z0.VnS(), z1.VnS());
12886 __ Ftssel(z4.VnD(), z0.VnD(), z1.VnD());
12887
12888 END();
12889
12890 if (CAN_RUN()) {
12891 RUN();
12892
12893 uint64_t expected_z2[] = {0x3c00bc006666d555, 0x3c003bbbccccbc00};
12894 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12895 uint64_t expected_z3[] = {0xbf800000e6665555, 0x2aaabbbbbf800000};
12896 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12897 uint64_t expected_z4[] = {0x9111777766665555, 0xbff0000000000000};
12898 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12899 }
12900}
12901
12902TEST_SVE(sve_fexpa) {
12903 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12904 START();
12905
12906 uint64_t in0[] = {0x3ff0000000000000, 0x3ff0000000011001};
12907 uint64_t in1[] = {0x3ff000000002200f, 0xbff000000003301f};
12908 uint64_t in2[] = {0xbff000000004403f, 0x3ff0000000055040};
12909 uint64_t in3[] = {0x3f800000bf800001, 0x3f80000f3f80001f};
12910 uint64_t in4[] = {0x3f80002f3f82203f, 0xbf8000403f833041};
12911 uint64_t in5[] = {0x3c003c01bc00bc07, 0x3c08bc0f3c1fbc20};
12912 InsrHelper(&masm, z0.VnD(), in0);
12913 InsrHelper(&masm, z1.VnD(), in1);
12914 InsrHelper(&masm, z2.VnD(), in2);
12915 InsrHelper(&masm, z3.VnD(), in3);
12916 InsrHelper(&masm, z4.VnD(), in4);
12917 InsrHelper(&masm, z5.VnD(), in5);
12918
12919 __ Fexpa(z6.VnD(), z0.VnD());
12920 __ Fexpa(z7.VnD(), z1.VnD());
12921 __ Fexpa(z8.VnD(), z2.VnD());
12922 __ Fexpa(z9.VnS(), z3.VnS());
12923 __ Fexpa(z10.VnS(), z4.VnS());
12924 __ Fexpa(z11.VnH(), z5.VnH());
12925
12926 END();
12927
12928 if (CAN_RUN()) {
12929 RUN();
12930 uint64_t expected_z6[] = {0x0000000000000000, 0x44002c9a3e778061};
12931 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12932 uint64_t expected_z7[] = {0x0802d285a6e4030b, 0x4c06623882552225};
12933 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12934 uint64_t expected_z8[] = {0x100fa7c1819e90d8, 0x5410000000000000};
12935 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12936 uint64_t expected_z9[] = {0x00000000000164d2, 0x0016942d003311c4};
12937 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12938 uint64_t expected_z10[] = {0x0054f35b407d3e0c, 0x00800000608164d2};
12939 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
12940 uint64_t expected_z11[] = {0x00000016000000a8, 0x00c2018903d40400};
12941 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
12942 }
12943}
12944
Martyn Capewell7fd6fd52019-12-06 14:50:15 +000012945TEST_SVE(sve_rev_p) {
12946 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12947 START();
12948
12949 Initialise(&masm,
12950 p0.VnB(),
12951 0xabcdabcdabcdabcd,
12952 0xabcdabcdabcdabcd,
12953 0xabcdabcdabcdabcd,
12954 0xabcdabcdabcdabcd);
12955
12956 __ Rev(p1.VnB(), p0.VnB());
12957 __ Rev(p2.VnH(), p0.VnH());
12958 __ Rev(p3.VnS(), p0.VnS());
12959 __ Rev(p4.VnD(), p0.VnD());
12960
12961 END();
12962
12963 if (CAN_RUN()) {
12964 RUN();
12965
12966 int p1_expected[] = {1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1};
12967 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
12968 int p2_expected[] = {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0};
12969 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
12970 int p3_expected[] = {1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0};
12971 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
12972 int p4_expected[] = {1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1};
12973 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
12974 }
12975}
12976
12977TEST_SVE(sve_trn_p_bh) {
12978 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12979 START();
12980
12981 Initialise(&masm, p0.VnB(), 0xa5a55a5a);
12982 __ Pfalse(p1.VnB());
12983
12984 __ Trn1(p2.VnB(), p0.VnB(), p0.VnB());
12985 __ Trn2(p3.VnB(), p0.VnB(), p0.VnB());
12986 __ Trn1(p4.VnB(), p1.VnB(), p0.VnB());
12987 __ Trn2(p5.VnB(), p1.VnB(), p0.VnB());
12988 __ Trn1(p6.VnB(), p0.VnB(), p1.VnB());
12989 __ Trn2(p7.VnB(), p0.VnB(), p1.VnB());
12990
12991 __ Trn1(p8.VnH(), p0.VnH(), p0.VnH());
12992 __ Trn2(p9.VnH(), p0.VnH(), p0.VnH());
12993 __ Trn1(p10.VnH(), p1.VnH(), p0.VnH());
12994 __ Trn2(p11.VnH(), p1.VnH(), p0.VnH());
12995 __ Trn1(p12.VnH(), p0.VnH(), p1.VnH());
12996 __ Trn2(p13.VnH(), p0.VnH(), p1.VnH());
12997
12998 END();
12999
13000 if (CAN_RUN()) {
13001 RUN();
13002 int p2_expected[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
13003 int p3_expected[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
13004 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
13005 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
13006
13007 int p4_expected[] = {1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
13008 int p5_expected[] = {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0};
13009 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
13010 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
13011
13012 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0};
13013 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1};
13014 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
13015 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
13016
13017 int p8_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13018 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13019 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
13020 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
13021
13022 int p10_expected[] = {0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0};
13023 int p11_expected[] = {0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0};
13024 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
13025 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
13026
13027 int p12_expected[] = {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0};
13028 int p13_expected[] = {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0};
13029 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
13030 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
13031 }
13032}
13033
13034TEST_SVE(sve_trn_p_sd) {
13035 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13036 START();
13037
13038 Initialise(&masm, p0.VnB(), 0x55a55aaa);
13039 __ Pfalse(p1.VnB());
13040
13041 __ Trn1(p2.VnS(), p0.VnS(), p0.VnS());
13042 __ Trn2(p3.VnS(), p0.VnS(), p0.VnS());
13043 __ Trn1(p4.VnS(), p1.VnS(), p0.VnS());
13044 __ Trn2(p5.VnS(), p1.VnS(), p0.VnS());
13045 __ Trn1(p6.VnS(), p0.VnS(), p1.VnS());
13046 __ Trn2(p7.VnS(), p0.VnS(), p1.VnS());
13047
13048 __ Trn1(p8.VnD(), p0.VnD(), p0.VnD());
13049 __ Trn2(p9.VnD(), p0.VnD(), p0.VnD());
13050 __ Trn1(p10.VnD(), p1.VnD(), p0.VnD());
13051 __ Trn2(p11.VnD(), p1.VnD(), p0.VnD());
13052 __ Trn1(p12.VnD(), p0.VnD(), p1.VnD());
13053 __ Trn2(p13.VnD(), p0.VnD(), p1.VnD());
13054
13055 END();
13056
13057 if (CAN_RUN()) {
13058 RUN();
13059 int p2_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
13060 int p3_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
13061 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
13062 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
13063
13064 int p4_expected[] = {1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
13065 int p5_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
13066 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
13067 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
13068
13069 int p6_expected[] = {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0};
13070 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
13071 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
13072 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
13073
13074 int p8_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
13075 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13076 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
13077 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
13078
13079 int p10_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13080 int p11_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13081 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
13082 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
13083
13084 int p12_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0};
13085 int p13_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13086 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
13087 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
13088 }
13089}
13090
13091TEST_SVE(sve_zip_p_bh) {
13092 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13093 START();
13094
13095 Initialise(&masm,
13096 p0.VnB(),
13097 0x5a5a5a5a5a5a5a5a,
13098 0x5a5a5a5a5a5a5a5a,
13099 0x5a5a5a5a5a5a5a5a,
13100 0x5a5a5a5a5a5a5a5a);
13101 __ Pfalse(p1.VnB());
13102
13103 __ Zip1(p2.VnB(), p0.VnB(), p0.VnB());
13104 __ Zip2(p3.VnB(), p0.VnB(), p0.VnB());
13105 __ Zip1(p4.VnB(), p1.VnB(), p0.VnB());
13106 __ Zip2(p5.VnB(), p1.VnB(), p0.VnB());
13107 __ Zip1(p6.VnB(), p0.VnB(), p1.VnB());
13108 __ Zip2(p7.VnB(), p0.VnB(), p1.VnB());
13109
13110 __ Zip1(p8.VnH(), p0.VnH(), p0.VnH());
13111 __ Zip2(p9.VnH(), p0.VnH(), p0.VnH());
13112 __ Zip1(p10.VnH(), p1.VnH(), p0.VnH());
13113 __ Zip2(p11.VnH(), p1.VnH(), p0.VnH());
13114 __ Zip1(p12.VnH(), p0.VnH(), p1.VnH());
13115 __ Zip2(p13.VnH(), p0.VnH(), p1.VnH());
13116
13117 END();
13118
13119 if (CAN_RUN()) {
13120 RUN();
13121 int p2_expected[] = {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0};
13122 int p3_expected[] = {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0};
13123 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
13124 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
13125
13126 int p4_expected[] = {0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0};
13127 int p5_expected[] = {0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0};
13128 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
13129 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
13130
13131 int p6_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0};
13132 int p7_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0};
13133 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
13134 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
13135
13136 int p8_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
13137 int p9_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
13138 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
13139 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
13140
13141 int p10_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
13142 int p11_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
13143 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
13144 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
13145
13146 int p12_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0};
13147 int p13_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0};
13148 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
13149 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
13150 }
13151}
13152
13153TEST_SVE(sve_zip_p_sd) {
13154 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13155 START();
13156
13157 Initialise(&masm,
13158 p0.VnB(),
13159 0x5a5a5a5a5a5a5a5a,
13160 0x5a5a5a5a5a5a5a5a,
13161 0x5a5a5a5a5a5a5a5a,
13162 0x5a5a5a5a5a5a5a5a);
13163 __ Pfalse(p1.VnB());
13164
13165 __ Zip1(p2.VnS(), p0.VnS(), p0.VnS());
13166 __ Zip2(p3.VnS(), p0.VnS(), p0.VnS());
13167 __ Zip1(p4.VnS(), p1.VnS(), p0.VnS());
13168 __ Zip2(p5.VnS(), p1.VnS(), p0.VnS());
13169 __ Zip1(p6.VnS(), p0.VnS(), p1.VnS());
13170 __ Zip2(p7.VnS(), p0.VnS(), p1.VnS());
13171
13172 __ Zip1(p8.VnD(), p0.VnD(), p0.VnD());
13173 __ Zip2(p9.VnD(), p0.VnD(), p0.VnD());
13174 __ Zip1(p10.VnD(), p1.VnD(), p0.VnD());
13175 __ Zip2(p11.VnD(), p1.VnD(), p0.VnD());
13176 __ Zip1(p12.VnD(), p0.VnD(), p1.VnD());
13177 __ Zip2(p13.VnD(), p0.VnD(), p1.VnD());
13178
13179 END();
13180
13181 if (CAN_RUN()) {
13182 RUN();
13183 int p2_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
13184 int p3_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
13185 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
13186 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
13187
13188 int p4_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
13189 int p5_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
13190 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
13191 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
13192
13193 int p6_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
13194 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
13195 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
13196 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
13197
13198 int p8_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13199 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13200 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
13201 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
13202
13203 int p10_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13204 int p11_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13205 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
13206 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
13207
13208 int p12_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13209 int p13_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13210 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
13211 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
13212 }
13213}
13214
13215TEST_SVE(sve_uzp_p) {
13216 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13217 START();
13218
13219 Initialise(&masm,
13220 p0.VnB(),
13221 0xf0f0ff00ffff0000,
13222 0x4242424242424242,
13223 0x5a5a5a5a5a5a5a5a,
13224 0x0123456789abcdef);
13225 __ Rev(p1.VnB(), p0.VnB());
13226
13227 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
13228 __ Zip2(p3.VnB(), p0.VnB(), p1.VnB());
13229 __ Uzp1(p4.VnB(), p2.VnB(), p3.VnB());
13230 __ Uzp2(p5.VnB(), p2.VnB(), p3.VnB());
13231
13232 __ Zip1(p2.VnH(), p0.VnH(), p1.VnH());
13233 __ Zip2(p3.VnH(), p0.VnH(), p1.VnH());
13234 __ Uzp1(p6.VnH(), p2.VnH(), p3.VnH());
13235 __ Uzp2(p7.VnH(), p2.VnH(), p3.VnH());
13236
13237 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
13238 __ Zip2(p3.VnS(), p0.VnS(), p1.VnS());
13239 __ Uzp1(p8.VnS(), p2.VnS(), p3.VnS());
13240 __ Uzp2(p9.VnS(), p2.VnS(), p3.VnS());
13241
13242 __ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
13243 __ Zip2(p3.VnD(), p0.VnD(), p1.VnD());
13244 __ Uzp1(p10.VnD(), p2.VnD(), p3.VnD());
13245 __ Uzp2(p11.VnD(), p2.VnD(), p3.VnD());
13246
13247 END();
13248
13249 if (CAN_RUN()) {
13250 RUN();
13251
13252 ASSERT_EQUAL_SVE(p0, p4);
13253 ASSERT_EQUAL_SVE(p1, p5);
13254 ASSERT_EQUAL_SVE(p0, p6);
13255 ASSERT_EQUAL_SVE(p1, p7);
13256 ASSERT_EQUAL_SVE(p0, p8);
13257 ASSERT_EQUAL_SVE(p1, p9);
13258 ASSERT_EQUAL_SVE(p0, p10);
13259 ASSERT_EQUAL_SVE(p1, p11);
13260 }
13261}
13262
13263TEST_SVE(sve_punpk) {
13264 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13265 START();
13266
13267 Initialise(&masm,
13268 p0.VnB(),
13269 0xf0a0f0a0f0a0f0a0,
13270 0xf0a0f0a0f0a0f0a0,
13271 0xa0f0a0f0a0f0a0f0,
13272 0xa0f0a0f0a0f0a0f0);
13273 __ Punpklo(p1.VnH(), p0.VnB());
13274 __ Punpkhi(p2.VnH(), p0.VnB());
13275
13276 END();
13277
13278 if (CAN_RUN()) {
13279 RUN();
13280
13281 int p1_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
13282 int p2_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13283 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
13284 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
13285 }
13286}
13287
TatWai Chong5d872292020-01-02 15:39:51 -080013288typedef void (MacroAssembler::*BrkFn)(const PRegisterWithLaneSize& pd,
13289 const PRegister& pg,
13290 const PRegisterWithLaneSize& pn);
13291
13292typedef void (MacroAssembler::*BrksFn)(const PRegisterWithLaneSize& pd,
13293 const PRegisterZ& pg,
13294 const PRegisterWithLaneSize& pn);
13295
13296template <typename T, size_t N>
13297static void BrkaBrkbHelper(Test* config,
13298 BrkFn macro,
13299 BrksFn macro_set_flags,
13300 const T (&pd_inputs)[N],
13301 const T (&pg_inputs)[N],
13302 const T (&pn_inputs)[N],
13303 const T (&pd_z_expected)[N]) {
13304 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13305 START();
13306
13307 PRegister pg = p10;
13308 PRegister pn = p9;
13309 PRegister pd_z = p0;
13310 PRegister pd_z_s = p1;
13311 PRegister pd_m = p2;
13312 Initialise(&masm, pg.VnB(), pg_inputs);
13313 Initialise(&masm, pn.VnB(), pn_inputs);
13314 Initialise(&masm, pd_m.VnB(), pd_inputs);
13315
13316 // Initialise NZCV to an impossible value, to check that we actually write it.
13317 __ Mov(x10, NZCVFlag);
13318 __ Msr(NZCV, x10);
13319
13320 (masm.*macro)(pd_z.VnB(), pg.Zeroing(), pn.VnB());
13321 (masm.*macro_set_flags)(pd_z_s.VnB(), pg.Zeroing(), pn.VnB());
13322 __ Mrs(x0, NZCV);
13323
13324 (masm.*macro)(pd_m.VnB(), pg.Merging(), pn.VnB());
13325
13326 END();
13327
13328 if (CAN_RUN()) {
13329 RUN();
13330
13331 ASSERT_EQUAL_SVE(pd_z_expected, pd_z.VnB());
13332
13333 // Check that the flags were properly set.
13334 StatusFlags nzcv_expected =
13335 GetPredTestFlags(pd_z_expected,
13336 pg_inputs,
13337 core.GetSVELaneCount(kBRegSize));
13338 ASSERT_EQUAL_64(nzcv_expected, x0);
13339 ASSERT_EQUAL_SVE(pd_z.VnB(), pd_z_s.VnB());
13340
13341 T pd_m_expected[N];
13342 // Set expected `pd` result on merging predication.
13343 for (size_t i = 0; i < N; i++) {
13344 pd_m_expected[i] = pg_inputs[i] ? pd_z_expected[i] : pd_inputs[i];
13345 }
13346 ASSERT_EQUAL_SVE(pd_m_expected, pd_m.VnB());
13347 }
13348}
13349
13350template <typename T>
13351static void BrkaHelper(Test* config,
13352 const T& pd_inputs,
13353 const T& pg_inputs,
13354 const T& pn_inputs,
13355 const T& pd_expected) {
13356 BrkaBrkbHelper(config,
13357 &MacroAssembler::Brka,
13358 &MacroAssembler::Brkas,
13359 pd_inputs,
13360 pg_inputs,
13361 pn_inputs,
13362 pd_expected);
13363}
13364
13365TEST_SVE(sve_brka) {
13366 // clang-format off
13367 // | boundary of 128-bits VL.
13368 // v
13369 int pd[] = {1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13370
13371 // | highest-numbered lane lowest-numbered lane |
13372 // v v
13373 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
13374 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
13375
13376 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
13377 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13378 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1};
13379
13380 // | first break
13381 // v
13382 int exp_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
13383 // | first break
13384 // v
13385 int exp_1_2[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
13386 // | first break
13387 // v
13388 int exp_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
13389
13390 BrkaHelper(config, pd, pg_1, pn_1, exp_1_1);
13391 BrkaHelper(config, pd, pg_1, pn_2, exp_1_2);
13392 BrkaHelper(config, pd, pg_1, pn_3, exp_1_3);
13393
13394 // | first break
13395 // v
13396 int exp_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1};
13397 // | first break
13398 // v
13399 int exp_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
13400 // | first break
13401 // v
13402 int exp_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
13403 BrkaHelper(config, pd, pg_2, pn_1, exp_2_1);
13404 BrkaHelper(config, pd, pg_2, pn_2, exp_2_2);
13405 BrkaHelper(config, pd, pg_2, pn_3, exp_2_3);
13406
13407 // The all-inactive zeroing predicate sets destination predicate all-false.
13408 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13409 int exp_3_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13410 BrkaHelper(config, pd, pg_3, pn_1, exp_3_x);
13411 BrkaHelper(config, pd, pg_3, pn_2, exp_3_x);
13412 BrkaHelper(config, pd, pg_3, pn_3, exp_3_x);
13413 // clang-format on
13414}
13415
13416template <typename T>
13417static void BrkbHelper(Test* config,
13418 const T& pd_inputs,
13419 const T& pg_inputs,
13420 const T& pn_inputs,
13421 const T& pd_expected) {
13422 BrkaBrkbHelper(config,
13423 &MacroAssembler::Brkb,
13424 &MacroAssembler::Brkbs,
13425 pd_inputs,
13426 pg_inputs,
13427 pn_inputs,
13428 pd_expected);
13429}
13430
13431TEST_SVE(sve_brkb) {
13432 // clang-format off
13433 // | boundary of 128-bits VL.
13434 // v
13435 int pd[] = {1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13436
13437 // | highest-numbered lane lowest-numbered lane |
13438 // v v
13439 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
13440 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
13441
13442 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
13443 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13444 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1};
13445
13446 // | first break
13447 // v
13448 int exp_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
13449 // | first break
13450 // v
13451 int exp_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
13452 // | first break
13453 // v
13454 int exp_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0};
13455
13456 BrkbHelper(config, pd, pg_1, pn_1, exp_1_1);
13457 BrkbHelper(config, pd, pg_1, pn_2, exp_1_2);
13458 BrkbHelper(config, pd, pg_1, pn_3, exp_1_3);
13459
13460 // | first break
13461 // v
13462 int exp_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
13463 // | first break
13464 // v
13465 int exp_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
13466 // | first break
13467 // v
13468 int exp_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13469 BrkbHelper(config, pd, pg_2, pn_1, exp_2_1);
13470 BrkbHelper(config, pd, pg_2, pn_2, exp_2_2);
13471 BrkbHelper(config, pd, pg_2, pn_3, exp_2_3);
13472
13473 // The all-inactive zeroing predicate sets destination predicate all-false.
13474 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13475 int exp_3_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
13476 BrkbHelper(config, pd, pg_3, pn_1, exp_3_x);
13477 BrkbHelper(config, pd, pg_3, pn_2, exp_3_x);
13478 BrkbHelper(config, pd, pg_3, pn_3, exp_3_x);
13479 // clang-format on
13480}
13481
13482typedef void (MacroAssembler::*BrknFn)(const PRegisterWithLaneSize& pd,
13483 const PRegisterZ& pg,
13484 const PRegisterWithLaneSize& pn,
13485 const PRegisterWithLaneSize& pm);
13486
13487typedef void (MacroAssembler::*BrknsFn)(const PRegisterWithLaneSize& pd,
13488 const PRegisterZ& pg,
13489 const PRegisterWithLaneSize& pn,
13490 const PRegisterWithLaneSize& pm);
13491
13492enum BrknDstPredicateState { kAllFalse, kUnchanged };
13493
13494template <typename T, size_t N>
13495static void BrknHelper(Test* config,
13496 BrknFn macro,
13497 BrknsFn macro_set_flags,
13498 const T (&pd_inputs)[N],
13499 const T (&pg_inputs)[N],
13500 const T (&pn_inputs)[N],
13501 const T (&pm_inputs)[N],
13502 BrknDstPredicateState expected_pd_state) {
13503 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13504 START();
13505
13506 PRegister pg = p10;
13507 PRegister pn = p9;
13508 PRegister pm = p8;
13509 PRegister pdm = p0;
13510 PRegister pd = p1;
13511 PRegister pd_s = p2;
13512 Initialise(&masm, pg.VnB(), pg_inputs);
13513 Initialise(&masm, pn.VnB(), pn_inputs);
13514 Initialise(&masm, pm.VnB(), pm_inputs);
13515 Initialise(&masm, pdm.VnB(), pm_inputs);
13516 Initialise(&masm, pd.VnB(), pd_inputs);
13517 Initialise(&masm, pd_s.VnB(), pd_inputs);
13518
13519 // Initialise NZCV to an impossible value, to check that we actually write it.
13520 __ Mov(x10, NZCVFlag);
13521 __ Msr(NZCV, x10);
13522
13523 (masm.*macro)(pdm.VnB(), pg.Zeroing(), pn.VnB(), pdm.VnB());
13524 // !pd.Aliases(pm).
13525 (masm.*macro)(pd.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
13526 (masm.*macro_set_flags)(pd_s.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
13527 __ Mrs(x0, NZCV);
13528
13529 END();
13530
13531 if (CAN_RUN()) {
13532 RUN();
13533
13534 T all_false[N] = {0};
13535 if (expected_pd_state == kAllFalse) {
13536 ASSERT_EQUAL_SVE(all_false, pd.VnB());
13537 } else {
13538 ASSERT_EQUAL_SVE(pm_inputs, pd.VnB());
13539 }
13540 ASSERT_EQUAL_SVE(pm_inputs, pm.VnB());
13541
13542 // Check that the flags were properly set.
13543 StatusFlags nzcv_expected =
13544 GetPredTestFlags((expected_pd_state == kAllFalse) ? all_false
13545 : pm_inputs,
13546 pg_inputs,
13547 core.GetSVELaneCount(kBRegSize));
13548 ASSERT_EQUAL_64(nzcv_expected, x0);
13549 ASSERT_EQUAL_SVE(pd.VnB(), pdm.VnB());
13550 ASSERT_EQUAL_SVE(pd.VnB(), pd_s.VnB());
13551 }
13552}
13553
13554TEST_SVE(sve_brkn) {
13555 // clang-format off
13556 int pd[] = {1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
13557 int pm[] = {0, 1, 1, 1, 1, 0, 0, 1, 0, 1};
13558
13559 int pg_1[] = {1, 1, 0, 0, 1, 0, 1, 1, 0, 0};
13560 int pg_2[] = {0, 0, 0, 1, 1, 1, 0, 0, 1, 1};
13561 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // all-false
13562
13563 int pn_1[] = {1, 0, 0, 0, 0, 1, 1, 0, 0, 0};
13564 int pn_2[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
13565 int pn_3[] = {0, 0, 0, 0, 1, 1, 0, 0, 1, 1};
13566
13567 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_1, pm, kUnchanged);
13568 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_2, pm, kAllFalse);
13569 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_3, pm, kAllFalse);
13570
13571 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_1, pm, kAllFalse);
13572 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_2, pm, kUnchanged);
13573 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_3, pm, kAllFalse);
13574
13575 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_1, pm, kAllFalse);
13576 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_2, pm, kAllFalse);
13577 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_3, pm, kAllFalse);
13578 // clang-format on
13579}
13580
Martyn Capewell15f89012020-01-09 11:18:30 +000013581TEST_SVE(sve_trn) {
13582 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13583 START();
13584
13585 uint64_t in0[] = {0xffeeddccbbaa9988, 0x7766554433221100};
13586 uint64_t in1[] = {0xaa55aa55aa55aa55, 0x55aa55aa55aa55aa};
13587 InsrHelper(&masm, z0.VnD(), in0);
13588 InsrHelper(&masm, z1.VnD(), in1);
13589
13590 __ Trn1(z2.VnB(), z0.VnB(), z1.VnB());
13591 __ Trn2(z3.VnB(), z0.VnB(), z1.VnB());
13592 __ Trn1(z4.VnH(), z0.VnH(), z1.VnH());
13593 __ Trn2(z5.VnH(), z0.VnH(), z1.VnH());
13594 __ Trn1(z6.VnS(), z0.VnS(), z1.VnS());
13595 __ Trn2(z7.VnS(), z0.VnS(), z1.VnS());
13596 __ Trn1(z8.VnD(), z0.VnD(), z1.VnD());
13597 __ Trn2(z9.VnD(), z0.VnD(), z1.VnD());
13598
13599 END();
13600
13601 if (CAN_RUN()) {
13602 RUN();
13603 uint64_t expected_z2[] = {0x55ee55cc55aa5588, 0xaa66aa44aa22aa00};
13604 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
13605 uint64_t expected_z3[] = {0xaaffaaddaabbaa99, 0x5577555555335511};
13606 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
13607 uint64_t expected_z4[] = {0xaa55ddccaa559988, 0x55aa554455aa1100};
13608 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
13609 uint64_t expected_z5[] = {0xaa55ffeeaa55bbaa, 0x55aa776655aa3322};
13610 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
13611 uint64_t expected_z6[] = {0xaa55aa55bbaa9988, 0x55aa55aa33221100};
13612 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
13613 uint64_t expected_z7[] = {0xaa55aa55ffeeddcc, 0x55aa55aa77665544};
13614 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
13615 uint64_t expected_z8[] = {0x55aa55aa55aa55aa, 0x7766554433221100};
13616 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
13617 uint64_t expected_z9[] = {0xaa55aa55aa55aa55, 0xffeeddccbbaa9988};
13618 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
13619 }
13620}
13621
13622TEST_SVE(sve_zip_uzp) {
13623 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13624 START();
13625
13626 __ Dup(z0.VnD(), 0xffeeddccbbaa9988);
13627 __ Insr(z0.VnD(), 0x7766554433221100);
13628 __ Dup(z1.VnD(), 0xaa55aa55aa55aa55);
13629 __ Insr(z1.VnD(), 0x55aa55aa55aa55aa);
13630
13631 __ Zip1(z2.VnB(), z0.VnB(), z1.VnB());
13632 __ Zip2(z3.VnB(), z0.VnB(), z1.VnB());
13633 __ Zip1(z4.VnH(), z0.VnH(), z1.VnH());
13634 __ Zip2(z5.VnH(), z0.VnH(), z1.VnH());
13635 __ Zip1(z6.VnS(), z0.VnS(), z1.VnS());
13636 __ Zip2(z7.VnS(), z0.VnS(), z1.VnS());
13637 __ Zip1(z8.VnD(), z0.VnD(), z1.VnD());
13638 __ Zip2(z9.VnD(), z0.VnD(), z1.VnD());
13639
13640 __ Uzp1(z10.VnB(), z2.VnB(), z3.VnB());
13641 __ Uzp2(z11.VnB(), z2.VnB(), z3.VnB());
13642 __ Uzp1(z12.VnH(), z4.VnH(), z5.VnH());
13643 __ Uzp2(z13.VnH(), z4.VnH(), z5.VnH());
13644 __ Uzp1(z14.VnS(), z6.VnS(), z7.VnS());
13645 __ Uzp2(z15.VnS(), z6.VnS(), z7.VnS());
13646 __ Uzp1(z16.VnD(), z8.VnD(), z9.VnD());
13647 __ Uzp2(z17.VnD(), z8.VnD(), z9.VnD());
13648
13649 END();
13650
13651 if (CAN_RUN()) {
13652 RUN();
13653 uint64_t expected_z2[] = {0x5577aa665555aa44, 0x5533aa225511aa00};
13654 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
13655 uint64_t expected_z3[] = {0xaaff55eeaadd55cc, 0xaabb55aaaa995588};
13656 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
13657 uint64_t expected_z4[] = {0x55aa776655aa5544, 0x55aa332255aa1100};
13658 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
13659 uint64_t expected_z5[] = {0xaa55ffeeaa55ddcc, 0xaa55bbaaaa559988};
13660 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
13661 uint64_t expected_z6[] = {0x55aa55aa77665544, 0x55aa55aa33221100};
13662 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
13663 uint64_t expected_z7[] = {0xaa55aa55ffeeddcc, 0xaa55aa55bbaa9988};
13664 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
13665 uint64_t expected_z8[] = {0x55aa55aa55aa55aa, 0x7766554433221100};
13666 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
13667 uint64_t expected_z9[] = {0xaa55aa55aa55aa55, 0xffeeddccbbaa9988};
13668 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
13669
13670 // Check uzp is the opposite of zip.
13671 ASSERT_EQUAL_SVE(z0.VnD(), z10.VnD());
13672 ASSERT_EQUAL_SVE(z1.VnD(), z11.VnD());
13673 ASSERT_EQUAL_SVE(z0.VnD(), z12.VnD());
13674 ASSERT_EQUAL_SVE(z1.VnD(), z13.VnD());
13675 ASSERT_EQUAL_SVE(z0.VnD(), z14.VnD());
13676 ASSERT_EQUAL_SVE(z1.VnD(), z15.VnD());
13677 ASSERT_EQUAL_SVE(z0.VnD(), z16.VnD());
13678 ASSERT_EQUAL_SVE(z1.VnD(), z17.VnD());
13679 }
13680}
Martyn Capewell50e9f552020-01-07 17:45:03 +000013681
Martyn Capewell0b1afa82020-03-04 11:31:42 +000013682TEST_SVE(sve_fcadd) {
13683 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13684 START();
13685
13686 __ Dup(z30.VnS(), 0);
13687
13688 __ Ptrue(p0.VnB());
13689 __ Pfalse(p1.VnB());
13690 __ Zip1(p2.VnH(), p0.VnH(), p1.VnH()); // Real elements.
13691 __ Zip1(p3.VnH(), p1.VnH(), p0.VnH()); // Imaginary elements.
13692
13693 __ Fdup(z0.VnH(), 10.0); // 10i + 10
13694 __ Fdup(z1.VnH(), 5.0); // 5i + 5
13695 __ Index(z7.VnH(), 1, 1);
13696 __ Scvtf(z7.VnH(), p0.Merging(), z7.VnH()); // Ai + B
13697
13698 __ Sel(z2.VnH(), p3, z1.VnH(), z30.VnH()); // 5i + 0
13699 __ Sel(z3.VnH(), p2, z1.VnH(), z30.VnH()); // 0i + 5
13700 __ Sel(z7.VnH(), p3, z7.VnH(), z0.VnH()); // Ai + 10
13701 __ Ext(z8.VnB(), z7.VnB(), z7.VnB(), 2);
13702 __ Sel(z8.VnH(), p2, z8.VnH(), z30.VnH()); // 0i + A
13703
13704 // (10i + 10) + rotate(5i + 0, 90)
13705 // = (10i + 10) + (0i - 5)
13706 // = 10i + 5
13707 __ Fcadd(z4.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 90);
13708
13709 // (10i + 5) + rotate(0i + 5, 270)
13710 // = (10i + 5) + (-5i + 0)
13711 // = 5i + 5
13712 __ Fcadd(z4.VnH(), p0.Merging(), z4.VnH(), z3.VnH(), 270);
13713
13714 // The same calculation, but selecting real/imaginary using predication.
13715 __ Mov(z5, z0);
13716 __ Fcadd(z5.VnH(), p2.Merging(), z5.VnH(), z1.VnH(), 90);
13717 __ Fcadd(z5.VnH(), p3.Merging(), z5.VnH(), z1.VnH(), 270);
13718
13719 // Reference calculation: (10i + 10) - (5i + 5)
13720 __ Fsub(z6.VnH(), z0.VnH(), z1.VnH());
13721
13722 // Calculation using varying imaginary values.
13723 // (Ai + 10) + rotate(5i + 0, 90)
13724 // = (Ai + 10) + (0i - 5)
13725 // = Ai + 5
13726 __ Fcadd(z7.VnH(), p0.Merging(), z7.VnH(), z2.VnH(), 90);
13727
13728 // (Ai + 5) + rotate(0i + A, 270)
13729 // = (Ai + 5) + (-Ai + 0)
13730 // = 5
13731 __ Fcadd(z7.VnH(), p0.Merging(), z7.VnH(), z8.VnH(), 270);
13732
13733 // Repeated, but for wider elements.
13734 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
13735 __ Zip1(p3.VnS(), p1.VnS(), p0.VnS());
13736 __ Fdup(z0.VnS(), 42.0);
13737 __ Fdup(z1.VnS(), 21.0);
13738 __ Index(z11.VnS(), 1, 1);
13739 __ Scvtf(z11.VnS(), p0.Merging(), z11.VnS());
13740 __ Sel(z2.VnS(), p3, z1.VnS(), z30.VnS());
13741 __ Sel(z29.VnS(), p2, z1.VnS(), z30.VnS());
13742 __ Sel(z11.VnS(), p3, z11.VnS(), z0.VnS());
13743 __ Ext(z12.VnB(), z11.VnB(), z11.VnB(), 4);
13744 __ Sel(z12.VnS(), p2, z12.VnS(), z30.VnS());
13745 __ Fcadd(z8.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 90);
13746 __ Fcadd(z8.VnS(), p0.Merging(), z8.VnS(), z29.VnS(), 270);
13747 __ Mov(z9, z0);
13748 __ Fcadd(z9.VnS(), p2.Merging(), z9.VnS(), z1.VnS(), 90);
13749 __ Fcadd(z9.VnS(), p3.Merging(), z9.VnS(), z1.VnS(), 270);
13750 __ Fsub(z10.VnS(), z0.VnS(), z1.VnS());
13751 __ Fcadd(z11.VnS(), p0.Merging(), z11.VnS(), z2.VnS(), 90);
13752 __ Fcadd(z11.VnS(), p0.Merging(), z11.VnS(), z12.VnS(), 270);
13753
13754 __ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
13755 __ Zip1(p3.VnD(), p1.VnD(), p0.VnD());
13756 __ Fdup(z0.VnD(), -42.0);
13757 __ Fdup(z1.VnD(), -21.0);
13758 __ Index(z15.VnD(), 1, 1);
13759 __ Scvtf(z15.VnD(), p0.Merging(), z15.VnD());
13760 __ Sel(z2.VnD(), p3, z1.VnD(), z30.VnD());
13761 __ Sel(z28.VnD(), p2, z1.VnD(), z30.VnD());
13762 __ Sel(z15.VnD(), p3, z15.VnD(), z0.VnD());
13763 __ Ext(z16.VnB(), z15.VnB(), z15.VnB(), 8);
13764 __ Sel(z16.VnD(), p2, z16.VnD(), z30.VnD());
13765 __ Fcadd(z12.VnD(), p0.Merging(), z0.VnD(), z2.VnD(), 90);
13766 __ Fcadd(z12.VnD(), p0.Merging(), z12.VnD(), z28.VnD(), 270);
13767 __ Mov(z13, z0);
13768 __ Fcadd(z13.VnD(), p2.Merging(), z13.VnD(), z1.VnD(), 90);
13769 __ Fcadd(z13.VnD(), p3.Merging(), z13.VnD(), z1.VnD(), 270);
13770 __ Fsub(z14.VnD(), z0.VnD(), z1.VnD());
13771 __ Fcadd(z15.VnD(), p0.Merging(), z15.VnD(), z2.VnD(), 90);
13772 __ Fcadd(z15.VnD(), p0.Merging(), z15.VnD(), z16.VnD(), 270);
13773 END();
13774
13775 if (CAN_RUN()) {
13776 RUN();
13777 ASSERT_EQUAL_SVE(z6.VnH(), z4.VnH());
13778 ASSERT_EQUAL_SVE(z6.VnH(), z5.VnH());
13779 ASSERT_EQUAL_SVE(z3.VnH(), z7.VnH());
13780 ASSERT_EQUAL_SVE(z10.VnS(), z8.VnS());
13781 ASSERT_EQUAL_SVE(z10.VnS(), z9.VnS());
13782 ASSERT_EQUAL_SVE(z29.VnS(), z11.VnS());
13783 ASSERT_EQUAL_SVE(z14.VnD(), z12.VnD());
13784 ASSERT_EQUAL_SVE(z14.VnD(), z13.VnD());
13785 ASSERT_EQUAL_SVE(z28.VnS(), z15.VnS());
13786 }
13787}
13788
Martyn Capewelle4886e52020-03-30 09:28:52 +010013789TEST_SVE(sve_fcmla_index) {
13790 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13791 START();
13792
13793 __ Ptrue(p0.VnB());
13794
13795 __ Fdup(z0.VnH(), 10.0);
13796 __ Fdup(z2.VnH(), 2.0);
13797 __ Zip1(z0.VnH(), z0.VnH(), z2.VnH());
13798
13799 // Duplicate complex numbers across z2 segments. First segment has 1i+0,
13800 // second has 3i+2, etc.
13801 __ Index(z1.VnH(), 0, 1);
13802 __ Scvtf(z1.VnH(), p0.Merging(), z1.VnH());
13803 __ Zip1(z2.VnS(), z1.VnS(), z1.VnS());
13804 __ Zip1(z2.VnS(), z2.VnS(), z2.VnS());
13805
13806 // Derive a vector from z2 where only the third element in each segment
13807 // contains a complex number, with other elements zero.
13808 __ Index(z3.VnS(), 0, 1);
13809 __ And(z3.VnS(), z3.VnS(), 3);
13810 __ Cmpeq(p2.VnS(), p0.Zeroing(), z3.VnS(), 2);
13811 __ Dup(z3.VnB(), 0);
13812 __ Sel(z3.VnS(), p2, z2.VnS(), z3.VnS());
13813
13814 // Use indexed complex multiply on this vector, indexing the third element.
13815 __ Dup(z4.VnH(), 0);
13816 __ Fcmla(z4.VnH(), z0.VnH(), z3.VnH(), 2, 0);
13817 __ Fcmla(z4.VnH(), z0.VnH(), z3.VnH(), 2, 90);
13818
13819 // Rotate the indexed complex number and repeat, negated, and with a different
13820 // index.
13821 __ Ext(z3.VnH(), z3.VnH(), z3.VnH(), 4);
13822 __ Dup(z5.VnH(), 0);
13823 __ Fcmla(z5.VnH(), z0.VnH(), z3.VnH(), 1, 180);
13824 __ Fcmla(z5.VnH(), z0.VnH(), z3.VnH(), 1, 270);
13825 __ Fneg(z5.VnH(), p0.Merging(), z5.VnH());
13826
13827 // Create a reference result from a vector complex multiply.
13828 __ Dup(z6.VnH(), 0);
13829 __ Fcmla(z6.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 0);
13830 __ Fcmla(z6.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 90);
13831
13832 // Repeated, but for wider elements.
13833 __ Fdup(z0.VnS(), 42.0);
13834 __ Fdup(z2.VnS(), 24.0);
13835 __ Zip1(z0.VnS(), z0.VnS(), z2.VnS());
13836 __ Index(z1.VnS(), -42, 13);
13837 __ Scvtf(z1.VnS(), p0.Merging(), z1.VnS());
13838 __ Zip1(z2.VnD(), z1.VnD(), z1.VnD());
13839 __ Zip1(z2.VnD(), z2.VnD(), z2.VnD());
13840 __ Index(z3.VnD(), 0, 1);
13841 __ And(z3.VnD(), z3.VnD(), 1);
13842 __ Cmpeq(p2.VnD(), p0.Zeroing(), z3.VnD(), 1);
13843 __ Dup(z3.VnB(), 0);
13844 __ Sel(z3.VnD(), p2, z2.VnD(), z3.VnD());
13845 __ Dup(z7.VnS(), 0);
13846 __ Fcmla(z7.VnS(), z0.VnS(), z3.VnS(), 1, 0);
13847 __ Fcmla(z7.VnS(), z0.VnS(), z3.VnS(), 1, 90);
13848 __ Ext(z3.VnB(), z3.VnB(), z3.VnB(), 8);
13849 __ Dup(z8.VnS(), 0);
13850 __ Fcmla(z8.VnS(), z0.VnS(), z3.VnS(), 0, 180);
13851 __ Fcmla(z8.VnS(), z0.VnS(), z3.VnS(), 0, 270);
13852 __ Fneg(z8.VnS(), p0.Merging(), z8.VnS());
13853 __ Dup(z9.VnS(), 0);
13854 __ Fcmla(z9.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 0);
13855 __ Fcmla(z9.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 90);
13856 END();
13857
13858 if (CAN_RUN()) {
13859 RUN();
13860 ASSERT_EQUAL_SVE(z6.VnH(), z4.VnH());
13861 ASSERT_EQUAL_SVE(z6.VnH(), z5.VnH());
13862 ASSERT_EQUAL_SVE(z9.VnS(), z7.VnS());
13863 ASSERT_EQUAL_SVE(z9.VnS(), z8.VnS());
13864 }
13865}
13866
Martyn Capewell75f1c432020-03-30 09:23:27 +010013867TEST_SVE(sve_fcmla) {
13868 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13869 START();
13870
13871 __ Ptrue(p0.VnB());
13872 __ Pfalse(p1.VnB());
13873 __ Zip1(p2.VnH(), p0.VnH(), p1.VnH()); // Real elements.
13874 __ Zip1(p3.VnH(), p1.VnH(), p0.VnH()); // Imaginary elements.
13875
13876 __ Fdup(z0.VnH(), 10.0);
13877 __ Fdup(z2.VnH(), 2.0);
13878
13879 // Create pairs of complex numbers, Ai + A. A is chosen to be non-zero, as
13880 // the later fneg will result in a failed comparison otherwise.
13881 __ Index(z1.VnH(), -4, 3);
13882 __ Zip1(z1.VnH(), z1.VnH(), z1.VnH());
13883 __ Zip1(z1.VnH(), z1.VnH(), z1.VnH());
13884 __ Scvtf(z1.VnH(), p0.Merging(), z1.VnH());
13885
13886 __ Sel(z3.VnH(), p2, z0.VnH(), z1.VnH()); // Ai + 10
13887 __ Sel(z4.VnH(), p2, z1.VnH(), z2.VnH()); // 2i + A
13888
13889 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS()); // Even complex numbers.
13890 __ Zip1(p3.VnS(), p1.VnS(), p0.VnS()); // Odd complex numbers.
13891
13892 // Calculate (Ai + 10) * (2i + A) = (20 + A^2)i + 8A, using predication to
13893 // select only the complex numbers in odd-numbered element pairs. This leaves
13894 // results in elements 2/3, 6/7, etc. with zero in elements 0/1, 4/5, etc.
13895 // ... 7 6 5 4 3 2 1 0 <-- element
13896 // ... | 20+A^2 | 8A | 0 | 0 | 20+A^2 | 8A | 0 | 0 | <-- value
13897 __ Dup(z5.VnH(), 0);
13898 __ Fcmla(z5.VnH(), p3.Merging(), z4.VnH(), z3.VnH(), 0);
13899 __ Fcmla(z5.VnH(), p3.Merging(), z4.VnH(), z3.VnH(), 90);
13900
13901 // Move the odd results to the even result positions.
13902 // ... 7 6 5 4 3 2 1 0 <-- element
13903 // ... | 0 | 0 | 20+A^2 | 8A | 0 | 0 | 20+A^2 | 8A | <-- value
13904 __ Ext(z5.VnB(), z5.VnB(), z5.VnB(), 4);
13905
13906 // Calculate -(Ai + 10) * (2i + A) = -(20 + A^2)i - 8A for the even complex
13907 // numbers.
13908 // ... 7 6 5 4 3 2 1 0 <-- element
13909 // ... | 0 | 0 | -20-A^2 | -8A | 0 | 0 | -20-A^2 | -8A | <-- value
13910 __ Dup(z6.VnH(), 0);
13911 __ Fcmla(z6.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), 180);
13912 __ Fcmla(z6.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), 270);
13913
13914 // Negate the even results. The results in z6 should now match the results
13915 // computed earlier in z5.
13916 // ... 7 6 5 4 3 2 1 0 <-- element
13917 // ... | 0 | 0 | 20+A^2 | 8A | 0 | 0 | 20+A^2 | 8A | <-- value
13918 __ Fneg(z6.VnH(), p2.Merging(), z6.VnH());
13919
13920
13921 // Similarly, but for wider elements.
13922 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
13923 __ Zip1(p3.VnS(), p1.VnS(), p0.VnS());
13924 __ Index(z1.VnS(), -4, 3);
13925 __ Zip1(z1.VnS(), z1.VnS(), z1.VnS());
13926 __ Zip1(z1.VnS(), z1.VnS(), z1.VnS());
13927 __ Scvtf(z1.VnS(), p0.Merging(), z1.VnS());
13928 __ Fdup(z0.VnS(), 20.0);
13929 __ Fdup(z2.VnS(), 21.0);
13930 __ Sel(z3.VnS(), p2, z0.VnS(), z1.VnS());
13931 __ Sel(z4.VnS(), p2, z1.VnS(), z2.VnS());
13932 __ Punpklo(p2.VnH(), p2.VnB());
13933 __ Punpklo(p3.VnH(), p3.VnB());
13934 __ Dup(z7.VnS(), 0);
13935 __ Fcmla(z7.VnS(), p3.Merging(), z4.VnS(), z3.VnS(), 0);
13936 __ Fcmla(z7.VnS(), p3.Merging(), z4.VnS(), z3.VnS(), 90);
13937 __ Ext(z7.VnB(), z7.VnB(), z7.VnB(), 8);
13938 __ Dup(z8.VnS(), 0);
13939 __ Fcmla(z8.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), 180);
13940 __ Fcmla(z8.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), 270);
13941 __ Fneg(z8.VnS(), p2.Merging(), z8.VnS());
13942
13943 // Double precision computed for even lanes only.
13944 __ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
13945 __ Index(z1.VnD(), -4, 3);
13946 __ Zip1(z1.VnD(), z1.VnD(), z1.VnD());
13947 __ Zip1(z1.VnD(), z1.VnD(), z1.VnD());
13948 __ Scvtf(z1.VnD(), p0.Merging(), z1.VnD());
13949 __ Fdup(z0.VnD(), 20.0);
13950 __ Fdup(z2.VnD(), 21.0);
13951 __ Sel(z3.VnD(), p2, z0.VnD(), z1.VnD());
13952 __ Sel(z4.VnD(), p2, z1.VnD(), z2.VnD());
13953 __ Punpklo(p2.VnH(), p2.VnB());
13954 __ Dup(z9.VnD(), 0);
13955 __ Fcmla(z9.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 0);
13956 __ Fcmla(z9.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 90);
13957 __ Dup(z10.VnD(), 0);
13958 __ Fcmla(z10.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 180);
13959 __ Fcmla(z10.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 270);
13960 __ Fneg(z10.VnD(), p2.Merging(), z10.VnD());
13961 END();
13962
13963 if (CAN_RUN()) {
13964 RUN();
13965 ASSERT_EQUAL_SVE(z5.VnH(), z6.VnH());
13966 ASSERT_EQUAL_SVE(z7.VnS(), z8.VnS());
13967 ASSERT_EQUAL_SVE(z9.VnD(), z10.VnD());
13968 }
13969}
13970
Martyn Capewell50e9f552020-01-07 17:45:03 +000013971TEST_SVE(sve_fpmul_index) {
13972 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13973 START();
13974
13975 uint64_t in0[] = {0x3ff000003f803c00, 0xbff00000bf80bc00};
13976 uint64_t in1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76};
13977
13978 InsrHelper(&masm, z0.VnD(), in0);
13979 InsrHelper(&masm, z1.VnD(), in1);
13980
13981 __ Fmul(z2.VnH(), z1.VnH(), z0.VnH(), 0);
13982 __ Fmul(z3.VnH(), z1.VnH(), z0.VnH(), 1);
13983 __ Fmul(z4.VnH(), z1.VnH(), z0.VnH(), 4);
13984 __ Fmul(z5.VnH(), z1.VnH(), z0.VnH(), 7);
13985
13986 __ Fmul(z6.VnS(), z1.VnS(), z0.VnS(), 0);
13987 __ Fmul(z7.VnS(), z1.VnS(), z0.VnS(), 1);
13988 __ Fmul(z8.VnS(), z1.VnS(), z0.VnS(), 2);
13989 __ Fmul(z9.VnS(), z1.VnS(), z0.VnS(), 3);
13990
13991 __ Fmul(z10.VnD(), z1.VnD(), z0.VnD(), 0);
13992 __ Fmul(z11.VnD(), z1.VnD(), z0.VnD(), 1);
13993
13994 // Compute the results using other instructions.
13995 __ Dup(z12.VnH(), z0.VnH(), 0);
13996 __ Fmul(z12.VnH(), z1.VnH(), z12.VnH());
13997 __ Dup(z13.VnH(), z0.VnH(), 1);
13998 __ Fmul(z13.VnH(), z1.VnH(), z13.VnH());
13999 __ Dup(z14.VnH(), z0.VnH(), 4);
14000 __ Fmul(z14.VnH(), z1.VnH(), z14.VnH());
14001 __ Dup(z15.VnH(), z0.VnH(), 7);
14002 __ Fmul(z15.VnH(), z1.VnH(), z15.VnH());
14003
14004 __ Dup(z16.VnS(), z0.VnS(), 0);
14005 __ Fmul(z16.VnS(), z1.VnS(), z16.VnS());
14006 __ Dup(z17.VnS(), z0.VnS(), 1);
14007 __ Fmul(z17.VnS(), z1.VnS(), z17.VnS());
14008 __ Dup(z18.VnS(), z0.VnS(), 2);
14009 __ Fmul(z18.VnS(), z1.VnS(), z18.VnS());
14010 __ Dup(z19.VnS(), z0.VnS(), 3);
14011 __ Fmul(z19.VnS(), z1.VnS(), z19.VnS());
14012
14013 __ Dup(z20.VnD(), z0.VnD(), 0);
14014 __ Fmul(z20.VnD(), z1.VnD(), z20.VnD());
14015 __ Dup(z21.VnD(), z0.VnD(), 1);
14016 __ Fmul(z21.VnD(), z1.VnD(), z21.VnD());
14017
14018 END();
14019
14020 if (CAN_RUN()) {
14021 RUN();
14022 ASSERT_EQUAL_SVE(z12.VnH(), z2.VnH());
14023 ASSERT_EQUAL_SVE(z13.VnH(), z3.VnH());
14024 ASSERT_EQUAL_SVE(z14.VnH(), z4.VnH());
14025 ASSERT_EQUAL_SVE(z15.VnH(), z5.VnH());
14026 ASSERT_EQUAL_SVE(z16.VnS(), z6.VnS());
14027 ASSERT_EQUAL_SVE(z17.VnS(), z7.VnS());
14028 ASSERT_EQUAL_SVE(z18.VnS(), z8.VnS());
14029 ASSERT_EQUAL_SVE(z19.VnS(), z9.VnS());
14030 ASSERT_EQUAL_SVE(z20.VnD(), z10.VnD());
14031 ASSERT_EQUAL_SVE(z21.VnD(), z11.VnD());
14032 }
14033}
14034
Martyn Capewell5fb2ad62020-01-10 14:08:27 +000014035TEST_SVE(sve_ftmad) {
14036 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14037 START();
14038
14039 uint64_t in_h0[] = {0x7c027e01fc02fe01,
14040 0x3c003c00bc00bc00,
14041 0x3c003c00bc00bc00};
14042 uint64_t in_h1[] = {0xfe01fc027e017e01,
14043 0x3c00bc003c00bc00,
14044 0x3c00bc003c00bc00};
14045 uint64_t in_s0[] = {0x7f800002ffc00001,
14046 0x3f8000003f800000,
14047 0xbf800000bf800000};
14048 uint64_t in_s1[] = {0xffc00001ffc00001,
14049 0x3f800000bf800000,
14050 0x3f800000bf800000};
14051 uint64_t in_d0[] = {0x7ff8000000000001,
14052 0x3ff0000000000000,
14053 0xbff0000000000000};
14054 uint64_t in_d1[] = {0xfff0000000000002,
14055 0xbff0000000000000,
14056 0x3ff0000000000000};
14057 InsrHelper(&masm, z0.VnD(), in_h0);
14058 InsrHelper(&masm, z1.VnD(), in_h1);
14059 InsrHelper(&masm, z2.VnD(), in_s0);
14060 InsrHelper(&masm, z3.VnD(), in_s1);
14061 InsrHelper(&masm, z4.VnD(), in_d0);
14062 InsrHelper(&masm, z5.VnD(), in_d1);
14063
14064 __ Mov(z6, z0);
14065 __ Ftmad(z6.VnH(), z6.VnH(), z1.VnH(), 0);
14066 __ Mov(z7, z0);
14067 __ Ftmad(z7.VnH(), z7.VnH(), z1.VnH(), 1);
14068 __ Mov(z8, z0);
14069 __ Ftmad(z8.VnH(), z8.VnH(), z1.VnH(), 2);
14070
14071 __ Mov(z9, z2);
14072 __ Ftmad(z9.VnS(), z9.VnS(), z3.VnS(), 0);
14073 __ Mov(z10, z2);
14074 __ Ftmad(z10.VnS(), z10.VnS(), z3.VnS(), 3);
14075 __ Mov(z11, z2);
14076 __ Ftmad(z11.VnS(), z11.VnS(), z3.VnS(), 4);
14077
14078 __ Mov(z12, z4);
14079 __ Ftmad(z12.VnD(), z12.VnD(), z5.VnD(), 0);
14080 __ Mov(z13, z4);
14081 __ Ftmad(z13.VnD(), z13.VnD(), z5.VnD(), 5);
14082 __ Mov(z14, z4);
14083 __ Ftmad(z14.VnD(), z14.VnD(), z5.VnD(), 7);
14084
14085 END();
14086
14087 if (CAN_RUN()) {
14088 RUN();
14089 uint64_t expected_z6[] = {0x7e027e02fe02fe01,
14090 0x4000400000000000,
14091 0x4000400000000000};
14092 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
14093 uint64_t expected_z7[] = {0x7e027e02fe02fe01,
14094 0x3aab3800bcabbe00,
14095 0x3aab3800bcabbe00};
14096 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
14097 uint64_t expected_z8[] = {0x7e027e02fe02fe01,
14098 0x3c083c2abbefbbac,
14099 0x3c083c2abbefbbac};
14100 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
14101 uint64_t expected_z9[] = {0x7fc00002ffc00001,
14102 0x4000000040000000,
14103 0x0000000000000000};
14104 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
14105 uint64_t expected_z10[] = {0x7fc00002ffc00001,
14106 0x3f7ff2ff3f7fa4fc,
14107 0xbf800680bf802d82};
14108 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
14109 uint64_t expected_z11[] = {0x7fc00002ffc00001,
14110 0x3f8000173f8000cd,
14111 0xbf7fffd2bf7ffe66};
14112 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
14113 uint64_t expected_z12[] = {0x7ff8000000000002,
14114 0x4000000000000000,
14115 0x0000000000000000};
14116 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
14117 uint64_t expected_z13[] = {0x7ff8000000000002,
14118 0x3fefffff6c0d846c,
14119 0xbff0000006b978ae};
14120 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
14121 uint64_t expected_z14[] = {0x7ff8000000000002,
14122 0x3feffffffffe708a,
14123 0xbff0000000000000};
14124 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
14125 }
14126}
14127
Martyn Capewell37f28182020-01-14 10:15:10 +000014128static void BasicFPArithHelper(MacroAssembler* masm,
14129 int lane_size_in_bits,
14130 const uint64_t (&inputs)[2],
14131 const uint64_t (&inputs_fmulx)[2],
14132 const uint64_t (&inputs_nans)[2]) {
14133 int ls = lane_size_in_bits;
14134
14135 for (int i = 0; i < 16; i++) {
14136 InsrHelper(masm, z0.VnD(), inputs);
14137 }
14138 ZRegister rvrs = z1.WithLaneSize(ls);
14139 masm->Rev(rvrs, z0.WithLaneSize(ls));
14140
14141 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
14142 Initialise(masm, p2.VnB(), pred);
14143 PRegisterM p2m = p2.Merging();
14144
14145 masm->Mov(z2, z0);
14146 masm->Fadd(z2.WithLaneSize(ls),
14147 p2m,
14148 z2.WithLaneSize(ls),
14149 rvrs,
14150 FastNaNPropagation);
14151 masm->Mov(z3, z0);
14152 masm->Fsub(z3.WithLaneSize(ls), p2m, z3.WithLaneSize(ls), rvrs);
14153 masm->Mov(z4, z0);
14154 masm->Fsub(z4.WithLaneSize(ls), p2m, rvrs, z4.WithLaneSize(ls));
14155 masm->Mov(z5, z0);
14156 masm->Fabd(z5.WithLaneSize(ls),
14157 p2m,
14158 z5.WithLaneSize(ls),
14159 rvrs,
14160 FastNaNPropagation);
14161 masm->Mov(z6, z0);
14162 masm->Fmul(z6.WithLaneSize(ls),
14163 p2m,
14164 z6.WithLaneSize(ls),
14165 rvrs,
14166 FastNaNPropagation);
14167
14168 for (int i = 0; i < 16; i++) {
14169 InsrHelper(masm, z7.VnD(), inputs_fmulx);
14170 }
14171 masm->Rev(z8.WithLaneSize(ls), z7.WithLaneSize(ls));
14172 masm->Fmulx(z7.WithLaneSize(ls),
14173 p2m,
14174 z7.WithLaneSize(ls),
14175 z8.WithLaneSize(ls),
14176 FastNaNPropagation);
14177
14178 InsrHelper(masm, z8.VnD(), inputs_nans);
14179 masm->Mov(z9, z8);
14180 masm->Fminnm(z9.WithLaneSize(ls),
14181 p2m,
14182 z9.WithLaneSize(ls),
14183 rvrs,
14184 FastNaNPropagation);
14185 masm->Mov(z10, z8);
14186 masm->Fmaxnm(z10.WithLaneSize(ls),
14187 p2m,
14188 z10.WithLaneSize(ls),
14189 rvrs,
14190 FastNaNPropagation);
14191}
14192
14193TEST_SVE(sve_fp_arith_pred_h) {
14194 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14195 START();
14196
14197 uint64_t inputs[] = {0x4800470046004500, 0x4400420040003c00};
14198 uint64_t inputs_fmulx[] = {0x7c00fc007c00fc00, 0x0000800000008000};
14199 uint64_t inputs_nans[] = {0x7fffffff7fffffff, 0x7bfffbff7fbbfbff};
14200
14201 BasicFPArithHelper(&masm, kHRegSize, inputs, inputs_fmulx, inputs_nans);
14202
14203 END();
14204
14205 if (CAN_RUN()) {
14206 RUN();
14207 uint64_t expected_z2[] = {0x4880488048804880, 0x4880420048804880};
14208 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
14209 uint64_t expected_z3[] = {0x4700450042003c00, 0xbc004200c500c700};
14210 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
14211 uint64_t expected_z4[] = {0xc700c500c200bc00, 0x3c00420045004700};
14212 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
14213 uint64_t expected_z5[] = {0x4700450042003c00, 0x3c00420045004700};
14214 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
14215 uint64_t expected_z6[] = {0x48004b004c804d00, 0x4d0042004b004800};
14216 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
14217 uint64_t expected_z7[] = {0xc000c000c000c000, 0xc0008000c000c000};
14218 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
14219 uint64_t expected_z9[] = {0x3c00400042004400, 0x4500fbff4700fbff};
14220 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
14221 uint64_t expected_z10[] = {0x3c00400042004400, 0x7bfffbff47004800};
14222 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
14223 }
14224}
14225
14226TEST_SVE(sve_fp_arith_pred_s) {
14227 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14228 START();
14229
14230 uint64_t inputs[] = {0x4080000040400000, 0x400000003f800000};
14231 uint64_t inputs_fmulx[] = {0x7f800000ff800000, 0x0000000080000000};
14232 uint64_t inputs_nans[] = {0x7fffffffffffffff, 0x41000000c1000000};
14233
14234 BasicFPArithHelper(&masm, kSRegSize, inputs, inputs_fmulx, inputs_nans);
14235
14236 END();
14237
14238 if (CAN_RUN()) {
14239 RUN();
14240 uint64_t expected_z2[] = {0x40a0000040a00000, 0x4000000040a00000};
14241 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
14242 uint64_t expected_z3[] = {0x404000003f800000, 0x40000000c0400000};
14243 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
14244 uint64_t expected_z4[] = {0xc0400000bf800000, 0x4000000040400000};
14245 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
14246 uint64_t expected_z5[] = {0x404000003f800000, 0x4000000040400000};
14247 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
14248 uint64_t expected_z6[] = {0x4080000040c00000, 0x4000000040800000};
14249 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
14250 uint64_t expected_z7[] = {0xc0000000c0000000, 0x00000000c0000000};
14251 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
14252 uint64_t expected_z9[] = {0x3f80000040000000, 0x41000000c1000000};
14253 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
14254 uint64_t expected_z10[] = {0x3f80000040000000, 0x4100000040800000};
14255 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
14256 }
14257}
14258
14259TEST_SVE(sve_fp_arith_pred_d) {
14260 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14261 START();
14262
14263 uint64_t inputs[] = {0x4000000000000000, 0x3ff0000000000000};
14264 uint64_t inputs_fmulx[] = {0x7ff0000000000000, 0x8000000000000000};
14265 uint64_t inputs_nans[] = {0x7fffffffffffffff, 0x4100000000000000};
14266
14267 BasicFPArithHelper(&masm, kDRegSize, inputs, inputs_fmulx, inputs_nans);
14268
14269 END();
14270
14271 if (CAN_RUN()) {
14272 RUN();
14273 uint64_t expected_z2[] = {0x4008000000000000, 0x4008000000000000};
14274 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
14275 uint64_t expected_z3[] = {0x3ff0000000000000, 0xbff0000000000000};
14276 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
14277 uint64_t expected_z4[] = {0xbff0000000000000, 0x3ff0000000000000};
14278 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
14279 uint64_t expected_z5[] = {0x3ff0000000000000, 0x3ff0000000000000};
14280 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
14281 uint64_t expected_z6[] = {0x4000000000000000, 0x4000000000000000};
14282 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
14283 uint64_t expected_z7[] = {0xc000000000000000, 0xc000000000000000};
14284 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
14285 uint64_t expected_z9[] = {0x3ff0000000000000, 0x4000000000000000};
14286 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
14287 uint64_t expected_z10[] = {0x3ff0000000000000, 0x4100000000000000};
14288 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
14289 }
14290}
14291
Martyn Capewella2fadc22020-01-16 16:09:55 +000014292TEST_SVE(sve_fp_arith_pred_imm) {
14293 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14294 START();
14295
14296 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
14297 Initialise(&masm, p0.VnB(), pred);
14298 PRegisterM p0m = p0.Merging();
14299 __ Ptrue(p1.VnB());
14300
14301 __ Fdup(z0.VnD(), 0.0);
14302
14303 __ Mov(z1, z0);
14304 __ Fdiv(z1.VnH(), p1.Merging(), z1.VnH(), z1.VnH());
14305 __ Mov(z2, z0);
14306 __ Fadd(z2.VnH(), p0m, z2.VnH(), 0.5);
14307 __ Mov(z3, z2);
14308 __ Fsub(z3.VnH(), p0m, z3.VnH(), 1.0);
14309 __ Mov(z4, z3);
14310 __ Fsub(z4.VnH(), p0m, 1.0, z4.VnH());
14311 __ Mov(z5, z4);
14312 __ Fmul(z5.VnH(), p0m, z5.VnH(), 2.0);
14313 __ Mov(z6, z1);
14314 __ Fminnm(z6.VnH(), p0m, z6.VnH(), 0.0);
14315 __ Mov(z7, z1);
14316 __ Fmaxnm(z7.VnH(), p0m, z7.VnH(), 1.0);
14317 __ Mov(z8, z5);
14318 __ Fmin(z8.VnH(), p0m, z8.VnH(), 1.0);
14319 __ Mov(z9, z5);
14320 __ Fmax(z9.VnH(), p0m, z9.VnH(), 0.0);
14321
14322 __ Mov(z11, z0);
14323 __ Fdiv(z11.VnS(), p1.Merging(), z11.VnS(), z11.VnS());
14324 __ Mov(z12, z0);
14325 __ Fadd(z12.VnS(), p0m, z12.VnS(), 0.5);
14326 __ Mov(z13, z12);
14327 __ Fsub(z13.VnS(), p0m, z13.VnS(), 1.0);
14328 __ Mov(z14, z13);
14329 __ Fsub(z14.VnS(), p0m, 1.0, z14.VnS());
14330 __ Mov(z15, z14);
14331 __ Fmul(z15.VnS(), p0m, z15.VnS(), 2.0);
14332 __ Mov(z16, z11);
14333 __ Fminnm(z16.VnS(), p0m, z16.VnS(), 0.0);
14334 __ Mov(z17, z11);
14335 __ Fmaxnm(z17.VnS(), p0m, z17.VnS(), 1.0);
14336 __ Mov(z18, z15);
14337 __ Fmin(z18.VnS(), p0m, z18.VnS(), 1.0);
14338 __ Mov(z19, z15);
14339 __ Fmax(z19.VnS(), p0m, z19.VnS(), 0.0);
14340
14341 __ Mov(z21, z0);
14342 __ Fdiv(z21.VnD(), p1.Merging(), z21.VnD(), z21.VnD());
14343 __ Mov(z22, z0);
14344 __ Fadd(z22.VnD(), p0m, z22.VnD(), 0.5);
14345 __ Mov(z23, z22);
14346 __ Fsub(z23.VnD(), p0m, z23.VnD(), 1.0);
14347 __ Mov(z24, z23);
14348 __ Fsub(z24.VnD(), p0m, 1.0, z24.VnD());
14349 __ Mov(z25, z24);
14350 __ Fmul(z25.VnD(), p0m, z25.VnD(), 2.0);
14351 __ Mov(z26, z21);
14352 __ Fminnm(z26.VnD(), p0m, z26.VnD(), 0.0);
14353 __ Mov(z27, z21);
14354 __ Fmaxnm(z27.VnD(), p0m, z27.VnD(), 1.0);
14355 __ Mov(z28, z25);
14356 __ Fmin(z28.VnD(), p0m, z28.VnD(), 1.0);
14357 __ Mov(z29, z25);
14358 __ Fmax(z29.VnD(), p0m, z29.VnD(), 0.0);
14359
14360 __ Index(z0.VnH(), -3, 1);
14361 __ Scvtf(z0.VnH(), p1.Merging(), z0.VnH());
14362 __ Fmax(z0.VnH(), p1.Merging(), z0.VnH(), 0.0);
14363 __ Index(z1.VnS(), -4, 2);
14364 __ Scvtf(z1.VnS(), p1.Merging(), z1.VnS());
14365 __ Fadd(z1.VnS(), p1.Merging(), z1.VnS(), 1.0);
14366
14367 END();
14368
14369 if (CAN_RUN()) {
14370 RUN();
14371 uint64_t expected_z2[] = {0x3800380038003800, 0x3800000038003800};
14372 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
14373 uint64_t expected_z3[] = {0xb800b800b800b800, 0xb8000000b800b800};
14374 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
14375 uint64_t expected_z4[] = {0x3e003e003e003e00, 0x3e0000003e003e00};
14376 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
14377 uint64_t expected_z5[] = {0x4200420042004200, 0x4200000042004200};
14378 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
14379 uint64_t expected_z6[] = {0x0000000000000000, 0x00007e0000000000};
14380 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
14381 uint64_t expected_z7[] = {0x3c003c003c003c00, 0x3c007e003c003c00};
14382 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
14383 uint64_t expected_z8[] = {0x3c003c003c003c00, 0x3c0000003c003c00};
14384 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
14385 uint64_t expected_z9[] = {0x4200420042004200, 0x4200000042004200};
14386 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
14387
14388 uint64_t expected_z12[] = {0x3f0000003f000000, 0x000000003f000000};
14389 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
14390 uint64_t expected_z13[] = {0xbf000000bf000000, 0x00000000bf000000};
14391 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
14392 uint64_t expected_z14[] = {0x3fc000003fc00000, 0x000000003fc00000};
14393 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
14394 uint64_t expected_z15[] = {0x4040000040400000, 0x0000000040400000};
14395 ASSERT_EQUAL_SVE(expected_z15, z15.VnD());
14396 uint64_t expected_z16[] = {0x0000000000000000, 0x7fc0000000000000};
14397 ASSERT_EQUAL_SVE(expected_z16, z16.VnD());
14398 uint64_t expected_z17[] = {0x3f8000003f800000, 0x7fc000003f800000};
14399 ASSERT_EQUAL_SVE(expected_z17, z17.VnD());
14400 uint64_t expected_z18[] = {0x3f8000003f800000, 0x000000003f800000};
14401 ASSERT_EQUAL_SVE(expected_z18, z18.VnD());
14402 uint64_t expected_z19[] = {0x4040000040400000, 0x0000000040400000};
14403 ASSERT_EQUAL_SVE(expected_z19, z19.VnD());
14404
14405 uint64_t expected_z22[] = {0x3fe0000000000000, 0x3fe0000000000000};
14406 ASSERT_EQUAL_SVE(expected_z22, z22.VnD());
14407 uint64_t expected_z23[] = {0xbfe0000000000000, 0xbfe0000000000000};
14408 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
14409 uint64_t expected_z24[] = {0x3ff8000000000000, 0x3ff8000000000000};
14410 ASSERT_EQUAL_SVE(expected_z24, z24.VnD());
14411 uint64_t expected_z25[] = {0x4008000000000000, 0x4008000000000000};
14412 ASSERT_EQUAL_SVE(expected_z25, z25.VnD());
14413 uint64_t expected_z26[] = {0x0000000000000000, 0x0000000000000000};
14414 ASSERT_EQUAL_SVE(expected_z26, z26.VnD());
14415 uint64_t expected_z27[] = {0x3ff0000000000000, 0x3ff0000000000000};
14416 ASSERT_EQUAL_SVE(expected_z27, z27.VnD());
14417 uint64_t expected_z28[] = {0x3ff0000000000000, 0x3ff0000000000000};
14418 ASSERT_EQUAL_SVE(expected_z28, z28.VnD());
14419 uint64_t expected_z29[] = {0x4008000000000000, 0x4008000000000000};
14420 ASSERT_EQUAL_SVE(expected_z29, z29.VnD());
14421 uint64_t expected_z0[] = {0x4400420040003c00, 0x0000000000000000};
14422 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
14423 uint64_t expected_z1[] = {0x404000003f800000, 0xbf800000c0400000};
14424 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
14425 }
14426}
14427
Martyn Capewell37f28182020-01-14 10:15:10 +000014428TEST_SVE(sve_fscale) {
14429 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14430 START();
14431
14432 uint64_t inputs_h[] = {0x4800470046004500, 0x4400420040003c00};
14433 InsrHelper(&masm, z0.VnD(), inputs_h);
14434 uint64_t inputs_s[] = {0x4080000040400000, 0x400000003f800000};
14435 InsrHelper(&masm, z1.VnD(), inputs_s);
14436 uint64_t inputs_d[] = {0x40f0000000000000, 0x4000000000000000};
14437 InsrHelper(&masm, z2.VnD(), inputs_d);
14438
14439 uint64_t scales[] = {0x00080002fff8fffe, 0x00100001fff0ffff};
14440 InsrHelper(&masm, z3.VnD(), scales);
14441
14442 __ Ptrue(p0.VnB());
14443 int pred[] = {0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1};
14444 Initialise(&masm, p1.VnB(), pred);
14445
14446 __ Mov(z4, z0);
14447 __ Fscale(z4.VnH(), p0.Merging(), z4.VnH(), z3.VnH());
14448 __ Mov(z5, z0);
14449 __ Fscale(z5.VnH(), p1.Merging(), z5.VnH(), z3.VnH());
14450
14451 __ Sunpklo(z3.VnS(), z3.VnH());
14452 __ Mov(z6, z1);
14453 __ Fscale(z6.VnS(), p0.Merging(), z6.VnS(), z3.VnS());
14454 __ Mov(z7, z1);
14455 __ Fscale(z7.VnS(), p1.Merging(), z7.VnS(), z3.VnS());
14456
14457 __ Sunpklo(z3.VnD(), z3.VnS());
14458 __ Mov(z8, z2);
14459 __ Fscale(z8.VnD(), p0.Merging(), z8.VnD(), z3.VnD());
14460 __ Mov(z9, z2);
14461 __ Fscale(z9.VnD(), p1.Merging(), z9.VnD(), z3.VnD());
14462
14463 // Test full double precision range scaling.
14464 __ Dup(z10.VnD(), 2045);
14465 __ Dup(z11.VnD(), 0x0010000000000000); // 2^-1022
14466 __ Fscale(z11.VnD(), p0.Merging(), z11.VnD(), z10.VnD());
14467
14468 END();
14469
14470 if (CAN_RUN()) {
14471 RUN();
14472
14473 uint64_t expected_z4[] = {0x68004f0026003d00, 0x7c00460002003800};
14474 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
14475 uint64_t expected_z5[] = {0x68004f0026004500, 0x7c00420002003800};
14476 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
14477
14478 uint64_t expected_z6[] = {0x4880000040c00000, 0x380000003f000000};
14479 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
14480 uint64_t expected_z7[] = {0x4880000040400000, 0x400000003f000000};
14481 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
14482
14483 uint64_t expected_z8[] = {0x3ff0000000000000, 0x3ff0000000000000};
14484 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
14485 uint64_t expected_z9[] = {0x40f0000000000000, 0x3ff0000000000000};
14486 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
14487
14488 uint64_t expected_z11[] = {0x7fe0000000000000, 0x7fe0000000000000};
14489 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
14490 }
14491}
14492
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014493typedef void (MacroAssembler::*FcvtFrintMFn)(const ZRegister& zd,
14494 const PRegisterM& pg,
14495 const ZRegister& zn);
14496
14497typedef void (MacroAssembler::*FcvtFrintZFn)(const ZRegister& zd,
14498 const PRegisterZ& pg,
14499 const ZRegister& zn);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014500
14501template <typename F, size_t N>
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014502static void TestFcvtFrintHelper(Test* config,
14503 FcvtFrintMFn macro_m,
14504 FcvtFrintZFn macro_z,
14505 int dst_type_size_in_bits,
14506 int src_type_size_in_bits,
14507 const F (&zn_inputs)[N],
14508 const int (&pg_inputs)[N],
14509 const uint64_t (&zd_expected_all_active)[N]) {
14510 VIXL_ASSERT(macro_m != NULL);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014511 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14512 START();
14513
14514 // If the input and result types have a different size, the instruction
14515 // options on elements of the largest specified type is determined by the
14516 // larger type.
14517 int lane_size_in_bits =
14518 std::max(dst_type_size_in_bits, src_type_size_in_bits);
14519
14520 ZRegister zd_all_active = z25;
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014521 ZRegister zd_merging = z26;
TatWai Chongdb7437c2020-01-09 17:44:10 -080014522 ZRegister zn = z27;
14523
14524 uint64_t zn_rawbits[N];
14525 FPToRawbitsWithSize(zn_inputs, zn_rawbits, src_type_size_in_bits);
14526 InsrHelper(&masm, zn.WithLaneSize(lane_size_in_bits), zn_rawbits);
14527
14528 PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
14529 __ Ptrue(pg_all_active);
14530
14531 // Test floating-point conversions with all lanes actived.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014532 (masm.*macro_m)(zd_all_active.WithLaneSize(dst_type_size_in_bits),
14533 pg_all_active.Merging(),
14534 zn.WithLaneSize(src_type_size_in_bits));
TatWai Chongdb7437c2020-01-09 17:44:10 -080014535
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014536 PRegisterWithLaneSize pg_merging = p1.WithLaneSize(lane_size_in_bits);
14537 Initialise(&masm, pg_merging, pg_inputs);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014538
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014539 __ Dup(zd_merging.VnD(), 0x0bad0bad0bad0bad);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014540
14541 // Use the same `zn` inputs to test floating-point conversions but partial
14542 // lanes are set inactive.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014543 (masm.*macro_m)(zd_merging.WithLaneSize(dst_type_size_in_bits),
14544 pg_merging.Merging(),
14545 zn.WithLaneSize(src_type_size_in_bits));
14546
14547 ZRegister zd_zeroing = z24;
14548 PRegisterWithLaneSize pg_zeroing = p1.WithLaneSize(lane_size_in_bits);
14549 Initialise(&masm, pg_zeroing, pg_inputs);
14550
14551 if (macro_z != NULL) {
14552 __ Dup(zd_zeroing.VnD(), 0x0bad0bad0bad0bad);
14553 (masm.*macro_z)(zd_zeroing.WithLaneSize(dst_type_size_in_bits),
14554 pg_zeroing.Zeroing(),
14555 zn.WithLaneSize(src_type_size_in_bits));
14556 }
TatWai Chongdb7437c2020-01-09 17:44:10 -080014557
14558 END();
14559
14560 if (CAN_RUN()) {
14561 RUN();
14562
14563 ASSERT_EQUAL_SVE(zd_expected_all_active,
14564 zd_all_active.WithLaneSize(lane_size_in_bits));
14565
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014566 uint64_t zd_expected_merging[N];
TatWai Chongdb7437c2020-01-09 17:44:10 -080014567 for (unsigned i = 0; i < N; i++) {
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014568 zd_expected_merging[i] =
TatWai Chongdb7437c2020-01-09 17:44:10 -080014569 pg_inputs[i] ? zd_expected_all_active[i]
14570 : 0x0bad0bad0bad0bad & GetUintMask(lane_size_in_bits);
14571 }
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014572 ASSERT_EQUAL_SVE(zd_expected_merging,
14573 zd_merging.WithLaneSize(lane_size_in_bits));
14574
14575 if (macro_z != NULL) {
14576 uint64_t zd_expected_zeroing[N] = {0};
14577 for (unsigned i = 0; i < N; i++) {
14578 if (pg_inputs[i]) {
14579 zd_expected_zeroing[i] = zd_expected_all_active[i];
14580 }
14581 }
14582 ASSERT_EQUAL_SVE(zd_expected_zeroing,
14583 zd_zeroing.WithLaneSize(lane_size_in_bits));
14584 }
TatWai Chongdb7437c2020-01-09 17:44:10 -080014585 }
14586}
14587
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014588template <typename F, size_t N>
14589static void TestFcvtzHelper(Test* config,
14590 FcvtFrintMFn macro_m,
14591 int dst_type_size_in_bits,
14592 int src_type_size_in_bits,
14593 const F (&zn_inputs)[N],
14594 const int (&pg_inputs)[N],
14595 const uint64_t (&zd_expected_all_active)[N]) {
14596 TestFcvtFrintHelper(config,
14597 macro_m,
14598 // Fcvt variants have no zeroing predication form.
14599 NULL,
14600 dst_type_size_in_bits,
14601 src_type_size_in_bits,
14602 zn_inputs,
14603 pg_inputs,
14604 zd_expected_all_active);
14605}
14606
TatWai Chongdb7437c2020-01-09 17:44:10 -080014607TEST_SVE(fcvtzs_fcvtzu_float16) {
TatWai Chongdb7437c2020-01-09 17:44:10 -080014608 const double h_max_float16 = kHMaxInt; // Largest float16 == INT16_MAX.
14609 const double h_min_float16 = -h_max_float16; // Smallest float16 > INT16_MIN.
14610 const double largest_float16 = 0xffe0; // 65504
14611 const double smallest_float16 = -largest_float16;
14612 const double h_max_int_sub_one = kHMaxInt - 1;
14613 const double h_min_int_add_one = kHMinInt + 1;
14614
14615 double zn_inputs[] = {1.0,
14616 1.1,
14617 1.5,
14618 -1.5,
14619 h_max_float16,
14620 h_min_float16,
14621 largest_float16,
14622 smallest_float16,
14623 kFP64PositiveInfinity,
14624 kFP64NegativeInfinity,
14625 h_max_int_sub_one,
14626 h_min_int_add_one};
14627
14628 int pg_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
14629
14630 uint64_t expected_fcvtzs_fp162h[] = {1,
14631 1,
14632 1,
14633 0xffff,
14634 0x7fff,
14635 0x8000,
14636 0x7fff,
14637 0x8000,
14638 0x7fff,
14639 0x8000,
14640 0x7fff,
14641 0x8000};
14642
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014643 uint64_t expected_fcvtzu_fp162h[] =
14644 {1, 1, 1, 0, 0x8000, 0, 0xffe0, 0, 0xffff, 0, 0x8000, 0};
TatWai Chongdb7437c2020-01-09 17:44:10 -080014645
14646 // Float16 to 16-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014647 TestFcvtzHelper(config,
14648 &MacroAssembler::Fcvtzs,
14649 kHRegSize,
14650 kHRegSize,
14651 zn_inputs,
14652 pg_inputs,
14653 expected_fcvtzs_fp162h);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014654
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014655 TestFcvtzHelper(config,
14656 &MacroAssembler::Fcvtzu,
14657 kHRegSize,
14658 kHRegSize,
14659 zn_inputs,
14660 pg_inputs,
14661 expected_fcvtzu_fp162h);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014662
14663 uint64_t expected_fcvtzs_fp162w[] = {1,
14664 1,
14665 1,
14666 0xffffffff,
14667 0x8000,
14668 0xffff8000,
14669 0xffe0,
14670 0xffff0020,
14671 0x7fffffff,
14672 0x80000000,
14673 0x8000,
14674 0xffff8000};
14675
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014676 uint64_t expected_fcvtzu_fp162w[] =
14677 {1, 1, 1, 0, 0x8000, 0, 0xffe0, 0, 0xffffffff, 0, 0x8000, 0};
TatWai Chongdb7437c2020-01-09 17:44:10 -080014678
14679 // Float16 to 32-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014680 TestFcvtzHelper(config,
14681 &MacroAssembler::Fcvtzs,
14682 kSRegSize,
14683 kHRegSize,
14684 zn_inputs,
14685 pg_inputs,
14686 expected_fcvtzs_fp162w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014687
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014688 TestFcvtzHelper(config,
14689 &MacroAssembler::Fcvtzu,
14690 kSRegSize,
14691 kHRegSize,
14692 zn_inputs,
14693 pg_inputs,
14694 expected_fcvtzu_fp162w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014695
14696 uint64_t expected_fcvtzs_fp162x[] = {1,
14697 1,
14698 1,
14699 0xffffffffffffffff,
14700 0x8000,
14701 0xffffffffffff8000,
14702 0xffe0,
14703 0xffffffffffff0020,
14704 0x7fffffffffffffff,
14705 0x8000000000000000,
14706 0x8000,
14707 0xffffffffffff8000};
14708
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014709 uint64_t expected_fcvtzu_fp162x[] =
14710 {1, 1, 1, 0, 0x8000, 0, 0xffe0, 0, 0xffffffffffffffff, 0, 0x8000, 0};
TatWai Chongdb7437c2020-01-09 17:44:10 -080014711
14712 // Float16 to 64-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014713 TestFcvtzHelper(config,
14714 &MacroAssembler::Fcvtzs,
14715 kDRegSize,
14716 kHRegSize,
14717 zn_inputs,
14718 pg_inputs,
14719 expected_fcvtzs_fp162x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014720
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014721 TestFcvtzHelper(config,
14722 &MacroAssembler::Fcvtzu,
14723 kDRegSize,
14724 kHRegSize,
14725 zn_inputs,
14726 pg_inputs,
14727 expected_fcvtzu_fp162x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014728}
14729
14730TEST_SVE(fcvtzs_fcvtzu_float) {
14731 const double w_max_float = 0x7fffff80; // Largest float < INT32_MAX.
14732 const double w_min_float = -w_max_float; // Smallest float > INT32_MIN.
14733 const double x_max_float = 0x7fffff8000000000; // Largest float < INT64_MAX.
14734 const double x_min_float = -x_max_float; // Smallest float > INT64_MIN.
14735 const double w_max_int_sub_one = kWMaxInt - 1;
14736 const double w_min_int_add_one = kWMinInt + 1;
14737 const double x_max_int_sub_one = kXMaxInt - 1;
14738 const double x_min_int_add_one = kXMinInt + 1;
14739
TatWai Chongdb7437c2020-01-09 17:44:10 -080014740 double zn_inputs[] = {1.0,
14741 1.1,
14742 1.5,
14743 -1.5,
14744 w_max_float,
14745 w_min_float,
14746 x_max_float,
14747 x_min_float,
14748 kFP64PositiveInfinity,
14749 kFP64NegativeInfinity,
14750 w_max_int_sub_one,
14751 w_min_int_add_one,
14752 x_max_int_sub_one,
14753 x_min_int_add_one};
14754
14755 int pg_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0};
14756
14757 uint64_t expected_fcvtzs_s2w[] = {1,
14758 1,
14759 1,
14760 0xffffffff,
14761 0x7fffff80,
14762 0x80000080,
14763 0x7fffffff,
14764 0x80000000,
14765 0x7fffffff,
14766 0x80000000,
14767 0x7fffffff,
14768 0x80000000,
14769 0x7fffffff,
14770 0x80000000};
14771
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014772 uint64_t expected_fcvtzu_s2w[] = {1,
14773 1,
14774 1,
14775 0,
14776 0x7fffff80,
14777 0,
14778 0xffffffff,
14779 0,
14780 0xffffffff,
14781 0,
14782 0x80000000,
14783 0,
14784 0xffffffff,
14785 0};
TatWai Chongdb7437c2020-01-09 17:44:10 -080014786
14787 // Float to 32-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014788 TestFcvtzHelper(config,
14789 &MacroAssembler::Fcvtzs,
14790 kSRegSize,
14791 kSRegSize,
14792 zn_inputs,
14793 pg_inputs,
14794 expected_fcvtzs_s2w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014795
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014796 TestFcvtzHelper(config,
14797 &MacroAssembler::Fcvtzu,
14798 kSRegSize,
14799 kSRegSize,
14800 zn_inputs,
14801 pg_inputs,
14802 expected_fcvtzu_s2w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014803
14804 uint64_t expected_fcvtzs_s2x[] = {1,
14805 1,
14806 1,
14807 0xffffffffffffffff,
14808 0x7fffff80,
14809 0xffffffff80000080,
14810 0x7fffff8000000000,
14811 0x8000008000000000,
14812 0x7fffffffffffffff,
14813 0x8000000000000000,
14814 0x80000000,
14815 0xffffffff80000000,
14816 0x7fffffffffffffff,
14817 0x8000000000000000};
14818
14819 uint64_t expected_fcvtzu_s2x[] = {1,
14820 1,
14821 1,
14822 0,
14823 0x7fffff80,
14824 0,
14825 0x7fffff8000000000,
14826 0,
14827 0xffffffffffffffff,
14828 0,
14829 0x0000000080000000,
14830 0,
14831 0x8000000000000000,
14832 0};
14833
14834 // Float to 64-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014835 TestFcvtzHelper(config,
14836 &MacroAssembler::Fcvtzs,
14837 kDRegSize,
14838 kSRegSize,
14839 zn_inputs,
14840 pg_inputs,
14841 expected_fcvtzs_s2x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014842
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014843 TestFcvtzHelper(config,
14844 &MacroAssembler::Fcvtzu,
14845 kDRegSize,
14846 kSRegSize,
14847 zn_inputs,
14848 pg_inputs,
14849 expected_fcvtzu_s2x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014850}
14851
14852TEST_SVE(fcvtzs_fcvtzu_double) {
TatWai Chongdb7437c2020-01-09 17:44:10 -080014853 const double w_max_float = 0x7fffff80; // Largest float < INT32_MAX.
14854 const double w_min_float = -w_max_float; // Smallest float > INT32_MIN.
14855 const double x_max_float = 0x7fffff8000000000; // Largest float < INT64_MAX.
14856 const double x_min_float = -x_max_float; // Smallest float > INT64_MIN.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014857 const double w_max_double = kWMaxInt; // Largest double == INT32_MAX.
14858 const double w_min_double = -w_max_double; // Smallest double > INT32_MIN.
14859 const double x_max_double =
14860 0x7ffffffffffffc00; // Largest double < INT64_MAX.
14861 const double x_min_double = -x_max_double; // Smallest double > INT64_MIN.
TatWai Chongdb7437c2020-01-09 17:44:10 -080014862 const double w_max_int_sub_one = kWMaxInt - 1;
14863 const double w_min_int_add_one = kWMinInt + 1;
14864 const double x_max_int_sub_one = kXMaxInt - 1;
14865 const double x_min_int_add_one = kXMinInt + 1;
14866
14867 double zn_inputs[] = {1.0,
14868 1.1,
14869 1.5,
14870 -1.5,
14871 w_max_float,
14872 w_min_float,
14873 x_max_float,
14874 x_min_float,
14875 w_max_double,
14876 w_min_double,
14877 x_max_double,
14878 x_min_double,
14879 kFP64PositiveInfinity,
14880 kFP64NegativeInfinity,
14881 w_max_int_sub_one,
14882 w_min_int_add_one,
14883 x_max_int_sub_one,
14884 x_min_int_add_one};
14885
14886 int pg_inputs[] = {1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0};
14887
14888 uint64_t expected_fcvtzs_d2w[] = {1,
14889 1,
14890 1,
14891 0xffffffffffffffff,
14892 0x7fffff80,
14893 0xffffffff80000080,
14894 0x7fffffff,
14895 0xffffffff80000000,
14896 0x7fffffff,
14897 0xffffffff80000001,
14898 0x7fffffff,
14899 0xffffffff80000000,
14900 0x7fffffff,
14901 0xffffffff80000000,
14902 0x7ffffffe,
14903 0xffffffff80000001,
14904 0x7fffffff,
14905 0xffffffff80000000};
14906
14907 uint64_t expected_fcvtzu_d2w[] = {1,
14908 1,
14909 1,
14910 0,
14911 0x7fffff80,
14912 0,
14913 0xffffffff,
14914 0,
14915 0x7fffffff,
14916 0,
14917 0xffffffff,
14918 0,
14919 0xffffffff,
14920 0,
14921 0x7ffffffe,
14922 0,
14923 0xffffffff,
14924 0};
14925
14926 // Double to 32-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014927 TestFcvtzHelper(config,
14928 &MacroAssembler::Fcvtzs,
14929 kSRegSize,
14930 kDRegSize,
14931 zn_inputs,
14932 pg_inputs,
14933 expected_fcvtzs_d2w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014934
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014935 TestFcvtzHelper(config,
14936 &MacroAssembler::Fcvtzu,
14937 kSRegSize,
14938 kDRegSize,
14939 zn_inputs,
14940 pg_inputs,
14941 expected_fcvtzu_d2w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014942
14943 uint64_t expected_fcvtzs_d2x[] = {1,
14944 1,
14945 1,
14946 0xffffffffffffffff,
14947 0x7fffff80,
14948 0xffffffff80000080,
14949 0x7fffff8000000000,
14950 0x8000008000000000,
14951 0x7fffffff,
14952 0xffffffff80000001,
14953 0x7ffffffffffffc00,
14954 0x8000000000000400,
14955 0x7fffffffffffffff,
14956 0x8000000000000000,
14957 0x7ffffffe,
14958 0xffffffff80000001,
14959 0x7fffffffffffffff,
14960 0x8000000000000000};
14961
14962 uint64_t expected_fcvtzu_d2x[] = {1,
14963 1,
14964 1,
14965 0,
14966 0x7fffff80,
14967 0,
14968 0x7fffff8000000000,
14969 0,
14970 0x7fffffff,
14971 0,
14972 0x7ffffffffffffc00,
14973 0,
14974 0xffffffffffffffff,
14975 0,
14976 0x000000007ffffffe,
14977 0,
14978 0x8000000000000000,
14979 0};
14980
14981 // Double to 64-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014982 TestFcvtzHelper(config,
14983 &MacroAssembler::Fcvtzs,
14984 kDRegSize,
14985 kDRegSize,
14986 zn_inputs,
14987 pg_inputs,
14988 expected_fcvtzs_d2x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080014989
TatWai Chongf07b8ce2020-02-17 00:05:54 -080014990 TestFcvtzHelper(config,
14991 &MacroAssembler::Fcvtzu,
14992 kDRegSize,
14993 kDRegSize,
14994 zn_inputs,
14995 pg_inputs,
14996 expected_fcvtzu_d2x);
14997}
14998
14999template <typename F, size_t N>
15000static void TestFrintHelper(Test* config,
15001 FcvtFrintMFn macro_m,
15002 FcvtFrintZFn macro_z,
15003 int lane_size_in_bits,
15004 const F (&zn_inputs)[N],
15005 const int (&pg_inputs)[N],
15006 const F (&zd_expected)[N]) {
15007 uint64_t zd_expected_rawbits[N];
15008 FPToRawbitsWithSize(zd_expected, zd_expected_rawbits, lane_size_in_bits);
15009 TestFcvtFrintHelper(config,
15010 macro_m,
15011 macro_z,
15012 lane_size_in_bits,
15013 lane_size_in_bits,
15014 zn_inputs,
15015 pg_inputs,
15016 zd_expected_rawbits);
15017}
15018
15019TEST_SVE(frint) {
15020 const double inf_pos = kFP64PositiveInfinity;
15021 const double inf_neg = kFP64NegativeInfinity;
15022
15023 double zn_inputs[] =
15024 {1.1, 1.5, 1.9, 2.5, -1.5, -2.5, 0.0, -0.0, -0.2, inf_pos, inf_neg};
15025 double zd_expected_a[] =
15026 {1.0, 2.0, 2.0, 3.0, -2.0, -3.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
15027 double zd_expected_i[] =
15028 {1.0, 2.0, 2.0, 2.0, -2.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
15029 double zd_expected_m[] =
15030 {1.0, 1.0, 1.0, 2.0, -2.0, -3.0, 0.0, -0.0, -1.0, inf_pos, inf_neg};
15031 double zd_expected_n[] =
15032 {1.0, 2.0, 2.0, 2.0, -2.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
15033 double zd_expected_p[] =
15034 {2.0, 2.0, 2.0, 3.0, -1.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
15035 double zd_expected_x[] =
15036 {1.0, 2.0, 2.0, 2.0, -2.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
15037 double zd_expected_z[] =
15038 {1.0, 1.0, 1.0, 2.0, -1.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
15039
15040 int pg_inputs[] = {0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0};
15041
15042 struct TestDataSet {
15043 FcvtFrintMFn macro_m; // merging form.
15044 FcvtFrintZFn macro_z; // zeroing form.
15045 double (&expected)[11];
15046 };
15047
15048 TestDataSet test_data[] =
15049 {{&MacroAssembler::Frinta, &MacroAssembler::Frinta, zd_expected_a},
15050 {&MacroAssembler::Frinti, &MacroAssembler::Frinti, zd_expected_i},
15051 {&MacroAssembler::Frintm, &MacroAssembler::Frintm, zd_expected_m},
15052 {&MacroAssembler::Frintn, &MacroAssembler::Frintn, zd_expected_n},
15053 {&MacroAssembler::Frintp, &MacroAssembler::Frintp, zd_expected_p},
15054 {&MacroAssembler::Frintx, &MacroAssembler::Frintx, zd_expected_x},
15055 {&MacroAssembler::Frintz, &MacroAssembler::Frintz, zd_expected_z}};
15056
15057 unsigned lane_sizes[] = {kHRegSize, kSRegSize, kDRegSize};
15058
15059 for (size_t i = 0; i < sizeof(test_data) / sizeof(TestDataSet); i++) {
15060 for (size_t j = 0; j < ArrayLength(lane_sizes); j++) {
15061 TestFrintHelper(config,
15062 test_data[i].macro_m,
15063 test_data[i].macro_z,
15064 lane_sizes[j],
15065 zn_inputs,
15066 pg_inputs,
15067 test_data[i].expected);
15068 }
15069 }
TatWai Chongdb7437c2020-01-09 17:44:10 -080015070}
15071
TatWai Chong31cd6a02020-01-10 13:03:26 -080015072struct CvtfTestDataSet {
15073 uint64_t int_value;
15074 uint64_t scvtf_result;
15075 uint64_t ucvtf_result;
15076};
15077
15078template <size_t N>
15079static void TestUScvtfHelper(Test* config,
15080 int dst_type_size_in_bits,
15081 int src_type_size_in_bits,
15082 const int (&pg_inputs)[N],
15083 const CvtfTestDataSet (&data_set)[N]) {
15084 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15085 START();
15086
15087 // Unpack the data from the array of struct into individual arrays that can
15088 // simplify the testing.
15089 uint64_t zn_inputs[N];
15090 uint64_t expected_zd_scvtf_all_active[N];
15091 uint64_t expected_zd_ucvtf_all_active[N];
15092 for (size_t i = 0; i < N; i++) {
15093 zn_inputs[i] = data_set[i].int_value;
15094 expected_zd_scvtf_all_active[i] = data_set[i].scvtf_result;
15095 expected_zd_ucvtf_all_active[i] = data_set[i].ucvtf_result;
15096 }
15097
15098 // If the input and result types have a different size, the instruction
15099 // operates on elements of the largest specified type.
15100 int lane_size_in_bits =
15101 std::max(dst_type_size_in_bits, src_type_size_in_bits);
15102
15103 ZRegister zd_scvtf_all_active = z25;
15104 ZRegister zd_ucvtf_all_active = z26;
15105 ZRegister zn = z27;
15106 InsrHelper(&masm, zn.WithLaneSize(lane_size_in_bits), zn_inputs);
15107
15108 PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
15109 __ Ptrue(pg_all_active);
15110
15111 // Test integer conversions with all lanes actived.
15112 __ Scvtf(zd_scvtf_all_active.WithLaneSize(dst_type_size_in_bits),
15113 pg_all_active.Merging(),
15114 zn.WithLaneSize(src_type_size_in_bits));
15115 __ Ucvtf(zd_ucvtf_all_active.WithLaneSize(dst_type_size_in_bits),
15116 pg_all_active.Merging(),
15117 zn.WithLaneSize(src_type_size_in_bits));
15118
15119 ZRegister zd_scvtf_merged = z23;
15120 ZRegister zd_ucvtf_merged = z24;
15121
15122 PRegisterWithLaneSize pg_merged = p1.WithLaneSize(lane_size_in_bits);
15123 Initialise(&masm, pg_merged, pg_inputs);
15124
15125 uint64_t snan;
15126 switch (lane_size_in_bits) {
15127 case kHRegSize:
15128 snan = 0x7c11;
15129 break;
15130 case kSRegSize:
15131 snan = 0x7f951111;
15132 break;
15133 case kDRegSize:
15134 snan = 0x7ff5555511111111;
15135 break;
15136 }
15137 __ Dup(zd_scvtf_merged.WithLaneSize(lane_size_in_bits), snan);
15138 __ Dup(zd_ucvtf_merged.WithLaneSize(lane_size_in_bits), snan);
15139
15140 // Use the same `zn` inputs to test integer conversions but some lanes are set
15141 // inactive.
15142 __ Scvtf(zd_scvtf_merged.WithLaneSize(dst_type_size_in_bits),
15143 pg_merged.Merging(),
15144 zn.WithLaneSize(src_type_size_in_bits));
15145 __ Ucvtf(zd_ucvtf_merged.WithLaneSize(dst_type_size_in_bits),
15146 pg_merged.Merging(),
15147 zn.WithLaneSize(src_type_size_in_bits));
15148
15149 END();
15150
15151 if (CAN_RUN()) {
15152 RUN();
15153
15154 ASSERT_EQUAL_SVE(expected_zd_scvtf_all_active,
15155 zd_scvtf_all_active.WithLaneSize(lane_size_in_bits));
15156 ASSERT_EQUAL_SVE(expected_zd_ucvtf_all_active,
15157 zd_ucvtf_all_active.WithLaneSize(lane_size_in_bits));
15158
15159 uint64_t expected_zd_scvtf_merged[N];
15160 for (size_t i = 0; i < N; i++) {
15161 expected_zd_scvtf_merged[i] =
15162 pg_inputs[i] ? expected_zd_scvtf_all_active[i] : snan;
15163 }
15164 ASSERT_EQUAL_SVE(expected_zd_scvtf_merged,
15165 zd_scvtf_merged.WithLaneSize(lane_size_in_bits));
15166
15167 uint64_t expected_zd_ucvtf_merged[N];
15168 for (size_t i = 0; i < N; i++) {
15169 expected_zd_ucvtf_merged[i] =
15170 pg_inputs[i] ? expected_zd_ucvtf_all_active[i] : snan;
15171 }
15172 ASSERT_EQUAL_SVE(expected_zd_ucvtf_merged,
15173 zd_ucvtf_merged.WithLaneSize(lane_size_in_bits));
15174 }
15175}
15176
15177TEST_SVE(scvtf_ucvtf_h_s_d_to_float16) {
15178 // clang-format off
15179 CvtfTestDataSet data_set_1[] = {
15180 // Simple conversions of positive numbers which require no rounding; the
15181 // results should not depened on the rounding mode, and ucvtf and scvtf should
15182 // produce the same result.
15183 {0x0000, 0x0000, 0x0000},
15184 {0x0001, 0x3c00, 0x3c00},
15185 {0x0010, 0x4c00, 0x4c00},
15186 {0x0080, 0x5800, 0x5800},
15187 {0x0400, 0x6400, 0x6400},
15188 // Conversions which require rounding.
15189 {0x4000, 0x7400, 0x7400},
15190 {0x4001, 0x7400, 0x7400},
15191 // Round up to produce a result that's too big for the input to represent.
15192 {0x7ff0, 0x77ff, 0x77ff},
15193 {0x7ff1, 0x77ff, 0x77ff},
15194 {0x7ffe, 0x7800, 0x7800},
15195 {0x7fff, 0x7800, 0x7800}};
15196 int pg_1[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
15197 TestUScvtfHelper(config, kHRegSize, kDRegSize, pg_1, data_set_1);
15198 TestUScvtfHelper(config, kHRegSize, kSRegSize, pg_1, data_set_1);
15199 TestUScvtfHelper(config, kHRegSize, kHRegSize, pg_1, data_set_1);
15200
15201 CvtfTestDataSet data_set_2[] = {
15202 // Test mantissa extremities.
15203 {0x0401, 0x6401, 0x6401},
15204 {0x4020, 0x7402, 0x7402},
15205 // The largest int16_t that fits in a float16.
15206 {0xffef, 0xcc40, 0x7bff},
15207 // Values that would be negative if treated as an int16_t.
15208 {0xff00, 0xdc00, 0x7bf8},
15209 {0x8000, 0xf800, 0x7800},
15210 {0x8100, 0xf7f0, 0x7808},
15211 // Check for bit pattern reproduction.
15212 {0x0123, 0x5c8c, 0x5c8c},
15213 {0x0cde, 0x6a6f, 0x6a6f},
15214 // Simple conversions of negative int64_t values. These require no rounding,
15215 // and the results should not depend on the rounding mode.
15216 {0xf800, 0xe800, 0x7bc0},
15217 {0xfc00, 0xe400, 0x7be0},
15218 {0xc000, 0xf400, 0x7a00},
15219 // Check rounding of negative int16_t values.
15220 {0x8ffe, 0xf700, 0x7880},
15221 {0x8fff, 0xf700, 0x7880},
15222 {0xffee, 0xcc80, 0x7bff},
15223 {0xffef, 0xcc40, 0x7bff}};
15224 int pg_2[] = {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1};
15225 // `32-bit to float16` and `64-bit to float16` of above tests has been tested
15226 // in `ucvtf` of `16-bit to float16`.
15227 TestUScvtfHelper(config, kHRegSize, kHRegSize, pg_2, data_set_2);
15228 // clang-format on
15229}
15230
15231TEST_SVE(scvtf_ucvtf_s_to_float) {
15232 // clang-format off
15233 int dst_lane_size = kSRegSize;
15234 int src_lane_size = kSRegSize;
15235
15236 // Simple conversions of positive numbers which require no rounding; the
15237 // results should not depened on the rounding mode, and ucvtf and scvtf should
15238 // produce the same result.
15239 CvtfTestDataSet data_set_1[] = {
15240 {0x00000000, 0x00000000, 0x00000000},
15241 {0x00000001, 0x3f800000, 0x3f800000},
15242 {0x00004000, 0x46800000, 0x46800000},
15243 {0x00010000, 0x47800000, 0x47800000},
15244 {0x40000000, 0x4e800000, 0x4e800000}};
15245 int pg_1[] = {1, 0, 1, 0, 0};
15246 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
15247
15248 CvtfTestDataSet data_set_2[] = {
15249 // Test mantissa extremities.
15250 {0x00800001, 0x4b000001, 0x4b000001},
15251 {0x40400000, 0x4e808000, 0x4e808000},
15252 // The largest int32_t that fits in a double.
15253 {0x7fffff80, 0x4effffff, 0x4effffff},
15254 // Values that would be negative if treated as an int32_t.
15255 {0xffffffff, 0xbf800000, 0x4f800000},
15256 {0xffffff00, 0xc3800000, 0x4f7fffff},
15257 {0x80000000, 0xcf000000, 0x4f000000},
15258 {0x80000001, 0xcf000000, 0x4f000000},
15259 // Check for bit pattern reproduction.
15260 {0x089abcde, 0x4d09abce, 0x4d09abce},
15261 {0x12345678, 0x4d91a2b4, 0x4d91a2b4}};
15262 int pg_2[] = {1, 0, 1, 0, 1, 1, 1, 0, 0};
15263 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
15264
15265 // Simple conversions of negative int32_t values. These require no rounding,
15266 // and the results should not depend on the rounding mode.
15267 CvtfTestDataSet data_set_3[] = {
15268 {0xffffc000, 0xc6800000, 0x4f7fffc0},
15269 {0xffff0000, 0xc7800000, 0x4f7fff00},
15270 {0xc0000000, 0xce800000, 0x4f400000},
15271 // Conversions which require rounding.
15272 {0x72800000, 0x4ee50000, 0x4ee50000},
15273 {0x72800001, 0x4ee50000, 0x4ee50000},
15274 {0x73000000, 0x4ee60000, 0x4ee60000},
15275 // Check rounding of negative int32_t values.
15276 {0x80000140, 0xcefffffe, 0x4f000001},
15277 {0x80000141, 0xcefffffd, 0x4f000001},
15278 {0x80000180, 0xcefffffd, 0x4f000002},
15279 // Round up to produce a result that's too big for the input to represent.
15280 {0x7fffffc0, 0x4f000000, 0x4f000000},
15281 {0x7fffffff, 0x4f000000, 0x4f000000}};
15282 int pg_3[] = {1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0};
15283 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
15284 // clang-format on
15285}
15286
15287TEST_SVE(scvtf_ucvtf_d_to_float) {
15288 // clang-format off
15289 int dst_lane_size = kSRegSize;
15290 int src_lane_size = kDRegSize;
15291
15292 // Simple conversions of positive numbers which require no rounding; the
15293 // results should not depened on the rounding mode, and ucvtf and scvtf should
15294 // produce the same result.
15295 CvtfTestDataSet data_set_1[] = {
15296 {0x0000000000000000, 0x00000000, 0x00000000},
15297 {0x0000000000000001, 0x3f800000, 0x3f800000},
15298 {0x0000000040000000, 0x4e800000, 0x4e800000},
15299 {0x0000000100000000, 0x4f800000, 0x4f800000},
15300 {0x4000000000000000, 0x5e800000, 0x5e800000}};
15301 int pg_1[] = {1, 1, 0, 1, 0};
15302 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
15303
15304 CvtfTestDataSet data_set_2[] = {
15305 // Test mantissa extremities.
15306 {0x0010000000000001, 0x59800000, 0x59800000},
15307 {0x4008000000000000, 0x5e801000, 0x5e801000},
15308 // The largest int32_t that fits in a float.
15309 {0x000000007fffff80, 0x4effffff, 0x4effffff},
15310 // Values that would be negative if treated as an int32_t.
15311 {0x00000000ffffffff, 0x4f800000, 0x4f800000},
15312 {0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff},
15313 {0x0000000080000000, 0x4f000000, 0x4f000000},
15314 {0x0000000080000100, 0x4f000001, 0x4f000001},
15315 // The largest int64_t that fits in a float.
15316 {0x7fffff8000000000, 0x5effffff, 0x5effffff},
15317 // Check for bit pattern reproduction.
15318 {0x0123456789abcde0, 0x5b91a2b4, 0x5b91a2b4},
15319 {0x0000000000876543, 0x4b076543, 0x4b076543}};
15320 int pg_2[] = {1, 0, 0, 0, 1, 0, 0, 0, 0, 1};
15321 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
15322
15323 CvtfTestDataSet data_set_3[] = {
15324 // Simple conversions of negative int64_t values. These require no rounding,
15325 // and the results should not depend on the rounding mode.
15326 {0xffffffffc0000000, 0xce800000, 0x5f800000},
15327 {0xffffffff00000000, 0xcf800000, 0x5f800000},
15328 {0xc000000000000000, 0xde800000, 0x5f400000},
15329 // Conversions which require rounding.
15330 {0x0000800002800000, 0x57000002, 0x57000002},
15331 {0x0000800002800001, 0x57000003, 0x57000003},
15332 {0x0000800003000000, 0x57000003, 0x57000003},
15333 // Check rounding of negative int64_t values.
15334 {0x8000014000000000, 0xdefffffe, 0x5f000001},
15335 {0x8000014000000001, 0xdefffffd, 0x5f000001},
15336 {0x8000018000000000, 0xdefffffd, 0x5f000002},
15337 // Round up to produce a result that's too big for the input to represent.
15338 {0x00000000ffffff80, 0x4f800000, 0x4f800000},
15339 {0x00000000ffffffff, 0x4f800000, 0x4f800000},
15340 {0xffffff8000000000, 0xd3000000, 0x5f800000},
15341 {0xffffffffffffffff, 0xbf800000, 0x5f800000}};
15342 int pg_3[] = {0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1};
15343 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
15344 // clang-format on
15345}
15346
15347TEST_SVE(scvtf_ucvtf_d_to_double) {
15348 // clang-format off
15349 int dst_lane_size = kDRegSize;
15350 int src_lane_size = kDRegSize;
15351
15352 // Simple conversions of positive numbers which require no rounding; the
15353 // results should not depened on the rounding mode, and ucvtf and scvtf should
15354 // produce the same result.
15355 CvtfTestDataSet data_set_1[] = {
15356 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
15357 {0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000},
15358 {0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000},
15359 {0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000},
15360 {0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000}};
15361 int pg_1[] = {0, 1, 1, 0, 0};
15362 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
15363
15364 CvtfTestDataSet data_set_2[] = {
15365 // Test mantissa extremities.
15366 {0x0010000000000001, 0x4330000000000001, 0x4330000000000001},
15367 {0x4008000000000000, 0x43d0020000000000, 0x43d0020000000000},
15368 // The largest int32_t that fits in a double.
15369 {0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000},
15370 // Values that would be negative if treated as an int32_t.
15371 {0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000},
15372 {0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000},
15373 {0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000},
15374 // The largest int64_t that fits in a double.
15375 {0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff},
15376 // Check for bit pattern reproduction.
15377 {0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde},
15378 {0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000}};
15379 int pg_2[] = {1, 1, 1, 1, 1, 0, 0, 0, 0};
15380 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
15381
15382 CvtfTestDataSet data_set_3[] = {
15383 // Simple conversions of negative int64_t values. These require no rounding,
15384 // and the results should not depend on the rounding mode.
15385 {0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000},
15386 {0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000},
15387 {0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000},
15388 // Conversions which require rounding.
15389 {0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002},
15390 {0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003},
15391 {0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003},
15392 // Check rounding of negative int64_t values.
15393 {0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001},
15394 {0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001},
15395 {0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002},
15396 // Round up to produce a result that's too big for the input to represent.
15397 {0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000},
15398 {0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000},
15399 {0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000},
15400 {0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000}};
15401 int pg_3[] = {1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0};
15402 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
15403 // clang-format on
15404}
15405
15406TEST_SVE(scvtf_ucvtf_s_to_double) {
15407 // clang-format off
15408 int dst_lane_size = kDRegSize;
15409 int src_lane_size = kSRegSize;
15410
15411 // Simple conversions of positive numbers which require no rounding; the
15412 // results should not depened on the rounding mode, and ucvtf and scvtf should
15413 // produce the same result.
15414 CvtfTestDataSet data_set_1[] = {
15415 {0x00000000, 0x0000000000000000, 0x0000000000000000},
15416 {0x00000001, 0x3ff0000000000000, 0x3ff0000000000000},
15417 {0x00004000, 0x40d0000000000000, 0x40d0000000000000},
15418 {0x00010000, 0x40f0000000000000, 0x40f0000000000000},
15419 {0x40000000, 0x41d0000000000000, 0x41d0000000000000}};
15420 int pg_1[] = {1, 0, 0, 0, 1};
15421 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
15422
15423 CvtfTestDataSet data_set_2[] = {
15424 // Test mantissa extremities.
15425 {0x40000400, 0x41d0000100000000, 0x41d0000100000000},
15426 // The largest int32_t that fits in a double.
15427 {0x7fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000},
15428 // Values that would be negative if treated as an int32_t.
15429 {0xffffffff, 0xbff0000000000000, 0x41efffffffe00000},
15430 {0x80000000, 0xc1e0000000000000, 0x41e0000000000000},
15431 {0x80000001, 0xc1dfffffffc00000, 0x41e0000000200000},
15432 // Check for bit pattern reproduction.
15433 {0x089abcde, 0x41a13579bc000000, 0x41a13579bc000000},
15434 {0x12345678, 0x41b2345678000000, 0x41b2345678000000},
15435 // Simple conversions of negative int32_t values. These require no rounding,
15436 // and the results should not depend on the rounding mode.
15437 {0xffffc000, 0xc0d0000000000000, 0x41effff800000000},
15438 {0xffff0000, 0xc0f0000000000000, 0x41efffe000000000},
15439 {0xc0000000, 0xc1d0000000000000, 0x41e8000000000000}};
15440 int pg_2[] = {1, 0, 1, 0, 0, 1, 1, 0, 1, 1};
15441 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
15442
15443 // Note that IEEE 754 double-precision format has 52-bits fraction, so all
15444 // 32-bits integers are representable in double.
15445 // clang-format on
15446}
15447
Martyn Capewell4a9829f2020-01-30 17:41:01 +000015448TEST_SVE(sve_fadda) {
15449 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
15450 CPUFeatures::kFP,
15451 CPUFeatures::kFPHalf);
15452 START();
15453
15454 __ Ptrue(p0.VnB());
15455 __ Pfalse(p1.VnB());
15456 __ Zip1(p1.VnH(), p0.VnH(), p1.VnH());
15457
15458 __ Index(z0.VnS(), 3, 3);
15459 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
15460 __ Fmov(s2, 2.0);
15461 __ Fadda(s2, p0, s2, z0.VnS());
15462
15463 __ Index(z0.VnD(), -7, -7);
15464 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
15465 __ Fmov(d3, 3.0);
15466 __ Fadda(d3, p0, d3, z0.VnD());
15467
15468 __ Index(z0.VnH(), 1, 1);
15469 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
15470 __ Fmov(h4, 0);
15471 __ Fadda(h4, p1, h4, z0.VnH());
15472 END();
15473
15474 if (CAN_RUN()) {
15475 RUN();
15476 // Sum of 1 .. n is n+1 * n/2, ie. n(n+1)/2.
15477 int n = core.GetSVELaneCount(kSRegSize);
15478 ASSERT_EQUAL_FP32(2 + 3 * ((n + 1) * (n / 2)), s2);
15479
15480 n /= 2; // Half as many lanes.
15481 ASSERT_EQUAL_FP64(3 + -7 * ((n + 1) * (n / 2)), d3);
15482
15483 // Sum of first n odd numbers is n^2.
15484 n = core.GetSVELaneCount(kHRegSize) / 2; // Half are odd numbers.
15485 ASSERT_EQUAL_FP16(Float16(n * n), h4);
15486 }
15487}
15488
Martyn Capewellac07af12019-12-02 14:55:05 +000015489TEST_SVE(sve_extract) {
15490 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15491 START();
15492
15493 __ Index(z0.VnB(), 0, 1);
15494
15495 __ Mov(z1, z0);
15496 __ Mov(z2, z0);
15497 __ Mov(z3, z0);
15498 __ Mov(z4, z0);
15499 __ Mov(z5, z0);
15500 __ Mov(z6, z0);
15501
15502 __ Ext(z1, z1, z0, 0);
15503 __ Ext(z2, z2, z0, 1);
15504 __ Ext(z3, z3, z0, 15);
15505 __ Ext(z4, z4, z0, 31);
15506 __ Ext(z5, z5, z0, 47);
15507 __ Ext(z6, z6, z0, 255);
15508
15509 END();
15510
15511 if (CAN_RUN()) {
15512 RUN();
15513
15514 ASSERT_EQUAL_SVE(z1, z0);
15515
15516 int lane_count = core.GetSVELaneCount(kBRegSize);
15517 if (lane_count == 16) {
15518 uint64_t z2_expected[] = {0x000f0e0d0c0b0a09, 0x0807060504030201};
15519 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
15520 } else {
15521 uint64_t z2_expected[] = {0x100f0e0d0c0b0a09, 0x0807060504030201};
15522 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
15523 }
15524
15525 if (lane_count == 16) {
15526 uint64_t z3_expected[] = {0x0e0d0c0b0a090807, 0x060504030201000f};
15527 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
15528 } else {
15529 uint64_t z3_expected[] = {0x1e1d1c1b1a191817, 0x161514131211100f};
15530 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
15531 }
15532
15533 if (lane_count < 32) {
15534 ASSERT_EQUAL_SVE(z4, z0);
15535 } else if (lane_count == 32) {
15536 uint64_t z4_expected[] = {0x0e0d0c0b0a090807, 0x060504030201001f};
15537 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
15538 } else {
15539 uint64_t z4_expected[] = {0x2e2d2c2b2a292827, 0x262524232221201f};
15540 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
15541 }
15542
15543 if (lane_count < 48) {
15544 ASSERT_EQUAL_SVE(z5, z0);
15545 } else if (lane_count == 48) {
15546 uint64_t z5_expected[] = {0x0e0d0c0b0a090807, 0x060504030201002f};
15547 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
15548 } else {
15549 uint64_t z5_expected[] = {0x3e3d3c3b3a393837, 0x363534333231302f};
15550 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
15551 }
15552
15553 if (lane_count < 256) {
15554 ASSERT_EQUAL_SVE(z6, z0);
15555 } else {
15556 uint64_t z6_expected[] = {0x0e0d0c0b0a090807, 0x06050403020100ff};
15557 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
15558 }
15559 }
15560}
15561
Martyn Capewell894962f2020-02-05 15:46:44 +000015562TEST_SVE(sve_fp_paired_across) {
15563 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15564
15565 START();
15566
15567 __ Ptrue(p0.VnB());
15568 __ Pfalse(p1.VnB());
15569 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
15570 __ Zip1(p3.VnD(), p0.VnD(), p1.VnD());
15571 __ Zip1(p4.VnH(), p0.VnH(), p1.VnH());
15572
15573 __ Index(z0.VnS(), 3, 3);
15574 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
15575 __ Faddv(s1, p0, z0.VnS());
15576 __ Fminv(s2, p2, z0.VnS());
15577 __ Fmaxv(s3, p2, z0.VnS());
15578
15579 __ Index(z0.VnD(), -7, -7);
15580 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
15581 __ Faddv(d4, p0, z0.VnD());
15582 __ Fminv(d5, p3, z0.VnD());
15583 __ Fmaxv(d6, p3, z0.VnD());
15584
15585 __ Index(z0.VnH(), 1, 1);
15586 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
15587 __ Faddv(h7, p4, z0.VnH());
15588 __ Fminv(h8, p4, z0.VnH());
15589 __ Fmaxv(h9, p4, z0.VnH());
15590
15591 __ Dup(z10.VnH(), 0);
15592 __ Fdiv(z10.VnH(), p0.Merging(), z10.VnH(), z10.VnH());
15593 __ Insr(z10.VnH(), 0x5140);
15594 __ Insr(z10.VnH(), 0xd140);
15595 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 2);
15596 __ Fmaxnmv(h11, p0, z10.VnH());
15597 __ Fmaxnmv(h12, p4, z10.VnH());
15598 __ Fminnmv(h13, p0, z10.VnH());
15599 __ Fminnmv(h14, p4, z10.VnH());
15600
15601 __ Dup(z10.VnS(), 0);
15602 __ Fdiv(z10.VnS(), p0.Merging(), z10.VnS(), z10.VnS());
15603 __ Insr(z10.VnS(), 0x42280000);
15604 __ Insr(z10.VnS(), 0xc2280000);
15605 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 4);
15606 __ Fmaxnmv(s15, p0, z10.VnS());
15607 __ Fmaxnmv(s16, p2, z10.VnS());
15608 __ Fminnmv(s17, p0, z10.VnS());
15609 __ Fminnmv(s18, p2, z10.VnS());
15610
15611 __ Dup(z10.VnD(), 0);
15612 __ Fdiv(z10.VnD(), p0.Merging(), z10.VnD(), z10.VnD());
15613 __ Insr(z10.VnD(), 0x4045000000000000);
15614 __ Insr(z10.VnD(), 0xc045000000000000);
15615 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 8);
15616 __ Fmaxnmv(d19, p0, z10.VnD());
15617 __ Fmaxnmv(d20, p3, z10.VnD());
15618 __ Fminnmv(d21, p0, z10.VnD());
15619 __ Fminnmv(d22, p3, z10.VnD());
15620 END();
15621
15622 if (CAN_RUN()) {
15623 RUN();
15624 // Sum of 1 .. n is n+1 * n/2, ie. n(n+1)/2.
15625 int n = core.GetSVELaneCount(kSRegSize);
15626 ASSERT_EQUAL_FP32(3 * ((n + 1) * (n / 2)), s1);
15627 ASSERT_EQUAL_FP32(3, s2);
15628 ASSERT_EQUAL_FP32(3 * n - 3, s3);
15629
15630 n /= 2; // Half as many lanes.
15631 ASSERT_EQUAL_FP64(-7 * ((n + 1) * (n / 2)), d4);
15632 ASSERT_EQUAL_FP64(-7 * (n - 1), d5);
15633 ASSERT_EQUAL_FP64(-7, d6);
15634
15635 // Sum of first n odd numbers is n^2.
15636 n = core.GetSVELaneCount(kHRegSize) / 2; // Half are odd numbers.
15637 ASSERT_EQUAL_FP16(Float16(n * n), h7);
15638 ASSERT_EQUAL_FP16(Float16(1), h8);
15639
15640 n = core.GetSVELaneCount(kHRegSize);
15641 ASSERT_EQUAL_FP16(Float16(n - 1), h9);
15642
15643 ASSERT_EQUAL_FP16(Float16(42), h11);
15644 ASSERT_EQUAL_FP16(Float16(42), h12);
15645 ASSERT_EQUAL_FP16(Float16(-42), h13);
15646 ASSERT_EQUAL_FP16(Float16(42), h14);
15647 ASSERT_EQUAL_FP32(42, s15);
15648 ASSERT_EQUAL_FP32(42, s16);
15649 ASSERT_EQUAL_FP32(-42, s17);
15650 ASSERT_EQUAL_FP32(42, s18);
15651 ASSERT_EQUAL_FP64(42, d19);
15652 ASSERT_EQUAL_FP64(42, d20);
15653 ASSERT_EQUAL_FP64(-42, d21);
15654 ASSERT_EQUAL_FP64(42, d22);
15655 }
15656}
15657
Martyn Capewell13050ca2020-02-11 16:43:40 +000015658TEST_SVE(sve_frecpe_frsqrte) {
15659 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15660
15661 START();
15662
15663 __ Ptrue(p0.VnB());
15664
15665 __ Index(z0.VnH(), 0, 1);
15666 __ Fdup(z1.VnH(), Float16(1));
15667 __ Fscale(z1.VnH(), p0.Merging(), z1.VnH(), z0.VnH());
15668 __ Insr(z1.VnH(), 0);
15669 __ Frsqrte(z2.VnH(), z1.VnH());
15670 __ Frecpe(z1.VnH(), z1.VnH());
15671
15672 __ Index(z0.VnS(), 0, 1);
15673 __ Fdup(z3.VnS(), Float16(1));
15674 __ Fscale(z3.VnS(), p0.Merging(), z3.VnS(), z0.VnS());
15675 __ Insr(z3.VnS(), 0);
15676 __ Frsqrte(z4.VnS(), z3.VnS());
15677 __ Frecpe(z3.VnS(), z3.VnS());
15678
15679 __ Index(z0.VnD(), 0, 1);
15680 __ Fdup(z5.VnD(), Float16(1));
15681 __ Fscale(z5.VnD(), p0.Merging(), z5.VnD(), z0.VnD());
15682 __ Insr(z5.VnD(), 0);
15683 __ Frsqrte(z6.VnD(), z5.VnD());
15684 __ Frecpe(z5.VnD(), z5.VnD());
15685 END();
15686
15687 if (CAN_RUN()) {
15688 RUN();
15689 uint64_t z1_expected[] = {0x23fc27fc2bfc2ffc, 0x33fc37fc3bfc7c00};
15690 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
15691 uint64_t z2_expected[] = {0x2ffc31a433fc35a4, 0x37fc39a43bfc7c00};
15692 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
15693
15694 uint64_t z3_expected[] = {0x3e7f80003eff8000, 0x3f7f80007f800000};
15695 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
15696 uint64_t z4_expected[] = {0x3eff80003f348000, 0x3f7f80007f800000};
15697 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
15698
15699 uint64_t z5_expected[] = {0x3feff00000000000, 0x7ff0000000000000};
15700 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
15701 uint64_t z6_expected[] = {0x3feff00000000000, 0x7ff0000000000000};
15702 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
15703 }
15704}
15705
Martyn Capewellefd9dc72020-02-13 10:46:29 +000015706TEST_SVE(sve_frecps_frsqrts) {
15707 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15708
15709 START();
15710 __ Ptrue(p0.VnB());
15711
15712 __ Index(z0.VnH(), 0, -1);
15713 __ Fdup(z1.VnH(), Float16(1));
15714 __ Fscale(z1.VnH(), p0.Merging(), z1.VnH(), z0.VnH());
15715 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
15716 __ Insr(z1.VnH(), 0);
15717 __ Frsqrts(z2.VnH(), z1.VnH(), z0.VnH());
15718 __ Frecps(z1.VnH(), z1.VnH(), z0.VnH());
15719
15720 __ Index(z0.VnS(), 0, -1);
15721 __ Fdup(z3.VnS(), Float16(1));
15722 __ Fscale(z3.VnS(), p0.Merging(), z3.VnS(), z0.VnS());
15723 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
15724 __ Insr(z3.VnS(), 0);
15725 __ Frsqrts(z4.VnS(), z3.VnS(), z0.VnS());
15726 __ Frecps(z3.VnS(), z3.VnS(), z0.VnS());
15727
15728 __ Index(z0.VnD(), 0, -1);
15729 __ Fdup(z5.VnD(), Float16(1));
15730 __ Fscale(z5.VnD(), p0.Merging(), z5.VnD(), z0.VnD());
15731 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
15732 __ Insr(z5.VnD(), 0);
15733 __ Frsqrts(z6.VnD(), z5.VnD(), z0.VnD());
15734 __ Frecps(z5.VnD(), z5.VnD(), z0.VnD());
15735 END();
15736
15737 if (CAN_RUN()) {
15738 RUN();
15739 uint64_t z1_expected[] = {0x4038406040a04100, 0x4180420042004000};
15740 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
15741 uint64_t z2_expected[] = {0x3e383e603ea03f00, 0x3f80400040003e00};
15742 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
15743
15744 uint64_t z3_expected[] = {0x4030000040400000, 0x4040000040000000};
15745 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
15746 uint64_t z4_expected[] = {0x3ff0000040000000, 0x400000003fc00000};
15747 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
15748
15749 uint64_t z5_expected[] = {0x4008000000000000, 0x4000000000000000};
15750 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
15751 uint64_t z6_expected[] = {0x4000000000000000, 0x3ff8000000000000};
15752 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
15753 }
15754}
15755
15756TEST_SVE(sve_ftsmul) {
15757 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15758
15759 START();
15760 __ Ptrue(p0.VnB());
15761
15762 __ Index(z0.VnH(), 0, 1);
15763 __ Rev(z1.VnH(), z0.VnH());
15764 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
15765 __ Dup(z2.VnH(), 0);
15766 __ Fdiv(z2.VnH(), p0.Merging(), z2.VnH(), z2.VnH());
15767 __ Ftsmul(z3.VnH(), z0.VnH(), z1.VnH());
15768 __ Ftsmul(z4.VnH(), z2.VnH(), z1.VnH());
15769
15770 __ Index(z0.VnS(), -7, 1);
15771 __ Rev(z1.VnS(), z0.VnS());
15772 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
15773 __ Dup(z2.VnS(), 0);
15774 __ Fdiv(z2.VnS(), p0.Merging(), z2.VnS(), z2.VnS());
15775 __ Ftsmul(z5.VnS(), z0.VnS(), z1.VnS());
15776 __ Ftsmul(z6.VnS(), z2.VnS(), z1.VnS());
15777
15778 __ Index(z0.VnD(), 2, -1);
15779 __ Rev(z1.VnD(), z0.VnD());
15780 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
15781 __ Dup(z2.VnD(), 0);
15782 __ Fdiv(z2.VnD(), p0.Merging(), z2.VnD(), z2.VnD());
15783 __ Ftsmul(z7.VnD(), z0.VnD(), z1.VnD());
15784 __ Ftsmul(z8.VnD(), z2.VnD(), z1.VnD());
15785 END();
15786
15787 if (CAN_RUN()) {
15788 RUN();
15789 uint64_t z3_expected[] = {0x5220d0804e40cc00, 0x4880c4003c008000};
15790 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
15791 uint64_t z4_expected[] = {0x7e007e007e007e00, 0x7e007e007e007e00};
15792 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
15793
15794 uint64_t z5_expected[] = {0x41800000c1c80000, 0x42100000c2440000};
15795 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
15796 uint64_t z6_expected[] = {0x7fc000007fc00000, 0x7fc000007fc00000};
15797 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
15798
15799 uint64_t z7_expected[] = {0x3ff0000000000000, 0xc010000000000000};
15800 ASSERT_EQUAL_SVE(z7_expected, z7.VnD());
15801 uint64_t z8_expected[] = {0x7ff8000000000000, 0x7ff8000000000000};
15802 ASSERT_EQUAL_SVE(z8_expected, z8.VnD());
15803 }
15804}
TatWai Chongf8d29f12020-02-16 22:53:18 -080015805
15806typedef void (MacroAssembler::*FPMulAccFn)(
15807 const ZRegister& zd,
15808 const PRegisterM& pg,
15809 const ZRegister& za,
15810 const ZRegister& zn,
15811 const ZRegister& zm,
15812 FPMacroNaNPropagationOption nan_option);
15813
15814// The `pg_inputs` is used for examining the predication correctness internally.
15815// It does not imply the value of `result` argument. `result` stands for the
15816// expected result on all-true predication.
15817template <typename T, size_t N>
15818static void FPMulAccHelper(
15819 Test* config,
15820 FPMulAccFn macro,
15821 unsigned lane_size_in_bits,
15822 const int (&pg_inputs)[N],
15823 const T (&za_inputs)[N],
15824 const T (&zn_inputs)[N],
15825 const T (&zm_inputs)[N],
15826 const uint64_t (&result)[N],
15827 FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
15828 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15829 START();
15830
15831 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
15832 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
15833 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
15834 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
15835
15836 uint64_t za_rawbits[N];
15837 uint64_t zn_rawbits[N];
15838 uint64_t zm_rawbits[N];
15839
15840 FPToRawbitsWithSize(za_inputs, za_rawbits, lane_size_in_bits);
15841 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
15842 FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
15843
15844 InsrHelper(&masm, za, za_rawbits);
15845 InsrHelper(&masm, zn, zn_rawbits);
15846 InsrHelper(&masm, zm, zm_rawbits);
15847
TatWai Chong2cb1b612020-03-04 23:51:21 -080015848 // Initialize `zd` with a signalling NaN.
15849 uint64_t sn = GetSignallingNan(lane_size_in_bits);
15850 __ Mov(x29, sn);
15851 __ Dup(zd, x29);
TatWai Chongf8d29f12020-02-16 22:53:18 -080015852
15853 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
15854
15855 // Fmla macro automatically selects between fmla, fmad and movprfx + fmla
15856 // Fmls `ditto` fmls, fmsb and movprfx + fmls
15857 // Fnmla `ditto` fnmla, fnmad and movprfx + fnmla
15858 // Fnmls `ditto` fnmls, fnmsb and movprfx + fnmls
15859 // based on what registers are aliased.
15860 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
15861 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
15862 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
15863 ZRegister d_result = z13.WithLaneSize(lane_size_in_bits);
15864
15865 __ Mov(da_result, za);
15866 (masm.*macro)(da_result, p0.Merging(), da_result, zn, zm, nan_option);
15867
15868 __ Mov(dn_result, zn);
15869 (masm.*macro)(dn_result, p0.Merging(), za, dn_result, zm, nan_option);
15870
15871 __ Mov(dm_result, zm);
15872 (masm.*macro)(dm_result, p0.Merging(), za, zn, dm_result, nan_option);
15873
15874 __ Mov(d_result, zd);
15875 (masm.*macro)(d_result, p0.Merging(), za, zn, zm, nan_option);
15876
15877 END();
15878
15879 if (CAN_RUN()) {
15880 RUN();
15881
15882 ASSERT_EQUAL_SVE(za_rawbits, za);
15883 ASSERT_EQUAL_SVE(zn_rawbits, zn);
15884 ASSERT_EQUAL_SVE(zm_rawbits, zm);
15885
15886 uint64_t da_expected[N];
15887 uint64_t dn_expected[N];
15888 uint64_t dm_expected[N];
15889 uint64_t d_expected[N];
15890 for (size_t i = 0; i < N; i++) {
15891 da_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : za_rawbits[i];
15892 dn_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zn_rawbits[i];
15893 dm_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zm_rawbits[i];
TatWai Chong2cb1b612020-03-04 23:51:21 -080015894 d_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : sn;
TatWai Chongf8d29f12020-02-16 22:53:18 -080015895 }
15896
15897 ASSERT_EQUAL_SVE(da_expected, da_result);
15898 ASSERT_EQUAL_SVE(dn_expected, dn_result);
15899 ASSERT_EQUAL_SVE(dm_expected, dm_result);
15900 ASSERT_EQUAL_SVE(d_expected, d_result);
15901 }
15902}
15903
15904TEST_SVE(sve_fmla_fmad) {
15905 // fmla : zd = za + zn * zm
15906 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
15907 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
15908 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
15909 int pg_inputs[] = {1, 1, 0, 1};
15910
15911 uint64_t fmla_result_h[] = {Float16ToRawbits(Float16(-84.0)),
15912 Float16ToRawbits(Float16(101.0)),
15913 Float16ToRawbits(Float16(33.0)),
15914 Float16ToRawbits(Float16(42.0))};
15915
15916 // `fmad` has been tested in the helper.
15917 FPMulAccHelper(config,
15918 &MacroAssembler::Fmla,
15919 kHRegSize,
15920 pg_inputs,
15921 za_inputs,
15922 zn_inputs,
15923 zm_inputs,
15924 fmla_result_h);
15925
15926 uint64_t fmla_result_s[] = {FloatToRawbits(-84.0f),
15927 FloatToRawbits(101.0f),
15928 FloatToRawbits(33.0f),
15929 FloatToRawbits(42.0f)};
15930
15931 FPMulAccHelper(config,
15932 &MacroAssembler::Fmla,
15933 kSRegSize,
15934 pg_inputs,
15935 za_inputs,
15936 zn_inputs,
15937 zm_inputs,
15938 fmla_result_s);
15939
15940 uint64_t fmla_result_d[] = {DoubleToRawbits(-84.0),
15941 DoubleToRawbits(101.0),
15942 DoubleToRawbits(33.0),
15943 DoubleToRawbits(42.0)};
15944
15945 FPMulAccHelper(config,
15946 &MacroAssembler::Fmla,
15947 kDRegSize,
15948 pg_inputs,
15949 za_inputs,
15950 zn_inputs,
15951 zm_inputs,
15952 fmla_result_d);
15953}
15954
15955TEST_SVE(sve_fmls_fmsb) {
15956 // fmls : zd = za - zn * zm
15957 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
15958 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
15959 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
15960 int pg_inputs[] = {1, 0, 1, 1};
15961
15962 uint64_t fmls_result_h[] = {Float16ToRawbits(Float16(6.0)),
15963 Float16ToRawbits(Float16(-99.0)),
15964 Float16ToRawbits(Float16(-39.0)),
15965 Float16ToRawbits(Float16(-38.0))};
15966
15967 // `fmsb` has been tested in the helper.
15968 FPMulAccHelper(config,
15969 &MacroAssembler::Fmls,
15970 kHRegSize,
15971 pg_inputs,
15972 za_inputs,
15973 zn_inputs,
15974 zm_inputs,
15975 fmls_result_h);
15976
15977 uint64_t fmls_result_s[] = {FloatToRawbits(6.0f),
15978 FloatToRawbits(-99.0f),
15979 FloatToRawbits(-39.0f),
15980 FloatToRawbits(-38.0f)};
15981
15982 FPMulAccHelper(config,
15983 &MacroAssembler::Fmls,
15984 kSRegSize,
15985 pg_inputs,
15986 za_inputs,
15987 zn_inputs,
15988 zm_inputs,
15989 fmls_result_s);
15990
15991 uint64_t fmls_result_d[] = {DoubleToRawbits(6.0),
15992 DoubleToRawbits(-99.0),
15993 DoubleToRawbits(-39.0),
15994 DoubleToRawbits(-38.0)};
15995
15996 FPMulAccHelper(config,
15997 &MacroAssembler::Fmls,
15998 kDRegSize,
15999 pg_inputs,
16000 za_inputs,
16001 zn_inputs,
16002 zm_inputs,
16003 fmls_result_d);
16004}
16005
16006TEST_SVE(sve_fnmla_fnmad) {
16007 // fnmla : zd = -za - zn * zm
16008 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
16009 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
16010 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
16011 int pg_inputs[] = {0, 1, 1, 1};
16012
16013 uint64_t fnmla_result_h[] = {Float16ToRawbits(Float16(84.0)),
16014 Float16ToRawbits(Float16(-101.0)),
16015 Float16ToRawbits(Float16(-33.0)),
16016 Float16ToRawbits(Float16(-42.0))};
16017
16018 // `fnmad` has been tested in the helper.
16019 FPMulAccHelper(config,
16020 &MacroAssembler::Fnmla,
16021 kHRegSize,
16022 pg_inputs,
16023 za_inputs,
16024 zn_inputs,
16025 zm_inputs,
16026 fnmla_result_h);
16027
16028 uint64_t fnmla_result_s[] = {FloatToRawbits(84.0f),
16029 FloatToRawbits(-101.0f),
16030 FloatToRawbits(-33.0f),
16031 FloatToRawbits(-42.0f)};
16032
16033 FPMulAccHelper(config,
16034 &MacroAssembler::Fnmla,
16035 kSRegSize,
16036 pg_inputs,
16037 za_inputs,
16038 zn_inputs,
16039 zm_inputs,
16040 fnmla_result_s);
16041
16042 uint64_t fnmla_result_d[] = {DoubleToRawbits(84.0),
16043 DoubleToRawbits(-101.0),
16044 DoubleToRawbits(-33.0),
16045 DoubleToRawbits(-42.0)};
16046
16047 FPMulAccHelper(config,
16048 &MacroAssembler::Fnmla,
16049 kDRegSize,
16050 pg_inputs,
16051 za_inputs,
16052 zn_inputs,
16053 zm_inputs,
16054 fnmla_result_d);
16055}
16056
16057TEST_SVE(sve_fnmls_fnmsb) {
16058 // fnmls : zd = -za + zn * zm
16059 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
16060 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
16061 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
16062 int pg_inputs[] = {1, 1, 1, 0};
16063
16064 uint64_t fnmls_result_h[] = {Float16ToRawbits(Float16(-6.0)),
16065 Float16ToRawbits(Float16(99.0)),
16066 Float16ToRawbits(Float16(39.0)),
16067 Float16ToRawbits(Float16(38.0))};
16068
16069 // `fnmsb` has been tested in the helper.
16070 FPMulAccHelper(config,
16071 &MacroAssembler::Fnmls,
16072 kHRegSize,
16073 pg_inputs,
16074 za_inputs,
16075 zn_inputs,
16076 zm_inputs,
16077 fnmls_result_h);
16078
16079 uint64_t fnmls_result_s[] = {FloatToRawbits(-6.0f),
16080 FloatToRawbits(99.0f),
16081 FloatToRawbits(39.0f),
16082 FloatToRawbits(38.0f)};
16083
16084 FPMulAccHelper(config,
16085 &MacroAssembler::Fnmls,
16086 kSRegSize,
16087 pg_inputs,
16088 za_inputs,
16089 zn_inputs,
16090 zm_inputs,
16091 fnmls_result_s);
16092
16093 uint64_t fnmls_result_d[] = {DoubleToRawbits(-6.0),
16094 DoubleToRawbits(99.0),
16095 DoubleToRawbits(39.0),
16096 DoubleToRawbits(38.0)};
16097
16098 FPMulAccHelper(config,
16099 &MacroAssembler::Fnmls,
16100 kDRegSize,
16101 pg_inputs,
16102 za_inputs,
16103 zn_inputs,
16104 zm_inputs,
16105 fnmls_result_d);
16106}
16107
Martyn Capewellc7501512020-03-16 10:35:33 +000016108// Create a pattern in dst where the value of each element in src is incremented
16109// by the segment number. This allows varying a short input by a predictable
16110// pattern for each segment.
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016111static void FPSegmentPatternHelper(MacroAssembler* masm,
16112 const ZRegister& dst,
16113 const PRegisterM& ptrue,
16114 const ZRegister& src) {
Martyn Capewellc7501512020-03-16 10:35:33 +000016115 VIXL_ASSERT(AreSameLaneSize(dst, src));
16116 UseScratchRegisterScope temps(masm);
16117 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(dst);
Martyn Capewellc7501512020-03-16 10:35:33 +000016118 masm->Index(ztmp, 0, 1);
16119 masm->Asr(ztmp, ztmp, kQRegSizeInBytesLog2 - dst.GetLaneSizeInBytesLog2());
16120 masm->Scvtf(ztmp, ptrue, ztmp);
16121 masm->Fadd(dst, src, ztmp);
16122}
16123
TatWai Chonga2c1bb72020-02-16 23:16:47 -080016124typedef void (MacroAssembler::*FPMulAccIdxFn)(const ZRegister& zd,
16125 const ZRegister& za,
16126 const ZRegister& zn,
16127 const ZRegister& zm,
16128 int index);
16129
16130template <typename T, size_t N>
16131static void FPMulAccIdxHelper(Test* config,
16132 FPMulAccFn macro,
16133 FPMulAccIdxFn macro_idx,
16134 const T (&za_inputs)[N],
16135 const T (&zn_inputs)[N],
16136 const T (&zm_inputs)[N]) {
16137 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
16138 START();
16139
Martyn Capewellc7501512020-03-16 10:35:33 +000016140 __ Ptrue(p0.VnB());
16141
16142 // Repeat indexed vector across up to 2048-bit VL.
16143 for (size_t i = 0; i < (kZRegMaxSize / kDRegSize); i += N) {
16144 InsrHelper(&masm, z30.VnD(), zm_inputs);
16145 }
16146
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016147 FPSegmentPatternHelper(&masm, z0.VnH(), p0.Merging(), z30.VnH());
Martyn Capewellc7501512020-03-16 10:35:33 +000016148
TatWai Chonga2c1bb72020-02-16 23:16:47 -080016149 InsrHelper(&masm, z1.VnD(), zn_inputs);
16150 InsrHelper(&masm, z2.VnD(), za_inputs);
16151
16152 __ Mov(z3, z0);
16153 (masm.*macro_idx)(z3.VnH(), z2.VnH(), z1.VnH(), z3.VnH(), 0); // zd == zm
16154 __ Mov(z4, z1);
16155 (masm.*macro_idx)(z4.VnH(), z2.VnH(), z4.VnH(), z0.VnH(), 1); // zd == zn
16156 __ Mov(z5, z2);
16157 (masm.*macro_idx)(z5.VnH(), z5.VnH(), z1.VnH(), z0.VnH(), 4); // zd == za
16158 (masm.*macro_idx)(z6.VnH(), z2.VnH(), z1.VnH(), z0.VnH(), 7);
16159
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016160 FPSegmentPatternHelper(&masm, z0.VnS(), p0.Merging(), z30.VnS());
Martyn Capewellc7501512020-03-16 10:35:33 +000016161
TatWai Chonga2c1bb72020-02-16 23:16:47 -080016162 __ Mov(z7, z0);
16163 (masm.*macro_idx)(z7.VnS(), z2.VnS(), z1.VnS(), z7.VnS(), 0); // zd == zm
16164 __ Mov(z8, z1);
16165 (masm.*macro_idx)(z8.VnS(), z2.VnS(), z8.VnS(), z0.VnS(), 1); // zd == zn
16166 __ Mov(z9, z2);
16167 (masm.*macro_idx)(z9.VnS(), z9.VnS(), z1.VnS(), z0.VnS(), 2); // zd == za
16168 (masm.*macro_idx)(z10.VnS(), z2.VnS(), z1.VnS(), z0.VnS(), 3);
16169
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016170 FPSegmentPatternHelper(&masm, z0.VnD(), p0.Merging(), z30.VnD());
Martyn Capewellc7501512020-03-16 10:35:33 +000016171
TatWai Chonga2c1bb72020-02-16 23:16:47 -080016172 __ Mov(z11, z0);
16173 (masm.*macro_idx)(z11.VnD(), z2.VnD(), z1.VnD(), z11.VnD(), 0); // zd == zm
16174 __ Mov(z12, z1);
16175 (masm.*macro_idx)(z12.VnD(), z2.VnD(), z12.VnD(), z0.VnD(), 1); // zd == zn
16176 __ Mov(z13, z2);
16177 (masm.*macro_idx)(z13.VnD(), z13.VnD(), z1.VnD(), z0.VnD(), 0); // zd == za
16178 __ Mov(z14, z0);
16179 // zd == zn == zm
16180 (masm.*macro_idx)(z14.VnD(), z2.VnD(), z14.VnD(), z14.VnD(), 1);
16181
TatWai Chonga2c1bb72020-02-16 23:16:47 -080016182 // Indexed form of Fmla and Fmls won't swap argument, passing strict NaN
16183 // propagation mode to ensure the following macros don't swap argument in
16184 // any cases.
16185 FPMacroNaNPropagationOption option = StrictNaNPropagation;
16186 // Compute the results using other instructions.
Martyn Capewellc7501512020-03-16 10:35:33 +000016187 __ Dup(z0.VnH(), z30.VnH(), 0);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016188 FPSegmentPatternHelper(&masm, z0.VnH(), p0.Merging(), z0.VnH());
Martyn Capewellc7501512020-03-16 10:35:33 +000016189 (masm.*macro)(z15.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z0.VnH(), option);
16190 __ Dup(z0.VnH(), z30.VnH(), 1);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016191 FPSegmentPatternHelper(&masm, z0.VnH(), p0.Merging(), z0.VnH());
Martyn Capewellc7501512020-03-16 10:35:33 +000016192 (masm.*macro)(z16.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z0.VnH(), option);
16193 __ Dup(z0.VnH(), z30.VnH(), 4);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016194 FPSegmentPatternHelper(&masm, z0.VnH(), p0.Merging(), z0.VnH());
Martyn Capewellc7501512020-03-16 10:35:33 +000016195 (masm.*macro)(z17.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z0.VnH(), option);
16196 __ Dup(z0.VnH(), z30.VnH(), 7);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016197 FPSegmentPatternHelper(&masm, z0.VnH(), p0.Merging(), z0.VnH());
Martyn Capewellc7501512020-03-16 10:35:33 +000016198 (masm.*macro)(z18.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z0.VnH(), option);
TatWai Chonga2c1bb72020-02-16 23:16:47 -080016199
Martyn Capewellc7501512020-03-16 10:35:33 +000016200 __ Dup(z0.VnS(), z30.VnS(), 0);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016201 FPSegmentPatternHelper(&masm, z0.VnS(), p0.Merging(), z0.VnS());
Martyn Capewellc7501512020-03-16 10:35:33 +000016202 (masm.*macro)(z19.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z0.VnS(), option);
16203 __ Dup(z0.VnS(), z30.VnS(), 1);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016204 FPSegmentPatternHelper(&masm, z0.VnS(), p0.Merging(), z0.VnS());
Martyn Capewellc7501512020-03-16 10:35:33 +000016205 (masm.*macro)(z20.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z0.VnS(), option);
16206 __ Dup(z0.VnS(), z30.VnS(), 2);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016207 FPSegmentPatternHelper(&masm, z0.VnS(), p0.Merging(), z0.VnS());
Martyn Capewellc7501512020-03-16 10:35:33 +000016208 (masm.*macro)(z21.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z0.VnS(), option);
16209 __ Dup(z0.VnS(), z30.VnS(), 3);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016210 FPSegmentPatternHelper(&masm, z0.VnS(), p0.Merging(), z0.VnS());
Martyn Capewellc7501512020-03-16 10:35:33 +000016211 (masm.*macro)(z22.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z0.VnS(), option);
TatWai Chonga2c1bb72020-02-16 23:16:47 -080016212
Martyn Capewellc7501512020-03-16 10:35:33 +000016213 __ Dup(z0.VnD(), z30.VnD(), 0);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016214 FPSegmentPatternHelper(&masm, z0.VnD(), p0.Merging(), z0.VnD());
Martyn Capewellc7501512020-03-16 10:35:33 +000016215 (masm.*macro)(z23.VnD(), p0.Merging(), z2.VnD(), z1.VnD(), z0.VnD(), option);
16216 __ Dup(z0.VnD(), z30.VnD(), 1);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016217 FPSegmentPatternHelper(&masm, z0.VnD(), p0.Merging(), z0.VnD());
Martyn Capewellc7501512020-03-16 10:35:33 +000016218 (masm.*macro)(z24.VnD(), p0.Merging(), z2.VnD(), z1.VnD(), z0.VnD(), option);
16219 __ Dup(z0.VnD(), z30.VnD(), 1);
TatWai Chongfa3f6bf2020-03-13 00:22:03 -070016220 FPSegmentPatternHelper(&masm, z0.VnD(), p0.Merging(), z0.VnD());
Martyn Capewellc7501512020-03-16 10:35:33 +000016221 (masm.*macro)(z25.VnD(), p0.Merging(), z2.VnD(), z30.VnD(), z0.VnD(), option);
TatWai Chonga2c1bb72020-02-16 23:16:47 -080016222
16223 END();
16224
16225 if (CAN_RUN()) {
16226 RUN();
16227
16228 ASSERT_EQUAL_SVE(z15.VnH(), z3.VnH());
16229 ASSERT_EQUAL_SVE(z16.VnH(), z4.VnH());
16230 ASSERT_EQUAL_SVE(z17.VnH(), z5.VnH());
16231 ASSERT_EQUAL_SVE(z18.VnH(), z6.VnH());
16232
16233 ASSERT_EQUAL_SVE(z19.VnS(), z7.VnS());
16234 ASSERT_EQUAL_SVE(z20.VnS(), z8.VnS());
16235 ASSERT_EQUAL_SVE(z21.VnS(), z9.VnS());
16236 ASSERT_EQUAL_SVE(z22.VnS(), z10.VnS());
16237
16238 ASSERT_EQUAL_SVE(z23.VnD(), z11.VnD());
16239 ASSERT_EQUAL_SVE(z24.VnD(), z12.VnD());
16240 ASSERT_EQUAL_SVE(z11.VnD(), z13.VnD());
16241 ASSERT_EQUAL_SVE(z25.VnD(), z14.VnD());
16242 }
16243}
16244
16245TEST_SVE(sve_fmla_fmls_index) {
16246 uint64_t zm_inputs_1[] = {0x3ff000003f803c00, 0xbff00000bf80bc00};
16247 uint64_t zn_inputs_1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76};
16248 uint64_t za_inputs_1[] = {0x3c004000bc00c000, 0x64006800e400e800};
16249
16250 // Using the vector form of Fmla and Fmls to verify the indexed form.
16251 FPMulAccIdxHelper(config,
16252 &MacroAssembler::Fmla, // vector form
16253 &MacroAssembler::Fmla, // indexed form
16254 za_inputs_1,
16255 zn_inputs_1,
16256 zm_inputs_1);
16257
16258 FPMulAccIdxHelper(config,
16259 &MacroAssembler::Fmls, // vector form
16260 &MacroAssembler::Fmls, // indexed form
16261 za_inputs_1,
16262 zn_inputs_1,
16263 zm_inputs_1);
16264
16265 uint64_t zm_inputs_2[] = {0x7ff5555511111111, // NaN
16266 0xfff0000000000000}; // Infinity
16267 uint64_t zn_inputs_2[] = {0x7f9511117fc00000, // NaN
16268 0x7f800000ff800000}; // Infinity
16269 uint64_t za_inputs_2[] = {0x7c11000000007e00, // NaN
16270 0x000000007c00fc00}; // Infinity
16271 FPMulAccIdxHelper(config,
16272 &MacroAssembler::Fmla, // vector form
16273 &MacroAssembler::Fmla, // indexed form
16274 za_inputs_2,
16275 zn_inputs_2,
16276 zm_inputs_2);
16277
16278 FPMulAccIdxHelper(config,
16279 &MacroAssembler::Fmls, // vector form
16280 &MacroAssembler::Fmls, // indexed form
16281 za_inputs_2,
16282 zn_inputs_2,
16283 zm_inputs_2);
16284}
16285
TatWai Chongf8d29f12020-02-16 22:53:18 -080016286// Execute a number of instructions which all use ProcessNaNs, and check that
16287// they all propagate NaNs correctly.
16288template <typename Ti, typename Td, size_t N>
16289static void ProcessNaNsHelper(Test* config,
16290 int lane_size_in_bits,
16291 const Ti (&zn_inputs)[N],
16292 const Ti (&zm_inputs)[N],
16293 const Td (&zd_expected)[N],
16294 FPMacroNaNPropagationOption nan_option) {
16295 ArithFn arith_unpredicated_macro[] = {&MacroAssembler::Fadd,
16296 &MacroAssembler::Fsub,
16297 &MacroAssembler::Fmul};
16298
16299 for (size_t i = 0; i < ArrayLength(arith_unpredicated_macro); i++) {
16300 FPBinArithHelper(config,
16301 arith_unpredicated_macro[i],
16302 lane_size_in_bits,
16303 zn_inputs,
16304 zm_inputs,
16305 zd_expected);
16306 }
16307
16308 FPArithPredicatedFn arith_predicated_macro[] = {&MacroAssembler::Fmax,
16309 &MacroAssembler::Fmin};
16310 int pg_inputs[N];
16311 // With an all-true predicate, this helper aims to compare with special
16312 // numbers.
16313 for (size_t i = 0; i < N; i++) {
16314 pg_inputs[i] = 1;
16315 }
16316
16317 // fdivr propagates the quotient (Zm) preferentially, so we don't actually
16318 // need any special handling for StrictNaNPropagation.
16319 FPBinArithHelper(config,
16320 NULL,
16321 &MacroAssembler::Fdiv,
16322 lane_size_in_bits,
16323 // With an all-true predicate, the value in zd is
16324 // irrelevant to the operations.
16325 zn_inputs,
16326 pg_inputs,
16327 zn_inputs,
16328 zm_inputs,
16329 zd_expected);
16330
16331 for (size_t i = 0; i < ArrayLength(arith_predicated_macro); i++) {
16332 FPBinArithHelper(config,
16333 arith_predicated_macro[i],
16334 NULL,
16335 lane_size_in_bits,
16336 // With an all-true predicate, the value in zd is
16337 // irrelevant to the operations.
16338 zn_inputs,
16339 pg_inputs,
16340 zn_inputs,
16341 zm_inputs,
16342 zd_expected,
16343 nan_option);
16344 }
16345}
16346
16347template <typename Ti, typename Td, size_t N>
16348static void ProcessNaNsHelper3(Test* config,
16349 int lane_size_in_bits,
16350 const Ti (&za_inputs)[N],
16351 const Ti (&zn_inputs)[N],
16352 const Ti (&zm_inputs)[N],
16353 const Td (&zd_expected_fmla)[N],
16354 const Td (&zd_expected_fmls)[N],
16355 const Td (&zd_expected_fnmla)[N],
16356 const Td (&zd_expected_fnmls)[N],
16357 FPMacroNaNPropagationOption nan_option) {
16358 int pg_inputs[N];
16359 // With an all-true predicate, this helper aims to compare with special
16360 // numbers.
16361 for (size_t i = 0; i < N; i++) {
16362 pg_inputs[i] = 1;
16363 }
16364
16365 FPMulAccHelper(config,
16366 &MacroAssembler::Fmla,
16367 lane_size_in_bits,
16368 pg_inputs,
16369 za_inputs,
16370 zn_inputs,
16371 zm_inputs,
16372 zd_expected_fmla,
16373 nan_option);
16374
16375 FPMulAccHelper(config,
16376 &MacroAssembler::Fmls,
16377 lane_size_in_bits,
16378 pg_inputs,
16379 za_inputs,
16380 zn_inputs,
16381 zm_inputs,
16382 zd_expected_fmls,
16383 nan_option);
16384
16385 FPMulAccHelper(config,
16386 &MacroAssembler::Fnmla,
16387 lane_size_in_bits,
16388 pg_inputs,
16389 za_inputs,
16390 zn_inputs,
16391 zm_inputs,
16392 zd_expected_fnmla,
16393 nan_option);
16394
16395 FPMulAccHelper(config,
16396 &MacroAssembler::Fnmls,
16397 lane_size_in_bits,
16398 pg_inputs,
16399 za_inputs,
16400 zn_inputs,
16401 zm_inputs,
16402 zd_expected_fnmls,
16403 nan_option);
16404}
16405
16406TEST_SVE(sve_process_nans_double) {
16407 // Use non-standard NaNs to check that the payload bits are preserved.
16408 double sa = RawbitsToDouble(0x7ff5555511111111);
16409 double sn = RawbitsToDouble(0x7ff5555522222222);
16410 double sm = RawbitsToDouble(0x7ff5555533333333);
16411 double qa = RawbitsToDouble(0x7ffaaaaa11111111);
16412 double qn = RawbitsToDouble(0x7ffaaaaa22222222);
16413 double qm = RawbitsToDouble(0x7ffaaaaa33333333);
16414 VIXL_ASSERT(IsSignallingNaN(sa));
16415 VIXL_ASSERT(IsSignallingNaN(sn));
16416 VIXL_ASSERT(IsSignallingNaN(sm));
16417 VIXL_ASSERT(IsQuietNaN(qa));
16418 VIXL_ASSERT(IsQuietNaN(qn));
16419 VIXL_ASSERT(IsQuietNaN(qm));
16420
16421 // The input NaNs after passing through ProcessNaN.
16422 uint64_t sa_proc = 0x7ffd555511111111;
16423 uint64_t sn_proc = 0x7ffd555522222222;
16424 uint64_t sm_proc = 0x7ffd555533333333;
16425 uint64_t qa_proc = DoubleToRawbits(qa);
16426 uint64_t qn_proc = DoubleToRawbits(qn);
16427 uint64_t qm_proc = DoubleToRawbits(qm);
16428 uint64_t sa_proc_n = sa_proc ^ kDSignMask;
16429 uint64_t sn_proc_n = sn_proc ^ kDSignMask;
16430 uint64_t qa_proc_n = qa_proc ^ kDSignMask;
16431 uint64_t qn_proc_n = qn_proc ^ kDSignMask;
16432
16433 // Quiet NaNs are propagated.
16434 double zn_inputs_1[] = {qn, 0.0, 0.0, qm, qn, qm};
16435 double zm_inputs_1[] = {0.0, qn, qm, 0.0, qm, qn};
16436 uint64_t zd_expected_1[] =
16437 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
16438
16439 ProcessNaNsHelper(config,
16440 kDRegSize,
16441 zn_inputs_1,
16442 zm_inputs_1,
16443 zd_expected_1,
16444 StrictNaNPropagation);
16445
16446 // Signalling NaNs are propagated.
16447 double zn_inputs_2[] = {sn, 0.0, 0.0, sm, sn, sm};
16448 double zm_inputs_2[] = {0.0, sn, sm, 0.0, sm, sn};
16449 uint64_t zd_expected_2[] =
16450 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
16451 ProcessNaNsHelper(config,
16452 kDRegSize,
16453 zn_inputs_2,
16454 zm_inputs_2,
16455 zd_expected_2,
16456 StrictNaNPropagation);
16457
16458 // Signalling NaNs take precedence over quiet NaNs.
16459 double zn_inputs_3[] = {sn, qn, sn, sn, qn};
16460 double zm_inputs_3[] = {qm, sm, sm, qn, sn};
16461 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
16462 ProcessNaNsHelper(config,
16463 kDRegSize,
16464 zn_inputs_3,
16465 zm_inputs_3,
16466 zd_expected_3,
16467 StrictNaNPropagation);
16468
16469 double za_inputs_4[] = {qa, qa, 0.0, 0.0, qa, qa};
16470 double zn_inputs_4[] = {qn, 0.0, 0.0, qn, qn, qn};
16471 double zm_inputs_4[] = {0.0, qm, qm, qm, qm, 0.0};
16472
16473 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
16474 // If `n` is propagated, its sign is inverted by fmls and fnmla.
16475 // If `m` is propagated, its sign is never inverted.
16476 uint64_t zd_expected_fmla_4[] =
16477 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
16478 uint64_t zd_expected_fmls_4[] =
16479 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
16480 uint64_t zd_expected_fnmla_4[] =
16481 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
16482 uint64_t zd_expected_fnmls_4[] =
16483 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
16484
16485 ProcessNaNsHelper3(config,
16486 kDRegSize,
16487 za_inputs_4,
16488 zn_inputs_4,
16489 zm_inputs_4,
16490 zd_expected_fmla_4,
16491 zd_expected_fmls_4,
16492 zd_expected_fnmla_4,
16493 zd_expected_fnmls_4,
16494 StrictNaNPropagation);
16495
16496 // Signalling NaNs take precedence over quiet NaNs.
16497 double za_inputs_5[] = {qa, qa, sa, sa, sa};
16498 double zn_inputs_5[] = {qn, sn, sn, sn, qn};
16499 double zm_inputs_5[] = {sm, qm, sm, qa, sm};
16500 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
16501 uint64_t zd_expected_fmls_5[] = {sm_proc,
16502 sn_proc_n,
16503 sa_proc,
16504 sa_proc,
16505 sa_proc};
16506 uint64_t zd_expected_fnmla_5[] = {sm_proc,
16507 sn_proc_n,
16508 sa_proc_n,
16509 sa_proc_n,
16510 sa_proc_n};
16511 uint64_t zd_expected_fnmls_5[] = {sm_proc,
16512 sn_proc,
16513 sa_proc_n,
16514 sa_proc_n,
16515 sa_proc_n};
16516
16517 ProcessNaNsHelper3(config,
16518 kDRegSize,
16519 za_inputs_5,
16520 zn_inputs_5,
16521 zm_inputs_5,
16522 zd_expected_fmla_5,
16523 zd_expected_fmls_5,
16524 zd_expected_fnmla_5,
16525 zd_expected_fnmls_5,
16526 StrictNaNPropagation);
16527
16528 const double inf = kFP64PositiveInfinity;
16529 const double inf_n = kFP64NegativeInfinity;
16530 uint64_t inf_proc = DoubleToRawbits(inf);
16531 uint64_t inf_proc_n = DoubleToRawbits(inf_n);
16532 uint64_t d_inf_proc = DoubleToRawbits(kFP64DefaultNaN);
16533
16534 double za_inputs_6[] = {qa, qa, 0.0f, -0.0f, qa, sa};
16535 double zn_inputs_6[] = {inf, -0.0f, -0.0f, inf, inf_n, inf};
16536 double zm_inputs_6[] = {0.0f, inf_n, inf, inf, inf, 0.0f};
16537
16538 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
16539 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
16540 // quiet_nan.
16541 uint64_t zd_expected_fmla_6[] =
16542 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
16543 uint64_t zd_expected_fmls_6[] =
16544 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
16545 uint64_t zd_expected_fnmla_6[] =
16546 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
16547 uint64_t zd_expected_fnmls_6[] =
16548 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
16549
16550 ProcessNaNsHelper3(config,
16551 kDRegSize,
16552 za_inputs_6,
16553 zn_inputs_6,
16554 zm_inputs_6,
16555 zd_expected_fmla_6,
16556 zd_expected_fmls_6,
16557 zd_expected_fnmla_6,
16558 zd_expected_fnmls_6,
16559 StrictNaNPropagation);
16560}
16561
16562TEST_SVE(sve_process_nans_float) {
16563 // Use non-standard NaNs to check that the payload bits are preserved.
16564 float sa = RawbitsToFloat(0x7f951111);
16565 float sn = RawbitsToFloat(0x7f952222);
16566 float sm = RawbitsToFloat(0x7f953333);
16567 float qa = RawbitsToFloat(0x7fea1111);
16568 float qn = RawbitsToFloat(0x7fea2222);
16569 float qm = RawbitsToFloat(0x7fea3333);
16570 VIXL_ASSERT(IsSignallingNaN(sa));
16571 VIXL_ASSERT(IsSignallingNaN(sn));
16572 VIXL_ASSERT(IsSignallingNaN(sm));
16573 VIXL_ASSERT(IsQuietNaN(qa));
16574 VIXL_ASSERT(IsQuietNaN(qn));
16575 VIXL_ASSERT(IsQuietNaN(qm));
16576
16577 // The input NaNs after passing through ProcessNaN.
16578 uint32_t sa_proc = 0x7fd51111;
16579 uint32_t sn_proc = 0x7fd52222;
16580 uint32_t sm_proc = 0x7fd53333;
16581 uint32_t qa_proc = FloatToRawbits(qa);
16582 uint32_t qn_proc = FloatToRawbits(qn);
16583 uint32_t qm_proc = FloatToRawbits(qm);
16584 uint32_t sa_proc_n = sa_proc ^ kSSignMask;
16585 uint32_t sn_proc_n = sn_proc ^ kSSignMask;
16586 uint32_t qa_proc_n = qa_proc ^ kSSignMask;
16587 uint32_t qn_proc_n = qn_proc ^ kSSignMask;
16588
16589 // Quiet NaNs are propagated.
16590 float zn_inputs_1[] = {qn, 0.0f, 0.0f, qm, qn, qm};
16591 float zm_inputs_1[] = {0.0f, qn, qm, 0.0f, qm, qn};
16592 uint64_t zd_expected_1[] =
16593 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
16594
16595 ProcessNaNsHelper(config,
16596 kSRegSize,
16597 zn_inputs_1,
16598 zm_inputs_1,
16599 zd_expected_1,
16600 StrictNaNPropagation);
16601
16602 // Signalling NaNs are propagated.
16603 float zn_inputs_2[] = {sn, 0.0f, 0.0f, sm, sn, sm};
16604 float zm_inputs_2[] = {0.0f, sn, sm, 0.0f, sm, sn};
16605 uint64_t zd_expected_2[] =
16606 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
16607 ProcessNaNsHelper(config,
16608 kSRegSize,
16609 zn_inputs_2,
16610 zm_inputs_2,
16611 zd_expected_2,
16612 StrictNaNPropagation);
16613
16614 // Signalling NaNs take precedence over quiet NaNs.
16615 float zn_inputs_3[] = {sn, qn, sn, sn, qn};
16616 float zm_inputs_3[] = {qm, sm, sm, qn, sn};
16617 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
16618 ProcessNaNsHelper(config,
16619 kSRegSize,
16620 zn_inputs_3,
16621 zm_inputs_3,
16622 zd_expected_3,
16623 StrictNaNPropagation);
16624
16625 float za_inputs_4[] = {qa, qa, 0.0f, 0.0f, qa, qa};
16626 float zn_inputs_4[] = {qn, 0.0f, 0.0f, qn, qn, qn};
16627 float zm_inputs_4[] = {0.0f, qm, qm, qm, qm, 0.0f};
16628
16629 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
16630 // If `n` is propagated, its sign is inverted by fmls and fnmla.
16631 // If `m` is propagated, its sign is never inverted.
16632 uint64_t zd_expected_fmla_4[] =
16633 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
16634 uint64_t zd_expected_fmls_4[] =
16635 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
16636 uint64_t zd_expected_fnmla_4[] =
16637 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
16638 uint64_t zd_expected_fnmls_4[] =
16639 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
16640
16641 ProcessNaNsHelper3(config,
16642 kSRegSize,
16643 za_inputs_4,
16644 zn_inputs_4,
16645 zm_inputs_4,
16646 zd_expected_fmla_4,
16647 zd_expected_fmls_4,
16648 zd_expected_fnmla_4,
16649 zd_expected_fnmls_4,
16650 StrictNaNPropagation);
16651
16652 // Signalling NaNs take precedence over quiet NaNs.
16653 float za_inputs_5[] = {qa, qa, sa, sa, sa};
16654 float zn_inputs_5[] = {qn, sn, sn, sn, qn};
16655 float zm_inputs_5[] = {sm, qm, sm, qa, sm};
16656 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
16657 uint64_t zd_expected_fmls_5[] = {sm_proc,
16658 sn_proc_n,
16659 sa_proc,
16660 sa_proc,
16661 sa_proc};
16662 uint64_t zd_expected_fnmla_5[] = {sm_proc,
16663 sn_proc_n,
16664 sa_proc_n,
16665 sa_proc_n,
16666 sa_proc_n};
16667 uint64_t zd_expected_fnmls_5[] = {sm_proc,
16668 sn_proc,
16669 sa_proc_n,
16670 sa_proc_n,
16671 sa_proc_n};
16672
16673 ProcessNaNsHelper3(config,
16674 kSRegSize,
16675 za_inputs_5,
16676 zn_inputs_5,
16677 zm_inputs_5,
16678 zd_expected_fmla_5,
16679 zd_expected_fmls_5,
16680 zd_expected_fnmla_5,
16681 zd_expected_fnmls_5,
16682 StrictNaNPropagation);
16683
16684 const float inf = kFP32PositiveInfinity;
16685 const float inf_n = kFP32NegativeInfinity;
16686 uint32_t inf_proc = FloatToRawbits(inf);
16687 uint32_t inf_proc_n = FloatToRawbits(inf_n);
16688 uint32_t d_inf_proc = FloatToRawbits(kFP32DefaultNaN);
16689
16690 float za_inputs_6[] = {qa, qa, 0.0f, 0.0f, qa, sa};
16691 float zn_inputs_6[] = {inf, 0.0f, 0.0f, inf, inf_n, inf};
16692 float zm_inputs_6[] = {0.0f, inf_n, inf, inf, inf, 0.0f};
16693
16694 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
16695 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
16696 // quiet_nan.
16697 uint64_t zd_expected_fmla_6[] =
16698 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
16699 uint64_t zd_expected_fmls_6[] =
16700 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
16701 uint64_t zd_expected_fnmla_6[] =
16702 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
16703 uint64_t zd_expected_fnmls_6[] =
16704 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
16705
16706 ProcessNaNsHelper3(config,
16707 kSRegSize,
16708 za_inputs_6,
16709 zn_inputs_6,
16710 zm_inputs_6,
16711 zd_expected_fmla_6,
16712 zd_expected_fmls_6,
16713 zd_expected_fnmla_6,
16714 zd_expected_fnmls_6,
16715 StrictNaNPropagation);
16716}
16717
16718TEST_SVE(sve_process_nans_half) {
16719 // Use non-standard NaNs to check that the payload bits are preserved.
16720 Float16 sa(RawbitsToFloat16(0x7c11));
16721 Float16 sn(RawbitsToFloat16(0x7c22));
16722 Float16 sm(RawbitsToFloat16(0x7c33));
16723 Float16 qa(RawbitsToFloat16(0x7e44));
16724 Float16 qn(RawbitsToFloat16(0x7e55));
16725 Float16 qm(RawbitsToFloat16(0x7e66));
16726 VIXL_ASSERT(IsSignallingNaN(sa));
16727 VIXL_ASSERT(IsSignallingNaN(sn));
16728 VIXL_ASSERT(IsSignallingNaN(sm));
16729 VIXL_ASSERT(IsQuietNaN(qa));
16730 VIXL_ASSERT(IsQuietNaN(qn));
16731 VIXL_ASSERT(IsQuietNaN(qm));
16732
16733 // The input NaNs after passing through ProcessNaN.
16734 uint16_t sa_proc = 0x7e11;
16735 uint16_t sn_proc = 0x7e22;
16736 uint16_t sm_proc = 0x7e33;
16737 uint16_t qa_proc = Float16ToRawbits(qa);
16738 uint16_t qn_proc = Float16ToRawbits(qn);
16739 uint16_t qm_proc = Float16ToRawbits(qm);
16740 uint16_t sa_proc_n = sa_proc ^ kHSignMask;
16741 uint16_t sn_proc_n = sn_proc ^ kHSignMask;
16742 uint16_t qa_proc_n = qa_proc ^ kHSignMask;
16743 uint16_t qn_proc_n = qn_proc ^ kHSignMask;
16744 Float16 zero(0.0);
16745
16746 // Quiet NaNs are propagated.
16747 Float16 zn_inputs_1[] = {qn, zero, zero, qm, qn, qm};
16748 Float16 zm_inputs_1[] = {zero, qn, qm, zero, qm, qn};
16749 uint64_t zd_expected_1[] =
16750 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
16751
16752 ProcessNaNsHelper(config,
16753 kHRegSize,
16754 zn_inputs_1,
16755 zm_inputs_1,
16756 zd_expected_1,
16757 StrictNaNPropagation);
16758
16759 // Signalling NaNs are propagated.
16760 Float16 zn_inputs_2[] = {sn, zero, zero, sm, sn, sm};
16761 Float16 zm_inputs_2[] = {zero, sn, sm, zero, sm, sn};
16762 uint64_t zd_expected_2[] =
16763 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
16764 ProcessNaNsHelper(config,
16765 kHRegSize,
16766 zn_inputs_2,
16767 zm_inputs_2,
16768 zd_expected_2,
16769 StrictNaNPropagation);
16770
16771 // Signalling NaNs take precedence over quiet NaNs.
16772 Float16 zn_inputs_3[] = {sn, qn, sn, sn, qn};
16773 Float16 zm_inputs_3[] = {qm, sm, sm, qn, sn};
16774 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
16775 ProcessNaNsHelper(config,
16776 kHRegSize,
16777 zn_inputs_3,
16778 zm_inputs_3,
16779 zd_expected_3,
16780 StrictNaNPropagation);
16781
16782 Float16 za_inputs_4[] = {qa, qa, zero, zero, qa, qa};
16783 Float16 zn_inputs_4[] = {qn, zero, zero, qn, qn, qn};
16784 Float16 zm_inputs_4[] = {zero, qm, qm, qm, qm, zero};
16785
16786 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
16787 // If `n` is propagated, its sign is inverted by fmls and fnmla.
16788 // If `m` is propagated, its sign is never inverted.
16789 uint64_t zd_expected_fmla_4[] =
16790 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
16791 uint64_t zd_expected_fmls_4[] =
16792 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
16793 uint64_t zd_expected_fnmla_4[] =
16794 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
16795 uint64_t zd_expected_fnmls_4[] =
16796 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
16797
16798 ProcessNaNsHelper3(config,
16799 kHRegSize,
16800 za_inputs_4,
16801 zn_inputs_4,
16802 zm_inputs_4,
16803 zd_expected_fmla_4,
16804 zd_expected_fmls_4,
16805 zd_expected_fnmla_4,
16806 zd_expected_fnmls_4,
16807 StrictNaNPropagation);
16808
16809 // Signalling NaNs take precedence over quiet NaNs.
16810 Float16 za_inputs_5[] = {qa, qa, sa, sa, sa};
16811 Float16 zn_inputs_5[] = {qn, sn, sn, sn, qn};
16812 Float16 zm_inputs_5[] = {sm, qm, sm, qa, sm};
16813 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
16814 uint64_t zd_expected_fmls_5[] = {sm_proc,
16815 sn_proc_n,
16816 sa_proc,
16817 sa_proc,
16818 sa_proc};
16819 uint64_t zd_expected_fnmla_5[] = {sm_proc,
16820 sn_proc_n,
16821 sa_proc_n,
16822 sa_proc_n,
16823 sa_proc_n};
16824 uint64_t zd_expected_fnmls_5[] = {sm_proc,
16825 sn_proc,
16826 sa_proc_n,
16827 sa_proc_n,
16828 sa_proc_n};
16829
16830 ProcessNaNsHelper3(config,
16831 kHRegSize,
16832 za_inputs_5,
16833 zn_inputs_5,
16834 zm_inputs_5,
16835 zd_expected_fmla_5,
16836 zd_expected_fmls_5,
16837 zd_expected_fnmla_5,
16838 zd_expected_fnmls_5,
16839 StrictNaNPropagation);
16840
16841 const Float16 inf = kFP16PositiveInfinity;
16842 const Float16 inf_n = kFP16NegativeInfinity;
16843 uint64_t inf_proc = Float16ToRawbits(inf);
16844 uint64_t inf_proc_n = Float16ToRawbits(inf_n);
16845 uint64_t d_inf_proc = Float16ToRawbits(kFP16DefaultNaN);
16846
16847 Float16 za_inputs_6[] = {qa, qa, zero, zero, qa, sa};
16848 Float16 zn_inputs_6[] = {inf, zero, zero, inf, inf_n, inf};
16849 Float16 zm_inputs_6[] = {zero, inf_n, inf, inf, inf, zero};
16850
16851 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
16852 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
16853 // quiet_nan.
16854 uint64_t zd_expected_fmla_6[] =
16855 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
16856 uint64_t zd_expected_fmls_6[] =
16857 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
16858 uint64_t zd_expected_fnmla_6[] =
16859 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
16860 uint64_t zd_expected_fnmls_6[] =
16861 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
16862
16863 ProcessNaNsHelper3(config,
16864 kHRegSize,
16865 za_inputs_6,
16866 zn_inputs_6,
16867 zm_inputs_6,
16868 zd_expected_fmla_6,
16869 zd_expected_fmls_6,
16870 zd_expected_fnmla_6,
16871 zd_expected_fnmls_6,
16872 StrictNaNPropagation);
16873}
16874
TatWai Chong47c26842020-02-10 01:51:32 -080016875typedef void (MacroAssembler::*FCmpFn)(const PRegisterWithLaneSize& pd,
16876 const PRegisterZ& pg,
16877 const ZRegister& zn,
16878 const ZRegister& zm);
16879
TatWai Chonge3775132020-02-16 22:13:17 -080016880typedef void (MacroAssembler::*FCmpZeroFn)(const PRegisterWithLaneSize& pd,
16881 const PRegisterZ& pg,
16882 const ZRegister& zn);
16883
TatWai Chong47c26842020-02-10 01:51:32 -080016884typedef void (MacroAssembler::*CmpFn)(const PRegisterWithLaneSize& pd,
16885 const PRegisterZ& pg,
16886 const ZRegister& zn,
16887 const ZRegister& zm);
16888
16889static FCmpFn GetFpAbsCompareFn(Condition cond) {
16890 switch (cond) {
16891 case ge:
16892 return &MacroAssembler::Facge;
16893 case gt:
16894 return &MacroAssembler::Facgt;
16895 case le:
16896 return &MacroAssembler::Facle;
16897 case lt:
16898 return &MacroAssembler::Faclt;
16899 default:
16900 VIXL_UNIMPLEMENTED();
16901 return NULL;
16902 }
16903}
16904
16905static FCmpFn GetFpCompareFn(Condition cond) {
16906 switch (cond) {
16907 case ge:
16908 return &MacroAssembler::Fcmge;
16909 case gt:
16910 return &MacroAssembler::Fcmgt;
16911 case le:
16912 return &MacroAssembler::Fcmle;
16913 case lt:
16914 return &MacroAssembler::Fcmlt;
16915 case eq:
16916 return &MacroAssembler::Fcmeq;
16917 case ne:
16918 return &MacroAssembler::Fcmne;
16919 case uo:
16920 return &MacroAssembler::Fcmuo;
16921 default:
16922 VIXL_UNIMPLEMENTED();
16923 return NULL;
16924 }
16925}
16926
TatWai Chonge3775132020-02-16 22:13:17 -080016927static FCmpZeroFn GetFpCompareZeroFn(Condition cond) {
16928 switch (cond) {
16929 case ge:
16930 return &MacroAssembler::Fcmge;
16931 case gt:
16932 return &MacroAssembler::Fcmgt;
16933 case le:
16934 return &MacroAssembler::Fcmle;
16935 case lt:
16936 return &MacroAssembler::Fcmlt;
16937 case eq:
16938 return &MacroAssembler::Fcmeq;
16939 case ne:
16940 return &MacroAssembler::Fcmne;
16941 default:
16942 VIXL_UNIMPLEMENTED();
16943 return NULL;
16944 }
16945}
16946
TatWai Chong47c26842020-02-10 01:51:32 -080016947static CmpFn GetIntCompareFn(Condition cond) {
16948 switch (cond) {
16949 case ge:
16950 return &MacroAssembler::Cmpge;
16951 case gt:
16952 return &MacroAssembler::Cmpgt;
16953 case le:
16954 return &MacroAssembler::Cmple;
16955 case lt:
16956 return &MacroAssembler::Cmplt;
16957 case eq:
16958 return &MacroAssembler::Cmpeq;
16959 case ne:
16960 return &MacroAssembler::Cmpne;
16961 default:
16962 VIXL_UNIMPLEMENTED();
16963 return NULL;
16964 }
16965}
16966
16967template <size_t N>
16968static void TestFpCompareHelper(Test* config,
16969 int lane_size_in_bits,
16970 Condition cond,
16971 const double (&zn_inputs)[N],
16972 const double (&zm_inputs)[N],
16973 const int (&pd_expected)[N],
16974 bool is_absolute = false) {
16975 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
16976 START();
16977
16978 ZRegister zt_int_1 = z1.WithLaneSize(lane_size_in_bits);
16979 ZRegister zt_int_2 = z2.WithLaneSize(lane_size_in_bits);
16980 ZRegister zt_int_3 = z3.WithLaneSize(lane_size_in_bits);
16981 ZRegister zt_fp_1 = z11.WithLaneSize(lane_size_in_bits);
16982 ZRegister zt_fp_2 = z12.WithLaneSize(lane_size_in_bits);
16983 ZRegister zt_fp_3 = z13.WithLaneSize(lane_size_in_bits);
16984 ZRegister fp_one = z31.WithLaneSize(lane_size_in_bits);
16985
16986 PRegisterWithLaneSize pd_result_int_1 = p15.WithLaneSize(lane_size_in_bits);
16987 PRegisterWithLaneSize pd_result_fp_1 = p14.WithLaneSize(lane_size_in_bits);
16988 PRegisterWithLaneSize pd_result_int_2 = p13.WithLaneSize(lane_size_in_bits);
16989 PRegisterWithLaneSize pd_result_fp_2 = p12.WithLaneSize(lane_size_in_bits);
16990
16991 FCmpFn fcmp = is_absolute ? GetFpAbsCompareFn(cond) : GetFpCompareFn(cond);
16992 __ Ptrue(p1.VnB());
16993
16994 if (cond != uo) {
16995 int pg_inputs[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1};
16996 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
16997
16998 __ Fdup(fp_one, 0.1f);
16999
17000 __ Index(zt_int_1, 3, 3);
17001 __ Scvtf(zt_fp_1, p0.Merging(), zt_int_1);
17002 __ Fadd(zt_fp_1, zt_fp_1, fp_one);
17003
17004 __ Index(zt_int_2, 3, -10);
17005 __ Scvtf(zt_fp_2, p0.Merging(), zt_int_2);
17006 __ Fadd(zt_fp_2, zt_fp_2, fp_one);
17007
17008 __ Index(zt_int_3, 3, 2);
17009 __ Scvtf(zt_fp_3, p0.Merging(), zt_int_3);
17010 __ Fadd(zt_fp_3, zt_fp_3, fp_one);
17011
17012
17013 // There is no absolute comparison in integer type, use `abs` with `cmp<cc>`
17014 // to synthesize the expected result for `fac<cc>`.
17015 if (is_absolute == true) {
17016 __ Abs(zt_int_2, p1.Merging(), zt_int_2);
17017 }
17018
17019 CmpFn cmp = GetIntCompareFn(cond);
17020 (masm.*cmp)(pd_result_int_1, p0.Zeroing(), zt_int_1, zt_int_2);
17021 (masm.*fcmp)(pd_result_fp_1, p0.Zeroing(), zt_fp_1, zt_fp_2);
17022
17023 (masm.*cmp)(pd_result_int_2, p0.Zeroing(), zt_int_1, zt_int_3);
17024 (masm.*fcmp)(pd_result_fp_2, p0.Zeroing(), zt_fp_1, zt_fp_3);
17025 }
17026
17027 uint64_t zn_inputs_rawbits[N];
17028 uint64_t zm_inputs_rawbits[N];
17029 FPToRawbitsWithSize(zn_inputs, zn_inputs_rawbits, lane_size_in_bits);
17030 FPToRawbitsWithSize(zm_inputs, zm_inputs_rawbits, lane_size_in_bits);
17031
17032 ZRegister zn_fp = z14.WithLaneSize(lane_size_in_bits);
17033 ZRegister zm_fp = z15.WithLaneSize(lane_size_in_bits);
17034 InsrHelper(&masm, zn_fp, zn_inputs_rawbits);
17035 InsrHelper(&masm, zm_fp, zm_inputs_rawbits);
17036
17037 PRegisterWithLaneSize pd_result_fp_3 = p11.WithLaneSize(lane_size_in_bits);
17038 (masm.*fcmp)(pd_result_fp_3, p1.Zeroing(), zn_fp, zm_fp);
17039
17040 END();
17041
17042 if (CAN_RUN()) {
17043 RUN();
17044
17045 if (cond != uo) {
17046 ASSERT_EQUAL_SVE(pd_result_int_1, pd_result_fp_1);
17047 ASSERT_EQUAL_SVE(pd_result_int_2, pd_result_fp_2);
17048 }
17049 ASSERT_EQUAL_SVE(pd_expected, pd_result_fp_3);
17050 }
17051}
17052
17053TEST_SVE(sve_fp_compare_vectors) {
17054 double inf_p = kFP64PositiveInfinity;
17055 double inf_n = kFP64NegativeInfinity;
17056 double nan = kFP64DefaultNaN;
17057
17058 // Normal floating point comparison has been tested in the helper.
17059 double zn[] = {0.0, inf_n, 1.0, inf_p, inf_p, nan, 0.0, nan};
17060 double zm[] = {-0.0, inf_n, inf_n, -2.0, inf_n, nan, nan, inf_p};
17061
17062 int pd_fcm_gt[] = {0, 0, 1, 1, 1, 0, 0, 0};
17063 int pd_fcm_lt[] = {0, 0, 0, 0, 0, 0, 0, 0};
17064 int pd_fcm_ge[] = {1, 1, 1, 1, 1, 0, 0, 0};
17065 int pd_fcm_le[] = {1, 1, 0, 0, 0, 0, 0, 0};
17066 int pd_fcm_eq[] = {1, 1, 0, 0, 0, 0, 0, 0};
17067 int pd_fcm_ne[] = {0, 0, 1, 1, 1, 0, 0, 0};
17068 int pd_fcm_uo[] = {0, 0, 0, 0, 0, 1, 1, 1};
17069 int pd_fac_gt[] = {0, 0, 0, 1, 0, 0, 0, 0};
17070 int pd_fac_lt[] = {0, 0, 1, 0, 0, 0, 0, 0};
17071 int pd_fac_ge[] = {1, 1, 0, 1, 1, 0, 0, 0};
17072 int pd_fac_le[] = {1, 1, 1, 0, 1, 0, 0, 0};
17073
17074 int lane_sizes[] = {kHRegSize, kSRegSize, kDRegSize};
17075
17076 for (size_t i = 0; i < ArrayLength(lane_sizes); i++) {
17077 int lane_size = lane_sizes[i];
17078 // Test floating-point compare vectors.
17079 TestFpCompareHelper(config, lane_size, gt, zn, zm, pd_fcm_gt);
17080 TestFpCompareHelper(config, lane_size, lt, zn, zm, pd_fcm_lt);
17081 TestFpCompareHelper(config, lane_size, ge, zn, zm, pd_fcm_ge);
17082 TestFpCompareHelper(config, lane_size, le, zn, zm, pd_fcm_le);
17083 TestFpCompareHelper(config, lane_size, eq, zn, zm, pd_fcm_eq);
17084 TestFpCompareHelper(config, lane_size, ne, zn, zm, pd_fcm_ne);
17085 TestFpCompareHelper(config, lane_size, uo, zn, zm, pd_fcm_uo);
17086
17087 // Test floating-point absolute compare vectors.
17088 TestFpCompareHelper(config, lane_size, gt, zn, zm, pd_fac_gt, true);
17089 TestFpCompareHelper(config, lane_size, lt, zn, zm, pd_fac_lt, true);
17090 TestFpCompareHelper(config, lane_size, ge, zn, zm, pd_fac_ge, true);
17091 TestFpCompareHelper(config, lane_size, le, zn, zm, pd_fac_le, true);
17092 }
17093}
17094
TatWai Chonge3775132020-02-16 22:13:17 -080017095template <size_t N, typename T>
17096static void TestFpCompareZeroHelper(Test* config,
17097 int lane_size_in_bits,
17098 Condition cond,
17099 const T (&zn_inputs)[N],
17100 const int (&pd_expected)[N]) {
17101 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
17102 START();
17103
17104 ZRegister zn = z28.WithLaneSize(lane_size_in_bits);
17105 PRegisterWithLaneSize pd = p14.WithLaneSize(lane_size_in_bits);
17106
17107 uint64_t zn_rawbits[N];
17108 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
17109 InsrHelper(&masm, zn, zn_rawbits);
17110
17111 __ Ptrue(p0.VnB());
17112 (masm.*GetFpCompareZeroFn(cond))(pd, p0.Zeroing(), zn);
17113
17114 END();
17115
17116 if (CAN_RUN()) {
17117 RUN();
17118
17119 ASSERT_EQUAL_SVE(pd_expected, pd);
17120 }
17121}
17122
17123TEST_SVE(sve_fp_compare_vector_zero) {
17124 Float16 fp16_inf_p = kFP16PositiveInfinity;
17125 Float16 fp16_inf_n = kFP16NegativeInfinity;
17126 Float16 fp16_dn = kFP16DefaultNaN;
17127 Float16 fp16_sn = RawbitsToFloat16(0x7c22);
17128 Float16 fp16_qn = RawbitsToFloat16(0x7e55);
17129
17130 float fp32_inf_p = kFP32PositiveInfinity;
17131 float fp32_inf_n = kFP32NegativeInfinity;
17132 float fp32_dn = kFP32DefaultNaN;
17133 float fp32_sn = RawbitsToFloat(0x7f952222);
17134 float fp32_qn = RawbitsToFloat(0x7fea2222);
17135
17136 double fp64_inf_p = kFP64PositiveInfinity;
17137 double fp64_inf_n = kFP64NegativeInfinity;
17138 double fp64_dn = kFP64DefaultNaN;
17139 double fp64_sn = RawbitsToDouble(0x7ff5555511111111);
17140 double fp64_qn = RawbitsToDouble(0x7ffaaaaa11111111);
17141
17142 // Normal floating point comparison has been tested in the non-zero form.
17143 Float16 zn_inputs_h[] = {Float16(0.0),
17144 Float16(-0.0),
17145 fp16_inf_p,
17146 fp16_inf_n,
17147 fp16_dn,
17148 fp16_sn,
17149 fp16_qn};
17150 float zn_inputs_s[] =
17151 {0.0, -0.0, fp32_inf_p, fp32_inf_n, fp32_dn, fp32_sn, fp32_qn};
17152 double zn_inputs_d[] =
17153 {0.0, -0.0, fp64_inf_p, fp64_inf_n, fp64_dn, fp64_sn, fp64_qn};
17154
17155 int pd_expected_gt[] = {0, 0, 1, 0, 0, 0, 0};
17156 int pd_expected_lt[] = {0, 0, 0, 1, 0, 0, 0};
17157 int pd_expected_ge[] = {1, 1, 1, 0, 0, 0, 0};
17158 int pd_expected_le[] = {1, 1, 0, 1, 0, 0, 0};
17159 int pd_expected_eq[] = {1, 1, 0, 0, 0, 0, 0};
17160 int pd_expected_ne[] = {0, 0, 1, 1, 0, 0, 0};
17161
17162 TestFpCompareZeroHelper(config, kDRegSize, gt, zn_inputs_d, pd_expected_gt);
17163 TestFpCompareZeroHelper(config, kDRegSize, lt, zn_inputs_d, pd_expected_lt);
17164 TestFpCompareZeroHelper(config, kDRegSize, ge, zn_inputs_d, pd_expected_ge);
17165 TestFpCompareZeroHelper(config, kDRegSize, le, zn_inputs_d, pd_expected_le);
17166 TestFpCompareZeroHelper(config, kDRegSize, eq, zn_inputs_d, pd_expected_eq);
17167 TestFpCompareZeroHelper(config, kDRegSize, ne, zn_inputs_d, pd_expected_ne);
17168
17169 TestFpCompareZeroHelper(config, kSRegSize, gt, zn_inputs_s, pd_expected_gt);
17170 TestFpCompareZeroHelper(config, kSRegSize, lt, zn_inputs_s, pd_expected_lt);
17171 TestFpCompareZeroHelper(config, kSRegSize, ge, zn_inputs_s, pd_expected_ge);
17172 TestFpCompareZeroHelper(config, kSRegSize, le, zn_inputs_s, pd_expected_le);
17173 TestFpCompareZeroHelper(config, kSRegSize, eq, zn_inputs_s, pd_expected_eq);
17174 TestFpCompareZeroHelper(config, kSRegSize, ne, zn_inputs_s, pd_expected_ne);
17175
17176 TestFpCompareZeroHelper(config, kHRegSize, gt, zn_inputs_h, pd_expected_gt);
17177 TestFpCompareZeroHelper(config, kHRegSize, lt, zn_inputs_h, pd_expected_lt);
17178 TestFpCompareZeroHelper(config, kHRegSize, ge, zn_inputs_h, pd_expected_ge);
17179 TestFpCompareZeroHelper(config, kHRegSize, le, zn_inputs_h, pd_expected_le);
17180 TestFpCompareZeroHelper(config, kHRegSize, eq, zn_inputs_h, pd_expected_eq);
17181 TestFpCompareZeroHelper(config, kHRegSize, ne, zn_inputs_h, pd_expected_ne);
17182}
17183
TatWai Chong2cb1b612020-03-04 23:51:21 -080017184typedef void (MacroAssembler::*FPUnaryMFn)(const ZRegister& zd,
17185 const PRegisterM& pg,
17186 const ZRegister& zn);
17187
17188typedef void (MacroAssembler::*FPUnaryZFn)(const ZRegister& zd,
17189 const PRegisterZ& pg,
17190 const ZRegister& zn);
17191
17192template <size_t N, size_t M>
17193static void TestFPUnaryPredicatedHelper(Test* config,
17194 int src_size_in_bits,
17195 int dst_size_in_bits,
17196 uint64_t (&zn_inputs)[N],
17197 const uint64_t (&pg_inputs)[M],
17198 const uint64_t (&zd_expected)[N],
17199 FPUnaryMFn macro_m,
17200 FPUnaryZFn macro_z) {
17201 // Provide the full predicate input.
17202 VIXL_ASSERT(M == (kPRegMaxSize / kDRegSize));
17203 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
17204 START();
17205
17206 int ds = dst_size_in_bits;
17207 int ss = src_size_in_bits;
17208 int ls = std::max(ss, ds);
17209
17210 // When destination type is larger than source type, fill the high parts with
17211 // noise values, which should be ignored.
17212 if (ds > ss) {
17213 VIXL_ASSERT(ss < 64);
17214 uint64_t zn_inputs_mod[N];
17215 uint64_t sn = GetSignallingNan(ss);
17216 for (unsigned i = 0; i < N; i++) {
17217 zn_inputs_mod[i] = zn_inputs[i] | ((sn + i) << ss);
17218 }
17219 InsrHelper(&masm, z29.WithLaneSize(ls), zn_inputs_mod);
17220 } else {
17221 InsrHelper(&masm, z29.WithLaneSize(ls), zn_inputs);
17222 }
17223
17224 // Make a copy so we can check that constructive operations preserve zn.
17225 __ Mov(z28, z29);
17226
17227 // Run the operation on all lanes.
17228 __ Ptrue(p0.WithLaneSize(ls));
17229 (masm.*macro_m)(z27.WithLaneSize(ds), p0.Merging(), z28.WithLaneSize(ss));
17230
17231 Initialise(&masm,
17232 p1.VnB(),
17233 pg_inputs[3],
17234 pg_inputs[2],
17235 pg_inputs[1],
17236 pg_inputs[0]);
17237
17238 // Clear the irrelevant lanes.
17239 __ Index(z31.WithLaneSize(ls), 0, 1);
17240 __ Cmplt(p1.WithLaneSize(ls), p1.Zeroing(), z31.WithLaneSize(ls), N);
17241
17242 // Check merging predication.
17243 __ Index(z11.WithLaneSize(ls), 42, 1);
17244 // Preserve the base value so we can derive the expected result.
17245 __ Mov(z21, z11);
17246 __ Mov(z9, z11);
17247 (masm.*macro_m)(z11.WithLaneSize(ds), p1.Merging(), z28.WithLaneSize(ss));
17248
17249 // Generate expected values using explicit merging operations.
17250 InsrHelper(&masm, z25.WithLaneSize(ls), zd_expected);
17251 __ Mov(z21.WithLaneSize(ls), p1.Merging(), z25.WithLaneSize(ls));
17252
17253 // Check zeroing predication.
17254 __ Index(z12.WithLaneSize(ds), 42, -1);
17255 (masm.*macro_z)(z12.WithLaneSize(ds), p1.Zeroing(), z28.WithLaneSize(ss));
17256
17257 // Generate expected values using explicit zeroing operations.
17258 InsrHelper(&masm, z30.WithLaneSize(ls), zd_expected);
17259 // Emulate zeroing predication.
17260 __ Dup(z22.WithLaneSize(ls), 0);
17261 __ Mov(z22.WithLaneSize(ls), p1.Merging(), z30.WithLaneSize(ls));
17262
17263 // Check an in-place update.
17264 __ Mov(z9.WithLaneSize(ls), p1.Merging(), z28.WithLaneSize(ls));
17265 (masm.*macro_m)(z9.WithLaneSize(ds), p1.Merging(), z9.WithLaneSize(ss));
17266
17267 END();
17268
17269 if (CAN_RUN()) {
17270 RUN();
17271
17272 // Check all lanes.
17273 ASSERT_EQUAL_SVE(zd_expected, z27.WithLaneSize(ls));
17274
17275 // Check that constructive operations preserve their inputs.
17276 ASSERT_EQUAL_SVE(z28, z29);
17277
17278 // Check merging predication.
17279 ASSERT_EQUAL_SVE(z21.WithLaneSize(ls), z21.WithLaneSize(ls));
17280
17281 // Check zeroing predication.
17282 ASSERT_EQUAL_SVE(z22.WithLaneSize(ls), z12.WithLaneSize(ls));
17283
17284 // Check in-place operation where zd == zn.
17285 ASSERT_EQUAL_SVE(z21.WithLaneSize(ls), z9.WithLaneSize(ls));
17286 }
17287}
17288
17289template <size_t N, typename T>
17290static void TestFPUnaryPredicatedHelper(Test* config,
17291 int src_size_in_bits,
17292 int dst_size_in_bits,
17293 T (&zn_inputs)[N],
17294 const T (&zd_expected)[N],
17295 FPUnaryMFn macro_m,
17296 FPUnaryZFn macro_z) {
17297 uint64_t pg_inputs[] = {0xa55aa55aa55aa55a,
17298 0xa55aa55aa55aa55a,
17299 0xa55aa55aa55aa55a,
17300 0xa55aa55aa55aa55a};
17301
17302 TestFPUnaryPredicatedHelper(config,
17303 src_size_in_bits,
17304 dst_size_in_bits,
17305 zn_inputs,
17306 pg_inputs,
17307 zd_expected,
17308 macro_m,
17309 macro_z);
17310
17311 // The complementary of above precicate to get full input coverage.
17312 uint64_t pg_c_inputs[] = {0x5aa55aa55aa55aa5,
17313 0x5aa55aa55aa55aa5,
17314 0x5aa55aa55aa55aa5,
17315 0x5aa55aa55aa55aa5};
17316
17317 TestFPUnaryPredicatedHelper(config,
17318 src_size_in_bits,
17319 dst_size_in_bits,
17320 zn_inputs,
17321 pg_c_inputs,
17322 zd_expected,
17323 macro_m,
17324 macro_z);
17325}
17326
17327template <size_t N, typename T>
17328static void TestFcvtHelper(Test* config,
17329 int src_size_in_bits,
17330 int dst_size_in_bits,
17331 T (&zn_inputs)[N],
17332 const T (&zd_expected)[N]) {
17333 TestFPUnaryPredicatedHelper(config,
17334 src_size_in_bits,
17335 dst_size_in_bits,
17336 zn_inputs,
17337 zd_expected,
17338 &MacroAssembler::Fcvt, // Merging form.
17339 &MacroAssembler::Fcvt); // Zerging form.
17340}
17341
17342TEST_SVE(sve_fcvt) {
17343 uint64_t h_vals[] = {0x7c00,
17344 0xfc00,
17345 0,
17346 0x8000,
17347 0x7bff, // Max half precision.
17348 0x0400, // Min positive normal.
17349 0x03ff, // Max subnormal.
17350 0x0001}; // Min positive subnormal.
17351
17352 uint64_t s_vals[] = {0x7f800000,
17353 0xff800000,
17354 0,
17355 0x80000000,
17356 0x477fe000,
17357 0x38800000,
17358 0x387fc000,
17359 0x33800000};
17360
17361 uint64_t d_vals[] = {0x7ff0000000000000,
17362 0xfff0000000000000,
17363 0,
17364 0x8000000000000000,
17365 0x40effc0000000000,
17366 0x3f10000000000000,
17367 0x3f0ff80000000000,
17368 0x3e70000000000000};
17369
17370 TestFcvtHelper(config, kHRegSize, kSRegSize, h_vals, s_vals);
17371 TestFcvtHelper(config, kSRegSize, kHRegSize, s_vals, h_vals);
17372 TestFcvtHelper(config, kSRegSize, kDRegSize, s_vals, d_vals);
17373 TestFcvtHelper(config, kDRegSize, kSRegSize, d_vals, s_vals);
17374 TestFcvtHelper(config, kHRegSize, kDRegSize, h_vals, d_vals);
17375 TestFcvtHelper(config, kDRegSize, kHRegSize, d_vals, h_vals);
17376}
17377
17378TEST_SVE(sve_fcvt_nan) {
17379 uint64_t h_inputs[] = {0x7e55, // Quiet NaN.
17380 0x7c22}; // Signalling NaN.
17381
17382 uint64_t h2s_expected[] = {0x7fcaa000, 0x7fc44000};
17383
17384 uint64_t h2d_expected[] = {0x7ff9540000000000, 0x7ff8880000000000};
17385
17386 uint64_t s_inputs[] = {0x7fc12345, // Quiet NaN.
17387 0x7f812345}; // Signalling NaN.
17388
17389 uint64_t s2h_expected[] = {0x7e09, 0x7e09};
17390
17391 uint64_t s2d_expected[] = {0x7ff82468a0000000, 0x7ff82468a0000000};
17392
17393 uint64_t d_inputs[] = {0x7ffaaaaa22222222, // Quiet NaN.
17394 0x7ff5555511111111}; // Signalling NaN.
17395
17396 uint64_t d2h_expected[] = {0x7eaa, 0x7f55};
17397
17398 uint64_t d2s_expected[] = {0x7fd55551, 0x7feaaaa8};
17399
17400 TestFcvtHelper(config, kHRegSize, kSRegSize, h_inputs, h2s_expected);
17401 TestFcvtHelper(config, kSRegSize, kHRegSize, s_inputs, s2h_expected);
17402 TestFcvtHelper(config, kHRegSize, kDRegSize, h_inputs, h2d_expected);
17403 TestFcvtHelper(config, kDRegSize, kHRegSize, d_inputs, d2h_expected);
17404 TestFcvtHelper(config, kSRegSize, kDRegSize, s_inputs, s2d_expected);
17405 TestFcvtHelper(config, kDRegSize, kSRegSize, d_inputs, d2s_expected);
17406}
17407
TatWai Chongf60f6dc2020-02-21 10:48:11 -080017408template <size_t N, typename T>
17409static void TestFrecpxHelper(Test* config,
17410 int lane_size_in_bits,
17411 T (&zn_inputs)[N],
17412 const T (&zd_expected)[N]) {
17413 TestFPUnaryPredicatedHelper(config,
17414 lane_size_in_bits,
17415 lane_size_in_bits,
17416 zn_inputs,
17417 zd_expected,
17418 &MacroAssembler::Frecpx, // Merging form.
17419 &MacroAssembler::Frecpx); // Zerging form.
17420}
17421
17422TEST_SVE(sve_frecpx_h) {
17423 uint64_t zn_inputs[] = {Float16ToRawbits(kFP16PositiveInfinity),
17424 Float16ToRawbits(kFP16NegativeInfinity),
17425 Float16ToRawbits(Float16(0.0)),
17426 Float16ToRawbits(Float16(-0.0)),
17427 0x0001, // Smallest positive subnormal number.
17428 0x03ff, // Largest subnormal number.
17429 0x0400, // Smallest positive normal number.
17430 0x7bff, // Largest normal number.
17431 0x3bff, // Largest number less than one.
17432 0x3c01, // Smallest number larger than one.
17433 0x7c22, // Signalling NaN.
17434 0x7e55}; // Quiet NaN.
17435
17436 uint64_t zd_expected[] = {0,
17437 0x8000,
17438 0x7800,
17439 0xf800,
17440 // Exponent of subnormal numbers are zero.
17441 0x7800,
17442 0x7800,
17443 0x7800,
17444 0x0400,
17445 0x4400,
17446 0x4000,
17447 0x7e22, // To quiet NaN.
17448 0x7e55};
17449
17450 TestFrecpxHelper(config, kHRegSize, zn_inputs, zd_expected);
17451}
17452
17453TEST_SVE(sve_frecpx_s) {
17454 uint64_t zn_inputs[] = {FloatToRawbits(kFP32PositiveInfinity),
17455 FloatToRawbits(kFP32NegativeInfinity),
17456 FloatToRawbits(65504), // Max half precision.
17457 FloatToRawbits(6.10352e-5), // Min positive normal.
17458 FloatToRawbits(6.09756e-5), // Max subnormal.
17459 FloatToRawbits(
17460 5.96046e-8), // Min positive subnormal.
17461 FloatToRawbits(5e-9), // Not representable -> zero.
17462 FloatToRawbits(-0.0),
17463 FloatToRawbits(0.0),
17464 0x7f952222, // Signalling NaN.
17465 0x7fea2222}; // Quiet NaN;
17466
17467 uint64_t zd_expected[] = {0, // 0.0
17468 0x80000000, // -0.0
17469 0x38800000, // 6.10352e-05
17470 0x47000000, // 32768
17471 0x47800000, // 65536
17472 0x4c800000, // 6.71089e+07
17473 0x4e000000, // 5.36871e+08
17474 0xff000000, // -1.70141e+38
17475 0x7f000000, // 1.70141e+38
17476 0x7fd52222,
17477 0x7fea2222};
17478
17479 TestFrecpxHelper(config, kSRegSize, zn_inputs, zd_expected);
17480}
17481
17482TEST_SVE(sve_frecpx_d) {
17483 uint64_t zn_inputs[] = {DoubleToRawbits(kFP64PositiveInfinity),
17484 DoubleToRawbits(kFP64NegativeInfinity),
17485 DoubleToRawbits(65504), // Max half precision.
17486 DoubleToRawbits(6.10352e-5), // Min positive normal.
17487 DoubleToRawbits(6.09756e-5), // Max subnormal.
17488 DoubleToRawbits(
17489 5.96046e-8), // Min positive subnormal.
17490 DoubleToRawbits(5e-9), // Not representable -> zero.
17491 DoubleToRawbits(-0.0),
17492 DoubleToRawbits(0.0),
17493 0x7ff5555511111111, // Signalling NaN.
17494 0x7ffaaaaa11111111}; // Quiet NaN;
17495
17496 uint64_t zd_expected[] = {0, // 0.0
17497 0x8000000000000000, // -0.0
17498 0x3f10000000000000, // 6.10352e-05
17499 0x40e0000000000000, // 32768
17500 0x40f0000000000000, // 65536
17501 0x4190000000000000, // 6.71089e+07
17502 0x41c0000000000000, // 5.36871e+08
17503 0xffe0000000000000, // -1.70141e+38
17504 0x7fe0000000000000, // 1.70141e+38
17505 0x7ffd555511111111,
17506 0x7ffaaaaa11111111};
17507
17508 TestFrecpxHelper(config, kDRegSize, zn_inputs, zd_expected);
17509}
TatWai Chong2cb1b612020-03-04 23:51:21 -080017510
TatWai Chongb4a25f62020-02-27 00:53:57 -080017511template <size_t N, typename T>
17512static void TestFsqrtHelper(Test* config,
17513 int lane_size_in_bits,
17514 T (&zn_inputs)[N],
17515 const T (&zd_expected)[N]) {
17516 TestFPUnaryPredicatedHelper(config,
17517 lane_size_in_bits,
17518 lane_size_in_bits,
17519 zn_inputs,
17520 zd_expected,
17521 &MacroAssembler::Fsqrt, // Merging form.
17522 &MacroAssembler::Fsqrt); // Zerging form.
17523}
17524
17525TEST_SVE(sve_fsqrt_h) {
17526 uint64_t zn_inputs[] =
17527 {Float16ToRawbits(Float16(0.0)),
17528 Float16ToRawbits(Float16(-0.0)),
17529 Float16ToRawbits(Float16(1.0)),
17530 Float16ToRawbits(Float16(65025.0)),
17531 Float16ToRawbits(kFP16PositiveInfinity),
17532 Float16ToRawbits(kFP16NegativeInfinity),
17533 Float16ToRawbits(Float16(6.10352e-5)), // Min normal positive.
17534 Float16ToRawbits(Float16(65504.0)), // Max normal positive float.
17535 Float16ToRawbits(Float16(6.09756e-5)), // Max subnormal.
17536 Float16ToRawbits(Float16(5.96046e-8)), // Min subnormal positive.
17537 0x7c22, // Signaling NaN
17538 0x7e55}; // Quiet NaN
17539
17540 uint64_t zd_expected[] = {Float16ToRawbits(Float16(0.0)),
17541 Float16ToRawbits(Float16(-0.0)),
17542 Float16ToRawbits(Float16(1.0)),
17543 Float16ToRawbits(Float16(255.0)),
17544 Float16ToRawbits(kFP16PositiveInfinity),
17545 Float16ToRawbits(kFP16DefaultNaN),
17546 0x2000,
17547 0x5bff,
17548 0x1fff,
17549 0x0c00,
17550 0x7e22, // To quiet NaN.
17551 0x7e55};
17552
17553 TestFsqrtHelper(config, kHRegSize, zn_inputs, zd_expected);
17554}
17555
17556TEST_SVE(sve_fsqrt_s) {
17557 uint64_t zn_inputs[] = {FloatToRawbits(0.0f),
17558 FloatToRawbits(-0.0f),
17559 FloatToRawbits(1.0f),
17560 FloatToRawbits(65536.0f),
17561 FloatToRawbits(kFP32PositiveInfinity),
17562 FloatToRawbits(kFP32NegativeInfinity),
17563 0x00800000, // Min normal positive, ~1.17e−38
17564 0x7f7fffff, // Max normal positive, ~3.40e+38
17565 0x00000001, // Min subnormal positive, ~1.40e−45
17566 0x007fffff, // Max subnormal, ~1.17e−38
17567 0x7f951111, // Signaling NaN
17568 0x7fea1111}; // Quiet NaN
17569
17570 uint64_t zd_expected[] = {FloatToRawbits(0.0f),
17571 FloatToRawbits(-0.0f),
17572 FloatToRawbits(1.0f),
17573 FloatToRawbits(256.0f),
17574 FloatToRawbits(kFP32PositiveInfinity),
17575 FloatToRawbits(kFP32DefaultNaN),
17576 0x20000000, // ~1.08e-19
17577 0x5f7fffff, // ~1.84e+19
17578 0x1a3504f3, // ~3.74e-23
17579 0x1fffffff, // ~1.08e-19
17580 0x7fd51111, // To quiet NaN.
17581 0x7fea1111};
17582
17583 TestFsqrtHelper(config, kSRegSize, zn_inputs, zd_expected);
17584}
17585
17586TEST_SVE(sve_fsqrt_d) {
17587 uint64_t zn_inputs[] =
17588 {DoubleToRawbits(0.0),
17589 DoubleToRawbits(-0.0),
17590 DoubleToRawbits(1.0),
17591 DoubleToRawbits(65536.0),
17592 DoubleToRawbits(kFP64PositiveInfinity),
17593 DoubleToRawbits(kFP64NegativeInfinity),
17594 0x0010000000000000, // Min normal positive, ~2.22e-308
17595 0x7fefffffffffffff, // Max normal positive, ~1.79e+308
17596 0x0000000000000001, // Min subnormal positive, 5e-324
17597 0x000fffffffffffff, // Max subnormal, ~2.22e-308
17598 0x7ff5555511111111,
17599 0x7ffaaaaa11111111};
17600
17601 uint64_t zd_expected[] = {DoubleToRawbits(0.0),
17602 DoubleToRawbits(-0.0),
17603 DoubleToRawbits(1.0),
17604 DoubleToRawbits(256.0),
17605 DoubleToRawbits(kFP64PositiveInfinity),
17606 DoubleToRawbits(kFP64DefaultNaN),
17607 0x2000000000000000, // ~1.49e-154
17608 0x5fefffffffffffff, // ~1.34e+154
17609 0x1e60000000000000, // ~2.22e-162
17610 0x1fffffffffffffff, // ~1.49e-154
17611 0x7ffd555511111111, // To quiet NaN.
17612 0x7ffaaaaa11111111};
17613
17614 TestFsqrtHelper(config, kDRegSize, zn_inputs, zd_expected);
17615}
17616
Martyn Capewell48522f52020-03-16 15:31:19 +000017617TEST_SVE(sve_adr) {
17618 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
17619 START();
17620
17621 __ Index(z0.VnD(), 0x10000000f0000000, 0x1000);
17622 __ Index(z1.VnD(), 1, 3);
17623 __ Index(z2.VnS(), -1, -1);
17624 __ Adr(z3.VnD(), SVEMemOperand(z0.VnD(), z1.VnD()));
17625 __ Adr(z4.VnD(), SVEMemOperand(z0.VnD(), z1.VnD(), LSL, 1));
17626 __ Adr(z5.VnD(), SVEMemOperand(z0.VnD(), z1.VnD(), LSL, 2));
17627 __ Adr(z6.VnD(), SVEMemOperand(z0.VnD(), z1.VnD(), LSL, 3));
17628 __ Adr(z7.VnD(), SVEMemOperand(z0.VnD(), z2.VnD(), UXTW));
17629 __ Adr(z8.VnD(), SVEMemOperand(z0.VnD(), z2.VnD(), UXTW, 1));
17630 __ Adr(z9.VnD(), SVEMemOperand(z0.VnD(), z2.VnD(), UXTW, 2));
17631 __ Adr(z10.VnD(), SVEMemOperand(z0.VnD(), z2.VnD(), UXTW, 3));
17632 __ Adr(z11.VnD(), SVEMemOperand(z0.VnD(), z2.VnD(), SXTW));
17633 __ Adr(z12.VnD(), SVEMemOperand(z0.VnD(), z2.VnD(), SXTW, 1));
17634 __ Adr(z13.VnD(), SVEMemOperand(z0.VnD(), z2.VnD(), SXTW, 2));
17635 __ Adr(z14.VnD(), SVEMemOperand(z0.VnD(), z2.VnD(), SXTW, 3));
17636 __ Adr(z15.VnS(), SVEMemOperand(z0.VnS(), z2.VnS()));
17637 __ Adr(z16.VnS(), SVEMemOperand(z0.VnS(), z2.VnS(), LSL, 1));
17638 __ Adr(z17.VnS(), SVEMemOperand(z0.VnS(), z2.VnS(), LSL, 2));
17639 __ Adr(z18.VnS(), SVEMemOperand(z0.VnS(), z2.VnS(), LSL, 3));
17640
17641 END();
17642
17643 if (CAN_RUN()) {
17644 RUN();
17645 uint64_t expected_z3[] = {0x10000000f0001004, 0x10000000f0000001};
17646 uint64_t expected_z4[] = {0x10000000f0001008, 0x10000000f0000002};
17647 uint64_t expected_z5[] = {0x10000000f0001010, 0x10000000f0000004};
17648 uint64_t expected_z6[] = {0x10000000f0001020, 0x10000000f0000008};
17649 uint64_t expected_z7[] = {0x10000001f0000ffd, 0x10000001efffffff};
17650 uint64_t expected_z8[] = {0x10000002f0000ffa, 0x10000002effffffe};
17651 uint64_t expected_z9[] = {0x10000004f0000ff4, 0x10000004effffffc};
17652 uint64_t expected_z10[] = {0x10000008f0000fe8, 0x10000008effffff8};
17653 uint64_t expected_z11[] = {0x10000000f0000ffd, 0x10000000efffffff};
17654 uint64_t expected_z12[] = {0x10000000f0000ffa, 0x10000000effffffe};
17655 uint64_t expected_z13[] = {0x10000000f0000ff4, 0x10000000effffffc};
17656 uint64_t expected_z14[] = {0x10000000f0000fe8, 0x10000000effffff8};
17657 uint64_t expected_z15[] = {0x0ffffffcf0000ffd, 0x0ffffffeefffffff};
17658 uint64_t expected_z16[] = {0x0ffffff8f0000ffa, 0x0ffffffceffffffe};
17659 uint64_t expected_z17[] = {0x0ffffff0f0000ff4, 0x0ffffff8effffffc};
17660 uint64_t expected_z18[] = {0x0fffffe0f0000fe8, 0x0ffffff0effffff8};
17661
17662 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
17663 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
17664 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
17665 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
17666 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
17667 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
17668 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
17669 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
17670 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
17671 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
17672 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
17673 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
17674 ASSERT_EQUAL_SVE(expected_z15, z15.VnD());
17675 ASSERT_EQUAL_SVE(expected_z16, z16.VnD());
17676 ASSERT_EQUAL_SVE(expected_z17, z17.VnD());
17677 ASSERT_EQUAL_SVE(expected_z18, z18.VnD());
17678 }
17679}
17680
TatWai Chong85e15102020-05-04 21:00:40 -070017681// Test loads and broadcast by comparing them with the result of a set of
17682// equivalent scalar loads.
17683template <typename F>
17684static void LoadBcastHelper(Test* config,
17685 unsigned msize_in_bits,
17686 unsigned esize_in_bits,
17687 F sve_ld1,
17688 bool is_signed) {
17689 VIXL_ASSERT((esize_in_bits == kBRegSize) || (esize_in_bits == kHRegSize) ||
17690 (esize_in_bits == kSRegSize) || (esize_in_bits == kDRegSize));
17691 static const unsigned kMaxLaneCount = kZRegMaxSize / kBRegSize;
17692
17693 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
17694 START();
17695
17696 unsigned msize_in_bytes = msize_in_bits / kBitsPerByte;
17697 unsigned esize_in_bytes = esize_in_bits / kBitsPerByte;
17698 int vl = config->sve_vl_in_bytes();
17699
17700 uint64_t offsets[kMaxLaneCount];
17701 uint64_t buffer_size = vl * 64;
17702 uint64_t data = reinterpret_cast<uintptr_t>(malloc(buffer_size));
17703 BufferFillingHelper(data,
17704 buffer_size,
17705 msize_in_bytes,
17706 kMaxLaneCount,
17707 offsets);
17708
17709 for (unsigned i = 0; i < (kMaxLaneCount / 2); i++) {
17710 // Assign encodable offsets into the first part of the offset array so
17711 // that both encodable and unencodable offset can be tested.
17712 // Note that the encoding bit range of immediate offset is 6 bits.
17713 offsets[i] = (offsets[i] % (UINT64_C(1) << 6)) * msize_in_bytes;
17714 }
17715
17716 ZRegister zn = z0.WithLaneSize(esize_in_bits);
17717 ZRegister zn_ref = z4.WithLaneSize(esize_in_bits);
17718
17719 PRegisterZ pg = p0.Zeroing();
17720 Initialise(&masm,
17721 pg,
17722 0x9abcdef012345678,
17723 0xabcdef0123456789,
17724 0xf4f3f1f0fefdfcfa,
17725 0xf9f8f6f5f3f2f0ff);
17726
17727 __ Mov(x2, data);
17728 uint64_t enablable_offset = offsets[0];
17729 // Simple check if the operation correct in a single offset.
17730 (masm.*sve_ld1)(zn, pg, SVEMemOperand(x2, enablable_offset));
17731
17732 // Generate a reference result using scalar loads.
17733 uint64_t address = data + enablable_offset;
17734 uint64_t duplicated_addresses[kMaxLaneCount];
17735 for (unsigned i = 0; i < kMaxLaneCount; i++) {
17736 duplicated_addresses[i] = address;
17737 }
17738
17739 ScalarLoadHelper(&masm,
17740 vl,
17741 duplicated_addresses,
17742 zn_ref,
17743 pg,
17744 esize_in_bits,
17745 msize_in_bits,
17746 is_signed);
17747
17748 ZRegister zn_agg = z10.WithLaneSize(esize_in_bits);
17749 ZRegister zn_agg_ref = z11.WithLaneSize(esize_in_bits);
17750 ZRegister zn_temp = z12.WithLaneSize(esize_in_bits);
17751
17752 __ Dup(zn_agg, 0);
17753 __ Dup(zn_agg_ref, 0);
17754
17755 // Check if the operation correct in different offsets.
17756 for (unsigned i = 0; i < (vl / esize_in_bytes); i++) {
17757 (masm.*sve_ld1)(zn_temp, pg, SVEMemOperand(x2, offsets[i]));
17758 __ Lastb(x1, pg, zn_temp);
17759 __ Insr(zn_agg, x1);
17760
17761 __ Mov(x3, data + offsets[i]);
17762 ScalarLoadHelper(&masm, x1, x3, msize_in_bits, is_signed);
17763 __ Insr(zn_agg_ref, x1);
17764 }
17765
17766 END();
17767
17768 if (CAN_RUN()) {
17769 RUN();
17770
17771 ASSERT_EQUAL_SVE(zn_ref, zn);
17772 ASSERT_EQUAL_SVE(zn_agg_ref, zn_agg);
17773 }
17774
17775 free(reinterpret_cast<void*>(data));
17776}
17777
17778TEST_SVE(sve_ld1rb) {
17779 LoadBcastHelper(config, kBRegSize, kBRegSize, &MacroAssembler::Ld1rb, false);
17780 LoadBcastHelper(config, kBRegSize, kHRegSize, &MacroAssembler::Ld1rb, false);
17781 LoadBcastHelper(config, kBRegSize, kSRegSize, &MacroAssembler::Ld1rb, false);
17782 LoadBcastHelper(config, kBRegSize, kDRegSize, &MacroAssembler::Ld1rb, false);
17783}
17784
17785TEST_SVE(sve_ld1rh) {
17786 LoadBcastHelper(config, kHRegSize, kHRegSize, &MacroAssembler::Ld1rh, false);
17787 LoadBcastHelper(config, kHRegSize, kSRegSize, &MacroAssembler::Ld1rh, false);
17788 LoadBcastHelper(config, kHRegSize, kDRegSize, &MacroAssembler::Ld1rh, false);
17789}
17790
17791TEST_SVE(sve_ld1rw) {
17792 LoadBcastHelper(config, kSRegSize, kSRegSize, &MacroAssembler::Ld1rw, false);
17793 LoadBcastHelper(config, kSRegSize, kDRegSize, &MacroAssembler::Ld1rw, false);
17794}
17795
17796TEST_SVE(sve_ld1rd) {
17797 LoadBcastHelper(config, kDRegSize, kDRegSize, &MacroAssembler::Ld1rd, false);
17798}
17799
17800TEST_SVE(sve_ld1rsb) {
17801 LoadBcastHelper(config, kBRegSize, kHRegSize, &MacroAssembler::Ld1rsb, true);
17802 LoadBcastHelper(config, kBRegSize, kSRegSize, &MacroAssembler::Ld1rsb, true);
17803 LoadBcastHelper(config, kBRegSize, kDRegSize, &MacroAssembler::Ld1rsb, true);
17804}
17805
17806TEST_SVE(sve_ld1rsh) {
17807 LoadBcastHelper(config, kHRegSize, kSRegSize, &MacroAssembler::Ld1rsh, true);
17808 LoadBcastHelper(config, kHRegSize, kDRegSize, &MacroAssembler::Ld1rsh, true);
17809}
17810
17811TEST_SVE(sve_ld1rsw) {
17812 LoadBcastHelper(config, kSRegSize, kDRegSize, &MacroAssembler::Ld1rsw, true);
17813}
17814
TatWai Chong3db2c492020-03-29 22:20:41 -070017815TEST_SVE(sve_prefetch_offset) {
17816 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
17817
17818 START();
17819
17820 __ Prfb(PLDL1KEEP, p5, SVEMemOperand(z30.VnS(), 0));
17821 __ Prfb(PLDL1STRM, p5, SVEMemOperand(x28, -11, SVE_MUL_VL));
17822 __ Prfb(PLDL2KEEP, p6, SVEMemOperand(x30, x31));
17823 __ Prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnS(), UXTW));
17824 __ Prfh(PSTL2KEEP, p6, SVEMemOperand(z0.VnS(), 28));
17825 __ Prfh(PSTL2STRM, p4, SVEMemOperand(x17, -3, SVE_MUL_VL));
17826 __ Prfh(PSTL3KEEP, p3, SVEMemOperand(x0, x0));
17827 __ Prfh(PSTL3STRM, p4, SVEMemOperand(x20, z0.VnD()));
17828 __ Prfw(PLDL1KEEP, p3, SVEMemOperand(z23.VnD(), 5));
17829 __ Prfw(PLDL1STRM, p1, SVEMemOperand(x4, 10, SVE_MUL_VL));
17830 __ Prfw(PLDL2KEEP, p2, SVEMemOperand(x22, x22));
17831 __ Prfw(PLDL2STRM, p1, SVEMemOperand(x2, z6.VnS(), SXTW));
17832 __ Prfd(PLDL3KEEP, p5, SVEMemOperand(z11.VnD(), 9));
17833 __ Prfd(PLDL3STRM, p3, SVEMemOperand(x0, -24, SVE_MUL_VL));
17834 __ Prfd(PSTL1KEEP, p7, SVEMemOperand(x5, x5));
17835 __ Prfd(PSTL1STRM, p1, SVEMemOperand(x19, z18.VnS(), SXTW));
17836
17837 END();
17838 if (CAN_RUN()) {
17839 RUN();
17840 }
17841}
17842
Jacob Bramleyd77a8e42019-02-12 16:52:24 +000017843} // namespace aarch64
17844} // namespace vixl