blob: 46ed675b0ab385cf4d6a7c8d5d941ff4ef7934b7 [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
Jacob Bramley85a9c102019-12-09 17:48:29 +000028#include <unistd.h>
Jacob Bramleyd77a8e42019-02-12 16:52:24 +000029
30#include <cfloat>
31#include <cmath>
32#include <cstdio>
33#include <cstdlib>
34#include <cstring>
35
36#include "test-runner.h"
37#include "test-utils.h"
38#include "aarch64/test-utils-aarch64.h"
39
40#include "aarch64/cpu-aarch64.h"
41#include "aarch64/disasm-aarch64.h"
42#include "aarch64/macro-assembler-aarch64.h"
43#include "aarch64/simulator-aarch64.h"
44#include "test-assembler-aarch64.h"
45
46namespace vixl {
47namespace aarch64 {
48
Jacob Bramleye8289202019-07-31 11:25:23 +010049Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
50 // We never free this memory, but we need it to live for as long as the static
51 // linked list of tests, and this is the easiest way to do it.
52 Test* test = new Test(name, fn);
53 test->set_sve_vl_in_bits(vl);
54 return test;
55}
56
57// The TEST_SVE macro works just like the usual TEST macro, but the resulting
58// function receives a `const Test& config` argument, to allow it to query the
59// vector length.
60#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
61// On the Simulator, run SVE tests with several vector lengths, including the
62// extreme values and an intermediate value that isn't a power of two.
63
64#define TEST_SVE(name) \
65 void Test##name(Test* config); \
66 Test* test_##name##_list[] = \
67 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
68 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
69 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
70 void Test##name(Test* config)
71
72#define SVE_SETUP_WITH_FEATURES(...) \
73 SETUP_WITH_FEATURES(__VA_ARGS__); \
74 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
75
76#else
77// Otherwise, just use whatever the hardware provides.
78static const int kSVEVectorLengthInBits =
79 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
80 ? CPU::ReadSVEVectorLengthInBits()
81 : 0;
82
83#define TEST_SVE(name) \
84 void Test##name(Test* config); \
85 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
86 "AARCH64_ASM_" #name "_vlauto", \
87 &Test##name); \
88 void Test##name(Test* config)
89
90#define SVE_SETUP_WITH_FEATURES(...) \
91 SETUP_WITH_FEATURES(__VA_ARGS__); \
92 USE(config)
93
94#endif
95
Jacob Bramley03c0b512019-02-22 16:42:06 +000096// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
97// is optimised for call-site clarity, not generated code quality, so it doesn't
98// exist in the MacroAssembler itself.
99//
100// Usage:
101//
102// int values[] = { 42, 43, 44 };
103// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
104//
105// The rightmost (highest-indexed) array element maps to the lowest-numbered
106// lane.
107template <typename T, size_t N>
108void InsrHelper(MacroAssembler* masm,
109 const ZRegister& zdn,
110 const T (&values)[N]) {
111 for (size_t i = 0; i < N; i++) {
112 masm->Insr(zdn, values[i]);
113 }
114}
115
Jacob Bramley0ce75842019-07-17 18:12:50 +0100116// Conveniently initialise P registers with scalar bit patterns. The destination
117// lane size is ignored. This is optimised for call-site clarity, not generated
118// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100119//
120// Usage:
121//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100122// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100123void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100124 const PRegister& pd,
125 uint64_t value3,
126 uint64_t value2,
127 uint64_t value1,
128 uint64_t value0) {
129 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100130 UseScratchRegisterScope temps(masm);
131 Register temp = temps.AcquireX();
132 Label data;
133 Label done;
134
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100135 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100136 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100137 masm->B(&done);
138 {
139 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
140 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100141 masm->dc64(value0);
142 masm->dc64(value1);
143 masm->dc64(value2);
144 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100145 }
146 masm->Bind(&done);
147}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100148void Initialise(MacroAssembler* masm,
149 const PRegister& pd,
150 uint64_t value2,
151 uint64_t value1,
152 uint64_t value0) {
153 Initialise(masm, pd, 0, value2, value1, value0);
154}
155void Initialise(MacroAssembler* masm,
156 const PRegister& pd,
157 uint64_t value1,
158 uint64_t value0) {
159 Initialise(masm, pd, 0, 0, value1, value0);
160}
161void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
162 Initialise(masm, pd, 0, 0, 0, value0);
163}
164
165// Conveniently initialise P registers by lane. This is optimised for call-site
166// clarity, not generated code quality.
167//
168// Usage:
169//
170// int values[] = { 0x0, 0x1, 0x2 };
171// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
172//
173// The rightmost (highest-indexed) array element maps to the lowest-numbered
174// lane. Unspecified lanes are set to 0 (inactive).
175//
176// Each element of the `values` array is mapped onto a lane in `pd`. The
177// architecture only respects the lower bit, and writes zero the upper bits, but
178// other (encodable) values can be specified if required by the test.
179template <typename T, size_t N>
180void Initialise(MacroAssembler* masm,
181 const PRegisterWithLaneSize& pd,
182 const T (&values)[N]) {
183 // Turn the array into 64-bit chunks.
184 uint64_t chunks[4] = {0, 0, 0, 0};
185 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
186
187 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
188 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
189 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
190
191 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
192
193 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
194 size_t bit = 0;
195 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
196 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
197 uint64_t value = values[n] & p_lane_mask;
198 chunks[bit / 64] |= value << (bit % 64);
199 bit += p_bits_per_lane;
200 }
201
202 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
203}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100204
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000205// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100206TEST_SVE(sve_test_infrastructure_z) {
207 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000208 START();
209
Jacob Bramley03c0b512019-02-22 16:42:06 +0000210 __ Mov(x0, 0x0123456789abcdef);
211
212 // Test basic `Insr` behaviour.
213 __ Insr(z0.VnB(), 1);
214 __ Insr(z0.VnB(), 2);
215 __ Insr(z0.VnB(), x0);
216 __ Insr(z0.VnB(), -42);
217 __ Insr(z0.VnB(), 0);
218
219 // Test array inputs.
220 int z1_inputs[] = {3, 4, 5, -42, 0};
221 InsrHelper(&masm, z1.VnH(), z1_inputs);
222
223 // Test that sign-extension works as intended for various lane sizes.
224 __ Dup(z2.VnD(), 0); // Clear the register first.
225 __ Insr(z2.VnB(), -42); // 0xd6
226 __ Insr(z2.VnB(), 0xfe); // 0xfe
227 __ Insr(z2.VnH(), -42); // 0xffd6
228 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
229 __ Insr(z2.VnS(), -42); // 0xffffffd6
230 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
231 // Use another register for VnD(), so we can support 128-bit Z registers.
232 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
233 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
234
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000235 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000236
Jacob Bramley119bd212019-04-16 10:13:09 +0100237 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100238 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000239
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100240 // Test that array checks work properly on a register initialised
241 // lane-by-lane.
242 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
243 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000244
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100245 // Test that lane-by-lane checks work properly on a register initialised
246 // by array.
247 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
248 // The rightmost (highest-indexed) array element maps to the
249 // lowest-numbered lane.
250 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
251 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000252 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100253
254 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
255 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
256 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
257 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100258 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000259}
260
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100261// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100262TEST_SVE(sve_test_infrastructure_p) {
263 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100264 START();
265
266 // Simple cases: move boolean (0 or 1) values.
267
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100268 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100269 Initialise(&masm, p0.VnB(), p0_inputs);
270
271 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
272 Initialise(&masm, p1.VnH(), p1_inputs);
273
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100274 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100275 Initialise(&masm, p2.VnS(), p2_inputs);
276
277 int p3_inputs[] = {0, 1};
278 Initialise(&masm, p3.VnD(), p3_inputs);
279
280 // Advanced cases: move numeric value into architecturally-ignored bits.
281
282 // B-sized lanes get one bit in a P register, so there are no ignored bits.
283
284 // H-sized lanes get two bits in a P register.
285 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
286 Initialise(&masm, p4.VnH(), p4_inputs);
287
288 // S-sized lanes get four bits in a P register.
289 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
290 Initialise(&masm, p5.VnS(), p5_inputs);
291
292 // D-sized lanes get eight bits in a P register.
293 int p6_inputs[] = {0x81, 0xcc, 0x55};
294 Initialise(&masm, p6.VnD(), p6_inputs);
295
296 // The largest possible P register has 32 bytes.
297 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
298 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
299 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
300 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
301 Initialise(&masm, p7.VnD(), p7_inputs);
302
303 END();
304
305 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100306 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100307
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100308 // Test that lane-by-lane checks work properly. The rightmost
309 // (highest-indexed) array element maps to the lowest-numbered lane.
310 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
311 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
312 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100313 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100314 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
315 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
316 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
317 }
318 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
319 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
320 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
321 }
322 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
323 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
324 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
325 }
326
327 // Test that array checks work properly on predicates initialised with a
328 // possibly-different lane size.
329 // 0b...11'10'01'00'01'10'11
330 int p4_expected[] = {0x39, 0x1b};
331 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
332
333 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
334
335 // 0b...10000001'11001100'01010101
336 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
337 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
338
339 // 0b...10011100'10011101'10011110'10011111
340 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
341 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
342 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100343 }
344}
345
Jacob Bramley935b15b2019-07-04 14:09:22 +0100346// Test that writes to V registers clear the high bits of the corresponding Z
347// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100348TEST_SVE(sve_v_write_clear) {
349 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
350 CPUFeatures::kFP,
351 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100352 START();
353
354 // The Simulator has two mechansisms for writing V registers:
355 // - Write*Register, calling through to SimRegisterBase::Write.
356 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
357 // Try to cover both variants.
358
359 // Prepare some known inputs.
360 uint8_t data[kQRegSizeInBytes];
361 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
362 data[i] = 42 + i;
363 }
364 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
365 __ Fmov(d30, 42.0);
366
Jacob Bramley199339d2019-08-05 18:49:13 +0100367 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100368 // diagnose.
369 __ Index(z0.VnB(), 0, 1);
370 __ Index(z1.VnB(), 0, 1);
371 __ Index(z2.VnB(), 0, 1);
372 __ Index(z3.VnB(), 0, 1);
373 __ Index(z4.VnB(), 0, 1);
374
375 __ Index(z10.VnB(), 0, -1);
376 __ Index(z11.VnB(), 0, -1);
377 __ Index(z12.VnB(), 0, -1);
378 __ Index(z13.VnB(), 0, -1);
379 __ Index(z14.VnB(), 0, -1);
380
381 // Instructions using Write*Register (and SimRegisterBase::Write).
382 __ Ldr(b0, MemOperand(x10));
383 __ Fcvt(h1, d30);
384 __ Fmov(s2, 1.5f);
385 __ Fmov(d3, d30);
386 __ Ldr(q4, MemOperand(x10));
387
388 // Instructions using LogicVRegister::ClearForWrite.
389 // These also (incidentally) test that across-lane instructions correctly
390 // ignore the high-order Z register lanes.
391 __ Sminv(b10, v10.V16B());
392 __ Addv(h11, v11.V4H());
393 __ Saddlv(s12, v12.V8H());
394 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
395 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
396
397 END();
398
399 if (CAN_RUN()) {
400 RUN();
401
402 // Check the Q part first.
403 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
404 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
405 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
406 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
407 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
408 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
409 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
410 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
411 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
412 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
413 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
414 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
415 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
416 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
417 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
418
419 // Check that the upper lanes are all clear.
420 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
421 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
422 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
430 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
431 }
432 }
433}
434
Jacob Bramleye8289202019-07-31 11:25:23 +0100435static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
436 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100437 START();
438
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100439 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100440 int za_inputs[] = {-39, 1, -3, 2};
441 int zn_inputs[] = {-5, -20, 9, 8};
442 int zm_inputs[] = {9, -5, 4, 5};
443
444 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
445 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
446 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
447 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
448
449 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100450 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100451 InsrHelper(&masm, za, za_inputs);
452 InsrHelper(&masm, zn, zn_inputs);
453 InsrHelper(&masm, zm, zm_inputs);
454
455 int p0_inputs[] = {1, 1, 0, 1};
456 int p1_inputs[] = {1, 0, 1, 1};
457 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100458 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100459
460 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
461 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
462 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
463 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
464
465 // The Mla macro automatically selects between mla, mad and movprfx + mla
466 // based on what registers are aliased.
467 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
468 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
469 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100470 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100471
472 __ Mov(mla_da_result, za);
473 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
474
475 __ Mov(mla_dn_result, zn);
476 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
477
478 __ Mov(mla_dm_result, zm);
479 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
480
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100481 __ Mov(mla_d_result, zd);
482 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100483
484 // The Mls macro automatically selects between mls, msb and movprfx + mls
485 // based on what registers are aliased.
486 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
487 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
488 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100489 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100490
491 __ Mov(mls_da_result, za);
492 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
493
494 __ Mov(mls_dn_result, zn);
495 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
496
497 __ Mov(mls_dm_result, zm);
498 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
499
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100500 __ Mov(mls_d_result, zd);
501 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100502
503 END();
504
505 if (CAN_RUN()) {
506 RUN();
507
508 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
509 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
510 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
511
512 int mla[] = {-84, 101, 33, 42};
513 int mls[] = {6, -99, -39, -38};
514
515 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
516 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
517
518 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
519 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
520
521 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
522 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
523
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100524 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
525 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100526
527 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
528 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
529
530 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
531 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
532
533 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
534 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
535
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100536 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
537 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100538 }
539}
540
Jacob Bramleye8289202019-07-31 11:25:23 +0100541TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
542TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
543TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
544TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100545
Jacob Bramleye8289202019-07-31 11:25:23 +0100546TEST_SVE(sve_bitwise_unpredicate_logical) {
547 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700548 START();
549
550 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
551 InsrHelper(&masm, z8.VnD(), z8_inputs);
552 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
553 InsrHelper(&masm, z15.VnD(), z15_inputs);
554
555 __ And(z1.VnD(), z8.VnD(), z15.VnD());
556 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
557 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
558 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
559
560 END();
561
562 if (CAN_RUN()) {
563 RUN();
564 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
565 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
566 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
567 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
568
569 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
570 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
571 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
572 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
573 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700574}
575
Martyn Capewellf804b602020-02-24 18:57:18 +0000576TEST_SVE(sve_last_r) {
577 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
578 START();
579
580 __ Pfalse(p1.VnB());
581 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
582 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
583 Initialise(&masm, p2.VnB(), p2_inputs);
584 Initialise(&masm, p3.VnB(), p3_inputs);
585 __ Ptrue(p4.VnB());
586
587 __ Index(z0.VnB(), 0x10, 1);
588 __ Lasta(x1, p1, z0.VnB());
589 __ Lastb(x2, p1, z0.VnB());
590 __ Lasta(x3, p2, z0.VnB());
591 __ Lastb(x4, p2, z0.VnB());
592 __ Lasta(x5, p3, z0.VnB());
593 __ Lastb(x6, p3, z0.VnB());
594 __ Lasta(x7, p4, z0.VnB());
595
596 __ Punpklo(p3.VnH(), p3.VnB());
597 __ Index(z0.VnH(), 0x1110, 1);
598 __ Lasta(x9, p1, z0.VnH());
599 __ Lastb(x10, p3, z0.VnH());
600 __ Lasta(x12, p4, z0.VnH());
601
602 __ Index(z0.VnS(), 0x11111110, 1);
603 __ Lastb(x13, p1, z0.VnS());
604 __ Lasta(x14, p2, z0.VnS());
605 __ Lastb(x18, p4, z0.VnS());
606
607 __ Index(z0.VnD(), 0x1111111111111110, 1);
608 __ Lasta(x19, p1, z0.VnD());
609 __ Lastb(x20, p3, z0.VnD());
610 __ Lasta(x21, p3, z0.VnD());
611 END();
612
613 if (CAN_RUN()) {
614 RUN();
615
616 ASSERT_EQUAL_64(0x0000000000000010, x1);
617 ASSERT_EQUAL_64(0x0000000000000011, x3);
618 ASSERT_EQUAL_64(0x0000000000000010, x4);
619 ASSERT_EQUAL_64(0x0000000000000019, x5);
620 ASSERT_EQUAL_64(0x0000000000000018, x6);
621 ASSERT_EQUAL_64(0x0000000000000010, x7);
622 ASSERT_EQUAL_64(0x0000000000001110, x9);
623 ASSERT_EQUAL_64(0x0000000000001110, x12);
624 ASSERT_EQUAL_64(0x0000000011111111, x14);
625 ASSERT_EQUAL_64(0x1111111111111110, x19);
626
627 int vl = core.GetSVELaneCount(kBRegSize) * 8;
628 switch (vl) {
629 case 128:
630 ASSERT_EQUAL_64(0x000000000000001f, x2);
631 ASSERT_EQUAL_64(0x0000000000001116, x10);
632 ASSERT_EQUAL_64(0x0000000011111113, x13);
633 ASSERT_EQUAL_64(0x0000000011111113, x18);
634 ASSERT_EQUAL_64(0x1111111111111111, x20);
635 ASSERT_EQUAL_64(0x1111111111111110, x21);
636 break;
637 case 384:
638 ASSERT_EQUAL_64(0x000000000000003f, x2);
639 ASSERT_EQUAL_64(0x0000000000001118, x10);
640 ASSERT_EQUAL_64(0x000000001111111b, x13);
641 ASSERT_EQUAL_64(0x000000001111111b, x18);
642 ASSERT_EQUAL_64(0x1111111111111112, x20);
643 ASSERT_EQUAL_64(0x1111111111111113, x21);
644 break;
645 case 2048:
646 ASSERT_EQUAL_64(0x000000000000000f, x2);
647 ASSERT_EQUAL_64(0x0000000000001118, x10);
648 ASSERT_EQUAL_64(0x000000001111114f, x13);
649 ASSERT_EQUAL_64(0x000000001111114f, x18);
650 ASSERT_EQUAL_64(0x1111111111111112, x20);
651 ASSERT_EQUAL_64(0x1111111111111113, x21);
652 break;
653 default:
654 printf("WARNING: Some tests skipped due to unexpected VL.\n");
655 break;
656 }
657 }
658}
659
660TEST_SVE(sve_last_v) {
661 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
662 START();
663
664 __ Pfalse(p1.VnB());
665 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
666 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
667 Initialise(&masm, p2.VnB(), p2_inputs);
668 Initialise(&masm, p3.VnB(), p3_inputs);
669 __ Ptrue(p4.VnB());
670
671 __ Index(z0.VnB(), 0x10, 1);
672 __ Lasta(b1, p1, z0.VnB());
673 __ Lastb(b2, p1, z0.VnB());
674 __ Lasta(b3, p2, z0.VnB());
675 __ Lastb(b4, p2, z0.VnB());
676 __ Lasta(b5, p3, z0.VnB());
677 __ Lastb(b6, p3, z0.VnB());
678 __ Lasta(b7, p4, z0.VnB());
679
680 __ Punpklo(p3.VnH(), p3.VnB());
681 __ Index(z0.VnH(), 0x1110, 1);
682 __ Lasta(h9, p1, z0.VnH());
683 __ Lastb(h10, p3, z0.VnH());
684 __ Lasta(h12, p4, z0.VnH());
685
686 __ Index(z0.VnS(), 0x11111110, 1);
687 __ Lastb(s13, p1, z0.VnS());
688 __ Lasta(s14, p2, z0.VnS());
689 __ Lastb(s18, p4, z0.VnS());
690
691 __ Index(z0.VnD(), 0x1111111111111110, 1);
692 __ Lasta(d19, p1, z0.VnD());
693 __ Lastb(d20, p3, z0.VnD());
694 __ Lasta(d21, p3, z0.VnD());
695 END();
696
697 if (CAN_RUN()) {
698 RUN();
699
700 ASSERT_EQUAL_128(0, 0x0000000000000010, q1);
701 ASSERT_EQUAL_128(0, 0x0000000000000011, q3);
702 ASSERT_EQUAL_128(0, 0x0000000000000010, q4);
703 ASSERT_EQUAL_128(0, 0x0000000000000019, q5);
704 ASSERT_EQUAL_128(0, 0x0000000000000018, q6);
705 ASSERT_EQUAL_128(0, 0x0000000000000010, q7);
706 ASSERT_EQUAL_128(0, 0x0000000000001110, q9);
707 ASSERT_EQUAL_128(0, 0x0000000000001110, q12);
708 ASSERT_EQUAL_128(0, 0x0000000011111111, q14);
709 ASSERT_EQUAL_128(0, 0x1111111111111110, q19);
710
711 int vl = core.GetSVELaneCount(kBRegSize) * 8;
712 switch (vl) {
713 case 128:
714 ASSERT_EQUAL_128(0, 0x000000000000001f, q2);
715 ASSERT_EQUAL_128(0, 0x0000000000001116, q10);
716 ASSERT_EQUAL_128(0, 0x0000000011111113, q13);
717 ASSERT_EQUAL_128(0, 0x0000000011111113, q18);
718 ASSERT_EQUAL_128(0, 0x1111111111111111, q20);
719 ASSERT_EQUAL_128(0, 0x1111111111111110, q21);
720 break;
721 case 384:
722 ASSERT_EQUAL_128(0, 0x000000000000003f, q2);
723 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
724 ASSERT_EQUAL_128(0, 0x000000001111111b, q13);
725 ASSERT_EQUAL_128(0, 0x000000001111111b, q18);
726 ASSERT_EQUAL_128(0, 0x1111111111111112, q20);
727 ASSERT_EQUAL_128(0, 0x1111111111111113, q21);
728 break;
729 case 2048:
730 ASSERT_EQUAL_128(0, 0x000000000000000f, q2);
731 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
732 ASSERT_EQUAL_128(0, 0x000000001111114f, q13);
733 ASSERT_EQUAL_128(0, 0x000000001111114f, q18);
734 ASSERT_EQUAL_128(0, 0x1111111111111112, q20);
735 ASSERT_EQUAL_128(0, 0x1111111111111113, q21);
736 break;
737 default:
738 printf("WARNING: Some tests skipped due to unexpected VL.\n");
739 break;
740 }
741 }
742}
743
744TEST_SVE(sve_clast_r) {
745 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
746 START();
747
748 __ Pfalse(p1.VnB());
749 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
750 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
751 Initialise(&masm, p2.VnB(), p2_inputs);
752 Initialise(&masm, p3.VnB(), p3_inputs);
753 __ Ptrue(p4.VnB());
754
755 __ Index(z0.VnB(), 0x10, 1);
756 __ Mov(x1, -1);
757 __ Mov(x2, -1);
758 __ Clasta(x1, p1, x1, z0.VnB());
759 __ Clastb(x2, p1, x2, z0.VnB());
760 __ Clasta(x3, p2, x3, z0.VnB());
761 __ Clastb(x4, p2, x4, z0.VnB());
762 __ Clasta(x5, p3, x5, z0.VnB());
763 __ Clastb(x6, p3, x6, z0.VnB());
764 __ Clasta(x7, p4, x7, z0.VnB());
765
766 __ Punpklo(p3.VnH(), p3.VnB());
767 __ Index(z0.VnH(), 0x1110, 1);
768 __ Mov(x9, -1);
769 __ Clasta(x9, p1, x9, z0.VnH());
770 __ Clastb(x10, p3, x10, z0.VnH());
771 __ Clasta(x12, p4, x12, z0.VnH());
772
773 __ Index(z0.VnS(), 0x11111110, 1);
774 __ Mov(x13, -1);
775 __ Clasta(x13, p1, x13, z0.VnS());
776 __ Clastb(x14, p2, x14, z0.VnS());
777 __ Clasta(x18, p4, x18, z0.VnS());
778
779 __ Index(z0.VnD(), 0x1111111111111110, 1);
780 __ Mov(x19, -1);
781 __ Clasta(x19, p1, x19, z0.VnD());
782 __ Clastb(x20, p2, x20, z0.VnD());
783 __ Clasta(x21, p4, x21, z0.VnD());
784 END();
785
786 if (CAN_RUN()) {
787 RUN();
788 ASSERT_EQUAL_64(0x00000000000000ff, x1);
789 ASSERT_EQUAL_64(0x00000000000000ff, x2);
790 ASSERT_EQUAL_64(0x0000000000000011, x3);
791 ASSERT_EQUAL_64(0x0000000000000010, x4);
792 ASSERT_EQUAL_64(0x0000000000000019, x5);
793 ASSERT_EQUAL_64(0x0000000000000018, x6);
794 ASSERT_EQUAL_64(0x0000000000000010, x7);
795 ASSERT_EQUAL_64(0x000000000000ffff, x9);
796 ASSERT_EQUAL_64(0x0000000000001110, x12);
797 ASSERT_EQUAL_64(0x00000000ffffffff, x13);
798 ASSERT_EQUAL_64(0x0000000011111110, x14);
799 ASSERT_EQUAL_64(0x0000000011111110, x18);
800 ASSERT_EQUAL_64(0xffffffffffffffff, x19);
801 ASSERT_EQUAL_64(0x1111111111111110, x20);
802 ASSERT_EQUAL_64(0x1111111111111110, x21);
803
804 int vl = core.GetSVELaneCount(kBRegSize) * 8;
805 switch (vl) {
806 case 128:
807 ASSERT_EQUAL_64(0x0000000000001116, x10);
808 break;
809 case 384:
810 ASSERT_EQUAL_64(0x0000000000001118, x10);
811 break;
812 case 2048:
813 ASSERT_EQUAL_64(0x0000000000001118, x10);
814 break;
815 default:
816 printf("WARNING: Some tests skipped due to unexpected VL.\n");
817 break;
818 }
819 }
820}
821
822TEST_SVE(sve_clast_v) {
823 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
824 START();
825
826 __ Pfalse(p1.VnB());
827 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
828 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
829 Initialise(&masm, p2.VnB(), p2_inputs);
830 Initialise(&masm, p3.VnB(), p3_inputs);
831 __ Ptrue(p4.VnB());
832
833 __ Index(z0.VnB(), 0x10, 1);
834 __ Dup(z1.VnB(), -1);
835 __ Dup(z2.VnB(), -1);
836 __ Clasta(b1, p1, b1, z0.VnB());
837 __ Clastb(b2, p1, b2, z0.VnB());
838 __ Clasta(b3, p2, b3, z0.VnB());
839 __ Clastb(b4, p2, b4, z0.VnB());
840 __ Clasta(b5, p3, b5, z0.VnB());
841 __ Clastb(b6, p3, b6, z0.VnB());
842 __ Clasta(b7, p4, b7, z0.VnB());
843
844 __ Punpklo(p3.VnH(), p3.VnB());
845 __ Index(z0.VnH(), 0x1110, 1);
846 __ Dup(z9.VnB(), -1);
847 __ Clasta(h9, p1, h9, z0.VnH());
848 __ Clastb(h10, p3, h10, z0.VnH());
849 __ Clasta(h12, p4, h12, z0.VnH());
850
851 __ Index(z0.VnS(), 0x11111110, 1);
852 __ Dup(z13.VnB(), -1);
853 __ Clasta(s13, p1, s13, z0.VnS());
854 __ Clastb(s14, p2, s14, z0.VnS());
855 __ Clasta(s18, p4, s18, z0.VnS());
856
857 __ Index(z0.VnD(), 0x1111111111111110, 1);
858 __ Dup(z19.VnB(), -1);
859 __ Clasta(d19, p1, d19, z0.VnD());
860 __ Clastb(d20, p2, d20, z0.VnD());
861 __ Clasta(d21, p4, d21, z0.VnD());
862 END();
863
864 if (CAN_RUN()) {
865 RUN();
866 ASSERT_EQUAL_128(0, 0x00000000000000ff, q1);
867 ASSERT_EQUAL_128(0, 0x00000000000000ff, q2);
868 ASSERT_EQUAL_128(0, 0x0000000000000011, q3);
869 ASSERT_EQUAL_128(0, 0x0000000000000010, q4);
870 ASSERT_EQUAL_128(0, 0x0000000000000019, q5);
871 ASSERT_EQUAL_128(0, 0x0000000000000018, q6);
872 ASSERT_EQUAL_128(0, 0x0000000000000010, q7);
873 ASSERT_EQUAL_128(0, 0x000000000000ffff, q9);
874 ASSERT_EQUAL_128(0, 0x0000000000001110, q12);
875 ASSERT_EQUAL_128(0, 0x00000000ffffffff, q13);
876 ASSERT_EQUAL_128(0, 0x0000000011111110, q14);
877 ASSERT_EQUAL_128(0, 0x0000000011111110, q18);
878 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
879 ASSERT_EQUAL_128(0, 0x1111111111111110, q20);
880 ASSERT_EQUAL_128(0, 0x1111111111111110, q21);
881
882 int vl = core.GetSVELaneCount(kBRegSize) * 8;
883 switch (vl) {
884 case 128:
885 ASSERT_EQUAL_128(0, 0x0000000000001116, q10);
886 break;
887 case 384:
888 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
889 break;
890 case 2048:
891 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
892 break;
893 default:
894 printf("WARNING: Some tests skipped due to unexpected VL.\n");
895 break;
896 }
897 }
898}
899
900TEST_SVE(sve_clast_z) {
901 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
902 START();
903
904 __ Pfalse(p1.VnB());
905 int p2_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
906 int p3_inputs[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
907 Initialise(&masm, p2.VnB(), p2_inputs);
908 Initialise(&masm, p3.VnB(), p3_inputs);
909 __ Ptrue(p4.VnB());
910
911 __ Index(z0.VnB(), 0x10, 1);
912 __ Dup(z1.VnB(), 0xff);
913 __ Dup(z2.VnB(), 0xff);
914 __ Clasta(z1.VnB(), p1, z1.VnB(), z0.VnB());
915 __ Clastb(z2.VnB(), p1, z2.VnB(), z0.VnB());
916 __ Clasta(z3.VnB(), p2, z3.VnB(), z0.VnB());
917 __ Clastb(z4.VnB(), p2, z4.VnB(), z0.VnB());
918 __ Clasta(z5.VnB(), p3, z5.VnB(), z0.VnB());
919 __ Clastb(z6.VnB(), p3, z6.VnB(), z0.VnB());
920 __ Clasta(z7.VnB(), p4, z7.VnB(), z0.VnB());
921
922 __ Punpklo(p3.VnH(), p3.VnB());
923 __ Index(z0.VnH(), 0x1110, 1);
924 __ Dup(z9.VnB(), 0xff);
925 __ Clasta(z9.VnH(), p1, z9.VnH(), z0.VnH());
926 __ Clastb(z10.VnH(), p3, z10.VnH(), z0.VnH());
927 __ Clasta(z12.VnH(), p4, z12.VnH(), z0.VnH());
928
929 __ Index(z0.VnS(), 0x11111110, 1);
930 __ Dup(z13.VnB(), 0xff);
931 __ Clasta(z13.VnS(), p1, z13.VnS(), z0.VnS());
932 __ Clastb(z14.VnS(), p2, z14.VnS(), z0.VnS());
933 __ Clasta(z16.VnS(), p4, z16.VnS(), z0.VnS());
934
935 __ Index(z0.VnD(), 0x1111111111111110, 1);
936 __ Dup(z17.VnB(), 0xff);
937 __ Clasta(z17.VnD(), p1, z17.VnD(), z0.VnD());
938 __ Clastb(z18.VnD(), p2, z18.VnD(), z0.VnD());
939 __ Clasta(z20.VnD(), p4, z20.VnD(), z0.VnD());
940 END();
941
942 if (CAN_RUN()) {
943 RUN();
944 uint64_t z1_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
945 uint64_t z2_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
946 uint64_t z3_expected[] = {0x1111111111111111, 0x1111111111111111};
947 uint64_t z4_expected[] = {0x1010101010101010, 0x1010101010101010};
948 uint64_t z5_expected[] = {0x1919191919191919, 0x1919191919191919};
949 uint64_t z6_expected[] = {0x1818181818181818, 0x1818181818181818};
950 uint64_t z7_expected[] = {0x1010101010101010, 0x1010101010101010};
951 uint64_t z9_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
952 uint64_t z12_expected[] = {0x1110111011101110, 0x1110111011101110};
953 uint64_t z13_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
954 uint64_t z14_expected[] = {0x1111111011111110, 0x1111111011111110};
955 uint64_t z16_expected[] = {0x1111111011111110, 0x1111111011111110};
956 uint64_t z17_expected[] = {0xffffffffffffffff, 0xffffffffffffffff};
957 uint64_t z18_expected[] = {0x1111111111111110, 0x1111111111111110};
958 uint64_t z20_expected[] = {0x1111111111111110, 0x1111111111111110};
959
960 uint64_t z10_expected_vl128[] = {0x1116111611161116, 0x1116111611161116};
961 uint64_t z10_expected_vl_long[] = {0x1118111811181118, 0x1118111811181118};
962
963 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
964 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
965 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
966 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
967 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
968 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
969 ASSERT_EQUAL_SVE(z7_expected, z7.VnD());
970 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
971 ASSERT_EQUAL_SVE(z12_expected, z12.VnD());
972 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
973 ASSERT_EQUAL_SVE(z14_expected, z14.VnD());
974 ASSERT_EQUAL_SVE(z16_expected, z16.VnD());
975 ASSERT_EQUAL_SVE(z17_expected, z17.VnD());
976 ASSERT_EQUAL_SVE(z18_expected, z18.VnD());
977 ASSERT_EQUAL_SVE(z20_expected, z20.VnD());
978
979 int vl = core.GetSVELaneCount(kBRegSize) * 8;
980 switch (vl) {
981 case 128:
982 ASSERT_EQUAL_SVE(z10_expected_vl128, z10.VnD());
983 break;
984 case 384:
985 case 2048:
986 ASSERT_EQUAL_SVE(z10_expected_vl_long, z10.VnD());
987 break;
988 default:
989 printf("WARNING: Some tests skipped due to unexpected VL.\n");
990 break;
991 }
992 }
993}
994
995TEST_SVE(sve_compact) {
996 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
997 START();
998
999 __ Ptrue(p0.VnB());
1000 __ Pfalse(p1.VnB());
1001 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
1002 __ Zip1(p3.VnS(), p1.VnS(), p0.VnS());
1003 __ Zip1(p4.VnD(), p0.VnD(), p1.VnD());
1004
1005 __ Index(z0.VnS(), 0x11111111, 0x11111111);
1006 __ Mov(q0, q0);
1007 __ Compact(z1.VnS(), p0, z0.VnS());
1008 __ Compact(z2.VnS(), p2, z0.VnS());
1009 __ Compact(z0.VnS(), p3, z0.VnS());
1010
1011 __ Index(z3.VnD(), 0x1111111111111111, 0x1111111111111111);
1012 __ Mov(q3, q3);
1013 __ Compact(z4.VnD(), p0, z3.VnD());
1014 __ Compact(z5.VnD(), p1, z3.VnD());
1015 __ Compact(z6.VnD(), p4, z3.VnD());
1016
1017 END();
1018
1019 if (CAN_RUN()) {
1020 RUN();
1021 uint64_t z1_expected[] = {0x4444444433333333, 0x2222222211111111};
1022 uint64_t z2_expected[] = {0x0000000000000000, 0x3333333311111111};
1023 uint64_t z0_expected[] = {0x0000000000000000, 0x4444444422222222};
1024 uint64_t z4_expected[] = {0x2222222222222222, 0x1111111111111111};
1025 uint64_t z5_expected[] = {0x0000000000000000, 0x0000000000000000};
1026 uint64_t z6_expected[] = {0x0000000000000000, 0x1111111111111111};
1027 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1028 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
1029 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1030 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1031 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1032 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
1033 }
1034}
1035
1036TEST_SVE(sve_splice) {
1037 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1038 START();
1039
1040 __ Ptrue(p0.VnB());
1041 __ Pfalse(p1.VnB());
1042 int p2b_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
1043 int p3b_inputs[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0};
1044 int p4b_inputs[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1045 int p5b_inputs[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0};
1046 int p6b_inputs[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0};
1047 Initialise(&masm, p2.VnB(), p2b_inputs);
1048 Initialise(&masm, p3.VnB(), p3b_inputs);
1049 Initialise(&masm, p4.VnB(), p4b_inputs);
1050 Initialise(&masm, p5.VnB(), p5b_inputs);
1051 Initialise(&masm, p6.VnB(), p6b_inputs);
1052
1053 __ Index(z30.VnB(), 1, 1);
1054
1055 __ Index(z0.VnB(), -1, -1);
1056 __ Splice(z0.VnB(), p0, z0.VnB(), z30.VnB());
1057 __ Index(z1.VnB(), -1, -1);
1058 __ Splice(z1.VnB(), p1, z1.VnB(), z30.VnB());
1059 __ Index(z2.VnB(), -1, -1);
1060 __ Splice(z2.VnB(), p2, z2.VnB(), z30.VnB());
1061 __ Index(z3.VnB(), -1, -1);
1062 __ Splice(z3.VnB(), p3, z3.VnB(), z30.VnB());
1063 __ Index(z4.VnB(), -1, -1);
1064 __ Splice(z4.VnB(), p4, z4.VnB(), z30.VnB());
1065 __ Index(z5.VnB(), -1, -1);
1066 __ Splice(z5.VnB(), p5, z5.VnB(), z30.VnB());
1067 __ Index(z6.VnB(), -1, -1);
1068 __ Splice(z6.VnB(), p6, z6.VnB(), z30.VnB());
1069
1070 int p2h_inputs[] = {0, 0, 0, 0, 0, 0, 1, 0};
1071 int p3h_inputs[] = {0, 0, 1, 0, 0, 0, 1, 0};
1072 Initialise(&masm, p2.VnH(), p2h_inputs);
1073 Initialise(&masm, p3.VnH(), p3h_inputs);
1074
1075 __ Index(z30.VnH(), 1, 1);
1076 __ Index(z29.VnH(), -1, -1);
1077 __ Splice(z7.VnH(), p2, z29.VnH(), z30.VnH());
1078 __ Splice(z8.VnH(), p3, z29.VnH(), z30.VnH());
1079
1080 int p2s_inputs[] = {0, 0, 1, 0};
1081 int p3s_inputs[] = {1, 0, 1, 0};
1082 Initialise(&masm, p2.VnS(), p2s_inputs);
1083 Initialise(&masm, p3.VnS(), p3s_inputs);
1084
1085 __ Index(z30.VnS(), 1, 1);
1086 __ Index(z29.VnS(), -1, -1);
1087 __ Splice(z9.VnS(), p2, z29.VnS(), z30.VnS());
1088 __ Splice(z10.VnS(), p3, z29.VnS(), z30.VnS());
1089
1090 int p2d_inputs[] = {0, 1};
1091 int p3d_inputs[] = {1, 0};
1092 Initialise(&masm, p2.VnD(), p2d_inputs);
1093 Initialise(&masm, p3.VnD(), p3d_inputs);
1094
1095 __ Index(z30.VnD(), 1, 1);
1096 __ Index(z29.VnD(), -1, -1);
1097 __ Splice(z11.VnD(), p2, z29.VnD(), z30.VnD());
1098 __ Splice(z30.VnD(), p3, z29.VnD(), z30.VnD());
1099
1100 END();
1101
1102 if (CAN_RUN()) {
1103 RUN();
1104 uint64_t z0_expected[] = {0xf0f1f2f3f4f5f6f7, 0xf8f9fafbfcfdfeff};
1105 uint64_t z1_expected[] = {0x100f0e0d0c0b0a09, 0x0807060504030201};
1106 uint64_t z2_expected[] = {0x0f0e0d0c0b0a0908, 0x07060504030201ff};
1107 uint64_t z3_expected[] = {0x0f0e0d0c0b0a0908, 0x07060504030201fe};
1108 uint64_t z4_expected[] = {0x0f0e0d0c0b0a0908, 0x07060504030201f0};
1109 uint64_t z5_expected[] = {0x0c0b0a0908070605, 0x04030201f6f7f8f9};
1110 uint64_t z6_expected[] = {0x01f0f1f2f3f4f5f6, 0xf7f8f9fafbfcfdfe};
1111 uint64_t z7_expected[] = {0x0007000600050004, 0x000300020001fffe};
1112 uint64_t z8_expected[] = {0x000300020001fffa, 0xfffbfffcfffdfffe};
1113 uint64_t z9_expected[] = {0x0000000300000002, 0x00000001fffffffe};
1114 uint64_t z10_expected[] = {0x00000001fffffffc, 0xfffffffdfffffffe};
1115 uint64_t z11_expected[] = {0x0000000000000001, 0xffffffffffffffff};
1116 uint64_t z30_expected[] = {0x0000000000000001, 0xfffffffffffffffe};
1117
1118 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1119 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1120 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
1121 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
1122 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1123 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1124 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
1125 ASSERT_EQUAL_SVE(z7_expected, z7.VnD());
1126 ASSERT_EQUAL_SVE(z8_expected, z8.VnD());
1127 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
1128 ASSERT_EQUAL_SVE(z10_expected, z10.VnD());
1129 ASSERT_EQUAL_SVE(z11_expected, z11.VnD());
1130 ASSERT_EQUAL_SVE(z30_expected, z30.VnD());
1131 }
1132}
1133
Jacob Bramleye8289202019-07-31 11:25:23 +01001134TEST_SVE(sve_predicate_logical) {
1135 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -07001136 START();
1137
1138 // 0b...01011010'10110111
1139 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
1140 // 0b...11011001'01010010
1141 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
1142 // 0b...01010101'10110010
1143 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
1144
1145 Initialise(&masm, p10.VnB(), p10_inputs);
1146 Initialise(&masm, p11.VnB(), p11_inputs);
1147 Initialise(&masm, p12.VnB(), p12_inputs);
1148
1149 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1150 __ Mrs(x0, NZCV);
1151 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1152 __ Mrs(x1, NZCV);
1153 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1154 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1155 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1156 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1157 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
1158 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
1159
1160 END();
1161
1162 if (CAN_RUN()) {
1163 RUN();
1164
1165 // 0b...01010000'00010010
1166 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
1167 // 0b...00000001'00000000
1168 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
1169 // 0b...00000001'10100000
1170 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
1171 // 0b...00000101'10100000
1172 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
1173 // 0b...00000100'00000000
1174 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1175 // 0b...01010101'00010010
1176 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
1177 // 0b...01010001'10110010
1178 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
1179 // 0b...01011011'00010111
1180 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
1181
1182 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1183 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
1184 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
1185 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
1186 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1187 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
1188 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
1189 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
1190
TatWai Chong96713fe2019-06-04 16:39:37 -07001191 ASSERT_EQUAL_32(SVEFirstFlag, w0);
1192 ASSERT_EQUAL_32(SVENotLastFlag, w1);
1193 }
1194}
TatWai Chongf4fa8222019-06-17 12:08:14 -07001195
Jacob Bramleye8289202019-07-31 11:25:23 +01001196TEST_SVE(sve_int_compare_vectors) {
1197 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -07001198 START();
1199
1200 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
1201 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
1202 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
1203 InsrHelper(&masm, z10.VnB(), z10_inputs);
1204 InsrHelper(&masm, z11.VnB(), z11_inputs);
1205 Initialise(&masm, p0.VnB(), p0_inputs);
1206
1207 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
1208 __ Mrs(x6, NZCV);
1209
1210 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
1211 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
1212 int p1_inputs[] = {1, 1};
1213 InsrHelper(&masm, z12.VnD(), z12_inputs);
1214 InsrHelper(&masm, z13.VnD(), z13_inputs);
1215 Initialise(&masm, p1.VnD(), p1_inputs);
1216
1217 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
1218 __ Mrs(x7, NZCV);
1219
1220 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
1221 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
1222
1223 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
1224 InsrHelper(&masm, z14.VnH(), z14_inputs);
1225 InsrHelper(&masm, z15.VnH(), z15_inputs);
1226 Initialise(&masm, p2.VnH(), p2_inputs);
1227
1228 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
1229 __ Mrs(x8, NZCV);
1230
1231 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
1232 __ Mrs(x9, NZCV);
1233
1234 int z16_inputs[] = {0, -1, 0, 0};
1235 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
1236 int p3_inputs[] = {1, 1, 1, 1};
1237 InsrHelper(&masm, z16.VnS(), z16_inputs);
1238 InsrHelper(&masm, z17.VnS(), z17_inputs);
1239 Initialise(&masm, p3.VnS(), p3_inputs);
1240
1241 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
1242 __ Mrs(x10, NZCV);
1243
1244 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
1245 __ Mrs(x11, NZCV);
1246
1247 // Architectural aliases testing.
1248 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
1249 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
1250 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
1251 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
1252
1253 END();
1254
1255 if (CAN_RUN()) {
1256 RUN();
1257
1258 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
1259 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
1260 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
1261 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
1262 }
1263
1264 int p7_expected[] = {1, 0};
1265 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1266
1267 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
1268 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
1269
1270 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
1271 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1272
1273 int p10_expected[] = {0, 0, 0, 1};
1274 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1275
1276 int p11_expected[] = {0, 1, 1, 1};
1277 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
1278
1279 // Reuse the expected results to verify the architectural aliases.
1280 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
1281 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
1282 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
1283 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
1284
1285 ASSERT_EQUAL_32(SVEFirstFlag, w6);
1286 ASSERT_EQUAL_32(NoFlag, w7);
1287 ASSERT_EQUAL_32(NoFlag, w8);
1288 ASSERT_EQUAL_32(NoFlag, w9);
1289 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1290 }
1291}
1292
Jacob Bramleye8289202019-07-31 11:25:23 +01001293TEST_SVE(sve_int_compare_vectors_wide_elements) {
1294 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -07001295 START();
1296
1297 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
1298 int src2_inputs_1[] = {0, -1};
1299 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
1300 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
1301 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
1302 Initialise(&masm, p0.VnB(), mask_inputs_1);
1303
1304 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
1305 __ Mrs(x2, NZCV);
1306 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
1307 __ Mrs(x3, NZCV);
1308
1309 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
1310 int src2_inputs_2[] = {0, -32767};
1311 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
1312 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
1313 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
1314 Initialise(&masm, p0.VnH(), mask_inputs_2);
1315
1316 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
1317 __ Mrs(x4, NZCV);
1318 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
1319 __ Mrs(x5, NZCV);
1320
1321 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
1322 int src2_inputs_3[] = {0, -2147483648};
1323 int mask_inputs_3[] = {1, 1, 1, 1};
1324 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
1325 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
1326 Initialise(&masm, p0.VnS(), mask_inputs_3);
1327
1328 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
1329 __ Mrs(x6, NZCV);
1330 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
1331 __ Mrs(x7, NZCV);
1332
1333 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
1334 int src2_inputs_4[] = {0x00, 0x7f};
1335 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
1336 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
1337 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
1338 Initialise(&masm, p0.VnB(), mask_inputs_4);
1339
1340 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
1341 __ Mrs(x8, NZCV);
1342 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
1343 __ Mrs(x9, NZCV);
1344
1345 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
1346 int src2_inputs_5[] = {0x8000, 0xffff};
1347 int mask_inputs_5[] = {1, 1, 1, 1};
1348 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
1349 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
1350 Initialise(&masm, p0.VnS(), mask_inputs_5);
1351
1352 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
1353 __ Mrs(x10, NZCV);
1354 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
1355 __ Mrs(x11, NZCV);
1356
1357 END();
1358
1359 if (CAN_RUN()) {
1360 RUN();
1361 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
1362 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
1363
1364 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
1365 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
1366
1367 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
1368 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
1369
1370 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
1371 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1372
1373 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
1374 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1375
1376 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
1377 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
1378
1379 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
1380 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1381
1382 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
1383 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
1384
1385 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
1386 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1387
1388 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
1389 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
1390
1391 ASSERT_EQUAL_32(NoFlag, w2);
1392 ASSERT_EQUAL_32(NoFlag, w3);
1393 ASSERT_EQUAL_32(NoFlag, w4);
1394 ASSERT_EQUAL_32(SVENotLastFlag, w5);
1395 ASSERT_EQUAL_32(SVEFirstFlag, w6);
1396 ASSERT_EQUAL_32(SVENotLastFlag, w7);
1397 ASSERT_EQUAL_32(SVEFirstFlag, w8);
1398 ASSERT_EQUAL_32(SVEFirstFlag, w9);
1399 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
1400 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -07001401 }
TatWai Chongf4fa8222019-06-17 12:08:14 -07001402}
1403
Jacob Bramleye8289202019-07-31 11:25:23 +01001404TEST_SVE(sve_bitwise_imm) {
1405 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -07001406 START();
1407
1408 // clang-format off
1409 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
1410 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
1411 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
1412 0x0123, 0x4567, 0x89ab, 0xcdef};
1413 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
1414 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
1415 // clang-format on
1416
1417 InsrHelper(&masm, z1.VnD(), z21_inputs);
1418 InsrHelper(&masm, z2.VnS(), z22_inputs);
1419 InsrHelper(&masm, z3.VnH(), z23_inputs);
1420 InsrHelper(&masm, z4.VnB(), z24_inputs);
1421
1422 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
1423 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
1424 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
1425 __ And(z4.VnB(), z4.VnB(), 0x3f);
1426
1427 InsrHelper(&masm, z5.VnD(), z21_inputs);
1428 InsrHelper(&masm, z6.VnS(), z22_inputs);
1429 InsrHelper(&masm, z7.VnH(), z23_inputs);
1430 InsrHelper(&masm, z8.VnB(), z24_inputs);
1431
1432 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
1433 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
1434 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
1435 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
1436
1437 InsrHelper(&masm, z9.VnD(), z21_inputs);
1438 InsrHelper(&masm, z10.VnS(), z22_inputs);
1439 InsrHelper(&masm, z11.VnH(), z23_inputs);
1440 InsrHelper(&masm, z12.VnB(), z24_inputs);
1441
1442 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
1443 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
1444 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
1445 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
1446
Jacob Bramley6069fd42019-06-24 10:20:45 +01001447 {
1448 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
1449 // so here we test `dupm` directly.
1450 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
1451 __ dupm(z13.VnD(), 0x7ffffff800000000);
1452 __ dupm(z14.VnS(), 0x7ffc7ffc);
1453 __ dupm(z15.VnH(), 0x3ffc);
1454 __ dupm(z16.VnB(), 0xc3);
1455 }
TatWai Chonga1885a52019-04-15 17:19:14 -07001456
1457 END();
1458
1459 if (CAN_RUN()) {
1460 RUN();
1461
1462 // clang-format off
1463 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
1464 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
1465 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
1466 0x0120, 0x0560, 0x09a0, 0x0de0};
1467 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
1468 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
1469
1470 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1471 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1472 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1473 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
1474
1475 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
1476 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
1477 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
1478 0x0ed3, 0x4a97, 0x865b, 0xc21f};
1479 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
1480 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
1481
1482 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1483 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1484 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1485 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
1486
1487 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
1488 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
1489 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
1490 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
1491 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
1492 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
1493
1494 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
1495 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
1496 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
1497 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
1498
1499 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
1500 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
1501 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
1502 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
1503 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
1504 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
1505 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
1506 // clang-format on
1507 }
TatWai Chonga1885a52019-04-15 17:19:14 -07001508}
1509
Jacob Bramleye8289202019-07-31 11:25:23 +01001510TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +01001511 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
1512 // unencodable immediates.
1513
Jacob Bramleye8289202019-07-31 11:25:23 +01001514 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +01001515 START();
1516
1517 // Encodable with `dup` (shift 0).
1518 __ Dup(z0.VnD(), -1);
1519 __ Dup(z1.VnS(), 0x7f);
1520 __ Dup(z2.VnH(), -0x80);
1521 __ Dup(z3.VnB(), 42);
1522
1523 // Encodable with `dup` (shift 8).
TatWai Chong6995bfd2019-09-26 10:48:05 +01001524 __ Dup(z4.VnD(), -42 * 256);
1525 __ Dup(z5.VnS(), -0x8000);
1526 __ Dup(z6.VnH(), 0x7f00);
Jacob Bramley6069fd42019-06-24 10:20:45 +01001527 // B-sized lanes cannot take a shift of 8.
1528
1529 // Encodable with `dupm` (but not `dup`).
1530 __ Dup(z10.VnD(), 0x3fc);
1531 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
1532 __ Dup(z12.VnH(), 0x0001);
1533 // All values that fit B-sized lanes are encodable with `dup`.
1534
1535 // Cases that require immediate synthesis.
1536 __ Dup(z20.VnD(), 0x1234);
1537 __ Dup(z21.VnD(), -4242);
1538 __ Dup(z22.VnD(), 0xfedcba9876543210);
1539 __ Dup(z23.VnS(), 0x01020304);
1540 __ Dup(z24.VnS(), -0x01020304);
1541 __ Dup(z25.VnH(), 0x3c38);
1542 // All values that fit B-sized lanes are directly encodable.
1543
1544 END();
1545
1546 if (CAN_RUN()) {
1547 RUN();
1548
1549 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
1550 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
1551 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
1552 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
1553
TatWai Chong6995bfd2019-09-26 10:48:05 +01001554 ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
1555 ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
1556 ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley6069fd42019-06-24 10:20:45 +01001557
1558 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1559 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1560 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1561
1562 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1563 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1564 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1565 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1566 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1567 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1568 }
1569}
1570
Jacob Bramleye8289202019-07-31 11:25:23 +01001571TEST_SVE(sve_inc_dec_p_scalar) {
1572 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001573 START();
1574
1575 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1576 Initialise(&masm, p0.VnB(), p0_inputs);
1577
1578 int p0_b_count = 9;
1579 int p0_h_count = 5;
1580 int p0_s_count = 3;
1581 int p0_d_count = 2;
1582
1583 // 64-bit operations preserve their high bits.
1584 __ Mov(x0, 0x123456780000002a);
1585 __ Decp(x0, p0.VnB());
1586
1587 __ Mov(x1, 0x123456780000002a);
1588 __ Incp(x1, p0.VnH());
1589
1590 // Check that saturation does not occur.
1591 __ Mov(x10, 1);
1592 __ Decp(x10, p0.VnS());
1593
1594 __ Mov(x11, UINT64_MAX);
1595 __ Incp(x11, p0.VnD());
1596
1597 __ Mov(x12, INT64_MAX);
1598 __ Incp(x12, p0.VnB());
1599
1600 // With an all-true predicate, these instructions increment or decrement by
1601 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001602 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001603
1604 __ Mov(x20, 0x4000000000000000);
1605 __ Decp(x20, p15.VnB());
1606
1607 __ Mov(x21, 0x4000000000000000);
1608 __ Incp(x21, p15.VnH());
1609
1610 END();
1611 if (CAN_RUN()) {
1612 RUN();
1613
1614 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1615 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1616
1617 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1618 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1619 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1620
1621 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1622 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1623 }
1624}
1625
Jacob Bramleye8289202019-07-31 11:25:23 +01001626TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1627 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001628 START();
1629
1630 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1631 Initialise(&masm, p0.VnB(), p0_inputs);
1632
1633 int p0_b_count = 9;
1634 int p0_h_count = 5;
1635 int p0_s_count = 3;
1636 int p0_d_count = 2;
1637
1638 uint64_t dummy_high = 0x1234567800000000;
1639
1640 // 64-bit operations preserve their high bits.
1641 __ Mov(x0, dummy_high + 42);
1642 __ Sqdecp(x0, p0.VnB());
1643
1644 __ Mov(x1, dummy_high + 42);
1645 __ Sqincp(x1, p0.VnH());
1646
1647 // 32-bit operations sign-extend into their high bits.
1648 __ Mov(x2, dummy_high + 42);
1649 __ Sqdecp(x2, p0.VnS(), w2);
1650
1651 __ Mov(x3, dummy_high + 42);
1652 __ Sqincp(x3, p0.VnD(), w3);
1653
1654 __ Mov(x4, dummy_high + 1);
1655 __ Sqdecp(x4, p0.VnS(), w4);
1656
1657 __ Mov(x5, dummy_high - 1);
1658 __ Sqincp(x5, p0.VnD(), w5);
1659
1660 // Check that saturation behaves correctly.
1661 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001662 __ Sqdecp(x10, p0.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001663
1664 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1665 __ Sqdecp(x11, p0.VnH(), w11);
1666
1667 __ Mov(x12, 1);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001668 __ Sqdecp(x12, p0.VnS());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001669
1670 __ Mov(x13, dummy_high + 1);
1671 __ Sqdecp(x13, p0.VnD(), w13);
1672
1673 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001674 __ Sqincp(x14, p0.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001675
1676 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1677 __ Sqincp(x15, p0.VnH(), w15);
1678
1679 // Don't use x16 and x17 since they are scratch registers by default.
1680
1681 __ Mov(x18, 0xffffffffffffffff);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001682 __ Sqincp(x18, p0.VnS());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001683
1684 __ Mov(x19, dummy_high + 0xffffffff);
1685 __ Sqincp(x19, p0.VnD(), w19);
1686
1687 __ Mov(x20, dummy_high + 0xffffffff);
1688 __ Sqdecp(x20, p0.VnB(), w20);
1689
1690 // With an all-true predicate, these instructions increment or decrement by
1691 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001692 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001693
1694 __ Mov(x21, 0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001695 __ Sqdecp(x21, p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001696
1697 __ Mov(x22, 0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001698 __ Sqincp(x22, p15.VnH());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001699
1700 __ Mov(x23, dummy_high);
1701 __ Sqdecp(x23, p15.VnS(), w23);
1702
1703 __ Mov(x24, dummy_high);
1704 __ Sqincp(x24, p15.VnD(), w24);
1705
1706 END();
1707 if (CAN_RUN()) {
1708 RUN();
1709
1710 // 64-bit operations preserve their high bits.
1711 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1712 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1713
1714 // 32-bit operations sign-extend into their high bits.
1715 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1716 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1717 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1718 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1719
1720 // Check that saturation behaves correctly.
1721 ASSERT_EQUAL_64(INT64_MIN, x10);
1722 ASSERT_EQUAL_64(INT32_MIN, x11);
1723 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1724 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1725 ASSERT_EQUAL_64(INT64_MAX, x14);
1726 ASSERT_EQUAL_64(INT32_MAX, x15);
1727 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1728 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1729 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1730
1731 // Check all-true predicates.
1732 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1733 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1734 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1735 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1736 }
1737}
1738
Jacob Bramleye8289202019-07-31 11:25:23 +01001739TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1740 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001741 START();
1742
1743 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1744 Initialise(&masm, p0.VnB(), p0_inputs);
1745
1746 int p0_b_count = 9;
1747 int p0_h_count = 5;
1748 int p0_s_count = 3;
1749 int p0_d_count = 2;
1750
1751 uint64_t dummy_high = 0x1234567800000000;
1752
1753 // 64-bit operations preserve their high bits.
1754 __ Mov(x0, dummy_high + 42);
1755 __ Uqdecp(x0, p0.VnB());
1756
1757 __ Mov(x1, dummy_high + 42);
1758 __ Uqincp(x1, p0.VnH());
1759
1760 // 32-bit operations zero-extend into their high bits.
1761 __ Mov(x2, dummy_high + 42);
1762 __ Uqdecp(x2, p0.VnS(), w2);
1763
1764 __ Mov(x3, dummy_high + 42);
1765 __ Uqincp(x3, p0.VnD(), w3);
1766
1767 __ Mov(x4, dummy_high + 0x80000001);
1768 __ Uqdecp(x4, p0.VnS(), w4);
1769
1770 __ Mov(x5, dummy_high + 0x7fffffff);
1771 __ Uqincp(x5, p0.VnD(), w5);
1772
1773 // Check that saturation behaves correctly.
1774 __ Mov(x10, 1);
1775 __ Uqdecp(x10, p0.VnB(), x10);
1776
1777 __ Mov(x11, dummy_high + 1);
1778 __ Uqdecp(x11, p0.VnH(), w11);
1779
1780 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1781 __ Uqdecp(x12, p0.VnS(), x12);
1782
1783 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1784 __ Uqdecp(x13, p0.VnD(), w13);
1785
1786 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1787 __ Uqincp(x14, p0.VnB(), x14);
1788
1789 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1790 __ Uqincp(x15, p0.VnH(), w15);
1791
1792 // Don't use x16 and x17 since they are scratch registers by default.
1793
1794 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1795 __ Uqincp(x18, p0.VnS(), x18);
1796
1797 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1798 __ Uqincp(x19, p0.VnD(), w19);
1799
1800 // With an all-true predicate, these instructions increment or decrement by
1801 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001802 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001803
1804 __ Mov(x20, 0x4000000000000000);
1805 __ Uqdecp(x20, p15.VnB(), x20);
1806
1807 __ Mov(x21, 0x4000000000000000);
1808 __ Uqincp(x21, p15.VnH(), x21);
1809
1810 __ Mov(x22, dummy_high + 0x40000000);
1811 __ Uqdecp(x22, p15.VnS(), w22);
1812
1813 __ Mov(x23, dummy_high + 0x40000000);
1814 __ Uqincp(x23, p15.VnD(), w23);
1815
1816 END();
1817 if (CAN_RUN()) {
1818 RUN();
1819
1820 // 64-bit operations preserve their high bits.
1821 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1822 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1823
1824 // 32-bit operations zero-extend into their high bits.
1825 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1826 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1827 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1828 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1829
1830 // Check that saturation behaves correctly.
1831 ASSERT_EQUAL_64(0, x10);
1832 ASSERT_EQUAL_64(0, x11);
1833 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1834 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1835 ASSERT_EQUAL_64(UINT64_MAX, x14);
1836 ASSERT_EQUAL_64(UINT32_MAX, x15);
1837 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1838 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1839
1840 // Check all-true predicates.
1841 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1842 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1843 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1844 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1845 }
1846}
1847
Jacob Bramleye8289202019-07-31 11:25:23 +01001848TEST_SVE(sve_inc_dec_p_vector) {
1849 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001850 START();
1851
1852 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1853 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1854 Initialise(&masm, p0.VnB(), p0_inputs);
1855
1856 // Check that saturation does not occur.
1857
1858 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1859 InsrHelper(&masm, z0.VnD(), z0_inputs);
1860
1861 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1862 InsrHelper(&masm, z1.VnD(), z1_inputs);
1863
1864 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1865 InsrHelper(&masm, z2.VnS(), z2_inputs);
1866
1867 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1868 InsrHelper(&masm, z3.VnH(), z3_inputs);
1869
1870 // The MacroAssembler implements non-destructive operations using movprfx.
1871 __ Decp(z10.VnD(), p0, z0.VnD());
1872 __ Decp(z11.VnD(), p0, z1.VnD());
1873 __ Decp(z12.VnS(), p0, z2.VnS());
1874 __ Decp(z13.VnH(), p0, z3.VnH());
1875
1876 __ Incp(z14.VnD(), p0, z0.VnD());
1877 __ Incp(z15.VnD(), p0, z1.VnD());
1878 __ Incp(z16.VnS(), p0, z2.VnS());
1879 __ Incp(z17.VnH(), p0, z3.VnH());
1880
1881 // Also test destructive forms.
1882 __ Mov(z4, z0);
1883 __ Mov(z5, z1);
1884 __ Mov(z6, z2);
1885 __ Mov(z7, z3);
1886
1887 __ Decp(z0.VnD(), p0);
1888 __ Decp(z1.VnD(), p0);
1889 __ Decp(z2.VnS(), p0);
1890 __ Decp(z3.VnH(), p0);
1891
1892 __ Incp(z4.VnD(), p0);
1893 __ Incp(z5.VnD(), p0);
1894 __ Incp(z6.VnS(), p0);
1895 __ Incp(z7.VnH(), p0);
1896
1897 END();
1898 if (CAN_RUN()) {
1899 RUN();
1900
1901 // z0_inputs[...] - number of active D lanes (2)
1902 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1903 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1904
1905 // z1_inputs[...] - number of active D lanes (2)
1906 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1907 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1908
1909 // z2_inputs[...] - number of active S lanes (3)
1910 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1911 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1912
1913 // z3_inputs[...] - number of active H lanes (5)
1914 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1915 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1916
1917 // z0_inputs[...] + number of active D lanes (2)
1918 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1919 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1920
1921 // z1_inputs[...] + number of active D lanes (2)
1922 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1923 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1924
1925 // z2_inputs[...] + number of active S lanes (3)
1926 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1927 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1928
1929 // z3_inputs[...] + number of active H lanes (5)
1930 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1931 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1932
1933 // Check that the non-destructive macros produced the same results.
1934 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1935 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1936 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1937 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1938 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1939 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1940 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1941 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1942 }
1943}
1944
Jacob Bramleye8289202019-07-31 11:25:23 +01001945TEST_SVE(sve_inc_dec_ptrue_vector) {
1946 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001947 START();
1948
1949 // With an all-true predicate, these instructions increment or decrement by
1950 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001951 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001952
1953 __ Dup(z0.VnD(), 0);
1954 __ Decp(z0.VnD(), p15);
1955
1956 __ Dup(z1.VnS(), 0);
1957 __ Decp(z1.VnS(), p15);
1958
1959 __ Dup(z2.VnH(), 0);
1960 __ Decp(z2.VnH(), p15);
1961
1962 __ Dup(z3.VnD(), 0);
1963 __ Incp(z3.VnD(), p15);
1964
1965 __ Dup(z4.VnS(), 0);
1966 __ Incp(z4.VnS(), p15);
1967
1968 __ Dup(z5.VnH(), 0);
1969 __ Incp(z5.VnH(), p15);
1970
1971 END();
1972 if (CAN_RUN()) {
1973 RUN();
1974
1975 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1976 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1977 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1978
1979 for (int i = 0; i < d_lane_count; i++) {
1980 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1981 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1982 }
1983
1984 for (int i = 0; i < s_lane_count; i++) {
1985 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1986 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1987 }
1988
1989 for (int i = 0; i < h_lane_count; i++) {
1990 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1991 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1992 }
1993 }
1994}
1995
Jacob Bramleye8289202019-07-31 11:25:23 +01001996TEST_SVE(sve_sqinc_sqdec_p_vector) {
1997 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001998 START();
1999
2000 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
2001 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
2002 Initialise(&masm, p0.VnB(), p0_inputs);
2003
2004 // Check that saturation behaves correctly.
2005
2006 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
2007 InsrHelper(&masm, z0.VnD(), z0_inputs);
2008
2009 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
2010 InsrHelper(&masm, z1.VnD(), z1_inputs);
2011
2012 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
2013 InsrHelper(&masm, z2.VnS(), z2_inputs);
2014
2015 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
2016 InsrHelper(&masm, z3.VnH(), z3_inputs);
2017
2018 // The MacroAssembler implements non-destructive operations using movprfx.
2019 __ Sqdecp(z10.VnD(), p0, z0.VnD());
2020 __ Sqdecp(z11.VnD(), p0, z1.VnD());
2021 __ Sqdecp(z12.VnS(), p0, z2.VnS());
2022 __ Sqdecp(z13.VnH(), p0, z3.VnH());
2023
2024 __ Sqincp(z14.VnD(), p0, z0.VnD());
2025 __ Sqincp(z15.VnD(), p0, z1.VnD());
2026 __ Sqincp(z16.VnS(), p0, z2.VnS());
2027 __ Sqincp(z17.VnH(), p0, z3.VnH());
2028
2029 // Also test destructive forms.
2030 __ Mov(z4, z0);
2031 __ Mov(z5, z1);
2032 __ Mov(z6, z2);
2033 __ Mov(z7, z3);
2034
2035 __ Sqdecp(z0.VnD(), p0);
2036 __ Sqdecp(z1.VnD(), p0);
2037 __ Sqdecp(z2.VnS(), p0);
2038 __ Sqdecp(z3.VnH(), p0);
2039
2040 __ Sqincp(z4.VnD(), p0);
2041 __ Sqincp(z5.VnD(), p0);
2042 __ Sqincp(z6.VnS(), p0);
2043 __ Sqincp(z7.VnH(), p0);
2044
2045 END();
2046 if (CAN_RUN()) {
2047 RUN();
2048
2049 // z0_inputs[...] - number of active D lanes (2)
2050 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
2051 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
2052
2053 // z1_inputs[...] - number of active D lanes (2)
2054 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
2055 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
2056
2057 // z2_inputs[...] - number of active S lanes (3)
2058 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
2059 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
2060
2061 // z3_inputs[...] - number of active H lanes (5)
2062 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
2063 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
2064
2065 // z0_inputs[...] + number of active D lanes (2)
2066 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
2067 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
2068
2069 // z1_inputs[...] + number of active D lanes (2)
2070 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
2071 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
2072
2073 // z2_inputs[...] + number of active S lanes (3)
2074 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
2075 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
2076
2077 // z3_inputs[...] + number of active H lanes (5)
2078 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
2079 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
2080
2081 // Check that the non-destructive macros produced the same results.
2082 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
2083 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
2084 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
2085 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
2086 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
2087 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
2088 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
2089 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
2090 }
2091}
2092
Jacob Bramleye8289202019-07-31 11:25:23 +01002093TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
2094 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002095 START();
2096
2097 // With an all-true predicate, these instructions increment or decrement by
2098 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01002099 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002100
2101 __ Dup(z0.VnD(), 0);
2102 __ Sqdecp(z0.VnD(), p15);
2103
2104 __ Dup(z1.VnS(), 0);
2105 __ Sqdecp(z1.VnS(), p15);
2106
2107 __ Dup(z2.VnH(), 0);
2108 __ Sqdecp(z2.VnH(), p15);
2109
2110 __ Dup(z3.VnD(), 0);
2111 __ Sqincp(z3.VnD(), p15);
2112
2113 __ Dup(z4.VnS(), 0);
2114 __ Sqincp(z4.VnS(), p15);
2115
2116 __ Dup(z5.VnH(), 0);
2117 __ Sqincp(z5.VnH(), p15);
2118
2119 END();
2120 if (CAN_RUN()) {
2121 RUN();
2122
2123 int d_lane_count = core.GetSVELaneCount(kDRegSize);
2124 int s_lane_count = core.GetSVELaneCount(kSRegSize);
2125 int h_lane_count = core.GetSVELaneCount(kHRegSize);
2126
2127 for (int i = 0; i < d_lane_count; i++) {
2128 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
2129 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
2130 }
2131
2132 for (int i = 0; i < s_lane_count; i++) {
2133 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
2134 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
2135 }
2136
2137 for (int i = 0; i < h_lane_count; i++) {
2138 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
2139 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
2140 }
2141 }
2142}
2143
Jacob Bramleye8289202019-07-31 11:25:23 +01002144TEST_SVE(sve_uqinc_uqdec_p_vector) {
2145 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002146 START();
2147
2148 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
2149 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
2150 Initialise(&masm, p0.VnB(), p0_inputs);
2151
2152 // Check that saturation behaves correctly.
2153
2154 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
2155 InsrHelper(&masm, z0.VnD(), z0_inputs);
2156
2157 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
2158 InsrHelper(&masm, z1.VnD(), z1_inputs);
2159
2160 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
2161 InsrHelper(&masm, z2.VnS(), z2_inputs);
2162
2163 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
2164 InsrHelper(&masm, z3.VnH(), z3_inputs);
2165
2166 // The MacroAssembler implements non-destructive operations using movprfx.
2167 __ Uqdecp(z10.VnD(), p0, z0.VnD());
2168 __ Uqdecp(z11.VnD(), p0, z1.VnD());
2169 __ Uqdecp(z12.VnS(), p0, z2.VnS());
2170 __ Uqdecp(z13.VnH(), p0, z3.VnH());
2171
2172 __ Uqincp(z14.VnD(), p0, z0.VnD());
2173 __ Uqincp(z15.VnD(), p0, z1.VnD());
2174 __ Uqincp(z16.VnS(), p0, z2.VnS());
2175 __ Uqincp(z17.VnH(), p0, z3.VnH());
2176
2177 // Also test destructive forms.
2178 __ Mov(z4, z0);
2179 __ Mov(z5, z1);
2180 __ Mov(z6, z2);
2181 __ Mov(z7, z3);
2182
2183 __ Uqdecp(z0.VnD(), p0);
2184 __ Uqdecp(z1.VnD(), p0);
2185 __ Uqdecp(z2.VnS(), p0);
2186 __ Uqdecp(z3.VnH(), p0);
2187
2188 __ Uqincp(z4.VnD(), p0);
2189 __ Uqincp(z5.VnD(), p0);
2190 __ Uqincp(z6.VnS(), p0);
2191 __ Uqincp(z7.VnH(), p0);
2192
2193 END();
2194 if (CAN_RUN()) {
2195 RUN();
2196
2197 // z0_inputs[...] - number of active D lanes (2)
2198 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
2199 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
2200
2201 // z1_inputs[...] - number of active D lanes (2)
2202 uint64_t z1_expected[] = {0x12345678ffffff28,
2203 0,
2204 0xfffffffffffffffd,
2205 0x7ffffffffffffffd};
2206 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
2207
2208 // z2_inputs[...] - number of active S lanes (3)
2209 uint32_t z2_expected[] =
2210 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
2211 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
2212
2213 // z3_inputs[...] - number of active H lanes (5)
2214 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
2215 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
2216
2217 // z0_inputs[...] + number of active D lanes (2)
2218 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
2219 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
2220
2221 // z1_inputs[...] + number of active D lanes (2)
2222 uint64_t z5_expected[] = {0x12345678ffffff2c,
2223 2,
2224 UINT64_MAX,
2225 0x8000000000000001};
2226 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
2227
2228 // z2_inputs[...] + number of active S lanes (3)
2229 uint32_t z6_expected[] =
2230 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
2231 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
2232
2233 // z3_inputs[...] + number of active H lanes (5)
2234 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
2235 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
2236
2237 // Check that the non-destructive macros produced the same results.
2238 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
2239 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
2240 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
2241 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
2242 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
2243 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
2244 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
2245 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
2246 }
2247}
2248
Jacob Bramleye8289202019-07-31 11:25:23 +01002249TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
2250 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002251 START();
2252
2253 // With an all-true predicate, these instructions increment or decrement by
2254 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01002255 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01002256
2257 __ Mov(x0, 0x1234567800000000);
2258 __ Mov(x1, 0x12340000);
2259 __ Mov(x2, 0x1200);
2260
2261 __ Dup(z0.VnD(), x0);
2262 __ Uqdecp(z0.VnD(), p15);
2263
2264 __ Dup(z1.VnS(), x1);
2265 __ Uqdecp(z1.VnS(), p15);
2266
2267 __ Dup(z2.VnH(), x2);
2268 __ Uqdecp(z2.VnH(), p15);
2269
2270 __ Dup(z3.VnD(), x0);
2271 __ Uqincp(z3.VnD(), p15);
2272
2273 __ Dup(z4.VnS(), x1);
2274 __ Uqincp(z4.VnS(), p15);
2275
2276 __ Dup(z5.VnH(), x2);
2277 __ Uqincp(z5.VnH(), p15);
2278
2279 END();
2280 if (CAN_RUN()) {
2281 RUN();
2282
2283 int d_lane_count = core.GetSVELaneCount(kDRegSize);
2284 int s_lane_count = core.GetSVELaneCount(kSRegSize);
2285 int h_lane_count = core.GetSVELaneCount(kHRegSize);
2286
2287 for (int i = 0; i < d_lane_count; i++) {
2288 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
2289 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
2290 }
2291
2292 for (int i = 0; i < s_lane_count; i++) {
2293 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
2294 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
2295 }
2296
2297 for (int i = 0; i < h_lane_count; i++) {
2298 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
2299 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
2300 }
2301 }
2302}
2303
Jacob Bramleye8289202019-07-31 11:25:23 +01002304TEST_SVE(sve_index) {
2305 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01002306 START();
2307
2308 // Simple cases.
2309 __ Index(z0.VnB(), 0, 1);
2310 __ Index(z1.VnH(), 1, 1);
2311 __ Index(z2.VnS(), 2, 1);
2312 __ Index(z3.VnD(), 3, 1);
2313
2314 // Synthesised immediates.
2315 __ Index(z4.VnB(), 42, -1);
2316 __ Index(z5.VnH(), -1, 42);
2317 __ Index(z6.VnS(), 42, 42);
2318
2319 // Register arguments.
2320 __ Mov(x0, 42);
2321 __ Mov(x1, -3);
2322 __ Index(z10.VnD(), x0, x1);
2323 __ Index(z11.VnB(), w0, w1);
2324 // The register size should correspond to the lane size, but VIXL allows any
2325 // register at least as big as the lane size.
2326 __ Index(z12.VnB(), x0, x1);
2327 __ Index(z13.VnH(), w0, x1);
2328 __ Index(z14.VnS(), x0, w1);
2329
2330 // Integer overflow.
2331 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
2332 __ Index(z21.VnH(), 7, -3);
2333 __ Index(z22.VnS(), INT32_MAX - 2, 1);
2334 __ Index(z23.VnD(), INT64_MIN + 6, -7);
2335
2336 END();
2337
2338 if (CAN_RUN()) {
2339 RUN();
2340
2341 int b_lane_count = core.GetSVELaneCount(kBRegSize);
2342 int h_lane_count = core.GetSVELaneCount(kHRegSize);
2343 int s_lane_count = core.GetSVELaneCount(kSRegSize);
2344 int d_lane_count = core.GetSVELaneCount(kDRegSize);
2345
2346 uint64_t b_mask = GetUintMask(kBRegSize);
2347 uint64_t h_mask = GetUintMask(kHRegSize);
2348 uint64_t s_mask = GetUintMask(kSRegSize);
2349 uint64_t d_mask = GetUintMask(kDRegSize);
2350
2351 // Simple cases.
2352 for (int i = 0; i < b_lane_count; i++) {
2353 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
2354 }
2355 for (int i = 0; i < h_lane_count; i++) {
2356 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
2357 }
2358 for (int i = 0; i < s_lane_count; i++) {
2359 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
2360 }
2361 for (int i = 0; i < d_lane_count; i++) {
2362 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
2363 }
2364
2365 // Synthesised immediates.
2366 for (int i = 0; i < b_lane_count; i++) {
2367 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
2368 }
2369 for (int i = 0; i < h_lane_count; i++) {
2370 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
2371 }
2372 for (int i = 0; i < s_lane_count; i++) {
2373 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
2374 }
2375
2376 // Register arguments.
2377 for (int i = 0; i < d_lane_count; i++) {
2378 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
2379 }
2380 for (int i = 0; i < b_lane_count; i++) {
2381 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
2382 }
2383 for (int i = 0; i < b_lane_count; i++) {
2384 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
2385 }
2386 for (int i = 0; i < h_lane_count; i++) {
2387 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
2388 }
2389 for (int i = 0; i < s_lane_count; i++) {
2390 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
2391 }
2392
2393 // Integer overflow.
2394 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
2395 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
2396 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
2397 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
2398 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
2399 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
2400 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
2401 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
2402 }
2403}
2404
TatWai Chongc844bb22019-06-10 15:32:53 -07002405TEST(sve_int_compare_count_and_limit_scalars) {
2406 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2407 START();
2408
2409 __ Mov(w20, 0xfffffffd);
2410 __ Mov(w21, 0xffffffff);
2411
2412 __ Whilele(p0.VnB(), w20, w21);
2413 __ Mrs(x0, NZCV);
2414 __ Whilele(p1.VnH(), w20, w21);
2415 __ Mrs(x1, NZCV);
2416
2417 __ Mov(w20, 0xffffffff);
2418 __ Mov(w21, 0x00000000);
2419
2420 __ Whilelt(p2.VnS(), w20, w21);
2421 __ Mrs(x2, NZCV);
2422 __ Whilelt(p3.VnD(), w20, w21);
2423 __ Mrs(x3, NZCV);
2424
2425 __ Mov(w20, 0xfffffffd);
2426 __ Mov(w21, 0xffffffff);
2427
2428 __ Whilels(p4.VnB(), w20, w21);
2429 __ Mrs(x4, NZCV);
2430 __ Whilels(p5.VnH(), w20, w21);
2431 __ Mrs(x5, NZCV);
2432
2433 __ Mov(w20, 0xffffffff);
2434 __ Mov(w21, 0x00000000);
2435
2436 __ Whilelo(p6.VnS(), w20, w21);
2437 __ Mrs(x6, NZCV);
2438 __ Whilelo(p7.VnD(), w20, w21);
2439 __ Mrs(x7, NZCV);
2440
2441 __ Mov(x20, 0xfffffffffffffffd);
2442 __ Mov(x21, 0xffffffffffffffff);
2443
2444 __ Whilele(p8.VnB(), x20, x21);
2445 __ Mrs(x8, NZCV);
2446 __ Whilele(p9.VnH(), x20, x21);
2447 __ Mrs(x9, NZCV);
2448
2449 __ Mov(x20, 0xffffffffffffffff);
2450 __ Mov(x21, 0x0000000000000000);
2451
2452 __ Whilelt(p10.VnS(), x20, x21);
2453 __ Mrs(x10, NZCV);
2454 __ Whilelt(p11.VnD(), x20, x21);
2455 __ Mrs(x11, NZCV);
2456
2457 __ Mov(x20, 0xfffffffffffffffd);
2458 __ Mov(x21, 0xffffffffffffffff);
2459
2460 __ Whilels(p12.VnB(), x20, x21);
2461 __ Mrs(x12, NZCV);
2462 __ Whilels(p13.VnH(), x20, x21);
2463 __ Mrs(x13, NZCV);
2464
2465 __ Mov(x20, 0xffffffffffffffff);
2466 __ Mov(x21, 0x0000000000000000);
2467
2468 __ Whilelo(p14.VnS(), x20, x21);
2469 __ Mrs(x14, NZCV);
2470 __ Whilelo(p15.VnD(), x20, x21);
2471 __ Mrs(x15, NZCV);
2472
2473 END();
2474
2475 if (CAN_RUN()) {
2476 RUN();
2477
2478 // 0b...00000000'00000111
2479 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
2480 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
2481
2482 // 0b...00000000'00010101
2483 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
2484 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
2485
2486 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
2487 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
2488
2489 int p3_expected[] = {0x00, 0x01};
2490 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
2491
2492 // 0b...11111111'11111111
2493 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
2494 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
2495
2496 // 0b...01010101'01010101
2497 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
2498 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2499
2500 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
2501 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2502
2503 int p7_expected[] = {0x00, 0x00};
2504 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
2505
2506 // 0b...00000000'00000111
2507 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
2508 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
2509
2510 // 0b...00000000'00010101
2511 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
2512 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
2513
2514 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
2515 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
2516
2517 int p11_expected[] = {0x00, 0x01};
2518 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2519
2520 // 0b...11111111'11111111
2521 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
2522 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
2523
2524 // 0b...01010101'01010101
2525 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
2526 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
2527
2528 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
2529 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
2530
2531 int p15_expected[] = {0x00, 0x00};
2532 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
2533
2534 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
2535 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
2536 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
2537 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
2538 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2539 ASSERT_EQUAL_32(SVEFirstFlag, w5);
2540 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
2541 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
2542 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
2543 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
2544 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
2545 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
2546 ASSERT_EQUAL_32(SVEFirstFlag, w12);
2547 ASSERT_EQUAL_32(SVEFirstFlag, w13);
2548 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
2549 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
2550 }
2551}
2552
TatWai Chong302729c2019-06-14 16:18:51 -07002553TEST(sve_int_compare_vectors_signed_imm) {
2554 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2555 START();
2556
2557 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
2558 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2559 InsrHelper(&masm, z13.VnB(), z13_inputs);
2560 Initialise(&masm, p0.VnB(), mask_inputs1);
2561
2562 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2563 __ Mrs(x2, NZCV);
2564 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2565
2566 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2567 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2568 InsrHelper(&masm, z14.VnH(), z14_inputs);
2569 Initialise(&masm, p0.VnH(), mask_inputs2);
2570
2571 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2572 __ Mrs(x4, NZCV);
2573 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2574
2575 int z15_inputs[] = {0, 1, -1, INT_MIN};
2576 int mask_inputs3[] = {0, 1, 1, 1};
2577 InsrHelper(&masm, z15.VnS(), z15_inputs);
2578 Initialise(&masm, p0.VnS(), mask_inputs3);
2579
2580 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2581 __ Mrs(x6, NZCV);
2582 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2583
2584 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2585 __ Mrs(x8, NZCV);
2586 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2587
2588 int64_t z16_inputs[] = {0, -1};
2589 int mask_inputs4[] = {1, 1};
2590 InsrHelper(&masm, z16.VnD(), z16_inputs);
2591 Initialise(&masm, p0.VnD(), mask_inputs4);
2592
2593 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2594 __ Mrs(x10, NZCV);
2595 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2596
2597 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2598 __ Mrs(x12, NZCV);
2599 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2600
2601 END();
2602
2603 if (CAN_RUN()) {
2604 RUN();
2605
2606 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2607 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2608
2609 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2610 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2611
2612 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2613 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2614
2615 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2616 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2617
2618 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2619 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2620
2621 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2622 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2623
2624 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2625 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2626
2627 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2628 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2629
2630 int p10_expected[] = {0x00, 0x01};
2631 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2632
2633 int p11_expected[] = {0x00, 0x00};
2634 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2635
2636 int p12_expected[] = {0x01, 0x00};
2637 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2638
2639 int p13_expected[] = {0x01, 0x01};
2640 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2641
2642 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2643 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2644 ASSERT_EQUAL_32(NoFlag, w6);
2645 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2646 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2647 ASSERT_EQUAL_32(NoFlag, w12);
2648 }
2649}
2650
2651TEST(sve_int_compare_vectors_unsigned_imm) {
2652 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2653 START();
2654
2655 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2656 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2657 InsrHelper(&masm, z13.VnB(), src1_inputs);
2658 Initialise(&masm, p0.VnB(), mask_inputs1);
2659
2660 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2661 __ Mrs(x2, NZCV);
2662 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2663
2664 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2665 int mask_inputs2[] = {1, 1, 1, 1, 0};
2666 InsrHelper(&masm, z13.VnH(), src2_inputs);
2667 Initialise(&masm, p0.VnH(), mask_inputs2);
2668
2669 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2670 __ Mrs(x4, NZCV);
2671 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2672
2673 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2674 int mask_inputs3[] = {1, 1, 1, 1};
2675 InsrHelper(&masm, z13.VnS(), src3_inputs);
2676 Initialise(&masm, p0.VnS(), mask_inputs3);
2677
2678 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2679 __ Mrs(x6, NZCV);
2680 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2681
2682 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2683 int mask_inputs4[] = {1, 1};
2684 InsrHelper(&masm, z13.VnD(), src4_inputs);
2685 Initialise(&masm, p0.VnD(), mask_inputs4);
2686
2687 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2688 __ Mrs(x8, NZCV);
2689 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2690
2691 END();
2692
2693 if (CAN_RUN()) {
2694 RUN();
2695
2696 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2697 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2698
2699 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2700 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2701
2702 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2703 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2704
2705 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2706 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2707
2708 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2709 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2710
2711 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2712 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2713
2714 int p8_expected[] = {0x00, 0x01};
2715 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2716
2717 int p9_expected[] = {0x00, 0x01};
2718 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2719
2720 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2721 ASSERT_EQUAL_32(NoFlag, w4);
2722 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2723 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2724 }
2725}
2726
TatWai Chongc844bb22019-06-10 15:32:53 -07002727TEST(sve_int_compare_conditionally_terminate_scalars) {
2728 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2729 START();
2730
2731 __ Mov(x0, 0xfedcba9887654321);
2732 __ Mov(x1, 0x1000100010001000);
2733
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002734 // Initialise Z and C. These are preserved by cterm*, and the V flag is set to
2735 // !C if the condition does not hold.
2736 __ Mov(x10, NoFlag);
2737 __ Msr(NZCV, x10);
2738
TatWai Chongc844bb22019-06-10 15:32:53 -07002739 __ Ctermeq(w0, w0);
2740 __ Mrs(x2, NZCV);
2741 __ Ctermeq(x0, x1);
2742 __ Mrs(x3, NZCV);
2743 __ Ctermne(x0, x0);
2744 __ Mrs(x4, NZCV);
2745 __ Ctermne(w0, w1);
2746 __ Mrs(x5, NZCV);
2747
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002748 // As above, but with all flags initially set.
2749 __ Mov(x10, NZCVFlag);
2750 __ Msr(NZCV, x10);
2751
2752 __ Ctermeq(w0, w0);
2753 __ Mrs(x6, NZCV);
2754 __ Ctermeq(x0, x1);
2755 __ Mrs(x7, NZCV);
2756 __ Ctermne(x0, x0);
2757 __ Mrs(x8, NZCV);
2758 __ Ctermne(w0, w1);
2759 __ Mrs(x9, NZCV);
2760
TatWai Chongc844bb22019-06-10 15:32:53 -07002761 END();
2762
2763 if (CAN_RUN()) {
2764 RUN();
2765
2766 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2767 ASSERT_EQUAL_32(VFlag, w3);
2768 ASSERT_EQUAL_32(VFlag, w4);
2769 ASSERT_EQUAL_32(SVEFirstFlag, w5);
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002770
2771 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w6);
2772 ASSERT_EQUAL_32(ZCFlag, w7);
2773 ASSERT_EQUAL_32(ZCFlag, w8);
2774 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w9);
TatWai Chongc844bb22019-06-10 15:32:53 -07002775 }
2776}
2777
Jacob Bramley0ce75842019-07-17 18:12:50 +01002778// Work out what the architectural `PredTest` pseudocode should produce for the
2779// given result and governing predicate.
2780template <typename Tg, typename Td, int N>
2781static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2782 const Tg (&pg)[N],
2783 int vl) {
2784 int first = -1;
2785 int last = -1;
2786 bool any_active = false;
2787
2788 // Only consider potentially-active lanes.
2789 int start = (N > vl) ? (N - vl) : 0;
2790 for (int i = start; i < N; i++) {
2791 if ((pg[i] & 1) == 1) {
2792 // Look for the first and last active lanes.
2793 // Note that the 'first' lane is the one with the highest index.
2794 if (last < 0) last = i;
2795 first = i;
2796 // Look for any active lanes that are also active in pd.
2797 if ((pd[i] & 1) == 1) any_active = true;
2798 }
2799 }
2800
2801 uint32_t flags = 0;
2802 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2803 if (!any_active) flags |= SVENoneFlag;
2804 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2805 return static_cast<StatusFlags>(flags);
2806}
2807
2808typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2809 const PRegister& pg,
2810 const PRegisterWithLaneSize& pn);
2811template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002812static void PfirstPnextHelper(Test* config,
2813 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002814 unsigned lane_size_in_bits,
2815 const Tg& pg_inputs,
2816 const Tn& pn_inputs,
2817 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002818 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002819 START();
2820
2821 PRegister pg = p15;
2822 PRegister pn = p14;
2823 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2824 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2825
2826 // Initialise NZCV to an impossible value, to check that we actually write it.
2827 __ Mov(x10, NZCVFlag);
2828
2829 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2830 // the Assembler.
2831 __ Msr(NZCV, x10);
2832 __ Mov(p0, pn);
2833 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2834 pg,
2835 p0.WithLaneSize(lane_size_in_bits));
2836 __ Mrs(x0, NZCV);
2837
2838 // The MacroAssembler supports non-destructive use.
2839 __ Msr(NZCV, x10);
2840 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2841 pg,
2842 pn.WithLaneSize(lane_size_in_bits));
2843 __ Mrs(x1, NZCV);
2844
2845 // If pd.Aliases(pg) the macro requires a scratch register.
2846 {
2847 UseScratchRegisterScope temps(&masm);
2848 temps.Include(p13);
2849 __ Msr(NZCV, x10);
2850 __ Mov(p2, p15);
2851 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2852 p2,
2853 pn.WithLaneSize(lane_size_in_bits));
2854 __ Mrs(x2, NZCV);
2855 }
2856
2857 END();
2858
2859 if (CAN_RUN()) {
2860 RUN();
2861
2862 // Check that the inputs weren't modified.
2863 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2864 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2865
2866 // Check the primary operation.
2867 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2868 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2869 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2870
2871 // Check that the flags were properly set.
2872 StatusFlags nzcv_expected =
2873 GetPredTestFlags(pd_expected,
2874 pg_inputs,
2875 core.GetSVELaneCount(kBRegSize));
2876 ASSERT_EQUAL_64(nzcv_expected, x0);
2877 ASSERT_EQUAL_64(nzcv_expected, x1);
2878 ASSERT_EQUAL_64(nzcv_expected, x2);
2879 }
2880}
2881
2882template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002883static void PfirstHelper(Test* config,
2884 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002885 const Tn& pn_inputs,
2886 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002887 PfirstPnextHelper(config,
2888 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002889 kBRegSize, // pfirst only accepts B-sized lanes.
2890 pg_inputs,
2891 pn_inputs,
2892 pd_expected);
2893}
2894
2895template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002896static void PnextHelper(Test* config,
2897 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002898 const Tg& pg_inputs,
2899 const Tn& pn_inputs,
2900 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002901 PfirstPnextHelper(config,
2902 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002903 lane_size_in_bits,
2904 pg_inputs,
2905 pn_inputs,
2906 pd_expected);
2907}
2908
Jacob Bramleye8289202019-07-31 11:25:23 +01002909TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002910 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2911 // large VL), but few enough to make the test easy to read.
2912 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2913 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2914 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2915 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2916 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2917 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2918
2919 // Pfirst finds the first active lane in pg, and activates the corresponding
2920 // lane in pn (if it isn't already active).
2921
2922 // The first active lane in in1 is here. |
2923 // v
2924 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2925 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2926 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2927 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002928 PfirstHelper(config, in1, in0, exp10);
2929 PfirstHelper(config, in1, in2, exp12);
2930 PfirstHelper(config, in1, in3, exp13);
2931 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002932
2933 // The first active lane in in2 is here. |
2934 // v
2935 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2936 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2937 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2938 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002939 PfirstHelper(config, in2, in0, exp20);
2940 PfirstHelper(config, in2, in1, exp21);
2941 PfirstHelper(config, in2, in3, exp23);
2942 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002943
2944 // The first active lane in in3 is here. |
2945 // v
2946 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2947 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2948 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2949 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002950 PfirstHelper(config, in3, in0, exp30);
2951 PfirstHelper(config, in3, in1, exp31);
2952 PfirstHelper(config, in3, in2, exp32);
2953 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002954
2955 // | The first active lane in in4 is here.
2956 // v
2957 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2958 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2959 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2960 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002961 PfirstHelper(config, in4, in0, exp40);
2962 PfirstHelper(config, in4, in1, exp41);
2963 PfirstHelper(config, in4, in2, exp42);
2964 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002965
2966 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002967 PfirstHelper(config, in0, in0, in0);
2968 PfirstHelper(config, in0, in1, in1);
2969 PfirstHelper(config, in0, in2, in2);
2970 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002971
2972 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002973 PfirstHelper(config, in0, in0, in0);
2974 PfirstHelper(config, in1, in1, in1);
2975 PfirstHelper(config, in2, in2, in2);
2976 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002977}
2978
Jacob Bramleye8289202019-07-31 11:25:23 +01002979TEST_SVE(sve_pfirst_alias) {
2980 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002981 START();
2982
2983 // Check that the Simulator behaves correctly when all arguments are aliased.
2984 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2985 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2986 int in_s[] = {0, 1, 1, 0};
2987 int in_d[] = {1, 1};
2988
2989 Initialise(&masm, p0.VnB(), in_b);
2990 Initialise(&masm, p1.VnH(), in_h);
2991 Initialise(&masm, p2.VnS(), in_s);
2992 Initialise(&masm, p3.VnD(), in_d);
2993
2994 // Initialise NZCV to an impossible value, to check that we actually write it.
2995 __ Mov(x10, NZCVFlag);
2996
2997 __ Msr(NZCV, x10);
2998 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
2999 __ Mrs(x0, NZCV);
3000
3001 __ Msr(NZCV, x10);
3002 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
3003 __ Mrs(x1, NZCV);
3004
3005 __ Msr(NZCV, x10);
3006 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
3007 __ Mrs(x2, NZCV);
3008
3009 __ Msr(NZCV, x10);
3010 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
3011 __ Mrs(x3, NZCV);
3012
3013 END();
3014
3015 if (CAN_RUN()) {
3016 RUN();
3017
3018 // The first lane from pg is already active in pdn, so the P register should
3019 // be unchanged.
3020 ASSERT_EQUAL_SVE(in_b, p0.VnB());
3021 ASSERT_EQUAL_SVE(in_h, p1.VnH());
3022 ASSERT_EQUAL_SVE(in_s, p2.VnS());
3023 ASSERT_EQUAL_SVE(in_d, p3.VnD());
3024
3025 ASSERT_EQUAL_64(SVEFirstFlag, x0);
3026 ASSERT_EQUAL_64(SVEFirstFlag, x1);
3027 ASSERT_EQUAL_64(SVEFirstFlag, x2);
3028 ASSERT_EQUAL_64(SVEFirstFlag, x3);
3029 }
3030}
3031
Jacob Bramleye8289202019-07-31 11:25:23 +01003032TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01003033 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
3034 // (to check propagation if we have a large VL), but few enough to make the
3035 // test easy to read.
3036 // For now, we just use kPRegMinSize so that the test works anywhere.
3037 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3038 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
3039 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
3040 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
3041 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3042
3043 // Pnext activates the next element that is true in pg, after the last-active
3044 // element in pn. If all pn elements are false (as in in0), it starts looking
3045 // at element 0.
3046
3047 // There are no active lanes in in0, so the result is simply the first active
3048 // lane from pg.
3049 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3050 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
3051 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
3052 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
3053 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3054
3055 // The last active lane in in1 is here. |
3056 // v
3057 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3058 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3059 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3060 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3061 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3062
3063 // | The last active lane in in2 is here.
3064 // v
3065 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3066 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3067 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3068 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3069 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3070
3071 // | The last active lane in in3 is here.
3072 // v
3073 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3074 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3075 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3076 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3077 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3078
3079 // | The last active lane in in4 is here.
3080 // v
3081 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3082 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3083 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3084 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3085 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3086
Jacob Bramleye8289202019-07-31 11:25:23 +01003087 PnextHelper(config, kBRegSize, in0, in0, exp00);
3088 PnextHelper(config, kBRegSize, in1, in0, exp10);
3089 PnextHelper(config, kBRegSize, in2, in0, exp20);
3090 PnextHelper(config, kBRegSize, in3, in0, exp30);
3091 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003092
Jacob Bramleye8289202019-07-31 11:25:23 +01003093 PnextHelper(config, kBRegSize, in0, in1, exp01);
3094 PnextHelper(config, kBRegSize, in1, in1, exp11);
3095 PnextHelper(config, kBRegSize, in2, in1, exp21);
3096 PnextHelper(config, kBRegSize, in3, in1, exp31);
3097 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003098
Jacob Bramleye8289202019-07-31 11:25:23 +01003099 PnextHelper(config, kBRegSize, in0, in2, exp02);
3100 PnextHelper(config, kBRegSize, in1, in2, exp12);
3101 PnextHelper(config, kBRegSize, in2, in2, exp22);
3102 PnextHelper(config, kBRegSize, in3, in2, exp32);
3103 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003104
Jacob Bramleye8289202019-07-31 11:25:23 +01003105 PnextHelper(config, kBRegSize, in0, in3, exp03);
3106 PnextHelper(config, kBRegSize, in1, in3, exp13);
3107 PnextHelper(config, kBRegSize, in2, in3, exp23);
3108 PnextHelper(config, kBRegSize, in3, in3, exp33);
3109 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003110
Jacob Bramleye8289202019-07-31 11:25:23 +01003111 PnextHelper(config, kBRegSize, in0, in4, exp04);
3112 PnextHelper(config, kBRegSize, in1, in4, exp14);
3113 PnextHelper(config, kBRegSize, in2, in4, exp24);
3114 PnextHelper(config, kBRegSize, in3, in4, exp34);
3115 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003116}
3117
Jacob Bramleye8289202019-07-31 11:25:23 +01003118TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01003119 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
3120 // (to check propagation if we have a large VL), but few enough to make the
3121 // test easy to read.
3122 // For now, we just use kPRegMinSize so that the test works anywhere.
3123 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
3124 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
3125 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
3126 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
3127 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
3128
3129 // Pnext activates the next element that is true in pg, after the last-active
3130 // element in pn. If all pn elements are false (as in in0), it starts looking
3131 // at element 0.
3132 //
3133 // As for other SVE instructions, elements are only considered to be active if
3134 // the _first_ bit in each field is one. Other bits are ignored.
3135
3136 // There are no active lanes in in0, so the result is simply the first active
3137 // lane from pg.
3138 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
3139 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
3140 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
3141 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
3142 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
3143
3144 // | The last active lane in in1 is here.
3145 // v
3146 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
3147 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
3148 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
3149 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
3150 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
3151
3152 // | The last active lane in in2 is here.
3153 // v
3154 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
3155 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
3156 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
3157 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
3158 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
3159
3160 // | The last active lane in in3 is here.
3161 // v
3162 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
3163 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
3164 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
3165 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
3166 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
3167
3168 // | The last active lane in in4 is here.
3169 // v
3170 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
3171 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
3172 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
3173 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
3174 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
3175
Jacob Bramleye8289202019-07-31 11:25:23 +01003176 PnextHelper(config, kHRegSize, in0, in0, exp00);
3177 PnextHelper(config, kHRegSize, in1, in0, exp10);
3178 PnextHelper(config, kHRegSize, in2, in0, exp20);
3179 PnextHelper(config, kHRegSize, in3, in0, exp30);
3180 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003181
Jacob Bramleye8289202019-07-31 11:25:23 +01003182 PnextHelper(config, kHRegSize, in0, in1, exp01);
3183 PnextHelper(config, kHRegSize, in1, in1, exp11);
3184 PnextHelper(config, kHRegSize, in2, in1, exp21);
3185 PnextHelper(config, kHRegSize, in3, in1, exp31);
3186 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003187
Jacob Bramleye8289202019-07-31 11:25:23 +01003188 PnextHelper(config, kHRegSize, in0, in2, exp02);
3189 PnextHelper(config, kHRegSize, in1, in2, exp12);
3190 PnextHelper(config, kHRegSize, in2, in2, exp22);
3191 PnextHelper(config, kHRegSize, in3, in2, exp32);
3192 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003193
Jacob Bramleye8289202019-07-31 11:25:23 +01003194 PnextHelper(config, kHRegSize, in0, in3, exp03);
3195 PnextHelper(config, kHRegSize, in1, in3, exp13);
3196 PnextHelper(config, kHRegSize, in2, in3, exp23);
3197 PnextHelper(config, kHRegSize, in3, in3, exp33);
3198 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003199
Jacob Bramleye8289202019-07-31 11:25:23 +01003200 PnextHelper(config, kHRegSize, in0, in4, exp04);
3201 PnextHelper(config, kHRegSize, in1, in4, exp14);
3202 PnextHelper(config, kHRegSize, in2, in4, exp24);
3203 PnextHelper(config, kHRegSize, in3, in4, exp34);
3204 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003205}
3206
Jacob Bramleye8289202019-07-31 11:25:23 +01003207TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01003208 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
3209 // (to check propagation if we have a large VL), but few enough to make the
3210 // test easy to read.
3211 // For now, we just use kPRegMinSize so that the test works anywhere.
3212 int in0[] = {0xe, 0xc, 0x8, 0x0};
3213 int in1[] = {0x0, 0x2, 0x0, 0x1};
3214 int in2[] = {0x0, 0x1, 0xf, 0x0};
3215 int in3[] = {0xf, 0x0, 0x0, 0x0};
3216
3217 // Pnext activates the next element that is true in pg, after the last-active
3218 // element in pn. If all pn elements are false (as in in0), it starts looking
3219 // at element 0.
3220 //
3221 // As for other SVE instructions, elements are only considered to be active if
3222 // the _first_ bit in each field is one. Other bits are ignored.
3223
3224 // There are no active lanes in in0, so the result is simply the first active
3225 // lane from pg.
3226 int exp00[] = {0, 0, 0, 0};
3227 int exp10[] = {0, 0, 0, 1};
3228 int exp20[] = {0, 0, 1, 0};
3229 int exp30[] = {1, 0, 0, 0};
3230
3231 // | The last active lane in in1 is here.
3232 // v
3233 int exp01[] = {0, 0, 0, 0};
3234 int exp11[] = {0, 0, 0, 0};
3235 int exp21[] = {0, 0, 1, 0};
3236 int exp31[] = {1, 0, 0, 0};
3237
3238 // | The last active lane in in2 is here.
3239 // v
3240 int exp02[] = {0, 0, 0, 0};
3241 int exp12[] = {0, 0, 0, 0};
3242 int exp22[] = {0, 0, 0, 0};
3243 int exp32[] = {1, 0, 0, 0};
3244
3245 // | The last active lane in in3 is here.
3246 // v
3247 int exp03[] = {0, 0, 0, 0};
3248 int exp13[] = {0, 0, 0, 0};
3249 int exp23[] = {0, 0, 0, 0};
3250 int exp33[] = {0, 0, 0, 0};
3251
Jacob Bramleye8289202019-07-31 11:25:23 +01003252 PnextHelper(config, kSRegSize, in0, in0, exp00);
3253 PnextHelper(config, kSRegSize, in1, in0, exp10);
3254 PnextHelper(config, kSRegSize, in2, in0, exp20);
3255 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003256
Jacob Bramleye8289202019-07-31 11:25:23 +01003257 PnextHelper(config, kSRegSize, in0, in1, exp01);
3258 PnextHelper(config, kSRegSize, in1, in1, exp11);
3259 PnextHelper(config, kSRegSize, in2, in1, exp21);
3260 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003261
Jacob Bramleye8289202019-07-31 11:25:23 +01003262 PnextHelper(config, kSRegSize, in0, in2, exp02);
3263 PnextHelper(config, kSRegSize, in1, in2, exp12);
3264 PnextHelper(config, kSRegSize, in2, in2, exp22);
3265 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003266
Jacob Bramleye8289202019-07-31 11:25:23 +01003267 PnextHelper(config, kSRegSize, in0, in3, exp03);
3268 PnextHelper(config, kSRegSize, in1, in3, exp13);
3269 PnextHelper(config, kSRegSize, in2, in3, exp23);
3270 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003271}
3272
Jacob Bramleye8289202019-07-31 11:25:23 +01003273TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01003274 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
3275 // (to check propagation if we have a large VL), but few enough to make the
3276 // test easy to read.
3277 // For now, we just use kPRegMinSize so that the test works anywhere.
3278 int in0[] = {0xfe, 0xf0};
3279 int in1[] = {0x00, 0x55};
3280 int in2[] = {0x33, 0xff};
3281
3282 // Pnext activates the next element that is true in pg, after the last-active
3283 // element in pn. If all pn elements are false (as in in0), it starts looking
3284 // at element 0.
3285 //
3286 // As for other SVE instructions, elements are only considered to be active if
3287 // the _first_ bit in each field is one. Other bits are ignored.
3288
3289 // There are no active lanes in in0, so the result is simply the first active
3290 // lane from pg.
3291 int exp00[] = {0, 0};
3292 int exp10[] = {0, 1};
3293 int exp20[] = {0, 1};
3294
3295 // | The last active lane in in1 is here.
3296 // v
3297 int exp01[] = {0, 0};
3298 int exp11[] = {0, 0};
3299 int exp21[] = {1, 0};
3300
3301 // | The last active lane in in2 is here.
3302 // v
3303 int exp02[] = {0, 0};
3304 int exp12[] = {0, 0};
3305 int exp22[] = {0, 0};
3306
Jacob Bramleye8289202019-07-31 11:25:23 +01003307 PnextHelper(config, kDRegSize, in0, in0, exp00);
3308 PnextHelper(config, kDRegSize, in1, in0, exp10);
3309 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003310
Jacob Bramleye8289202019-07-31 11:25:23 +01003311 PnextHelper(config, kDRegSize, in0, in1, exp01);
3312 PnextHelper(config, kDRegSize, in1, in1, exp11);
3313 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003314
Jacob Bramleye8289202019-07-31 11:25:23 +01003315 PnextHelper(config, kDRegSize, in0, in2, exp02);
3316 PnextHelper(config, kDRegSize, in1, in2, exp12);
3317 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003318}
3319
Jacob Bramleye8289202019-07-31 11:25:23 +01003320TEST_SVE(sve_pnext_alias) {
3321 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003322 START();
3323
3324 // Check that the Simulator behaves correctly when all arguments are aliased.
3325 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
3326 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
3327 int in_s[] = {0, 1, 1, 0};
3328 int in_d[] = {1, 1};
3329
3330 Initialise(&masm, p0.VnB(), in_b);
3331 Initialise(&masm, p1.VnH(), in_h);
3332 Initialise(&masm, p2.VnS(), in_s);
3333 Initialise(&masm, p3.VnD(), in_d);
3334
3335 // Initialise NZCV to an impossible value, to check that we actually write it.
3336 __ Mov(x10, NZCVFlag);
3337
3338 __ Msr(NZCV, x10);
3339 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
3340 __ Mrs(x0, NZCV);
3341
3342 __ Msr(NZCV, x10);
3343 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
3344 __ Mrs(x1, NZCV);
3345
3346 __ Msr(NZCV, x10);
3347 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
3348 __ Mrs(x2, NZCV);
3349
3350 __ Msr(NZCV, x10);
3351 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
3352 __ Mrs(x3, NZCV);
3353
3354 END();
3355
3356 if (CAN_RUN()) {
3357 RUN();
3358
3359 // Since pg.Is(pdn), there can be no active lanes in pg above the last
3360 // active lane in pdn, so the result should always be zero.
3361 ASSERT_EQUAL_SVE(0, p0.VnB());
3362 ASSERT_EQUAL_SVE(0, p1.VnH());
3363 ASSERT_EQUAL_SVE(0, p2.VnS());
3364 ASSERT_EQUAL_SVE(0, p3.VnD());
3365
3366 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
3367 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
3368 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
3369 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
3370 }
3371}
3372
Jacob Bramleye8289202019-07-31 11:25:23 +01003373static void PtrueHelper(Test* config,
3374 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01003375 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01003376 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003377 START();
3378
3379 PRegisterWithLaneSize p[kNumberOfPRegisters];
3380 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
3381 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
3382 }
3383
3384 // Initialise NZCV to an impossible value, to check that we actually write it.
3385 StatusFlags nzcv_unmodified = NZCVFlag;
3386 __ Mov(x20, nzcv_unmodified);
3387
3388 // We don't have enough registers to conveniently test every pattern, so take
3389 // samples from each group.
3390 __ Msr(NZCV, x20);
3391 __ Ptrue(p[0], SVE_POW2, s);
3392 __ Mrs(x0, NZCV);
3393
3394 __ Msr(NZCV, x20);
3395 __ Ptrue(p[1], SVE_VL1, s);
3396 __ Mrs(x1, NZCV);
3397
3398 __ Msr(NZCV, x20);
3399 __ Ptrue(p[2], SVE_VL2, s);
3400 __ Mrs(x2, NZCV);
3401
3402 __ Msr(NZCV, x20);
3403 __ Ptrue(p[3], SVE_VL5, s);
3404 __ Mrs(x3, NZCV);
3405
3406 __ Msr(NZCV, x20);
3407 __ Ptrue(p[4], SVE_VL6, s);
3408 __ Mrs(x4, NZCV);
3409
3410 __ Msr(NZCV, x20);
3411 __ Ptrue(p[5], SVE_VL8, s);
3412 __ Mrs(x5, NZCV);
3413
3414 __ Msr(NZCV, x20);
3415 __ Ptrue(p[6], SVE_VL16, s);
3416 __ Mrs(x6, NZCV);
3417
3418 __ Msr(NZCV, x20);
3419 __ Ptrue(p[7], SVE_VL64, s);
3420 __ Mrs(x7, NZCV);
3421
3422 __ Msr(NZCV, x20);
3423 __ Ptrue(p[8], SVE_VL256, s);
3424 __ Mrs(x8, NZCV);
3425
3426 {
3427 // We have to use the Assembler to use values not defined by
3428 // SVEPredicateConstraint, so call `ptrues` directly..
3429 typedef void (
3430 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
3431 int pattern);
3432 AssemblePtrueFn assemble =
3433 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
3434
3435 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
3436 __ msr(NZCV, x20);
3437 (masm.*assemble)(p[9], 0xe);
3438 __ mrs(x9, NZCV);
3439
3440 __ msr(NZCV, x20);
3441 (masm.*assemble)(p[10], 0x16);
3442 __ mrs(x10, NZCV);
3443
3444 __ msr(NZCV, x20);
3445 (masm.*assemble)(p[11], 0x1a);
3446 __ mrs(x11, NZCV);
3447
3448 __ msr(NZCV, x20);
3449 (masm.*assemble)(p[12], 0x1c);
3450 __ mrs(x12, NZCV);
3451 }
3452
3453 __ Msr(NZCV, x20);
3454 __ Ptrue(p[13], SVE_MUL4, s);
3455 __ Mrs(x13, NZCV);
3456
3457 __ Msr(NZCV, x20);
3458 __ Ptrue(p[14], SVE_MUL3, s);
3459 __ Mrs(x14, NZCV);
3460
3461 __ Msr(NZCV, x20);
3462 __ Ptrue(p[15], SVE_ALL, s);
3463 __ Mrs(x15, NZCV);
3464
3465 END();
3466
3467 if (CAN_RUN()) {
3468 RUN();
3469
3470 int all = core.GetSVELaneCount(lane_size_in_bits);
3471 int pow2 = 1 << HighestSetBitPosition(all);
3472 int mul4 = all - (all % 4);
3473 int mul3 = all - (all % 3);
3474
3475 // Check P register results.
3476 for (int i = 0; i < all; i++) {
3477 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
3478 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
3479 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
3480 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
3481 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
3482 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
3483 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
3484 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
3485 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
3486 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
3487 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
3488 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
3489 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
3490 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
3491 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
3492 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
3493 }
3494
3495 // Check NZCV results.
3496 if (s == LeaveFlags) {
3497 // No flags should have been updated.
3498 for (int i = 0; i <= 15; i++) {
3499 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
3500 }
3501 } else {
3502 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3503 StatusFlags nonzero = SVEFirstFlag;
3504
3505 // POW2
3506 ASSERT_EQUAL_64(nonzero, x0);
3507 // VL*
3508 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
3509 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
3510 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
3511 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
3512 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
3513 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
3514 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
3515 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
3516 // #uimm5
3517 ASSERT_EQUAL_64(zero, x9);
3518 ASSERT_EQUAL_64(zero, x10);
3519 ASSERT_EQUAL_64(zero, x11);
3520 ASSERT_EQUAL_64(zero, x12);
3521 // MUL*
3522 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
3523 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
3524 // ALL
3525 ASSERT_EQUAL_64(nonzero, x15);
3526 }
3527 }
3528}
3529
Jacob Bramleye8289202019-07-31 11:25:23 +01003530TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
3531TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
3532TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
3533TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01003534
Jacob Bramleye8289202019-07-31 11:25:23 +01003535TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
3536TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
3537TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
3538TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01003539
Jacob Bramleye8289202019-07-31 11:25:23 +01003540TEST_SVE(sve_pfalse) {
3541 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003542 START();
3543
3544 // Initialise non-zero inputs.
3545 __ Ptrue(p0.VnB());
3546 __ Ptrue(p1.VnH());
3547 __ Ptrue(p2.VnS());
3548 __ Ptrue(p3.VnD());
3549
3550 // The instruction only supports B-sized lanes, but the lane size has no
3551 // logical effect, so the MacroAssembler accepts anything.
3552 __ Pfalse(p0.VnB());
3553 __ Pfalse(p1.VnH());
3554 __ Pfalse(p2.VnS());
3555 __ Pfalse(p3.VnD());
3556
3557 END();
3558
3559 if (CAN_RUN()) {
3560 RUN();
3561
3562 ASSERT_EQUAL_SVE(0, p0.VnB());
3563 ASSERT_EQUAL_SVE(0, p1.VnB());
3564 ASSERT_EQUAL_SVE(0, p2.VnB());
3565 ASSERT_EQUAL_SVE(0, p3.VnB());
3566 }
3567}
3568
Jacob Bramleye8289202019-07-31 11:25:23 +01003569TEST_SVE(sve_ptest) {
3570 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003571 START();
3572
3573 // Initialise NZCV to a known (impossible) value.
3574 StatusFlags nzcv_unmodified = NZCVFlag;
3575 __ Mov(x0, nzcv_unmodified);
3576 __ Msr(NZCV, x0);
3577
3578 // Construct some test inputs.
3579 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
3580 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
3581 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3582 __ Pfalse(p0.VnB());
3583 __ Ptrue(p1.VnB());
3584 Initialise(&masm, p2.VnB(), in2);
3585 Initialise(&masm, p3.VnB(), in3);
3586 Initialise(&masm, p4.VnB(), in4);
3587
3588 // All-inactive pg.
3589 __ Ptest(p0, p0.VnB());
3590 __ Mrs(x0, NZCV);
3591 __ Ptest(p0, p1.VnB());
3592 __ Mrs(x1, NZCV);
3593 __ Ptest(p0, p2.VnB());
3594 __ Mrs(x2, NZCV);
3595 __ Ptest(p0, p3.VnB());
3596 __ Mrs(x3, NZCV);
3597 __ Ptest(p0, p4.VnB());
3598 __ Mrs(x4, NZCV);
3599
3600 // All-active pg.
3601 __ Ptest(p1, p0.VnB());
3602 __ Mrs(x5, NZCV);
3603 __ Ptest(p1, p1.VnB());
3604 __ Mrs(x6, NZCV);
3605 __ Ptest(p1, p2.VnB());
3606 __ Mrs(x7, NZCV);
3607 __ Ptest(p1, p3.VnB());
3608 __ Mrs(x8, NZCV);
3609 __ Ptest(p1, p4.VnB());
3610 __ Mrs(x9, NZCV);
3611
3612 // Combinations of other inputs.
3613 __ Ptest(p2, p2.VnB());
3614 __ Mrs(x20, NZCV);
3615 __ Ptest(p2, p3.VnB());
3616 __ Mrs(x21, NZCV);
3617 __ Ptest(p2, p4.VnB());
3618 __ Mrs(x22, NZCV);
3619 __ Ptest(p3, p2.VnB());
3620 __ Mrs(x23, NZCV);
3621 __ Ptest(p3, p3.VnB());
3622 __ Mrs(x24, NZCV);
3623 __ Ptest(p3, p4.VnB());
3624 __ Mrs(x25, NZCV);
3625 __ Ptest(p4, p2.VnB());
3626 __ Mrs(x26, NZCV);
3627 __ Ptest(p4, p3.VnB());
3628 __ Mrs(x27, NZCV);
3629 __ Ptest(p4, p4.VnB());
3630 __ Mrs(x28, NZCV);
3631
3632 END();
3633
3634 if (CAN_RUN()) {
3635 RUN();
3636
3637 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3638
3639 // If pg is all inactive, the value of pn is irrelevant.
3640 ASSERT_EQUAL_64(zero, x0);
3641 ASSERT_EQUAL_64(zero, x1);
3642 ASSERT_EQUAL_64(zero, x2);
3643 ASSERT_EQUAL_64(zero, x3);
3644 ASSERT_EQUAL_64(zero, x4);
3645
3646 // All-active pg.
3647 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3648 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3649 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3650 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3651 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3652 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3653
3654 // Other inputs.
3655 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3656 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3657 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3658 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3659 x23); // pg: in3, pn: in2
3660 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3661 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3662 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3663 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3664 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3665 }
3666}
3667
Jacob Bramleye8289202019-07-31 11:25:23 +01003668TEST_SVE(sve_cntp) {
3669 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003670 START();
3671
3672 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3673 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3674 Initialise(&masm, p0.VnB(), p0_inputs);
3675
3676 // With an all-true predicate, these instructions measure the vector length.
3677 __ Ptrue(p10.VnB());
3678 __ Ptrue(p11.VnH());
3679 __ Ptrue(p12.VnS());
3680 __ Ptrue(p13.VnD());
3681
3682 // `ptrue p10.b` provides an all-active pg.
3683 __ Cntp(x10, p10, p10.VnB());
3684 __ Cntp(x11, p10, p11.VnH());
3685 __ Cntp(x12, p10, p12.VnS());
3686 __ Cntp(x13, p10, p13.VnD());
3687
3688 // Check that the predicate mask is applied properly.
3689 __ Cntp(x14, p10, p10.VnB());
3690 __ Cntp(x15, p11, p10.VnB());
3691 __ Cntp(x16, p12, p10.VnB());
3692 __ Cntp(x17, p13, p10.VnB());
3693
3694 // Check other patterns (including some ignored bits).
3695 __ Cntp(x0, p10, p0.VnB());
3696 __ Cntp(x1, p10, p0.VnH());
3697 __ Cntp(x2, p10, p0.VnS());
3698 __ Cntp(x3, p10, p0.VnD());
3699 __ Cntp(x4, p0, p10.VnB());
3700 __ Cntp(x5, p0, p10.VnH());
3701 __ Cntp(x6, p0, p10.VnS());
3702 __ Cntp(x7, p0, p10.VnD());
3703
3704 END();
3705
3706 if (CAN_RUN()) {
3707 RUN();
3708
3709 int vl_b = core.GetSVELaneCount(kBRegSize);
3710 int vl_h = core.GetSVELaneCount(kHRegSize);
3711 int vl_s = core.GetSVELaneCount(kSRegSize);
3712 int vl_d = core.GetSVELaneCount(kDRegSize);
3713
3714 // Check all-active predicates in various combinations.
3715 ASSERT_EQUAL_64(vl_b, x10);
3716 ASSERT_EQUAL_64(vl_h, x11);
3717 ASSERT_EQUAL_64(vl_s, x12);
3718 ASSERT_EQUAL_64(vl_d, x13);
3719
3720 ASSERT_EQUAL_64(vl_b, x14);
3721 ASSERT_EQUAL_64(vl_h, x15);
3722 ASSERT_EQUAL_64(vl_s, x16);
3723 ASSERT_EQUAL_64(vl_d, x17);
3724
3725 // Check that irrelevant bits are properly ignored.
3726 ASSERT_EQUAL_64(7, x0);
3727 ASSERT_EQUAL_64(5, x1);
3728 ASSERT_EQUAL_64(2, x2);
3729 ASSERT_EQUAL_64(1, x3);
3730
3731 ASSERT_EQUAL_64(7, x4);
3732 ASSERT_EQUAL_64(5, x5);
3733 ASSERT_EQUAL_64(2, x6);
3734 ASSERT_EQUAL_64(1, x7);
3735 }
3736}
3737
Martyn Capewell74f84f62019-10-30 15:30:44 +00003738typedef void (MacroAssembler::*CntFn)(const Register& dst,
3739 int pattern,
3740 int multiplier);
3741
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003742template <typename T>
3743void GenerateCntSequence(MacroAssembler* masm,
3744 CntFn cnt,
3745 T acc_value,
3746 int multiplier) {
3747 // Initialise accumulators.
3748 masm->Mov(x0, acc_value);
3749 masm->Mov(x1, acc_value);
3750 masm->Mov(x2, acc_value);
3751 masm->Mov(x3, acc_value);
3752 masm->Mov(x4, acc_value);
3753 masm->Mov(x5, acc_value);
3754 masm->Mov(x6, acc_value);
3755 masm->Mov(x7, acc_value);
3756 masm->Mov(x8, acc_value);
3757 masm->Mov(x9, acc_value);
3758 masm->Mov(x10, acc_value);
3759 masm->Mov(x11, acc_value);
3760 masm->Mov(x12, acc_value);
3761 masm->Mov(x13, acc_value);
3762 masm->Mov(x14, acc_value);
3763 masm->Mov(x15, acc_value);
3764 masm->Mov(x18, acc_value);
3765 masm->Mov(x19, acc_value);
3766 masm->Mov(x20, acc_value);
3767 masm->Mov(x21, acc_value);
3768
3769 (masm->*cnt)(Register(0, sizeof(T) * kBitsPerByte), SVE_POW2, multiplier);
3770 (masm->*cnt)(Register(1, sizeof(T) * kBitsPerByte), SVE_VL1, multiplier);
3771 (masm->*cnt)(Register(2, sizeof(T) * kBitsPerByte), SVE_VL2, multiplier);
3772 (masm->*cnt)(Register(3, sizeof(T) * kBitsPerByte), SVE_VL3, multiplier);
3773 (masm->*cnt)(Register(4, sizeof(T) * kBitsPerByte), SVE_VL4, multiplier);
3774 (masm->*cnt)(Register(5, sizeof(T) * kBitsPerByte), SVE_VL5, multiplier);
3775 (masm->*cnt)(Register(6, sizeof(T) * kBitsPerByte), SVE_VL6, multiplier);
3776 (masm->*cnt)(Register(7, sizeof(T) * kBitsPerByte), SVE_VL7, multiplier);
3777 (masm->*cnt)(Register(8, sizeof(T) * kBitsPerByte), SVE_VL8, multiplier);
3778 (masm->*cnt)(Register(9, sizeof(T) * kBitsPerByte), SVE_VL16, multiplier);
3779 (masm->*cnt)(Register(10, sizeof(T) * kBitsPerByte), SVE_VL32, multiplier);
3780 (masm->*cnt)(Register(11, sizeof(T) * kBitsPerByte), SVE_VL64, multiplier);
3781 (masm->*cnt)(Register(12, sizeof(T) * kBitsPerByte), SVE_VL128, multiplier);
3782 (masm->*cnt)(Register(13, sizeof(T) * kBitsPerByte), SVE_VL256, multiplier);
3783 (masm->*cnt)(Register(14, sizeof(T) * kBitsPerByte), 16, multiplier);
3784 (masm->*cnt)(Register(15, sizeof(T) * kBitsPerByte), 23, multiplier);
3785 (masm->*cnt)(Register(18, sizeof(T) * kBitsPerByte), 28, multiplier);
3786 (masm->*cnt)(Register(19, sizeof(T) * kBitsPerByte), SVE_MUL4, multiplier);
3787 (masm->*cnt)(Register(20, sizeof(T) * kBitsPerByte), SVE_MUL3, multiplier);
3788 (masm->*cnt)(Register(21, sizeof(T) * kBitsPerByte), SVE_ALL, multiplier);
3789}
3790
3791int FixedVL(int fixed, int length) {
3792 VIXL_ASSERT(((fixed >= 1) && (fixed <= 8)) || (fixed == 16) ||
3793 (fixed == 32) || (fixed == 64) || (fixed == 128) ||
3794 (fixed = 256));
3795 return (length >= fixed) ? fixed : 0;
3796}
3797
Martyn Capewell74f84f62019-10-30 15:30:44 +00003798static void CntHelper(Test* config,
3799 CntFn cnt,
3800 int multiplier,
Martyn Capewell579c92d2019-10-30 17:48:52 +00003801 int lane_size_in_bits,
3802 int64_t acc_value = 0,
3803 bool is_increment = true) {
Martyn Capewell74f84f62019-10-30 15:30:44 +00003804 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3805 START();
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003806 GenerateCntSequence(&masm, cnt, acc_value, multiplier);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003807 END();
3808
3809 if (CAN_RUN()) {
3810 RUN();
3811
3812 int all = core.GetSVELaneCount(lane_size_in_bits);
3813 int pow2 = 1 << HighestSetBitPosition(all);
3814 int mul4 = all - (all % 4);
3815 int mul3 = all - (all % 3);
3816
Martyn Capewell579c92d2019-10-30 17:48:52 +00003817 multiplier = is_increment ? multiplier : -multiplier;
3818
3819 ASSERT_EQUAL_64(acc_value + (multiplier * pow2), x0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003820 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(1, all)), x1);
3821 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(2, all)), x2);
3822 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(3, all)), x3);
3823 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(4, all)), x4);
3824 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(5, all)), x5);
3825 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(6, all)), x6);
3826 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(7, all)), x7);
3827 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(8, all)), x8);
3828 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(16, all)), x9);
3829 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(32, all)), x10);
3830 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(64, all)), x11);
3831 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(128, all)), x12);
3832 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(256, all)), x13);
Martyn Capewell579c92d2019-10-30 17:48:52 +00003833 ASSERT_EQUAL_64(acc_value, x14);
3834 ASSERT_EQUAL_64(acc_value, x15);
3835 ASSERT_EQUAL_64(acc_value, x18);
3836 ASSERT_EQUAL_64(acc_value + (multiplier * mul4), x19);
3837 ASSERT_EQUAL_64(acc_value + (multiplier * mul3), x20);
3838 ASSERT_EQUAL_64(acc_value + (multiplier * all), x21);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003839 }
3840}
3841
Martyn Capewell579c92d2019-10-30 17:48:52 +00003842static void IncHelper(Test* config,
3843 CntFn cnt,
3844 int multiplier,
3845 int lane_size_in_bits,
3846 int64_t acc_value) {
3847 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3848}
3849
3850static void DecHelper(Test* config,
3851 CntFn cnt,
3852 int multiplier,
3853 int lane_size_in_bits,
3854 int64_t acc_value) {
3855 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
3856}
3857
Martyn Capewell74f84f62019-10-30 15:30:44 +00003858TEST_SVE(sve_cntb) {
3859 CntHelper(config, &MacroAssembler::Cntb, 1, kBRegSize);
3860 CntHelper(config, &MacroAssembler::Cntb, 2, kBRegSize);
3861 CntHelper(config, &MacroAssembler::Cntb, 15, kBRegSize);
3862 CntHelper(config, &MacroAssembler::Cntb, 16, kBRegSize);
3863}
3864
3865TEST_SVE(sve_cnth) {
3866 CntHelper(config, &MacroAssembler::Cnth, 1, kHRegSize);
3867 CntHelper(config, &MacroAssembler::Cnth, 2, kHRegSize);
3868 CntHelper(config, &MacroAssembler::Cnth, 15, kHRegSize);
3869 CntHelper(config, &MacroAssembler::Cnth, 16, kHRegSize);
3870}
3871
3872TEST_SVE(sve_cntw) {
3873 CntHelper(config, &MacroAssembler::Cntw, 1, kWRegSize);
3874 CntHelper(config, &MacroAssembler::Cntw, 2, kWRegSize);
3875 CntHelper(config, &MacroAssembler::Cntw, 15, kWRegSize);
3876 CntHelper(config, &MacroAssembler::Cntw, 16, kWRegSize);
3877}
3878
3879TEST_SVE(sve_cntd) {
3880 CntHelper(config, &MacroAssembler::Cntd, 1, kDRegSize);
3881 CntHelper(config, &MacroAssembler::Cntd, 2, kDRegSize);
3882 CntHelper(config, &MacroAssembler::Cntd, 15, kDRegSize);
3883 CntHelper(config, &MacroAssembler::Cntd, 16, kDRegSize);
3884}
3885
Martyn Capewell579c92d2019-10-30 17:48:52 +00003886TEST_SVE(sve_decb) {
3887 DecHelper(config, &MacroAssembler::Decb, 1, kBRegSize, 42);
3888 DecHelper(config, &MacroAssembler::Decb, 2, kBRegSize, -1);
3889 DecHelper(config, &MacroAssembler::Decb, 15, kBRegSize, INT64_MIN);
3890 DecHelper(config, &MacroAssembler::Decb, 16, kBRegSize, -42);
3891}
3892
3893TEST_SVE(sve_dech) {
3894 DecHelper(config, &MacroAssembler::Dech, 1, kHRegSize, 42);
3895 DecHelper(config, &MacroAssembler::Dech, 2, kHRegSize, -1);
3896 DecHelper(config, &MacroAssembler::Dech, 15, kHRegSize, INT64_MIN);
3897 DecHelper(config, &MacroAssembler::Dech, 16, kHRegSize, -42);
3898}
3899
3900TEST_SVE(sve_decw) {
3901 DecHelper(config, &MacroAssembler::Decw, 1, kWRegSize, 42);
3902 DecHelper(config, &MacroAssembler::Decw, 2, kWRegSize, -1);
3903 DecHelper(config, &MacroAssembler::Decw, 15, kWRegSize, INT64_MIN);
3904 DecHelper(config, &MacroAssembler::Decw, 16, kWRegSize, -42);
3905}
3906
3907TEST_SVE(sve_decd) {
3908 DecHelper(config, &MacroAssembler::Decd, 1, kDRegSize, 42);
3909 DecHelper(config, &MacroAssembler::Decd, 2, kDRegSize, -1);
3910 DecHelper(config, &MacroAssembler::Decd, 15, kDRegSize, INT64_MIN);
3911 DecHelper(config, &MacroAssembler::Decd, 16, kDRegSize, -42);
3912}
3913
3914TEST_SVE(sve_incb) {
3915 IncHelper(config, &MacroAssembler::Incb, 1, kBRegSize, 42);
3916 IncHelper(config, &MacroAssembler::Incb, 2, kBRegSize, -1);
3917 IncHelper(config, &MacroAssembler::Incb, 15, kBRegSize, INT64_MAX);
3918 IncHelper(config, &MacroAssembler::Incb, 16, kBRegSize, -42);
3919}
3920
3921TEST_SVE(sve_inch) {
3922 IncHelper(config, &MacroAssembler::Inch, 1, kHRegSize, 42);
3923 IncHelper(config, &MacroAssembler::Inch, 2, kHRegSize, -1);
3924 IncHelper(config, &MacroAssembler::Inch, 15, kHRegSize, INT64_MAX);
3925 IncHelper(config, &MacroAssembler::Inch, 16, kHRegSize, -42);
3926}
3927
3928TEST_SVE(sve_incw) {
3929 IncHelper(config, &MacroAssembler::Incw, 1, kWRegSize, 42);
3930 IncHelper(config, &MacroAssembler::Incw, 2, kWRegSize, -1);
3931 IncHelper(config, &MacroAssembler::Incw, 15, kWRegSize, INT64_MAX);
3932 IncHelper(config, &MacroAssembler::Incw, 16, kWRegSize, -42);
3933}
3934
3935TEST_SVE(sve_incd) {
3936 IncHelper(config, &MacroAssembler::Incd, 1, kDRegSize, 42);
3937 IncHelper(config, &MacroAssembler::Incd, 2, kDRegSize, -1);
3938 IncHelper(config, &MacroAssembler::Incd, 15, kDRegSize, INT64_MAX);
3939 IncHelper(config, &MacroAssembler::Incd, 16, kDRegSize, -42);
3940}
3941
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003942template <typename T>
3943static T QAdd(T x, int y) {
3944 VIXL_ASSERT(y > INT_MIN);
3945 T result;
3946 T min = std::numeric_limits<T>::min();
3947 T max = std::numeric_limits<T>::max();
3948 if ((x >= 0) && (y >= 0)) {
3949 // For positive a and b, saturate at max.
3950 result = (max - x) < static_cast<T>(y) ? max : x + y;
3951 } else if ((y < 0) && ((x < 0) || (min == 0))) {
3952 // For negative b, where either a negative or T unsigned.
3953 result = (x - min) < static_cast<T>(-y) ? min : x + y;
3954 } else {
3955 result = x + y;
3956 }
3957 return result;
3958}
3959
3960template <typename T>
3961static void QIncDecHelper(Test* config,
3962 CntFn cnt,
3963 int multiplier,
3964 int lane_size_in_bits,
3965 T acc_value,
3966 bool is_increment) {
3967 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3968 START();
3969 GenerateCntSequence(&masm, cnt, acc_value, multiplier);
3970 END();
3971
3972 if (CAN_RUN()) {
3973 RUN();
3974
3975 int all = core.GetSVELaneCount(lane_size_in_bits);
3976 int pow2 = 1 << HighestSetBitPosition(all);
3977 int mul4 = all - (all % 4);
3978 int mul3 = all - (all % 3);
3979
3980 multiplier = is_increment ? multiplier : -multiplier;
3981
3982 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
3983 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
3984 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
3985 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
3986 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
3987 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
3988 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
3989 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
3990 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
3991 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
3992 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
3993 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
3994 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
3995 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
3996 ASSERT_EQUAL_64(acc_value, x14);
3997 ASSERT_EQUAL_64(acc_value, x15);
3998 ASSERT_EQUAL_64(acc_value, x18);
3999 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
4000 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
4001 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
4002 }
4003}
4004
4005template <typename T>
4006static void QIncHelper(Test* config,
4007 CntFn cnt,
4008 int multiplier,
4009 int lane_size_in_bits,
4010 T acc_value) {
4011 QIncDecHelper<T>(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
4012}
4013
4014template <typename T>
4015static void QDecHelper(Test* config,
4016 CntFn cnt,
4017 int multiplier,
4018 int lane_size_in_bits,
4019 T acc_value) {
4020 QIncDecHelper<T>(config,
4021 cnt,
4022 multiplier,
4023 lane_size_in_bits,
4024 acc_value,
4025 false);
4026}
4027
4028TEST_SVE(sve_sqdecb) {
4029 int64_t bigneg = INT64_MIN + 42;
4030 int64_t bigpos = INT64_MAX - 42;
4031 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
4032 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 2, kBRegSize, bigneg);
4033 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
4034 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 16, kBRegSize, bigpos);
4035}
4036
4037TEST_SVE(sve_sqdech) {
4038 int64_t bigneg = INT64_MIN + 42;
4039 int64_t bigpos = INT64_MAX - 42;
4040 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
4041 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 2, kHRegSize, bigneg);
4042 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
4043 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 16, kHRegSize, bigpos);
4044}
4045
4046TEST_SVE(sve_sqdecw) {
4047 int64_t bigneg = INT64_MIN + 42;
4048 int64_t bigpos = INT64_MAX - 42;
4049 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
4050 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 2, kWRegSize, bigneg);
4051 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
4052 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 16, kWRegSize, bigpos);
4053}
4054
4055TEST_SVE(sve_sqdecd) {
4056 int64_t bigneg = INT64_MIN + 42;
4057 int64_t bigpos = INT64_MAX - 42;
4058 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
4059 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 2, kDRegSize, bigneg);
4060 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
4061 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 16, kDRegSize, bigpos);
4062}
4063
4064TEST_SVE(sve_sqincb) {
4065 int64_t bigneg = INT64_MIN + 42;
4066 int64_t bigpos = INT64_MAX - 42;
4067 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
4068 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 2, kBRegSize, bigneg);
4069 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
4070 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 16, kBRegSize, bigpos);
4071}
4072
4073TEST_SVE(sve_sqinch) {
4074 int64_t bigneg = INT64_MIN + 42;
4075 int64_t bigpos = INT64_MAX - 42;
4076 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
4077 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 2, kHRegSize, bigneg);
4078 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
4079 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 16, kHRegSize, bigpos);
4080}
4081
4082TEST_SVE(sve_sqincw) {
4083 int64_t bigneg = INT64_MIN + 42;
4084 int64_t bigpos = INT64_MAX - 42;
4085 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
4086 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 2, kWRegSize, bigneg);
4087 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
4088 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 16, kWRegSize, bigpos);
4089}
4090
4091TEST_SVE(sve_sqincd) {
4092 int64_t bigneg = INT64_MIN + 42;
4093 int64_t bigpos = INT64_MAX - 42;
4094 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
4095 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 2, kDRegSize, bigneg);
4096 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
4097 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 16, kDRegSize, bigpos);
4098}
4099
4100TEST_SVE(sve_uqdecb) {
4101 int32_t big32 = UINT32_MAX - 42;
4102 int64_t big64 = UINT64_MAX - 42;
4103 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
4104 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
4105 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
4106 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big32);
4107 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
4108 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
4109 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
4110 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big64);
4111}
4112
4113TEST_SVE(sve_uqdech) {
4114 int32_t big32 = UINT32_MAX - 42;
4115 int64_t big64 = UINT64_MAX - 42;
4116 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
4117 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
4118 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
4119 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big32);
4120 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
4121 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
4122 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
4123 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big64);
4124}
4125
4126TEST_SVE(sve_uqdecw) {
4127 int32_t big32 = UINT32_MAX - 42;
4128 int64_t big64 = UINT64_MAX - 42;
4129 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
4130 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
4131 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
4132 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big32);
4133 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
4134 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
4135 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
4136 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big64);
4137}
4138
4139TEST_SVE(sve_uqdecd) {
4140 int32_t big32 = UINT32_MAX - 42;
4141 int64_t big64 = UINT64_MAX - 42;
4142 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
4143 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
4144 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
4145 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big32);
4146 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
4147 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
4148 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
4149 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big64);
4150}
4151
4152TEST_SVE(sve_uqincb) {
4153 int32_t big32 = UINT32_MAX - 42;
4154 int64_t big64 = UINT64_MAX - 42;
4155 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
4156 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
4157 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
4158 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big32);
4159 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
4160 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
4161 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
4162 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big64);
4163}
4164
4165TEST_SVE(sve_uqinch) {
4166 int32_t big32 = UINT32_MAX - 42;
4167 int64_t big64 = UINT64_MAX - 42;
4168 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
4169 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
4170 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
4171 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big32);
4172 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
4173 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
4174 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
4175 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big64);
4176}
4177
4178TEST_SVE(sve_uqincw) {
4179 int32_t big32 = UINT32_MAX - 42;
4180 int64_t big64 = UINT64_MAX - 42;
4181 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
4182 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
4183 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
4184 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big32);
4185 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
4186 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
4187 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
4188 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big64);
4189}
4190
4191TEST_SVE(sve_uqincd) {
4192 int32_t big32 = UINT32_MAX - 42;
4193 int64_t big64 = UINT64_MAX - 42;
4194 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
4195 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
4196 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
4197 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big32);
4198 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
4199 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
4200 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
4201 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big64);
4202}
4203
4204typedef void (MacroAssembler::*QIncDecXWFn)(const Register& dst,
4205 const Register& src,
4206 int pattern,
4207 int multiplier);
4208
4209static void QIncDecXWHelper(Test* config,
4210 QIncDecXWFn cnt,
4211 int multiplier,
4212 int lane_size_in_bits,
4213 int32_t acc_value,
4214 bool is_increment) {
4215 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4216 START();
4217
4218 // Initialise accumulators.
4219 __ Mov(x0, acc_value);
4220 __ Mov(x1, acc_value);
4221 __ Mov(x2, acc_value);
4222 __ Mov(x3, acc_value);
4223 __ Mov(x4, acc_value);
4224 __ Mov(x5, acc_value);
4225 __ Mov(x6, acc_value);
4226 __ Mov(x7, acc_value);
4227 __ Mov(x8, acc_value);
4228 __ Mov(x9, acc_value);
4229 __ Mov(x10, acc_value);
4230 __ Mov(x11, acc_value);
4231 __ Mov(x12, acc_value);
4232 __ Mov(x13, acc_value);
4233 __ Mov(x14, acc_value);
4234 __ Mov(x15, acc_value);
4235 __ Mov(x18, acc_value);
4236 __ Mov(x19, acc_value);
4237 __ Mov(x20, acc_value);
4238 __ Mov(x21, acc_value);
4239
4240 (masm.*cnt)(x0, w0, SVE_POW2, multiplier);
4241 (masm.*cnt)(x1, w1, SVE_VL1, multiplier);
4242 (masm.*cnt)(x2, w2, SVE_VL2, multiplier);
4243 (masm.*cnt)(x3, w3, SVE_VL3, multiplier);
4244 (masm.*cnt)(x4, w4, SVE_VL4, multiplier);
4245 (masm.*cnt)(x5, w5, SVE_VL5, multiplier);
4246 (masm.*cnt)(x6, w6, SVE_VL6, multiplier);
4247 (masm.*cnt)(x7, w7, SVE_VL7, multiplier);
4248 (masm.*cnt)(x8, w8, SVE_VL8, multiplier);
4249 (masm.*cnt)(x9, w9, SVE_VL16, multiplier);
4250 (masm.*cnt)(x10, w10, SVE_VL32, multiplier);
4251 (masm.*cnt)(x11, w11, SVE_VL64, multiplier);
4252 (masm.*cnt)(x12, w12, SVE_VL128, multiplier);
4253 (masm.*cnt)(x13, w13, SVE_VL256, multiplier);
4254 (masm.*cnt)(x14, w14, 16, multiplier);
4255 (masm.*cnt)(x15, w15, 23, multiplier);
4256 (masm.*cnt)(x18, w18, 28, multiplier);
4257 (masm.*cnt)(x19, w19, SVE_MUL4, multiplier);
4258 (masm.*cnt)(x20, w20, SVE_MUL3, multiplier);
4259 (masm.*cnt)(x21, w21, SVE_ALL, multiplier);
4260
4261 END();
4262
4263 if (CAN_RUN()) {
4264 RUN();
4265
4266 int all = core.GetSVELaneCount(lane_size_in_bits);
4267 int pow2 = 1 << HighestSetBitPosition(all);
4268 int mul4 = all - (all % 4);
4269 int mul3 = all - (all % 3);
4270
4271 multiplier = is_increment ? multiplier : -multiplier;
4272
4273 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
4274 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
4275 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
4276 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
4277 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
4278 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
4279 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
4280 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
4281 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
4282 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
4283 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
4284 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
4285 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
4286 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
4287 ASSERT_EQUAL_64(acc_value, x14);
4288 ASSERT_EQUAL_64(acc_value, x15);
4289 ASSERT_EQUAL_64(acc_value, x18);
4290 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
4291 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
4292 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
4293 }
4294}
4295
4296static void QIncXWHelper(Test* config,
4297 QIncDecXWFn cnt,
4298 int multiplier,
4299 int lane_size_in_bits,
4300 int32_t acc_value) {
4301 QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
4302}
4303
4304static void QDecXWHelper(Test* config,
4305 QIncDecXWFn cnt,
4306 int multiplier,
4307 int lane_size_in_bits,
4308 int32_t acc_value) {
4309 QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
4310}
4311
4312TEST_SVE(sve_sqdecb_xw) {
4313 QDecXWHelper(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
4314 QDecXWHelper(config, &MacroAssembler::Sqdecb, 2, kBRegSize, INT32_MIN + 42);
4315 QDecXWHelper(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
4316 QDecXWHelper(config, &MacroAssembler::Sqdecb, 16, kBRegSize, INT32_MAX - 42);
4317}
4318
4319TEST_SVE(sve_sqdech_xw) {
4320 QDecXWHelper(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
4321 QDecXWHelper(config, &MacroAssembler::Sqdech, 2, kHRegSize, INT32_MIN + 42);
4322 QDecXWHelper(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
4323 QDecXWHelper(config, &MacroAssembler::Sqdech, 16, kHRegSize, INT32_MAX - 42);
4324}
4325
4326TEST_SVE(sve_sqdecw_xw) {
4327 QDecXWHelper(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
4328 QDecXWHelper(config, &MacroAssembler::Sqdecw, 2, kWRegSize, INT32_MIN + 42);
4329 QDecXWHelper(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
4330 QDecXWHelper(config, &MacroAssembler::Sqdecw, 16, kWRegSize, INT32_MAX - 42);
4331}
4332
4333TEST_SVE(sve_sqdecd_xw) {
4334 QDecXWHelper(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
4335 QDecXWHelper(config, &MacroAssembler::Sqdecd, 2, kDRegSize, INT32_MIN + 42);
4336 QDecXWHelper(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
4337 QDecXWHelper(config, &MacroAssembler::Sqdecd, 16, kDRegSize, INT32_MAX - 42);
4338}
4339
4340TEST_SVE(sve_sqincb_xw) {
4341 QIncXWHelper(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
4342 QIncXWHelper(config, &MacroAssembler::Sqincb, 2, kBRegSize, INT32_MIN + 42);
4343 QIncXWHelper(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
4344 QIncXWHelper(config, &MacroAssembler::Sqincb, 16, kBRegSize, INT32_MAX - 42);
4345}
4346
4347TEST_SVE(sve_sqinch_xw) {
4348 QIncXWHelper(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
4349 QIncXWHelper(config, &MacroAssembler::Sqinch, 2, kHRegSize, INT32_MIN + 42);
4350 QIncXWHelper(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
4351 QIncXWHelper(config, &MacroAssembler::Sqinch, 16, kHRegSize, INT32_MAX - 42);
4352}
4353
4354TEST_SVE(sve_sqincw_xw) {
4355 QIncXWHelper(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
4356 QIncXWHelper(config, &MacroAssembler::Sqincw, 2, kWRegSize, INT32_MIN + 42);
4357 QIncXWHelper(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
4358 QIncXWHelper(config, &MacroAssembler::Sqincw, 16, kWRegSize, INT32_MAX - 42);
4359}
4360
4361TEST_SVE(sve_sqincd_xw) {
4362 QIncXWHelper(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
4363 QIncXWHelper(config, &MacroAssembler::Sqincd, 2, kDRegSize, INT32_MIN + 42);
4364 QIncXWHelper(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
4365 QIncXWHelper(config, &MacroAssembler::Sqincd, 16, kDRegSize, INT32_MAX - 42);
4366}
4367
Martyn Capewell8188ddf2019-11-21 17:09:34 +00004368typedef void (MacroAssembler::*IncDecZFn)(const ZRegister& dst,
4369 int pattern,
4370 int multiplier);
4371typedef void (MacroAssembler::*AddSubFn)(const ZRegister& dst,
4372 const ZRegister& src1,
4373 const ZRegister& src2);
4374
4375static void IncDecZHelper(Test* config,
4376 IncDecZFn fn,
4377 CntFn cnt,
4378 AddSubFn addsub,
4379 int multiplier,
4380 int lane_size_in_bits) {
4381 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4382 START();
4383
4384 uint64_t acc_inputs[] = {0x7766554433221100,
4385 0xffffffffffffffff,
4386 0x0000000000000000,
4387 0xffffffff0000ffff,
4388 0x7fffffffffffffff,
4389 0x8000000000000000,
4390 0x7fffffff7fff7fff,
4391 0x8000000080008000};
4392
4393 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
4394 for (int j = 0; j < 4; j++) {
4395 InsrHelper(&masm, ZRegister(i, kDRegSize), acc_inputs);
4396 }
4397 }
4398 for (unsigned i = 0; i < 15; i++) {
4399 __ Mov(XRegister(i), 0);
4400 }
4401
4402 (masm.*fn)(z16.WithLaneSize(lane_size_in_bits), SVE_POW2, multiplier);
4403 (masm.*fn)(z17.WithLaneSize(lane_size_in_bits), SVE_VL1, multiplier);
4404 (masm.*fn)(z18.WithLaneSize(lane_size_in_bits), SVE_VL2, multiplier);
4405 (masm.*fn)(z19.WithLaneSize(lane_size_in_bits), SVE_VL3, multiplier);
4406 (masm.*fn)(z20.WithLaneSize(lane_size_in_bits), SVE_VL4, multiplier);
4407 (masm.*fn)(z21.WithLaneSize(lane_size_in_bits), SVE_VL7, multiplier);
4408 (masm.*fn)(z22.WithLaneSize(lane_size_in_bits), SVE_VL8, multiplier);
4409 (masm.*fn)(z23.WithLaneSize(lane_size_in_bits), SVE_VL16, multiplier);
4410 (masm.*fn)(z24.WithLaneSize(lane_size_in_bits), SVE_VL64, multiplier);
4411 (masm.*fn)(z25.WithLaneSize(lane_size_in_bits), SVE_VL256, multiplier);
4412 (masm.*fn)(z26.WithLaneSize(lane_size_in_bits), 16, multiplier);
4413 (masm.*fn)(z27.WithLaneSize(lane_size_in_bits), 28, multiplier);
4414 (masm.*fn)(z28.WithLaneSize(lane_size_in_bits), SVE_MUL3, multiplier);
4415 (masm.*fn)(z29.WithLaneSize(lane_size_in_bits), SVE_MUL4, multiplier);
4416 (masm.*fn)(z30.WithLaneSize(lane_size_in_bits), SVE_ALL, multiplier);
4417
4418 // Perform computation using alternative instructions.
4419 (masm.*cnt)(x0, SVE_POW2, multiplier);
4420 (masm.*cnt)(x1, SVE_VL1, multiplier);
4421 (masm.*cnt)(x2, SVE_VL2, multiplier);
4422 (masm.*cnt)(x3, SVE_VL3, multiplier);
4423 (masm.*cnt)(x4, SVE_VL4, multiplier);
4424 (masm.*cnt)(x5, SVE_VL7, multiplier);
4425 (masm.*cnt)(x6, SVE_VL8, multiplier);
4426 (masm.*cnt)(x7, SVE_VL16, multiplier);
4427 (masm.*cnt)(x8, SVE_VL64, multiplier);
4428 (masm.*cnt)(x9, SVE_VL256, multiplier);
4429 (masm.*cnt)(x10, 16, multiplier);
4430 (masm.*cnt)(x11, 28, multiplier);
4431 (masm.*cnt)(x12, SVE_MUL3, multiplier);
4432 (masm.*cnt)(x13, SVE_MUL4, multiplier);
4433 (masm.*cnt)(x14, SVE_ALL, multiplier);
4434
4435 ZRegister zscratch = z15.WithLaneSize(lane_size_in_bits);
4436 for (unsigned i = 0; i < 15; i++) {
4437 ZRegister zsrcdst = ZRegister(i, lane_size_in_bits);
4438 Register x = Register(i, kXRegSize);
4439 __ Dup(zscratch, x);
4440 (masm.*addsub)(zsrcdst, zsrcdst, zscratch);
4441 }
4442
4443 END();
4444
4445 if (CAN_RUN()) {
4446 RUN();
4447
4448 ASSERT_EQUAL_SVE(z0, z16);
4449 ASSERT_EQUAL_SVE(z1, z17);
4450 ASSERT_EQUAL_SVE(z2, z18);
4451 ASSERT_EQUAL_SVE(z3, z19);
4452 ASSERT_EQUAL_SVE(z4, z20);
4453 ASSERT_EQUAL_SVE(z5, z21);
4454 ASSERT_EQUAL_SVE(z6, z22);
4455 ASSERT_EQUAL_SVE(z7, z23);
4456 ASSERT_EQUAL_SVE(z8, z24);
4457 ASSERT_EQUAL_SVE(z9, z25);
4458 ASSERT_EQUAL_SVE(z10, z26);
4459 ASSERT_EQUAL_SVE(z11, z27);
4460 ASSERT_EQUAL_SVE(z12, z28);
4461 ASSERT_EQUAL_SVE(z13, z29);
4462 ASSERT_EQUAL_SVE(z14, z30);
4463 }
4464}
4465
4466TEST_SVE(sve_inc_dec_vec) {
4467 CntFn cnth = &MacroAssembler::Cnth;
4468 CntFn cntw = &MacroAssembler::Cntw;
4469 CntFn cntd = &MacroAssembler::Cntd;
4470 AddSubFn sub = &MacroAssembler::Sub;
4471 AddSubFn add = &MacroAssembler::Add;
4472 for (int mult = 1; mult <= 16; mult += 5) {
4473 IncDecZHelper(config, &MacroAssembler::Dech, cnth, sub, mult, kHRegSize);
4474 IncDecZHelper(config, &MacroAssembler::Decw, cntw, sub, mult, kSRegSize);
4475 IncDecZHelper(config, &MacroAssembler::Decd, cntd, sub, mult, kDRegSize);
4476 IncDecZHelper(config, &MacroAssembler::Inch, cnth, add, mult, kHRegSize);
4477 IncDecZHelper(config, &MacroAssembler::Incw, cntw, add, mult, kSRegSize);
4478 IncDecZHelper(config, &MacroAssembler::Incd, cntd, add, mult, kDRegSize);
4479 }
4480}
4481
4482TEST_SVE(sve_unsigned_sat_inc_dec_vec) {
4483 CntFn cnth = &MacroAssembler::Cnth;
4484 CntFn cntw = &MacroAssembler::Cntw;
4485 CntFn cntd = &MacroAssembler::Cntd;
4486 AddSubFn sub = &MacroAssembler::Uqsub;
4487 AddSubFn add = &MacroAssembler::Uqadd;
4488 for (int mult = 1; mult <= 16; mult += 5) {
4489 IncDecZHelper(config, &MacroAssembler::Uqdech, cnth, sub, mult, kHRegSize);
4490 IncDecZHelper(config, &MacroAssembler::Uqdecw, cntw, sub, mult, kSRegSize);
4491 IncDecZHelper(config, &MacroAssembler::Uqdecd, cntd, sub, mult, kDRegSize);
4492 IncDecZHelper(config, &MacroAssembler::Uqinch, cnth, add, mult, kHRegSize);
4493 IncDecZHelper(config, &MacroAssembler::Uqincw, cntw, add, mult, kSRegSize);
4494 IncDecZHelper(config, &MacroAssembler::Uqincd, cntd, add, mult, kDRegSize);
4495 }
4496}
4497
4498TEST_SVE(sve_signed_sat_inc_dec_vec) {
4499 CntFn cnth = &MacroAssembler::Cnth;
4500 CntFn cntw = &MacroAssembler::Cntw;
4501 CntFn cntd = &MacroAssembler::Cntd;
4502 AddSubFn sub = &MacroAssembler::Sqsub;
4503 AddSubFn add = &MacroAssembler::Sqadd;
4504 for (int mult = 1; mult <= 16; mult += 5) {
4505 IncDecZHelper(config, &MacroAssembler::Sqdech, cnth, sub, mult, kHRegSize);
4506 IncDecZHelper(config, &MacroAssembler::Sqdecw, cntw, sub, mult, kSRegSize);
4507 IncDecZHelper(config, &MacroAssembler::Sqdecd, cntd, sub, mult, kDRegSize);
4508 IncDecZHelper(config, &MacroAssembler::Sqinch, cnth, add, mult, kHRegSize);
4509 IncDecZHelper(config, &MacroAssembler::Sqincw, cntw, add, mult, kSRegSize);
4510 IncDecZHelper(config, &MacroAssembler::Sqincd, cntd, add, mult, kDRegSize);
4511 }
4512}
4513
TatWai Chong7a0d3672019-10-23 17:35:18 -07004514typedef void (MacroAssembler::*ArithPredicatedFn)(const ZRegister& zd,
4515 const PRegisterM& pg,
4516 const ZRegister& zn,
4517 const ZRegister& zm);
TatWai Chong13634762019-07-16 16:20:45 -07004518
4519template <typename Td, typename Tg, typename Tn>
4520static void IntBinArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07004521 ArithPredicatedFn macro,
TatWai Chong13634762019-07-16 16:20:45 -07004522 unsigned lane_size_in_bits,
4523 const Tg& pg_inputs,
4524 const Tn& zn_inputs,
4525 const Tn& zm_inputs,
4526 const Td& zd_expected) {
4527 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4528 START();
4529
4530 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
4531 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
4532 InsrHelper(&masm, src_a, zn_inputs);
4533 InsrHelper(&masm, src_b, zm_inputs);
4534
4535 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
4536
4537 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
4538 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
4539 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
4540
4541 // `instr` zd(dst), zd(src_a), zn(src_b)
4542 __ Mov(zd_1, src_a);
4543 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
4544
4545 // `instr` zd(dst), zm(src_a), zd(src_b)
4546 // Based on whether zd and zm registers are aliased, the macro of instructions
4547 // (`Instr`) swaps the order of operands if it has the commutative property,
4548 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
4549 __ Mov(zd_2, src_b);
4550 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
4551
4552 // `instr` zd(dst), zm(src_a), zn(src_b)
4553 // The macro of instructions (`Instr`) automatically selects between `instr`
4554 // and movprfx + `instr` based on whether zd and zn registers are aliased.
TatWai Chongd316c5e2019-10-16 12:22:10 -07004555 // A generated movprfx instruction is predicated that using the same
TatWai Chong13634762019-07-16 16:20:45 -07004556 // governing predicate register. In order to keep the result constant,
4557 // initialize the destination register first.
4558 __ Mov(zd_3, src_a);
4559 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
4560
4561 END();
4562
4563 if (CAN_RUN()) {
4564 RUN();
4565 ASSERT_EQUAL_SVE(zd_expected, zd_1);
4566
4567 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
4568 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
4569 if (!core.HasSVELane(zd_1, lane)) break;
TatWai Chongd316c5e2019-10-16 12:22:10 -07004570 if ((pg_inputs[i] & 1) != 0) {
TatWai Chong13634762019-07-16 16:20:45 -07004571 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
4572 } else {
4573 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
4574 }
4575 }
4576
4577 ASSERT_EQUAL_SVE(zd_expected, zd_3);
4578 }
4579}
4580
4581TEST_SVE(sve_binary_arithmetic_predicated_add) {
4582 // clang-format off
4583 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
4584
4585 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
4586
4587 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
4588
4589 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
4590
4591 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
4592 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
4593
4594 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
4595 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
4596
4597 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
4598 0x1010101010101010, 0x8181818181818181,
4599 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
4600 0x0101010101010101, 0x7f7f7f7fffffffff};
4601
4602 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
4603 0x1010101010101010, 0x0000000000000000,
4604 0x8181818181818181, 0x8080808080808080,
4605 0xffffffffffffffff, 0xffffffffffffffff};
4606
4607 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4608 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4609 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4610 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
4611
4612 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
4613
4614 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
4615 0x8180, 0x8f8f, 0x0101, 0x7f7e};
4616
4617 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
4618 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
4619
4620 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
4621 0x2020202020202020, 0x8181818181818181,
4622 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
4623 0x0101010101010100, 0x7f7f7f7ffffffffe};
4624
TatWai Chong7a0d3672019-10-23 17:35:18 -07004625 ArithPredicatedFn fn = &MacroAssembler::Add;
TatWai Chong13634762019-07-16 16:20:45 -07004626 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
4627 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
4628 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
4629 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
4630
4631 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
4632
4633 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
4634 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
4635
4636 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
4637 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
4638
4639 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
4640 0x0000000000000000, 0x8181818181818181,
4641 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
4642 0x0101010101010102, 0x7f7f7f8000000000};
4643
4644 fn = &MacroAssembler::Sub;
4645 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
4646 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
4647 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
4648 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
4649 // clang-format on
4650}
4651
4652TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
4653 // clang-format off
4654 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
4655
4656 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
4657
4658 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
4659 0xff00, 0xba98, 0x5555, 0x4567};
4660
4661 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
4662 0xfe00, 0xabab, 0xcdcd, 0x5678};
4663
4664 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
4665 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
4666
4667 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
4668 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
4669
4670 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
4671 0x5555555555555555, 0x0000000001234567};
4672
4673 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
4674 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
4675
4676 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4677 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4678 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4679 int pg_d[] = {1, 0, 1, 1};
4680
4681 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
4682
4683 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
4684 0xff00, 0xba98, 0x5555, 0x5678};
4685
4686 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
4687 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
4688
4689 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
4690 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
4691
TatWai Chong7a0d3672019-10-23 17:35:18 -07004692 ArithPredicatedFn fn = &MacroAssembler::Umax;
TatWai Chong13634762019-07-16 16:20:45 -07004693 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
4694 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
4695 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
4696 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
4697
4698 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
4699
4700 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
4701 0xfe00, 0xabab, 0x5555, 0x4567};
4702
4703 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
4704 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
4705
4706 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
4707 0x5555555555555555, 0x0000000001234567};
4708 fn = &MacroAssembler::Umin;
4709 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
4710 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
4711 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
4712 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
4713
4714 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
4715
4716 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
4717 0x0100, 0x0eed, 0x5555, 0x1111};
4718
4719 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
4720 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
4721
4722 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
4723 0x7878787878787878, 0x0000000011111111};
4724
4725 fn = &MacroAssembler::Uabd;
4726 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
4727 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
4728 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
4729 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
4730 // clang-format on
4731}
4732
4733TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
4734 // clang-format off
4735 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
4736
4737 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
4738
4739 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
4740 INT16_MIN, INT16_MAX, INT16_MAX, 1};
4741
4742 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
4743 INT16_MAX, INT16_MAX - 1, -1, 0};
4744
4745 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
4746 INT32_MIN, INT32_MAX, INT32_MAX, 1};
4747
4748 int zm_s[] = {-1, 0, -1, -INT32_MAX,
4749 INT32_MAX, INT32_MAX - 1, -1, 0};
4750
4751 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
4752 INT64_MIN, INT64_MAX, INT64_MAX, 1};
4753
4754 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
4755 INT64_MAX, INT64_MAX - 1, -1, 0};
4756
4757 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4758 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4759 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4760 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
4761
4762 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
4763
4764 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
4765 INT16_MAX, INT16_MAX, INT16_MAX, 1};
4766
4767 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
4768 INT32_MAX, INT32_MAX, INT32_MAX, 1};
4769
4770 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
4771 INT64_MIN, INT64_MAX, INT64_MAX, 1};
4772
TatWai Chong7a0d3672019-10-23 17:35:18 -07004773 ArithPredicatedFn fn = &MacroAssembler::Smax;
TatWai Chong13634762019-07-16 16:20:45 -07004774 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
4775 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
4776 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
4777 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
4778
4779 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
4780
4781 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
4782 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
4783
4784 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
4785 INT32_MIN, INT32_MAX, -1, 0};
4786
4787 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
4788 INT64_MIN, INT64_MAX - 1, -1, 0};
4789
4790 fn = &MacroAssembler::Smin;
4791 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
4792 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
4793 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
4794 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
4795
4796 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
4797
4798 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
4799
4800 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
4801 0xffffffff, 0x7fffffff, 0x80000000, 1};
4802
4803 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
4804 0x8000000000000000, 1, 0x8000000000000000, 1};
4805
4806 fn = &MacroAssembler::Sabd;
4807 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
4808 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
4809 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
4810 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
4811 // clang-format on
4812}
4813
4814TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
4815 // clang-format off
4816 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
4817
4818 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
4819
4820 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
4821 0x8000, 0xff00, 0x5555, 0xaaaa};
4822
4823 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
4824 0x5555, 0xaaaa, 0x0001, 0x1234};
4825
4826 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
4827 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
4828
4829 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
4830 0x12345678, 0x22223333, 0x55556666, 0x77778888};
4831
4832 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
4833 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
4834
4835 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
4836 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
4837
4838 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4839 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4840 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
4841 int pg_d[] = {1, 1, 0, 1};
4842
4843 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
4844
4845 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
4846 0x8000, 0xff00, 0x5555, 0x9e88};
4847
4848 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
4849 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
4850
4851 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
4852 0xffffffffffffffff, 0x38e38e38e38e38e4};
4853
TatWai Chong7a0d3672019-10-23 17:35:18 -07004854 ArithPredicatedFn fn = &MacroAssembler::Mul;
TatWai Chong13634762019-07-16 16:20:45 -07004855 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
4856 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
4857 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
4858 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
4859
4860 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
4861
4862 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
4863 0x2aaa, 0xff00, 0x0000, 0x0c22};
4864
4865 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
4866 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
4867
4868 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
4869 0xffffffffffffffff, 0x71c71c71c71c71c6};
4870
4871 fn = &MacroAssembler::Umulh;
4872 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
4873 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
4874 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
4875 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
4876 // clang-format on
4877}
4878
4879TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
4880 // clang-format off
4881 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
4882
4883 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
4884
4885 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
4886
4887 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
4888
4889 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
4890
4891 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
4892
4893 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
4894
4895 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
4896
4897 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4898 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4899 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
4900 int pg_d[] = {1, 1, 0, 1};
4901
4902 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
4903
4904 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
4905
4906 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
4907
4908 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
4909
TatWai Chong7a0d3672019-10-23 17:35:18 -07004910 ArithPredicatedFn fn = &MacroAssembler::Smulh;
TatWai Chong13634762019-07-16 16:20:45 -07004911 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
4912 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
4913 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
4914 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
4915 // clang-format on
4916}
4917
4918TEST_SVE(sve_binary_arithmetic_predicated_logical) {
4919 // clang-format off
4920 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
4921 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
4922
4923 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
4924 0x8000, 0xffff, 0x5555, 0xaaaa};
4925 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
4926 0x5555, 0xaaaa, 0x0000, 0x0800};
4927
4928 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
4929 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
4930
4931 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
4932 0x0001200880ff55aa, 0x0022446688aaccee};
4933 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
4934 0x7fcd80ff55aa0008, 0x1133557799bbddff};
4935
4936 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4937 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4938 int pg_s[] = {1, 1, 1, 0};
4939 int pg_d[] = {1, 1, 0, 1};
4940
4941 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
4942
4943 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
4944 0x0000, 0xffff, 0x0000, 0x0800};
4945
4946 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
4947
4948 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
4949 0x0001200880ff55aa, 0x0022446688aaccee};
4950
TatWai Chong7a0d3672019-10-23 17:35:18 -07004951 ArithPredicatedFn fn = &MacroAssembler::And;
TatWai Chong13634762019-07-16 16:20:45 -07004952 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
4953 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
4954 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
4955 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
4956
4957 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
4958
4959 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
4960 0x8000, 0xffff, 0x5555, 0xa2aa};
4961
4962 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
4963
4964 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
4965 0x0001200880ff55aa, 0x0000000000000000};
4966
4967 fn = &MacroAssembler::Bic;
4968 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
4969 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
4970 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
4971 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
4972
4973 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
4974
4975 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
4976 0xd555, 0xffff, 0x5555, 0xa2aa};
4977
4978 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
4979
4980 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
4981 0x0001200880ff55aa, 0x1111111111111111};
4982
4983 fn = &MacroAssembler::Eor;
4984 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
4985 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
4986 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
4987 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
4988
4989 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
4990
4991 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
4992 0xd555, 0xffff, 0x5555, 0xaaaa};
4993
4994 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
4995
4996 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
4997 0x0001200880ff55aa, 0x1133557799bbddff};
4998
4999 fn = &MacroAssembler::Orr;
5000 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
5001 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
5002 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
5003 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
5004 // clang-format on
5005}
5006
5007TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
5008 // clang-format off
5009 int zn_s[] = {0, 1, -1, 2468,
5010 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
5011 -11111111, 87654321, 0, 0};
5012
5013 int zm_s[] = {1, -1, 1, 1234,
5014 -1, INT32_MIN, 1, -1,
5015 22222222, 80000000, -1, 0};
5016
5017 int64_t zn_d[] = {0, 1, -1, 2468,
5018 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
5019 -11111111, 87654321, 0, 0};
5020
5021 int64_t zm_d[] = {1, -1, 1, 1234,
5022 -1, INT64_MIN, 1, -1,
5023 22222222, 80000000, -1, 0};
5024
5025 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
5026 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
5027
5028 int exp_s[] = {0, 1, -1, 2,
5029 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
5030 0, 1, 0, 0};
5031
5032 int64_t exp_d[] = {0, -1, -1, 2,
5033 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
5034 0, 1, 0, 0};
5035
TatWai Chong7a0d3672019-10-23 17:35:18 -07005036 ArithPredicatedFn fn = &MacroAssembler::Sdiv;
TatWai Chong13634762019-07-16 16:20:45 -07005037 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
5038 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
5039 // clang-format on
5040}
5041
5042TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
5043 // clang-format off
5044 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
5045 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
5046
5047 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
5048 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
5049
5050 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
5051 0xffffffffffffffff, 0x8000000000000000,
5052 0xffffffffffffffff, 0x8000000000000000,
5053 0xffffffffffffffff, 0xf0000000f0000000};
5054
5055 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
5056 0x8000000000000000, 0x0000000000000002,
5057 0x8888888888888888, 0x0000000000000001,
5058 0x0000000080000000, 0x00000000f0000000};
5059
5060 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
5061 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
5062
5063 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
5064 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
5065
5066 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
5067 0x0000000000000001, 0x4000000000000000,
5068 0x0000000000000001, 0x8000000000000000,
5069 0xffffffffffffffff, 0x0000000100000001};
5070
TatWai Chong7a0d3672019-10-23 17:35:18 -07005071 ArithPredicatedFn fn = &MacroAssembler::Udiv;
TatWai Chong13634762019-07-16 16:20:45 -07005072 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
5073 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
5074 // clang-format on
5075}
5076
TatWai Chong7a0d3672019-10-23 17:35:18 -07005077typedef void (MacroAssembler::*ArithFn)(const ZRegister& zd,
5078 const ZRegister& zn,
5079 const ZRegister& zm);
TatWai Chong845246b2019-08-08 00:01:58 -07005080
5081template <typename T>
5082static void IntArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07005083 ArithFn macro,
TatWai Chong845246b2019-08-08 00:01:58 -07005084 unsigned lane_size_in_bits,
5085 const T& zn_inputs,
5086 const T& zm_inputs,
5087 const T& zd_expected) {
5088 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5089 START();
5090
5091 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
5092 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
5093 InsrHelper(&masm, zn, zn_inputs);
5094 InsrHelper(&masm, zm, zm_inputs);
5095
5096 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
5097 (masm.*macro)(zd, zn, zm);
5098
5099 END();
5100
5101 if (CAN_RUN()) {
5102 RUN();
5103 ASSERT_EQUAL_SVE(zd_expected, zd);
5104 }
5105}
5106
5107TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
5108 // clang-format off
TatWai Chong6995bfd2019-09-26 10:48:05 +01005109 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
5110 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
5111 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
5112 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong845246b2019-08-08 00:01:58 -07005113 0x1000000010001010, 0xf0000000f000f0f0};
5114
TatWai Chong7a0d3672019-10-23 17:35:18 -07005115 ArithFn fn = &MacroAssembler::Add;
TatWai Chong845246b2019-08-08 00:01:58 -07005116
5117 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
5118 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
5119 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
5120 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
5121 0x2000000020002020, 0xe0000001e001e1e0};
5122
TatWai Chong6995bfd2019-09-26 10:48:05 +01005123 IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
5124 IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
5125 IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
5126 IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07005127
5128 fn = &MacroAssembler::Sqadd;
5129
5130 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
5131 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
5132 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
5133 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
5134 0x2000000020002020, 0xe0000001e001e1e0};
5135
TatWai Chong6995bfd2019-09-26 10:48:05 +01005136 IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
5137 IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
5138 IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
5139 IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07005140
5141 fn = &MacroAssembler::Uqadd;
5142
5143 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
5144 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
5145 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
5146 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
5147 0x2000000020002020, 0xffffffffffffffff};
5148
TatWai Chong6995bfd2019-09-26 10:48:05 +01005149 IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
5150 IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
5151 IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
5152 IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07005153 // clang-format on
5154}
5155
5156TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
5157 // clang-format off
5158
5159 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
5160 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
5161
5162 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
5163 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
5164
5165 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
5166 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
5167
5168 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
5169 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
5170 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
5171 0xf0000000f000f0f0, 0x5555555555555555};
5172
TatWai Chong7a0d3672019-10-23 17:35:18 -07005173 ArithFn fn = &MacroAssembler::Sub;
TatWai Chong845246b2019-08-08 00:01:58 -07005174
5175 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
5176 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
5177 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
5178 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
5179 0x8eeeeeed8eed8d8e, 0x5555555555555555};
5180
5181 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
5182 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
5183 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
5184 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
5185
5186 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
5187 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
5188 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
5189 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
5190 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
5191
5192 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
5193 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
5194 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
5195 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
5196
5197 fn = &MacroAssembler::Sqsub;
5198
5199 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
5200 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
5201 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
5202 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
5203 0x7fffffffffffffff, 0x8000000000000000};
5204
5205 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
5206 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
5207 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
5208 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
5209
5210 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
5211 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
5212 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
5213 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
5214 0x8000000000000000, 0x7fffffffffffffff};
5215
5216 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
5217 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
5218 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
5219 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
5220
5221 fn = &MacroAssembler::Uqsub;
5222
5223 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
5224 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
5225 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
5226 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
5227 0x0000000000000000, 0x5555555555555555};
5228
5229 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
5230 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
5231 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
5232 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
5233
5234 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
5235 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
5236 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
5237 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
5238 0x7111111271127272, 0x0000000000000000};
5239
5240 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
5241 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
5242 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
5243 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
5244 // clang-format on
5245}
5246
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01005247TEST_SVE(sve_rdvl) {
5248 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5249 START();
5250
5251 // Encodable multipliers.
5252 __ Rdvl(x0, 0);
5253 __ Rdvl(x1, 1);
5254 __ Rdvl(x2, 2);
5255 __ Rdvl(x3, 31);
5256 __ Rdvl(x4, -1);
5257 __ Rdvl(x5, -2);
5258 __ Rdvl(x6, -32);
5259
5260 // For unencodable multipliers, the MacroAssembler uses a sequence of
5261 // instructions.
5262 __ Rdvl(x10, 32);
5263 __ Rdvl(x11, -33);
5264 __ Rdvl(x12, 42);
5265 __ Rdvl(x13, -42);
5266
5267 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
5268 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
5269 // occurs in the macro.
5270 __ Rdvl(x14, 0x007fffffffffffff);
5271 __ Rdvl(x15, -0x0080000000000000);
5272
5273 END();
5274
5275 if (CAN_RUN()) {
5276 RUN();
5277
5278 uint64_t vl = config->sve_vl_in_bytes();
5279
5280 ASSERT_EQUAL_64(vl * 0, x0);
5281 ASSERT_EQUAL_64(vl * 1, x1);
5282 ASSERT_EQUAL_64(vl * 2, x2);
5283 ASSERT_EQUAL_64(vl * 31, x3);
5284 ASSERT_EQUAL_64(vl * -1, x4);
5285 ASSERT_EQUAL_64(vl * -2, x5);
5286 ASSERT_EQUAL_64(vl * -32, x6);
5287
5288 ASSERT_EQUAL_64(vl * 32, x10);
5289 ASSERT_EQUAL_64(vl * -33, x11);
5290 ASSERT_EQUAL_64(vl * 42, x12);
5291 ASSERT_EQUAL_64(vl * -42, x13);
5292
5293 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
5294 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
5295 }
5296}
5297
5298TEST_SVE(sve_rdpl) {
5299 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5300 START();
5301
5302 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
5303 // Addpl(xd, xzr, ...).
5304
5305 // Encodable multipliers (as `addvl`).
5306 __ Rdpl(x0, 0);
5307 __ Rdpl(x1, 8);
5308 __ Rdpl(x2, 248);
5309 __ Rdpl(x3, -8);
5310 __ Rdpl(x4, -256);
5311
5312 // Encodable multipliers (as `movz` + `addpl`).
5313 __ Rdpl(x7, 31);
Jacob Bramley889984c2019-10-28 17:28:48 +00005314 __ Rdpl(x8, -31);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01005315
5316 // For unencodable multipliers, the MacroAssembler uses a sequence of
5317 // instructions.
5318 __ Rdpl(x10, 42);
5319 __ Rdpl(x11, -42);
5320
5321 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
5322 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
5323 // occurs in the macro.
5324 __ Rdpl(x12, 0x007fffffffffffff);
5325 __ Rdpl(x13, -0x0080000000000000);
5326
5327 END();
5328
5329 if (CAN_RUN()) {
5330 RUN();
5331
5332 uint64_t vl = config->sve_vl_in_bytes();
5333 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5334 uint64_t pl = vl / kZRegBitsPerPRegBit;
5335
5336 ASSERT_EQUAL_64(pl * 0, x0);
5337 ASSERT_EQUAL_64(pl * 8, x1);
5338 ASSERT_EQUAL_64(pl * 248, x2);
5339 ASSERT_EQUAL_64(pl * -8, x3);
5340 ASSERT_EQUAL_64(pl * -256, x4);
5341
5342 ASSERT_EQUAL_64(pl * 31, x7);
Jacob Bramley889984c2019-10-28 17:28:48 +00005343 ASSERT_EQUAL_64(pl * -31, x8);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01005344
5345 ASSERT_EQUAL_64(pl * 42, x10);
5346 ASSERT_EQUAL_64(pl * -42, x11);
5347
5348 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
5349 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
5350 }
5351}
5352
5353TEST_SVE(sve_addvl) {
5354 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5355 START();
5356
5357 uint64_t base = 0x1234567800000000;
5358 __ Mov(x30, base);
5359
5360 // Encodable multipliers.
5361 __ Addvl(x0, x30, 0);
5362 __ Addvl(x1, x30, 1);
5363 __ Addvl(x2, x30, 31);
5364 __ Addvl(x3, x30, -1);
5365 __ Addvl(x4, x30, -32);
5366
5367 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
5368 __ Addvl(x5, x30, 32);
5369 __ Addvl(x6, x30, -33);
5370
5371 // Test the limits of the multiplier supported by the `Rdvl` macro.
5372 __ Addvl(x7, x30, 0x007fffffffffffff);
5373 __ Addvl(x8, x30, -0x0080000000000000);
5374
5375 // Check that xzr behaves correctly.
5376 __ Addvl(x9, xzr, 8);
5377 __ Addvl(x10, xzr, 42);
5378
5379 // Check that sp behaves correctly with encodable and unencodable multipliers.
5380 __ Addvl(sp, sp, -5);
5381 __ Addvl(sp, sp, -37);
5382 __ Addvl(x11, sp, -2);
5383 __ Addvl(sp, x11, 2);
5384 __ Addvl(x12, sp, -42);
5385
5386 // Restore the value of sp.
5387 __ Addvl(sp, x11, 39);
5388 __ Addvl(sp, sp, 5);
5389
5390 // Adjust x11 and x12 to make the test sp-agnostic.
5391 __ Sub(x11, sp, x11);
5392 __ Sub(x12, sp, x12);
5393
5394 // Check cases where xd.Is(xn). This stresses scratch register allocation.
5395 __ Mov(x20, x30);
5396 __ Mov(x21, x30);
5397 __ Mov(x22, x30);
5398 __ Addvl(x20, x20, 4);
5399 __ Addvl(x21, x21, 42);
5400 __ Addvl(x22, x22, -0x0080000000000000);
5401
5402 END();
5403
5404 if (CAN_RUN()) {
5405 RUN();
5406
5407 uint64_t vl = config->sve_vl_in_bytes();
5408
5409 ASSERT_EQUAL_64(base + (vl * 0), x0);
5410 ASSERT_EQUAL_64(base + (vl * 1), x1);
5411 ASSERT_EQUAL_64(base + (vl * 31), x2);
5412 ASSERT_EQUAL_64(base + (vl * -1), x3);
5413 ASSERT_EQUAL_64(base + (vl * -32), x4);
5414
5415 ASSERT_EQUAL_64(base + (vl * 32), x5);
5416 ASSERT_EQUAL_64(base + (vl * -33), x6);
5417
5418 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
5419 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
5420
5421 ASSERT_EQUAL_64(vl * 8, x9);
5422 ASSERT_EQUAL_64(vl * 42, x10);
5423
5424 ASSERT_EQUAL_64(vl * 44, x11);
5425 ASSERT_EQUAL_64(vl * 84, x12);
5426
5427 ASSERT_EQUAL_64(base + (vl * 4), x20);
5428 ASSERT_EQUAL_64(base + (vl * 42), x21);
5429 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
5430
5431 ASSERT_EQUAL_64(base, x30);
5432 }
5433}
5434
5435TEST_SVE(sve_addpl) {
5436 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5437 START();
5438
5439 uint64_t base = 0x1234567800000000;
5440 __ Mov(x30, base);
5441
5442 // Encodable multipliers.
5443 __ Addpl(x0, x30, 0);
5444 __ Addpl(x1, x30, 1);
5445 __ Addpl(x2, x30, 31);
5446 __ Addpl(x3, x30, -1);
5447 __ Addpl(x4, x30, -32);
5448
5449 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
5450 // it falls back to `Rdvl` and `Add`.
5451 __ Addpl(x5, x30, 32);
5452 __ Addpl(x6, x30, -33);
5453
5454 // Test the limits of the multiplier supported by the `Rdvl` macro.
5455 __ Addpl(x7, x30, 0x007fffffffffffff);
5456 __ Addpl(x8, x30, -0x0080000000000000);
5457
5458 // Check that xzr behaves correctly.
5459 __ Addpl(x9, xzr, 8);
5460 __ Addpl(x10, xzr, 42);
5461
5462 // Check that sp behaves correctly with encodable and unencodable multipliers.
5463 __ Addpl(sp, sp, -5);
5464 __ Addpl(sp, sp, -37);
5465 __ Addpl(x11, sp, -2);
5466 __ Addpl(sp, x11, 2);
5467 __ Addpl(x12, sp, -42);
5468
5469 // Restore the value of sp.
5470 __ Addpl(sp, x11, 39);
5471 __ Addpl(sp, sp, 5);
5472
5473 // Adjust x11 and x12 to make the test sp-agnostic.
5474 __ Sub(x11, sp, x11);
5475 __ Sub(x12, sp, x12);
5476
5477 // Check cases where xd.Is(xn). This stresses scratch register allocation.
5478 __ Mov(x20, x30);
5479 __ Mov(x21, x30);
5480 __ Mov(x22, x30);
5481 __ Addpl(x20, x20, 4);
5482 __ Addpl(x21, x21, 42);
5483 __ Addpl(x22, x22, -0x0080000000000000);
5484
5485 END();
5486
5487 if (CAN_RUN()) {
5488 RUN();
5489
5490 uint64_t vl = config->sve_vl_in_bytes();
5491 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5492 uint64_t pl = vl / kZRegBitsPerPRegBit;
5493
5494 ASSERT_EQUAL_64(base + (pl * 0), x0);
5495 ASSERT_EQUAL_64(base + (pl * 1), x1);
5496 ASSERT_EQUAL_64(base + (pl * 31), x2);
5497 ASSERT_EQUAL_64(base + (pl * -1), x3);
5498 ASSERT_EQUAL_64(base + (pl * -32), x4);
5499
5500 ASSERT_EQUAL_64(base + (pl * 32), x5);
5501 ASSERT_EQUAL_64(base + (pl * -33), x6);
5502
5503 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
5504 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
5505
5506 ASSERT_EQUAL_64(pl * 8, x9);
5507 ASSERT_EQUAL_64(pl * 42, x10);
5508
5509 ASSERT_EQUAL_64(pl * 44, x11);
5510 ASSERT_EQUAL_64(pl * 84, x12);
5511
5512 ASSERT_EQUAL_64(base + (pl * 4), x20);
5513 ASSERT_EQUAL_64(base + (pl * 42), x21);
5514 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
5515
5516 ASSERT_EQUAL_64(base, x30);
5517 }
5518}
5519
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005520TEST_SVE(sve_calculate_sve_address) {
5521 // Shadow the `MacroAssembler` type so that the test macros work without
5522 // modification.
5523 typedef CalculateSVEAddressMacroAssembler MacroAssembler;
5524
Jacob Bramley1314c462019-08-08 10:54:16 +01005525 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005526 START(); // NOLINT(clang-diagnostic-local-type-template-args)
Jacob Bramley1314c462019-08-08 10:54:16 +01005527
5528 uint64_t base = 0x1234567800000000;
5529 __ Mov(x28, base);
5530 __ Mov(x29, 48);
5531 __ Mov(x30, -48);
5532
5533 // Simple scalar (or equivalent) cases.
5534
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005535 __ CalculateSVEAddress(x0, SVEMemOperand(x28));
5536 __ CalculateSVEAddress(x1, SVEMemOperand(x28, 0));
5537 __ CalculateSVEAddress(x2, SVEMemOperand(x28, 0, SVE_MUL_VL));
5538 __ CalculateSVEAddress(x3, SVEMemOperand(x28, 0, SVE_MUL_VL), 3);
5539 __ CalculateSVEAddress(x4, SVEMemOperand(x28, xzr));
5540 __ CalculateSVEAddress(x5, SVEMemOperand(x28, xzr, LSL, 42));
Jacob Bramley1314c462019-08-08 10:54:16 +01005541
5542 // scalar-plus-immediate
5543
5544 // Unscaled immediates, handled with `Add`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005545 __ CalculateSVEAddress(x6, SVEMemOperand(x28, 42));
5546 __ CalculateSVEAddress(x7, SVEMemOperand(x28, -42));
Jacob Bramley1314c462019-08-08 10:54:16 +01005547 // Scaled immediates, handled with `Addvl` or `Addpl`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005548 __ CalculateSVEAddress(x8, SVEMemOperand(x28, 31, SVE_MUL_VL), 0);
5549 __ CalculateSVEAddress(x9, SVEMemOperand(x28, -32, SVE_MUL_VL), 0);
Jacob Bramley1314c462019-08-08 10:54:16 +01005550 // Out of `addvl` or `addpl` range.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005551 __ CalculateSVEAddress(x10, SVEMemOperand(x28, 42, SVE_MUL_VL), 0);
5552 __ CalculateSVEAddress(x11, SVEMemOperand(x28, -42, SVE_MUL_VL), 0);
5553 // As above, for VL-based accesses smaller than a Z register.
5554 VIXL_STATIC_ASSERT(kZRegBitsPerPRegBitLog2 == 3);
5555 __ CalculateSVEAddress(x12, SVEMemOperand(x28, -32 * 8, SVE_MUL_VL), 3);
5556 __ CalculateSVEAddress(x13, SVEMemOperand(x28, -42 * 8, SVE_MUL_VL), 3);
5557 __ CalculateSVEAddress(x14, SVEMemOperand(x28, -32 * 4, SVE_MUL_VL), 2);
5558 __ CalculateSVEAddress(x15, SVEMemOperand(x28, -42 * 4, SVE_MUL_VL), 2);
5559 __ CalculateSVEAddress(x18, SVEMemOperand(x28, -32 * 2, SVE_MUL_VL), 1);
5560 __ CalculateSVEAddress(x19, SVEMemOperand(x28, -42 * 2, SVE_MUL_VL), 1);
Jacob Bramley1314c462019-08-08 10:54:16 +01005561
5562 // scalar-plus-scalar
5563
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005564 __ CalculateSVEAddress(x20, SVEMemOperand(x28, x29));
5565 __ CalculateSVEAddress(x21, SVEMemOperand(x28, x30));
5566 __ CalculateSVEAddress(x22, SVEMemOperand(x28, x29, LSL, 8));
5567 __ CalculateSVEAddress(x23, SVEMemOperand(x28, x30, LSL, 8));
Jacob Bramley1314c462019-08-08 10:54:16 +01005568
5569 // In-place updates, to stress scratch register allocation.
5570
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005571 __ Mov(x24, 0xabcd000000000000);
5572 __ Mov(x25, 0xabcd101100000000);
5573 __ Mov(x26, 0xabcd202200000000);
5574 __ Mov(x27, 0xabcd303300000000);
5575 __ Mov(x28, 0xabcd404400000000);
5576 __ Mov(x29, 0xabcd505500000000);
Jacob Bramley1314c462019-08-08 10:54:16 +01005577
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005578 __ CalculateSVEAddress(x24, SVEMemOperand(x24));
5579 __ CalculateSVEAddress(x25, SVEMemOperand(x25, 0x42));
5580 __ CalculateSVEAddress(x26, SVEMemOperand(x26, 3, SVE_MUL_VL), 0);
5581 __ CalculateSVEAddress(x27, SVEMemOperand(x27, 0x42, SVE_MUL_VL), 3);
5582 __ CalculateSVEAddress(x28, SVEMemOperand(x28, x30));
5583 __ CalculateSVEAddress(x29, SVEMemOperand(x29, x30, LSL, 4));
Jacob Bramley1314c462019-08-08 10:54:16 +01005584
5585 END();
5586
5587 if (CAN_RUN()) {
5588 RUN();
5589
5590 uint64_t vl = config->sve_vl_in_bytes();
5591 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5592 uint64_t pl = vl / kZRegBitsPerPRegBit;
5593
5594 // Simple scalar (or equivalent) cases.
5595 ASSERT_EQUAL_64(base, x0);
5596 ASSERT_EQUAL_64(base, x1);
5597 ASSERT_EQUAL_64(base, x2);
5598 ASSERT_EQUAL_64(base, x3);
5599 ASSERT_EQUAL_64(base, x4);
5600 ASSERT_EQUAL_64(base, x5);
5601
5602 // scalar-plus-immediate
5603 ASSERT_EQUAL_64(base + 42, x6);
5604 ASSERT_EQUAL_64(base - 42, x7);
5605 ASSERT_EQUAL_64(base + (31 * vl), x8);
5606 ASSERT_EQUAL_64(base - (32 * vl), x9);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005607 ASSERT_EQUAL_64(base + (42 * vl), x10);
5608 ASSERT_EQUAL_64(base - (42 * vl), x11);
5609 ASSERT_EQUAL_64(base - (32 * vl), x12);
Jacob Bramley1314c462019-08-08 10:54:16 +01005610 ASSERT_EQUAL_64(base - (42 * vl), x13);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005611 ASSERT_EQUAL_64(base - (32 * vl), x14);
5612 ASSERT_EQUAL_64(base - (42 * vl), x15);
5613 ASSERT_EQUAL_64(base - (32 * vl), x18);
5614 ASSERT_EQUAL_64(base - (42 * vl), x19);
Jacob Bramley1314c462019-08-08 10:54:16 +01005615
5616 // scalar-plus-scalar
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005617 ASSERT_EQUAL_64(base + 48, x20);
5618 ASSERT_EQUAL_64(base - 48, x21);
5619 ASSERT_EQUAL_64(base + (48 << 8), x22);
5620 ASSERT_EQUAL_64(base - (48 << 8), x23);
Jacob Bramley1314c462019-08-08 10:54:16 +01005621
5622 // In-place updates.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005623 ASSERT_EQUAL_64(0xabcd000000000000, x24);
5624 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x25);
5625 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x26);
5626 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x27);
5627 ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28);
5628 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29);
Jacob Bramley1314c462019-08-08 10:54:16 +01005629 }
5630}
5631
TatWai Chong4f28df72019-08-14 17:50:30 -07005632TEST_SVE(sve_permute_vector_unpredicated) {
5633 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5634 START();
5635
Jacob Bramleye4983d42019-10-08 10:56:15 +01005636 // Initialise registers with known values first.
5637 __ Dup(z1.VnB(), 0x11);
5638 __ Dup(z2.VnB(), 0x22);
5639 __ Dup(z3.VnB(), 0x33);
5640 __ Dup(z4.VnB(), 0x44);
5641
TatWai Chong4f28df72019-08-14 17:50:30 -07005642 __ Mov(x0, 0x0123456789abcdef);
5643 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
5644 __ Insr(z1.VnS(), w0);
5645 __ Insr(z2.VnD(), x0);
5646 __ Insr(z3.VnH(), h0);
5647 __ Insr(z4.VnD(), d0);
5648
5649 uint64_t inputs[] = {0xfedcba9876543210,
5650 0x0123456789abcdef,
5651 0x8f8e8d8c8b8a8988,
5652 0x8786858483828180};
5653
5654 // Initialize a distinguishable value throughout the register first.
5655 __ Dup(z9.VnB(), 0xff);
5656 InsrHelper(&masm, z9.VnD(), inputs);
5657
5658 __ Rev(z5.VnB(), z9.VnB());
5659 __ Rev(z6.VnH(), z9.VnH());
5660 __ Rev(z7.VnS(), z9.VnS());
5661 __ Rev(z8.VnD(), z9.VnD());
5662
5663 int index[7] = {22, 7, 7, 3, 1, 1, 63};
5664 // Broadcasting an data within the input array.
5665 __ Dup(z10.VnB(), z9.VnB(), index[0]);
5666 __ Dup(z11.VnH(), z9.VnH(), index[1]);
5667 __ Dup(z12.VnS(), z9.VnS(), index[2]);
5668 __ Dup(z13.VnD(), z9.VnD(), index[3]);
5669 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
5670 // Test dst == src
5671 __ Mov(z15, z9);
5672 __ Dup(z15.VnS(), z15.VnS(), index[5]);
5673 // Selecting an data beyond the input array.
5674 __ Dup(z16.VnB(), z9.VnB(), index[6]);
5675
5676 END();
5677
5678 if (CAN_RUN()) {
5679 RUN();
5680
5681 // Insr
Jacob Bramleye4983d42019-10-08 10:56:15 +01005682 uint64_t z1_expected[] = {0x1111111111111111, 0x1111111189abcdef};
5683 uint64_t z2_expected[] = {0x2222222222222222, 0x0123456789abcdef};
5684 uint64_t z3_expected[] = {0x3333333333333333, 0x3333333333333456};
5685 uint64_t z4_expected[] = {0x4444444444444444, 0x7ffaaaaa22223456};
TatWai Chong4f28df72019-08-14 17:50:30 -07005686 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
5687 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
5688 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
5689 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
5690
5691 // Rev
5692 int lane_count = core.GetSVELaneCount(kBRegSize);
5693 for (int i = 0; i < lane_count; i++) {
5694 uint64_t expected =
5695 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
5696 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
5697 ASSERT_EQUAL_64(expected, input);
5698 }
5699
5700 lane_count = core.GetSVELaneCount(kHRegSize);
5701 for (int i = 0; i < lane_count; i++) {
5702 uint64_t expected =
5703 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
5704 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
5705 ASSERT_EQUAL_64(expected, input);
5706 }
5707
5708 lane_count = core.GetSVELaneCount(kSRegSize);
5709 for (int i = 0; i < lane_count; i++) {
5710 uint64_t expected =
5711 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
5712 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
5713 ASSERT_EQUAL_64(expected, input);
5714 }
5715
5716 lane_count = core.GetSVELaneCount(kDRegSize);
5717 for (int i = 0; i < lane_count; i++) {
5718 uint64_t expected =
5719 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
5720 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
5721 ASSERT_EQUAL_64(expected, input);
5722 }
5723
5724 // Dup
5725 unsigned vl = config->sve_vl_in_bits();
5726 lane_count = core.GetSVELaneCount(kBRegSize);
5727 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
5728 for (int i = 0; i < lane_count; i++) {
5729 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
5730 }
5731
5732 lane_count = core.GetSVELaneCount(kHRegSize);
5733 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
5734 for (int i = 0; i < lane_count; i++) {
5735 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
5736 }
5737
5738 lane_count = core.GetSVELaneCount(kSRegSize);
5739 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
5740 for (int i = 0; i < lane_count; i++) {
5741 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
5742 }
5743
5744 lane_count = core.GetSVELaneCount(kDRegSize);
5745 uint64_t expected_z13 =
5746 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
5747 for (int i = 0; i < lane_count; i++) {
5748 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
5749 }
5750
5751 lane_count = core.GetSVELaneCount(kDRegSize);
5752 uint64_t expected_z14_lo = 0;
5753 uint64_t expected_z14_hi = 0;
5754 if (vl > (index[4] * kQRegSize)) {
5755 expected_z14_lo = 0x0123456789abcdef;
5756 expected_z14_hi = 0xfedcba9876543210;
5757 }
5758 for (int i = 0; i < lane_count; i += 2) {
5759 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
5760 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
5761 }
5762
5763 lane_count = core.GetSVELaneCount(kSRegSize);
5764 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
5765 for (int i = 0; i < lane_count; i++) {
5766 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
5767 }
5768
5769 lane_count = core.GetSVELaneCount(kBRegSize);
5770 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
5771 for (int i = 0; i < lane_count; i++) {
5772 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
5773 }
5774 }
5775}
5776
Martyn Capewell2e954292020-01-14 14:56:42 +00005777TEST_SVE(sve_permute_vector_unpredicated_unpack_vector_elements) {
TatWai Chong4f28df72019-08-14 17:50:30 -07005778 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5779 START();
5780
5781 uint64_t z9_inputs[] = {0xfedcba9876543210,
5782 0x0123456789abcdef,
5783 0x8f8e8d8c8b8a8988,
5784 0x8786858483828180};
5785 InsrHelper(&masm, z9.VnD(), z9_inputs);
5786
5787 __ Sunpkhi(z10.VnH(), z9.VnB());
5788 __ Sunpkhi(z11.VnS(), z9.VnH());
5789 __ Sunpkhi(z12.VnD(), z9.VnS());
5790
5791 __ Sunpklo(z13.VnH(), z9.VnB());
5792 __ Sunpklo(z14.VnS(), z9.VnH());
5793 __ Sunpklo(z15.VnD(), z9.VnS());
5794
5795 __ Uunpkhi(z16.VnH(), z9.VnB());
5796 __ Uunpkhi(z17.VnS(), z9.VnH());
5797 __ Uunpkhi(z18.VnD(), z9.VnS());
5798
5799 __ Uunpklo(z19.VnH(), z9.VnB());
5800 __ Uunpklo(z20.VnS(), z9.VnH());
5801 __ Uunpklo(z21.VnD(), z9.VnS());
5802
Martyn Capewell2e954292020-01-14 14:56:42 +00005803 // Test unpacking with same source and destination.
5804 __ Mov(z22, z9);
5805 __ Sunpklo(z22.VnH(), z22.VnB());
5806 __ Mov(z23, z9);
5807 __ Uunpklo(z23.VnH(), z23.VnB());
5808
TatWai Chong4f28df72019-08-14 17:50:30 -07005809 END();
5810
5811 if (CAN_RUN()) {
5812 RUN();
5813
5814 // Suunpkhi
5815 int lane_count = core.GetSVELaneCount(kHRegSize);
5816 for (int i = lane_count - 1; i >= 0; i--) {
5817 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
5818 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
5819 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
5820 ASSERT_EQUAL_64(expected, input);
5821 }
5822
5823 lane_count = core.GetSVELaneCount(kSRegSize);
5824 for (int i = lane_count - 1; i >= 0; i--) {
5825 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
5826 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
5827 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
5828 ASSERT_EQUAL_64(expected, input);
5829 }
5830
5831 lane_count = core.GetSVELaneCount(kDRegSize);
5832 for (int i = lane_count - 1; i >= 0; i--) {
5833 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
5834 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
5835 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
5836 ASSERT_EQUAL_64(expected, input);
5837 }
5838
5839 // Suunpklo
5840 lane_count = core.GetSVELaneCount(kHRegSize);
5841 for (int i = lane_count - 1; i >= 0; i--) {
5842 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
5843 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
5844 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
5845 ASSERT_EQUAL_64(expected, input);
5846 }
5847
5848 lane_count = core.GetSVELaneCount(kSRegSize);
5849 for (int i = lane_count - 1; i >= 0; i--) {
5850 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
5851 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
5852 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
5853 ASSERT_EQUAL_64(expected, input);
5854 }
5855
5856 lane_count = core.GetSVELaneCount(kDRegSize);
5857 for (int i = lane_count - 1; i >= 0; i--) {
5858 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
5859 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
5860 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
5861 ASSERT_EQUAL_64(expected, input);
5862 }
5863
5864 // Uuunpkhi
5865 lane_count = core.GetSVELaneCount(kHRegSize);
5866 for (int i = lane_count - 1; i >= 0; i--) {
5867 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
5868 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
5869 ASSERT_EQUAL_64(expected, input);
5870 }
5871
5872 lane_count = core.GetSVELaneCount(kSRegSize);
5873 for (int i = lane_count - 1; i >= 0; i--) {
5874 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
5875 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
5876 ASSERT_EQUAL_64(expected, input);
5877 }
5878
5879 lane_count = core.GetSVELaneCount(kDRegSize);
5880 for (int i = lane_count - 1; i >= 0; i--) {
5881 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
5882 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
5883 ASSERT_EQUAL_64(expected, input);
5884 }
5885
5886 // Uuunpklo
5887 lane_count = core.GetSVELaneCount(kHRegSize);
5888 for (int i = lane_count - 1; i >= 0; i--) {
5889 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
5890 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
5891 ASSERT_EQUAL_64(expected, input);
5892 }
5893
5894 lane_count = core.GetSVELaneCount(kSRegSize);
5895 for (int i = lane_count - 1; i >= 0; i--) {
5896 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
5897 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
5898 ASSERT_EQUAL_64(expected, input);
5899 }
5900
5901 lane_count = core.GetSVELaneCount(kDRegSize);
5902 for (int i = lane_count - 1; i >= 0; i--) {
5903 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
5904 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
5905 ASSERT_EQUAL_64(expected, input);
5906 }
Martyn Capewell2e954292020-01-14 14:56:42 +00005907
5908 ASSERT_EQUAL_SVE(z13, z22);
5909 ASSERT_EQUAL_SVE(z19, z23);
TatWai Chong4f28df72019-08-14 17:50:30 -07005910 }
5911}
5912
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005913TEST_SVE(sve_cnot_not) {
5914 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5915 START();
5916
5917 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
5918
5919 // For simplicity, we re-use the same pg for various lane sizes.
5920 // For D lanes: 1, 1, 0
5921 // For S lanes: 1, 1, 1, 0, 0
5922 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5923 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5924 Initialise(&masm, p0.VnB(), pg_in);
5925 PRegisterM pg = p0.Merging();
5926
5927 // These are merging operations, so we have to initialise the result register.
5928 // We use a mixture of constructive and destructive operations.
5929
5930 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005931 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005932 __ Mov(z30, z31);
5933
5934 // For constructive operations, use a different initial result value.
5935 __ Index(z29.VnB(), 0, -1);
5936
5937 __ Mov(z0, z31);
5938 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
5939 __ Mov(z1, z29);
5940 __ Cnot(z1.VnH(), pg, z31.VnH());
5941 __ Mov(z2, z31);
5942 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
5943 __ Mov(z3, z29);
5944 __ Cnot(z3.VnD(), pg, z31.VnD());
5945
5946 __ Mov(z4, z29);
5947 __ Not(z4.VnB(), pg, z31.VnB());
5948 __ Mov(z5, z31);
5949 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
5950 __ Mov(z6, z29);
5951 __ Not(z6.VnS(), pg, z31.VnS());
5952 __ Mov(z7, z31);
5953 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
5954
5955 END();
5956
5957 if (CAN_RUN()) {
5958 RUN();
5959
5960 // Check that constructive operations preserve their inputs.
5961 ASSERT_EQUAL_SVE(z30, z31);
5962
5963 // clang-format off
5964
5965 // Cnot (B) destructive
5966 uint64_t expected_z0[] =
5967 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5968 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
5969 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5970
5971 // Cnot (H)
5972 uint64_t expected_z1[] =
5973 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5974 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
5975 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5976
5977 // Cnot (S) destructive
5978 uint64_t expected_z2[] =
5979 // pg: 0 1 1 1 0 0
5980 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
5981 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5982
5983 // Cnot (D)
5984 uint64_t expected_z3[] =
5985 // pg: 1 1 0
5986 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
5987 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5988
5989 // Not (B)
5990 uint64_t expected_z4[] =
5991 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5992 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
5993 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5994
5995 // Not (H) destructive
5996 uint64_t expected_z5[] =
5997 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5998 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
5999 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6000
6001 // Not (S)
6002 uint64_t expected_z6[] =
6003 // pg: 0 1 1 1 0 0
6004 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
6005 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6006
6007 // Not (D) destructive
6008 uint64_t expected_z7[] =
6009 // pg: 1 1 0
6010 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
6011 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6012
6013 // clang-format on
6014 }
6015}
6016
6017TEST_SVE(sve_fabs_fneg) {
6018 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6019 START();
6020
6021 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
6022 // NaNs, but fabs and fneg do not.
6023 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
6024 0xfff00000ff80fc01, // Signalling NaNs.
6025 0x123456789abcdef0};
6026
6027 // For simplicity, we re-use the same pg for various lane sizes.
6028 // For D lanes: 1, 1, 0
6029 // For S lanes: 1, 1, 1, 0, 0
6030 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6031 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6032 Initialise(&masm, p0.VnB(), pg_in);
6033 PRegisterM pg = p0.Merging();
6034
6035 // These are merging operations, so we have to initialise the result register.
6036 // We use a mixture of constructive and destructive operations.
6037
6038 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006039 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006040 __ Mov(z30, z31);
6041
6042 // For constructive operations, use a different initial result value.
6043 __ Index(z29.VnB(), 0, -1);
6044
6045 __ Mov(z0, z29);
6046 __ Fabs(z0.VnH(), pg, z31.VnH());
6047 __ Mov(z1, z31);
6048 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
6049 __ Mov(z2, z29);
6050 __ Fabs(z2.VnD(), pg, z31.VnD());
6051
6052 __ Mov(z3, z31);
6053 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
6054 __ Mov(z4, z29);
6055 __ Fneg(z4.VnS(), pg, z31.VnS());
6056 __ Mov(z5, z31);
6057 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
6058
6059 END();
6060
6061 if (CAN_RUN()) {
6062 RUN();
6063
6064 // Check that constructive operations preserve their inputs.
6065 ASSERT_EQUAL_SVE(z30, z31);
6066
6067 // clang-format off
6068
6069 // Fabs (H)
6070 uint64_t expected_z0[] =
6071 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6072 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
6073 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6074
6075 // Fabs (S) destructive
6076 uint64_t expected_z1[] =
6077 // pg: 0 1 1 1 0 0
6078 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
6079 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6080
6081 // Fabs (D)
6082 uint64_t expected_z2[] =
6083 // pg: 1 1 0
6084 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
6085 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6086
6087 // Fneg (H) destructive
6088 uint64_t expected_z3[] =
6089 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6090 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
6091 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6092
6093 // Fneg (S)
6094 uint64_t expected_z4[] =
6095 // pg: 0 1 1 1 0 0
6096 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
6097 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6098
6099 // Fneg (D) destructive
6100 uint64_t expected_z5[] =
6101 // pg: 1 1 0
6102 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
6103 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6104
6105 // clang-format on
6106 }
6107}
6108
6109TEST_SVE(sve_cls_clz_cnt) {
6110 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6111 START();
6112
6113 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
6114
6115 // For simplicity, we re-use the same pg for various lane sizes.
6116 // For D lanes: 1, 1, 0
6117 // For S lanes: 1, 1, 1, 0, 0
6118 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6119 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6120 Initialise(&masm, p0.VnB(), pg_in);
6121 PRegisterM pg = p0.Merging();
6122
6123 // These are merging operations, so we have to initialise the result register.
6124 // We use a mixture of constructive and destructive operations.
6125
6126 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006127 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006128 __ Mov(z30, z31);
6129
6130 // For constructive operations, use a different initial result value.
6131 __ Index(z29.VnB(), 0, -1);
6132
6133 __ Mov(z0, z29);
6134 __ Cls(z0.VnB(), pg, z31.VnB());
6135 __ Mov(z1, z31);
6136 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
6137 __ Mov(z2, z29);
6138 __ Cnt(z2.VnS(), pg, z31.VnS());
6139 __ Mov(z3, z31);
6140 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
6141
6142 END();
6143
6144 if (CAN_RUN()) {
6145 RUN();
6146 // Check that non-destructive operations preserve their inputs.
6147 ASSERT_EQUAL_SVE(z30, z31);
6148
6149 // clang-format off
6150
6151 // cls (B)
6152 uint8_t expected_z0[] =
6153 // pg: 0 0 0 0 1 0 1 1
6154 // pg: 1 0 0 1 0 1 1 1
6155 // pg: 0 0 1 0 1 1 1 0
6156 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
6157 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
6158 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
6159 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
6160
6161 // clz (H) destructive
6162 uint16_t expected_z1[] =
6163 // pg: 0 0 0 1
6164 // pg: 0 1 1 1
6165 // pg: 0 0 1 0
6166 {0x0000, 0x0000, 0x0000, 16,
6167 0xfefc, 0, 0, 0,
6168 0x1234, 0x5678, 0, 0xdef0};
6169 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
6170
6171 // cnt (S)
6172 uint32_t expected_z2[] =
6173 // pg: 0 1
6174 // pg: 1 1
6175 // pg: 0 0
6176 {0xe9eaebec, 0,
6177 22, 16,
6178 0xf9fafbfc, 0xfdfeff00};
6179 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
6180
6181 // cnt (D) destructive
6182 uint64_t expected_z3[] =
6183 // pg: 1 1 0
6184 { 0, 38, 0x123456789abcdef0};
6185 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6186
6187 // clang-format on
6188 }
6189}
6190
6191TEST_SVE(sve_sxt) {
6192 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6193 START();
6194
6195 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
6196
6197 // For simplicity, we re-use the same pg for various lane sizes.
6198 // For D lanes: 1, 1, 0
6199 // For S lanes: 1, 1, 1, 0, 0
6200 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6201 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6202 Initialise(&masm, p0.VnB(), pg_in);
6203 PRegisterM pg = p0.Merging();
6204
6205 // These are merging operations, so we have to initialise the result register.
6206 // We use a mixture of constructive and destructive operations.
6207
6208 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006209 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006210 __ Mov(z30, z31);
6211
6212 // For constructive operations, use a different initial result value.
6213 __ Index(z29.VnB(), 0, -1);
6214
6215 __ Mov(z0, z31);
6216 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
6217 __ Mov(z1, z29);
6218 __ Sxtb(z1.VnS(), pg, z31.VnS());
6219 __ Mov(z2, z31);
6220 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
6221 __ Mov(z3, z29);
6222 __ Sxth(z3.VnS(), pg, z31.VnS());
6223 __ Mov(z4, z31);
6224 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
6225 __ Mov(z5, z29);
6226 __ Sxtw(z5.VnD(), pg, z31.VnD());
6227
6228 END();
6229
6230 if (CAN_RUN()) {
6231 RUN();
6232 // Check that constructive operations preserve their inputs.
6233 ASSERT_EQUAL_SVE(z30, z31);
6234
6235 // clang-format off
6236
6237 // Sxtb (H) destructive
6238 uint64_t expected_z0[] =
6239 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6240 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
6241 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6242
6243 // Sxtb (S)
6244 uint64_t expected_z1[] =
6245 // pg: 0 1 1 1 0 0
6246 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
6247 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6248
6249 // Sxtb (D) destructive
6250 uint64_t expected_z2[] =
6251 // pg: 1 1 0
6252 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
6253 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6254
6255 // Sxth (S)
6256 uint64_t expected_z3[] =
6257 // pg: 0 1 1 1 0 0
6258 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
6259 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6260
6261 // Sxth (D) destructive
6262 uint64_t expected_z4[] =
6263 // pg: 1 1 0
6264 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
6265 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6266
6267 // Sxtw (D)
6268 uint64_t expected_z5[] =
6269 // pg: 1 1 0
6270 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
6271 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6272
6273 // clang-format on
6274 }
6275}
6276
6277TEST_SVE(sve_uxt) {
6278 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6279 START();
6280
6281 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
6282
6283 // For simplicity, we re-use the same pg for various lane sizes.
6284 // For D lanes: 1, 1, 0
6285 // For S lanes: 1, 1, 1, 0, 0
6286 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6287 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6288 Initialise(&masm, p0.VnB(), pg_in);
6289 PRegisterM pg = p0.Merging();
6290
6291 // These are merging operations, so we have to initialise the result register.
6292 // We use a mixture of constructive and destructive operations.
6293
6294 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006295 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006296 __ Mov(z30, z31);
6297
6298 // For constructive operations, use a different initial result value.
6299 __ Index(z29.VnB(), 0, -1);
6300
6301 __ Mov(z0, z29);
6302 __ Uxtb(z0.VnH(), pg, z31.VnH());
6303 __ Mov(z1, z31);
6304 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
6305 __ Mov(z2, z29);
6306 __ Uxtb(z2.VnD(), pg, z31.VnD());
6307 __ Mov(z3, z31);
6308 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
6309 __ Mov(z4, z29);
6310 __ Uxth(z4.VnD(), pg, z31.VnD());
6311 __ Mov(z5, z31);
6312 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
6313
6314 END();
6315
6316 if (CAN_RUN()) {
6317 RUN();
6318 // clang-format off
6319
6320 // Uxtb (H)
6321 uint64_t expected_z0[] =
6322 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6323 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
6324 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6325
6326 // Uxtb (S) destructive
6327 uint64_t expected_z1[] =
6328 // pg: 0 1 1 1 0 0
6329 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
6330 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6331
6332 // Uxtb (D)
6333 uint64_t expected_z2[] =
6334 // pg: 1 1 0
6335 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
6336 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6337
6338 // Uxth (S) destructive
6339 uint64_t expected_z3[] =
6340 // pg: 0 1 1 1 0 0
6341 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
6342 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6343
6344 // Uxth (D)
6345 uint64_t expected_z4[] =
6346 // pg: 1 1 0
6347 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
6348 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6349
6350 // Uxtw (D) destructive
6351 uint64_t expected_z5[] =
6352 // pg: 1 1 0
6353 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
6354 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6355
6356 // clang-format on
6357 }
6358}
6359
6360TEST_SVE(sve_abs_neg) {
6361 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6362 START();
6363
6364 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
6365
6366 // For simplicity, we re-use the same pg for various lane sizes.
6367 // For D lanes: 1, 1, 0
6368 // For S lanes: 1, 1, 1, 0, 0
6369 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6370 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6371 Initialise(&masm, p0.VnB(), pg_in);
6372 PRegisterM pg = p0.Merging();
6373
6374 InsrHelper(&masm, z31.VnD(), in);
6375
6376 // These are merging operations, so we have to initialise the result register.
6377 // We use a mixture of constructive and destructive operations.
6378
6379 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006380 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006381 __ Mov(z30, z31);
6382
6383 // For constructive operations, use a different initial result value.
6384 __ Index(z29.VnB(), 0, -1);
6385
6386 __ Mov(z0, z31);
6387 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
6388 __ Mov(z1, z29);
6389 __ Abs(z1.VnB(), pg, z31.VnB());
6390
6391 __ Mov(z2, z31);
6392 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
6393 __ Mov(z3, z29);
6394 __ Neg(z3.VnS(), pg, z31.VnS());
6395
Jacob Bramleyc0066272019-09-30 16:30:47 +01006396 // The unpredicated form of `Neg` is implemented using `subr`.
6397 __ Mov(z4, z31);
6398 __ Neg(z4.VnB(), z4.VnB()); // destructive
6399 __ Mov(z5, z29);
6400 __ Neg(z5.VnD(), z31.VnD());
6401
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006402 END();
6403
6404 if (CAN_RUN()) {
6405 RUN();
Jacob Bramleyc0066272019-09-30 16:30:47 +01006406
6407 ASSERT_EQUAL_SVE(z30, z31);
6408
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006409 // clang-format off
6410
6411 // Abs (D) destructive
6412 uint64_t expected_z0[] =
6413 // pg: 1 1 0
6414 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
6415 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6416
6417 // Abs (B)
6418 uint64_t expected_z1[] =
6419 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
6420 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
6421 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6422
6423 // Neg (H) destructive
6424 uint64_t expected_z2[] =
6425 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
6426 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
6427 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6428
6429 // Neg (S)
6430 uint64_t expected_z3[] =
6431 // pg: 0 1 1 1 0 0
6432 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
6433 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6434
Jacob Bramleyc0066272019-09-30 16:30:47 +01006435 // Neg (B) destructive, unpredicated
6436 uint64_t expected_z4[] =
6437 {0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
6438 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6439
6440 // Neg (D) unpredicated
6441 uint64_t expected_z5[] =
6442 {0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
6443 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6444
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01006445 // clang-format on
6446 }
6447}
6448
Jacob Bramley0093bb92019-10-04 15:54:10 +01006449TEST_SVE(sve_cpy) {
6450 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
6451 START();
6452
6453 // For simplicity, we re-use the same pg for various lane sizes.
6454 // For D lanes: 0, 1, 1
6455 // For S lanes: 0, 1, 1, 0, 1
6456 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6457 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6458
6459 PRegisterM pg = p7.Merging();
6460 Initialise(&masm, pg.VnB(), pg_in);
6461
6462 // These are merging operations, so we have to initialise the result registers
6463 // for each operation.
6464 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6465 __ Index(ZRegister(i, kBRegSize), 0, -1);
6466 }
6467
6468 // Recognisable values to copy.
6469 __ Mov(x0, 0xdeadbeefdeadbe42);
6470 __ Mov(x1, 0xdeadbeefdead8421);
6471 __ Mov(x2, 0xdeadbeef80042001);
6472 __ Mov(x3, 0x8000000420000001);
6473
6474 // Use NEON moves, to avoid testing SVE `cpy` against itself.
6475 __ Dup(v28.V2D(), x0);
6476 __ Dup(v29.V2D(), x1);
6477 __ Dup(v30.V2D(), x2);
6478 __ Dup(v31.V2D(), x3);
6479
6480 // Register forms (CPY_z_p_r)
6481 __ Cpy(z0.VnB(), pg, w0);
6482 __ Cpy(z1.VnH(), pg, x1); // X registers are accepted for small lanes.
6483 __ Cpy(z2.VnS(), pg, w2);
6484 __ Cpy(z3.VnD(), pg, x3);
6485
6486 // VRegister forms (CPY_z_p_v)
6487 __ Cpy(z4.VnB(), pg, b28);
6488 __ Cpy(z5.VnH(), pg, h29);
6489 __ Cpy(z6.VnS(), pg, s30);
6490 __ Cpy(z7.VnD(), pg, d31);
6491
6492 // Check that we can copy the stack pointer.
6493 __ Mov(x10, sp);
6494 __ Mov(sp, 0xabcabcabcabcabca); // Set sp to a known value.
6495 __ Cpy(z16.VnB(), pg, sp);
6496 __ Cpy(z17.VnH(), pg, wsp);
6497 __ Cpy(z18.VnS(), pg, wsp);
6498 __ Cpy(z19.VnD(), pg, sp);
6499 __ Mov(sp, x10); // Restore sp.
6500
6501 END();
6502
6503 if (CAN_RUN()) {
6504 RUN();
6505 // clang-format off
6506
6507 uint64_t expected_b[] =
6508 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6509 {0xe9eaebec424242f0, 0x42f2f34242f64242, 0xf942fbfcfdfeff42};
6510 ASSERT_EQUAL_SVE(expected_b, z0.VnD());
6511 ASSERT_EQUAL_SVE(expected_b, z4.VnD());
6512
6513 uint64_t expected_h[] =
6514 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6515 {0xe9eaebec8421eff0, 0xf1f28421f5f68421, 0x8421fbfcfdfe8421};
6516 ASSERT_EQUAL_SVE(expected_h, z1.VnD());
6517 ASSERT_EQUAL_SVE(expected_h, z5.VnD());
6518
6519 uint64_t expected_s[] =
6520 // pg: 0 0 1 1 0 1
6521 {0xe9eaebecedeeeff0, 0x8004200180042001, 0xf9fafbfc80042001};
6522 ASSERT_EQUAL_SVE(expected_s, z2.VnD());
6523 ASSERT_EQUAL_SVE(expected_s, z6.VnD());
6524
6525 uint64_t expected_d[] =
6526 // pg: 0 1 1
6527 {0xe9eaebecedeeeff0, 0x8000000420000001, 0x8000000420000001};
6528 ASSERT_EQUAL_SVE(expected_d, z3.VnD());
6529 ASSERT_EQUAL_SVE(expected_d, z7.VnD());
6530
6531
6532 uint64_t expected_b_sp[] =
6533 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6534 {0xe9eaebeccacacaf0, 0xcaf2f3cacaf6caca, 0xf9cafbfcfdfeffca};
6535 ASSERT_EQUAL_SVE(expected_b_sp, z16.VnD());
6536
6537 uint64_t expected_h_sp[] =
6538 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6539 {0xe9eaebecabcaeff0, 0xf1f2abcaf5f6abca, 0xabcafbfcfdfeabca};
6540 ASSERT_EQUAL_SVE(expected_h_sp, z17.VnD());
6541
6542 uint64_t expected_s_sp[] =
6543 // pg: 0 0 1 1 0 1
6544 {0xe9eaebecedeeeff0, 0xcabcabcacabcabca, 0xf9fafbfccabcabca};
6545 ASSERT_EQUAL_SVE(expected_s_sp, z18.VnD());
6546
6547 uint64_t expected_d_sp[] =
6548 // pg: 0 1 1
6549 {0xe9eaebecedeeeff0, 0xabcabcabcabcabca, 0xabcabcabcabcabca};
6550 ASSERT_EQUAL_SVE(expected_d_sp, z19.VnD());
6551
6552 // clang-format on
6553 }
6554}
6555
Jacob Bramley0f62eab2019-10-23 17:07:47 +01006556TEST_SVE(sve_cpy_imm) {
6557 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6558 START();
6559
6560 // For simplicity, we re-use the same pg for various lane sizes.
6561 // For D lanes: 0, 1, 1
6562 // For S lanes: 0, 1, 1, 0, 1
6563 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6564 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6565
6566 PRegister pg = p7;
6567 Initialise(&masm, pg.VnB(), pg_in);
6568
6569 // These are (mostly) merging operations, so we have to initialise the result
6570 // registers for each operation.
6571 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6572 __ Index(ZRegister(i, kBRegSize), 0, -1);
6573 }
6574
6575 // Encodable integer forms (CPY_z_p_i)
6576 __ Cpy(z0.VnB(), pg.Merging(), 0);
6577 __ Cpy(z1.VnB(), pg.Zeroing(), 42);
6578 __ Cpy(z2.VnB(), pg.Merging(), -42);
6579 __ Cpy(z3.VnB(), pg.Zeroing(), 0xff);
6580 __ Cpy(z4.VnH(), pg.Merging(), 127);
6581 __ Cpy(z5.VnS(), pg.Zeroing(), -128);
6582 __ Cpy(z6.VnD(), pg.Merging(), -1);
6583
6584 // Forms encodable using fcpy.
6585 __ Cpy(z7.VnH(), pg.Merging(), Float16ToRawbits(Float16(-31.0)));
6586 __ Cpy(z8.VnS(), pg.Zeroing(), FloatToRawbits(2.0f));
6587 __ Cpy(z9.VnD(), pg.Merging(), DoubleToRawbits(-4.0));
6588
6589 // Other forms use a scratch register.
6590 __ Cpy(z10.VnH(), pg.Merging(), 0xff);
6591 __ Cpy(z11.VnD(), pg.Zeroing(), 0x0123456789abcdef);
6592
6593 END();
6594
6595 if (CAN_RUN()) {
6596 RUN();
6597 // clang-format off
6598
6599 uint64_t expected_z0[] =
6600 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6601 {0xe9eaebec000000f0, 0x00f2f30000f60000, 0xf900fbfcfdfeff00};
6602 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6603
6604 uint64_t expected_z1[] =
6605 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6606 {0x000000002a2a2a00, 0x2a00002a2a002a2a, 0x002a00000000002a};
6607 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6608
6609 uint64_t expected_z2[] =
6610 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6611 {0xe9eaebecd6d6d6f0, 0xd6f2f3d6d6f6d6d6, 0xf9d6fbfcfdfeffd6};
6612 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6613
6614 uint64_t expected_z3[] =
6615 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6616 {0x00000000ffffff00, 0xff0000ffff00ffff, 0x00ff0000000000ff};
6617 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6618
6619 uint64_t expected_z4[] =
6620 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6621 {0xe9eaebec007feff0, 0xf1f2007ff5f6007f, 0x007ffbfcfdfe007f};
6622 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6623
6624 uint64_t expected_z5[] =
6625 // pg: 0 0 1 1 0 1
6626 {0x0000000000000000, 0xffffff80ffffff80, 0x00000000ffffff80};
6627 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6628
6629 uint64_t expected_z6[] =
6630 // pg: 0 1 1
6631 {0xe9eaebecedeeeff0, 0xffffffffffffffff, 0xffffffffffffffff};
6632 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6633
6634 uint64_t expected_z7[] =
6635 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6636 {0xe9eaebeccfc0eff0, 0xf1f2cfc0f5f6cfc0, 0xcfc0fbfcfdfecfc0};
6637 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6638
6639 uint64_t expected_z8[] =
6640 // pg: 0 0 1 1 0 1
6641 {0x0000000000000000, 0x4000000040000000, 0x0000000040000000};
6642 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
6643
6644 uint64_t expected_z9[] =
6645 // pg: 0 1 1
6646 {0xe9eaebecedeeeff0, 0xc010000000000000, 0xc010000000000000};
6647 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
6648
6649 uint64_t expected_z10[] =
6650 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6651 {0xe9eaebec00ffeff0, 0xf1f200fff5f600ff, 0x00fffbfcfdfe00ff};
6652 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
6653
6654 uint64_t expected_z11[] =
6655 // pg: 0 1 1
6656 {0x0000000000000000, 0x0123456789abcdef, 0x0123456789abcdef};
6657 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
6658
6659 // clang-format on
6660 }
6661}
6662
6663TEST_SVE(sve_fcpy_imm) {
6664 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6665 START();
6666
6667 // For simplicity, we re-use the same pg for various lane sizes.
6668 // For D lanes: 0, 1, 1
6669 // For S lanes: 0, 1, 1, 0, 1
6670 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6671 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6672
6673 PRegister pg = p7;
6674 Initialise(&masm, pg.VnB(), pg_in);
6675
6676 // These are (mostly) merging operations, so we have to initialise the result
6677 // registers for each operation.
6678 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6679 __ Index(ZRegister(i, kBRegSize), 0, -1);
6680 }
6681
6682 // Encodable floating-point forms (FCPY_z_p_i)
6683 __ Fcpy(z1.VnH(), pg.Merging(), Float16(1.0));
6684 __ Fcpy(z2.VnH(), pg.Merging(), -2.0f);
6685 __ Fcpy(z3.VnH(), pg.Merging(), 3.0);
6686 __ Fcpy(z4.VnS(), pg.Merging(), Float16(-4.0));
6687 __ Fcpy(z5.VnS(), pg.Merging(), 5.0f);
6688 __ Fcpy(z6.VnS(), pg.Merging(), 6.0);
6689 __ Fcpy(z7.VnD(), pg.Merging(), Float16(7.0));
6690 __ Fcpy(z8.VnD(), pg.Merging(), 8.0f);
6691 __ Fcpy(z9.VnD(), pg.Merging(), -9.0);
6692
6693 // Unencodable immediates.
6694 __ Fcpy(z10.VnS(), pg.Merging(), 0.0);
6695 __ Fcpy(z11.VnH(), pg.Merging(), Float16(42.0));
6696 __ Fcpy(z12.VnD(), pg.Merging(), RawbitsToDouble(0x7ff0000012340000)); // NaN
6697 __ Fcpy(z13.VnH(), pg.Merging(), kFP64NegativeInfinity);
6698
6699 END();
6700
6701 if (CAN_RUN()) {
6702 RUN();
6703 // clang-format off
6704
6705 // 1.0 as FP16: 0x3c00
6706 uint64_t expected_z1[] =
6707 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6708 {0xe9eaebec3c00eff0, 0xf1f23c00f5f63c00, 0x3c00fbfcfdfe3c00};
6709 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6710
6711 // -2.0 as FP16: 0xc000
6712 uint64_t expected_z2[] =
6713 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6714 {0xe9eaebecc000eff0, 0xf1f2c000f5f6c000, 0xc000fbfcfdfec000};
6715 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6716
6717 // 3.0 as FP16: 0x4200
6718 uint64_t expected_z3[] =
6719 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6720 {0xe9eaebec4200eff0, 0xf1f24200f5f64200, 0x4200fbfcfdfe4200};
6721 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6722
6723 // -4.0 as FP32: 0xc0800000
6724 uint64_t expected_z4[] =
6725 // pg: 0 0 1 1 0 1
6726 {0xe9eaebecedeeeff0, 0xc0800000c0800000, 0xf9fafbfcc0800000};
6727 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6728
6729 // 5.0 as FP32: 0x40a00000
6730 uint64_t expected_z5[] =
6731 // pg: 0 0 1 1 0 1
6732 {0xe9eaebecedeeeff0, 0x40a0000040a00000, 0xf9fafbfc40a00000};
6733 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6734
6735 // 6.0 as FP32: 0x40c00000
6736 uint64_t expected_z6[] =
6737 // pg: 0 0 1 1 0 1
6738 {0xe9eaebecedeeeff0, 0x40c0000040c00000, 0xf9fafbfc40c00000};
6739 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6740
6741 // 7.0 as FP64: 0x401c000000000000
6742 uint64_t expected_z7[] =
6743 // pg: 0 1 1
6744 {0xe9eaebecedeeeff0, 0x401c000000000000, 0x401c000000000000};
6745 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6746
6747 // 8.0 as FP64: 0x4020000000000000
6748 uint64_t expected_z8[] =
6749 // pg: 0 1 1
6750 {0xe9eaebecedeeeff0, 0x4020000000000000, 0x4020000000000000};
6751 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
6752
6753 // -9.0 as FP64: 0xc022000000000000
6754 uint64_t expected_z9[] =
6755 // pg: 0 1 1
6756 {0xe9eaebecedeeeff0, 0xc022000000000000, 0xc022000000000000};
6757 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
6758
6759 // 0.0 as FP32: 0x00000000
6760 uint64_t expected_z10[] =
6761 // pg: 0 0 1 1 0 1
6762 {0xe9eaebecedeeeff0, 0x0000000000000000, 0xf9fafbfc00000000};
6763 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
6764
6765 // 42.0 as FP16: 0x5140
6766 uint64_t expected_z11[] =
6767 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6768 {0xe9eaebec5140eff0, 0xf1f25140f5f65140, 0x5140fbfcfdfe5140};
6769 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
6770
6771 // Signalling NaN (with payload): 0x7ff0000012340000
6772 uint64_t expected_z12[] =
6773 // pg: 0 1 1
6774 {0xe9eaebecedeeeff0, 0x7ff0000012340000, 0x7ff0000012340000};
6775 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
6776
6777 // -infinity as FP16: 0xfc00
6778 uint64_t expected_z13[] =
6779 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6780 {0xe9eaebecfc00eff0, 0xf1f2fc00f5f6fc00, 0xfc00fbfcfdfefc00};
6781 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
6782
6783 // clang-format on
6784 }
6785}
6786
TatWai Chong4f28df72019-08-14 17:50:30 -07006787TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
6788 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6789 START();
6790
6791 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
6792
6793 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
6794
6795 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
6796
6797 int index_s[] = {1, 3, 2, 31, -1};
6798
6799 int index_d[] = {31, 1};
6800
6801 // Initialize the register with a value that doesn't existed in the table.
6802 __ Dup(z9.VnB(), 0x1f);
6803 InsrHelper(&masm, z9.VnD(), table_inputs);
6804
6805 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
6806 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
6807 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
6808 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
6809
6810 InsrHelper(&masm, ind_b, index_b);
6811 InsrHelper(&masm, ind_h, index_h);
6812 InsrHelper(&masm, ind_s, index_s);
6813 InsrHelper(&masm, ind_d, index_d);
6814
6815 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
6816
6817 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
6818
6819 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
6820
6821 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
6822
6823 END();
6824
6825 if (CAN_RUN()) {
6826 RUN();
6827
6828 // clang-format off
6829 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
6830 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
6831
6832 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
6833 0x5544, 0x7766, 0xddcc, 0x9988};
6834
6835 unsigned z28_expected[] =
6836 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
6837
6838 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
6839 // clang-format on
6840
6841 unsigned vl = config->sve_vl_in_bits();
6842 for (size_t i = 0; i < ArrayLength(index_b); i++) {
6843 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
6844 if (!core.HasSVELane(z26.VnB(), lane)) break;
6845 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
6846 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
6847 }
6848
6849 for (size_t i = 0; i < ArrayLength(index_h); i++) {
6850 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
6851 if (!core.HasSVELane(z27.VnH(), lane)) break;
6852 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
6853 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
6854 }
6855
6856 for (size_t i = 0; i < ArrayLength(index_s); i++) {
6857 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
6858 if (!core.HasSVELane(z28.VnS(), lane)) break;
6859 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
6860 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
6861 }
6862
6863 for (size_t i = 0; i < ArrayLength(index_d); i++) {
6864 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
6865 if (!core.HasSVELane(z29.VnD(), lane)) break;
6866 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
6867 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
6868 }
6869 }
6870}
6871
Jacob Bramley199339d2019-08-05 18:49:13 +01006872TEST_SVE(ldr_str_z_bi) {
6873 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6874 START();
6875
6876 int vl = config->sve_vl_in_bytes();
6877
6878 // The immediate can address [-256, 255] times the VL, so allocate enough
6879 // space to exceed that in both directions.
6880 int data_size = vl * 1024;
6881
6882 uint8_t* data = new uint8_t[data_size];
6883 memset(data, 0, data_size);
6884
6885 // Set the base half-way through the buffer so we can use negative indices.
6886 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6887
6888 __ Index(z1.VnB(), 1, 3);
6889 __ Index(z2.VnB(), 2, 5);
6890 __ Index(z3.VnB(), 3, 7);
6891 __ Index(z4.VnB(), 4, 11);
6892 __ Index(z5.VnB(), 5, 13);
6893 __ Index(z6.VnB(), 6, 2);
6894 __ Index(z7.VnB(), 7, 3);
6895 __ Index(z8.VnB(), 8, 5);
6896 __ Index(z9.VnB(), 9, 7);
6897
6898 // Encodable cases.
6899 __ Str(z1, SVEMemOperand(x0));
6900 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
6901 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
6902 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
6903 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
6904
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006905 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01006906 __ Str(z6, SVEMemOperand(x0, 6 * vl));
6907 __ Str(z7, SVEMemOperand(x0, -7 * vl));
6908 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
6909 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
6910
6911 // Corresponding loads.
6912 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
6913 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
6914 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
6915 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
6916 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
6917
6918 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
6919 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
6920 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
6921 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
6922
6923 END();
6924
6925 if (CAN_RUN()) {
6926 RUN();
6927
6928 uint8_t* expected = new uint8_t[data_size];
6929 memset(expected, 0, data_size);
6930 uint8_t* middle = &expected[data_size / 2];
6931
6932 for (int i = 0; i < vl; i++) {
6933 middle[i] = (1 + (3 * i)) & 0xff; // z1
6934 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
6935 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
6936 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
6937 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
6938 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
6939 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
6940 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
6941 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
6942 }
6943
Jacob Bramley33c99f92019-10-08 15:24:12 +01006944 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01006945
6946 ASSERT_EQUAL_SVE(z1, z11);
6947 ASSERT_EQUAL_SVE(z2, z12);
6948 ASSERT_EQUAL_SVE(z3, z13);
6949 ASSERT_EQUAL_SVE(z4, z14);
6950 ASSERT_EQUAL_SVE(z5, z15);
6951 ASSERT_EQUAL_SVE(z6, z16);
6952 ASSERT_EQUAL_SVE(z7, z17);
6953 ASSERT_EQUAL_SVE(z8, z18);
6954 ASSERT_EQUAL_SVE(z9, z19);
6955
6956 delete[] expected;
6957 }
6958 delete[] data;
6959}
6960
6961TEST_SVE(ldr_str_p_bi) {
6962 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6963 START();
6964
6965 int vl = config->sve_vl_in_bytes();
6966 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
6967 int pl = vl / kZRegBitsPerPRegBit;
6968
6969 // The immediate can address [-256, 255] times the PL, so allocate enough
6970 // space to exceed that in both directions.
6971 int data_size = pl * 1024;
6972
6973 uint8_t* data = new uint8_t[data_size];
6974 memset(data, 0, data_size);
6975
6976 // Set the base half-way through the buffer so we can use negative indices.
6977 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6978
6979 uint64_t pattern[4] = {0x1010101011101111,
6980 0x0010111011000101,
6981 0x1001101110010110,
6982 0x1010110101100011};
6983 for (int i = 8; i <= 15; i++) {
6984 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
6985 Initialise(&masm,
6986 PRegister(i),
6987 pattern[3] * i,
6988 pattern[2] * i,
6989 pattern[1] * i,
6990 pattern[0] * i);
6991 }
6992
6993 // Encodable cases.
6994 __ Str(p8, SVEMemOperand(x0));
6995 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
6996 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
6997 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
6998
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006999 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01007000 __ Str(p12, SVEMemOperand(x0, 6 * pl));
7001 __ Str(p13, SVEMemOperand(x0, -7 * pl));
7002 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
7003 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
7004
7005 // Corresponding loads.
7006 __ Ldr(p0, SVEMemOperand(x0));
7007 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
7008 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
7009 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
7010
7011 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
7012 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
7013 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
7014 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
7015
7016 END();
7017
7018 if (CAN_RUN()) {
7019 RUN();
7020
7021 uint8_t* expected = new uint8_t[data_size];
7022 memset(expected, 0, data_size);
7023 uint8_t* middle = &expected[data_size / 2];
7024
7025 for (int i = 0; i < pl; i++) {
7026 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
7027 size_t index = i / sizeof(pattern[0]);
7028 VIXL_ASSERT(index < ArrayLength(pattern));
7029 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
7030 // Each byte of `pattern` can be multiplied by 15 without carry.
7031 VIXL_ASSERT((byte * 15) <= 0xff);
7032
7033 middle[i] = byte * 8; // p8
7034 middle[(2 * pl) + i] = byte * 9; // p9
7035 middle[(-3 * pl) + i] = byte * 10; // p10
7036 middle[(255 * pl) + i] = byte * 11; // p11
7037 middle[(6 * pl) + i] = byte * 12; // p12
7038 middle[(-7 * pl) + i] = byte * 13; // p13
7039 middle[(314 * pl) + i] = byte * 14; // p14
7040 middle[(-314 * pl) + i] = byte * 15; // p15
7041 }
7042
Jacob Bramley33c99f92019-10-08 15:24:12 +01007043 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01007044
7045 ASSERT_EQUAL_SVE(p0, p8);
7046 ASSERT_EQUAL_SVE(p1, p9);
7047 ASSERT_EQUAL_SVE(p2, p10);
7048 ASSERT_EQUAL_SVE(p3, p11);
7049 ASSERT_EQUAL_SVE(p4, p12);
7050 ASSERT_EQUAL_SVE(p5, p13);
7051 ASSERT_EQUAL_SVE(p6, p14);
7052 ASSERT_EQUAL_SVE(p7, p15);
7053
7054 delete[] expected;
7055 }
7056 delete[] data;
7057}
7058
Jacob Bramleye668b202019-08-14 17:57:34 +01007059template <typename T>
7060static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
7061 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
7062}
7063
7064TEST_SVE(sve_ld1_st1_contiguous) {
7065 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7066 START();
7067
7068 int vl = config->sve_vl_in_bytes();
7069
7070 // The immediate can address [-8, 7] times the VL, so allocate enough space to
7071 // exceed that in both directions.
7072 int data_size = vl * 128;
7073
7074 uint8_t* data = new uint8_t[data_size];
7075 memset(data, 0, data_size);
7076
7077 // Set the base half-way through the buffer so we can use negative indeces.
7078 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7079
Jacob Bramleye668b202019-08-14 17:57:34 +01007080 // Encodable scalar-plus-immediate cases.
7081 __ Index(z1.VnB(), 1, -3);
7082 __ Ptrue(p1.VnB());
7083 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
7084
7085 __ Index(z2.VnH(), -2, 5);
7086 __ Ptrue(p2.VnH(), SVE_MUL3);
7087 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
7088
7089 __ Index(z3.VnS(), 3, -7);
7090 __ Ptrue(p3.VnS(), SVE_POW2);
7091 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
7092
7093 // Encodable scalar-plus-scalar cases.
7094 __ Index(z4.VnD(), -4, 11);
7095 __ Ptrue(p4.VnD(), SVE_VL3);
7096 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
7097 __ Mov(x2, 17);
7098 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
7099
7100 __ Index(z5.VnD(), 6, -2);
7101 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01007102 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
7103 __ Mov(x4, 6);
7104 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01007105
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007106 // Unencodable cases fall back on `CalculateSVEAddress`.
Jacob Bramleye668b202019-08-14 17:57:34 +01007107 __ Index(z6.VnS(), -7, 3);
7108 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
7109 // predicate bits when handling larger lanes.
7110 __ Ptrue(p6.VnB(), SVE_ALL);
7111 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
7112
TatWai Chong6205eb42019-09-24 10:07:20 +01007113 __ Index(z7.VnD(), 32, -11);
7114 __ Ptrue(p7.VnD(), SVE_MUL4);
7115 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01007116
TatWai Chong6205eb42019-09-24 10:07:20 +01007117 // Corresponding loads.
7118 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
7119 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
7120 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
7121 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
7122 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
7123 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
7124
7125 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
7126 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
7127 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
7128 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
7129
7130 // We can test ld1 by comparing the value loaded with the value stored. In
7131 // most cases, there are two complications:
7132 // - Loads have zeroing predication, so we have to clear the inactive
7133 // elements on our reference.
7134 // - We have to replicate any sign- or zero-extension.
7135
7136 // Ld1b(z8.VnB(), ...)
7137 __ Dup(z18.VnB(), 0);
7138 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
7139
7140 // Ld1b(z9.VnH(), ...)
7141 __ Dup(z19.VnH(), 0);
7142 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
7143
7144 // Ld1h(z10.VnS(), ...)
7145 __ Dup(z20.VnS(), 0);
7146 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
7147
7148 // Ld1b(z11.VnD(), ...)
7149 __ Dup(z21.VnD(), 0);
7150 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
7151
7152 // Ld1d(z12.VnD(), ...)
7153 __ Dup(z22.VnD(), 0);
7154 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
7155
7156 // Ld1w(z13.VnS(), ...)
7157 __ Dup(z23.VnS(), 0);
7158 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
7159
7160 // Ld1sb(z14.VnH(), ...)
7161 __ Dup(z24.VnH(), 0);
7162 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
7163
7164 // Ld1sh(z15.VnS(), ...)
7165 __ Dup(z25.VnS(), 0);
7166 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
7167
7168 // Ld1sb(z16.VnD(), ...)
7169 __ Dup(z26.VnD(), 0);
7170 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
7171
7172 // Ld1sw(z17.VnD(), ...)
7173 __ Dup(z27.VnD(), 0);
7174 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01007175
7176 END();
7177
7178 if (CAN_RUN()) {
7179 RUN();
7180
7181 uint8_t* expected = new uint8_t[data_size];
7182 memset(expected, 0, data_size);
7183 uint8_t* middle = &expected[data_size / 2];
7184
7185 int vl_b = vl / kBRegSizeInBytes;
7186 int vl_h = vl / kHRegSizeInBytes;
7187 int vl_s = vl / kSRegSizeInBytes;
7188 int vl_d = vl / kDRegSizeInBytes;
7189
7190 // Encodable cases.
7191
7192 // st1b { z1.b }, SVE_ALL
7193 for (int i = 0; i < vl_b; i++) {
7194 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
7195 }
7196
7197 // st1b { z2.h }, SVE_MUL3
7198 int vl_h_mul3 = vl_h - (vl_h % 3);
7199 for (int i = 0; i < vl_h_mul3; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007200 int64_t offset = 7 * static_cast<int>(vl / (kHRegSize / kBRegSize));
7201 MemoryWrite(middle, offset, i, static_cast<uint8_t>(-2 + (5 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01007202 }
7203
7204 // st1h { z3.s }, SVE_POW2
7205 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7206 for (int i = 0; i < vl_s_pow2; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007207 int64_t offset = -8 * static_cast<int>(vl / (kSRegSize / kHRegSize));
7208 MemoryWrite(middle, offset, i, static_cast<uint16_t>(3 - (7 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01007209 }
7210
7211 // st1b { z4.d }, SVE_VL3
7212 if (vl_d >= 3) {
7213 for (int i = 0; i < 3; i++) {
7214 MemoryWrite(middle,
7215 (8 * vl) + 17,
7216 i,
7217 static_cast<uint8_t>(-4 + (11 * i)));
7218 }
7219 }
7220
7221 // st1d { z5.d }, SVE_VL16
7222 if (vl_d >= 16) {
7223 for (int i = 0; i < 16; i++) {
7224 MemoryWrite(middle,
7225 (10 * vl) + (6 * kDRegSizeInBytes),
7226 i,
7227 static_cast<uint64_t>(6 - (2 * i)));
7228 }
7229 }
7230
7231 // Unencodable cases.
7232
7233 // st1w { z6.s }, SVE_ALL
7234 for (int i = 0; i < vl_s; i++) {
7235 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
7236 }
7237
TatWai Chong6205eb42019-09-24 10:07:20 +01007238 // st1w { z7.d }, SVE_MUL4
7239 int vl_d_mul4 = vl_d - (vl_d % 4);
7240 for (int i = 0; i < vl_d_mul4; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01007241 int64_t offset = 22 * static_cast<int>(vl / (kDRegSize / kWRegSize));
7242 MemoryWrite(middle, offset, i, static_cast<uint32_t>(32 + (-11 * i)));
TatWai Chong6205eb42019-09-24 10:07:20 +01007243 }
7244
Jacob Bramley33c99f92019-10-08 15:24:12 +01007245 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramleye668b202019-08-14 17:57:34 +01007246
TatWai Chong6205eb42019-09-24 10:07:20 +01007247 // Check that we loaded back the expected values.
7248
7249 ASSERT_EQUAL_SVE(z18, z8);
7250 ASSERT_EQUAL_SVE(z19, z9);
7251 ASSERT_EQUAL_SVE(z20, z10);
7252 ASSERT_EQUAL_SVE(z21, z11);
7253 ASSERT_EQUAL_SVE(z22, z12);
7254 ASSERT_EQUAL_SVE(z23, z13);
7255 ASSERT_EQUAL_SVE(z24, z14);
7256 ASSERT_EQUAL_SVE(z25, z15);
7257 ASSERT_EQUAL_SVE(z26, z16);
7258 ASSERT_EQUAL_SVE(z27, z17);
7259
Jacob Bramleye668b202019-08-14 17:57:34 +01007260 delete[] expected;
7261 }
7262 delete[] data;
7263}
7264
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007265TEST_SVE(sve_ld2_st2_scalar_plus_imm) {
7266 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7267 START();
7268
7269 int vl = config->sve_vl_in_bytes();
7270
7271 // The immediate can address [-16, 14] times the VL, so allocate enough space
7272 // to exceed that in both directions.
7273 int data_size = vl * 128;
7274
7275 uint8_t* data = new uint8_t[data_size];
7276 memset(data, 0, data_size);
7277
7278 // Set the base half-way through the buffer so we can use negative indeces.
7279 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7280
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007281 __ Index(z14.VnB(), 1, -3);
7282 __ Index(z15.VnB(), 2, -3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007283 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007284 __ St2b(z14.VnB(), z15.VnB(), p0, SVEMemOperand(x0));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007285
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007286 __ Index(z16.VnH(), -2, 5);
7287 __ Index(z17.VnH(), -3, 5);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007288 __ Ptrue(p1.VnH(), SVE_MUL3);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007289 __ St2h(z16.VnH(), z17.VnH(), p1, SVEMemOperand(x0, 8, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007290
7291 // Wrap around from z31 to z0.
7292 __ Index(z31.VnS(), 3, -7);
7293 __ Index(z0.VnS(), 4, -7);
7294 __ Ptrue(p2.VnS(), SVE_POW2);
7295 __ St2w(z31.VnS(), z0.VnS(), p2, SVEMemOperand(x0, -12, SVE_MUL_VL));
7296
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007297 __ Index(z18.VnD(), -7, 3);
7298 __ Index(z19.VnD(), -8, 3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007299 // Sparse predication, including some irrelevant bits (0xe). To make the
7300 // results easy to check, activate each lane <n> where n is a multiple of 5.
7301 Initialise(&masm,
7302 p3,
7303 0xeee10000000001ee,
7304 0xeeeeeee100000000,
7305 0x01eeeeeeeee10000,
7306 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007307 __ St2d(z18.VnD(), z19.VnD(), p3, SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007308
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007309 // We can test ld2 by comparing the values loaded with the values stored.
7310 // There are two complications:
7311 // - Loads have zeroing predication, so we have to clear the inactive
7312 // elements on our reference.
7313 // - We want to test both loads and stores that span { z31, z0 }, so we have
7314 // to move some values around.
7315 //
7316 // Registers z4-z11 will hold as-stored values (with inactive elements
7317 // cleared). Registers z20-z27 will hold the values that were loaded.
7318
7319 // Ld2b(z14.VnB(), z15.VnB(), ...)
7320 __ Dup(z4.VnB(), 0);
7321 __ Dup(z5.VnB(), 0);
7322 __ Mov(z4.VnB(), p0.Merging(), z14.VnB());
7323 __ Mov(z5.VnB(), p0.Merging(), z15.VnB());
7324
7325 // Ld2h(z16.VnH(), z17.VnH(), ...)
7326 __ Dup(z6.VnH(), 0);
7327 __ Dup(z7.VnH(), 0);
7328 __ Mov(z6.VnH(), p1.Merging(), z16.VnH());
7329 __ Mov(z7.VnH(), p1.Merging(), z17.VnH());
7330
7331 // Ld2w(z31.VnS(), z0.VnS(), ...)
7332 __ Dup(z8.VnS(), 0);
7333 __ Dup(z9.VnS(), 0);
7334 __ Mov(z8.VnS(), p2.Merging(), z31.VnS());
7335 __ Mov(z9.VnS(), p2.Merging(), z0.VnS());
7336
7337 // Ld2d(z18.VnD(), z19.VnD(), ...)
7338 __ Dup(z10.VnD(), 0);
7339 __ Dup(z11.VnD(), 0);
7340 __ Mov(z10.VnD(), p3.Merging(), z18.VnD());
7341 __ Mov(z11.VnD(), p3.Merging(), z19.VnD());
7342
7343 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7344 __ Ld2b(z31.VnB(), z0.VnB(), p0.Zeroing(), SVEMemOperand(x0));
7345 __ Mov(z20, z31);
7346 __ Mov(z21, z0);
7347
7348 __ Ld2h(z22.VnH(), z23.VnH(), p1.Zeroing(), SVEMemOperand(x0, 8, SVE_MUL_VL));
7349 __ Ld2w(z24.VnS(),
7350 z25.VnS(),
7351 p2.Zeroing(),
7352 SVEMemOperand(x0, -12, SVE_MUL_VL));
7353 __ Ld2d(z26.VnD(),
7354 z27.VnD(),
7355 p3.Zeroing(),
7356 SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007357
7358 END();
7359
7360 if (CAN_RUN()) {
7361 RUN();
7362
7363 uint8_t* expected = new uint8_t[data_size];
7364 memset(expected, 0, data_size);
7365 uint8_t* middle = &expected[data_size / 2];
7366
7367 int vl_b = vl / kBRegSizeInBytes;
7368 int vl_h = vl / kHRegSizeInBytes;
7369 int vl_s = vl / kSRegSizeInBytes;
7370 int vl_d = vl / kDRegSizeInBytes;
7371
7372 int reg_count = 2;
7373
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007374 // st2b { z14.b, z15.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007375 for (int i = 0; i < vl_b; i++) {
7376 uint8_t lane0 = 1 - (3 * i);
7377 uint8_t lane1 = 2 - (3 * i);
7378 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7379 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7380 }
7381
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007382 // st2h { z16.h, z17.h }, SVE_MUL3
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007383 int vl_h_mul3 = vl_h - (vl_h % 3);
7384 for (int i = 0; i < vl_h_mul3; i++) {
7385 int64_t offset = 8 * vl;
7386 uint16_t lane0 = -2 + (5 * i);
7387 uint16_t lane1 = -3 + (5 * i);
7388 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7389 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7390 }
7391
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007392 // st2w { z31.s, z0.s }, SVE_POW2
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007393 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7394 for (int i = 0; i < vl_s_pow2; i++) {
7395 int64_t offset = -12 * vl;
7396 uint32_t lane0 = 3 - (7 * i);
7397 uint32_t lane1 = 4 - (7 * i);
7398 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7399 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7400 }
7401
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007402 // st2d { z18.d, z19.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007403 for (int i = 0; i < vl_d; i++) {
7404 if ((i % 5) == 0) {
7405 int64_t offset = 14 * vl;
7406 uint64_t lane0 = -7 + (3 * i);
7407 uint64_t lane1 = -8 + (3 * i);
7408 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7409 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7410 }
7411 }
7412
7413 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7414
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007415 // Check that we loaded back the expected values.
7416
7417 // st2b/ld2b
7418 ASSERT_EQUAL_SVE(z4, z20);
7419 ASSERT_EQUAL_SVE(z5, z21);
7420
7421 // st2h/ld2h
7422 ASSERT_EQUAL_SVE(z6, z22);
7423 ASSERT_EQUAL_SVE(z7, z23);
7424
7425 // st2w/ld2w
7426 ASSERT_EQUAL_SVE(z8, z24);
7427 ASSERT_EQUAL_SVE(z9, z25);
7428
7429 // st2d/ld2d
7430 ASSERT_EQUAL_SVE(z10, z26);
7431 ASSERT_EQUAL_SVE(z11, z27);
7432
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007433 delete[] expected;
7434 }
7435 delete[] data;
7436}
7437
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007438TEST_SVE(sve_ld2_st2_scalar_plus_scalar) {
7439 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7440 START();
7441
7442 int vl = config->sve_vl_in_bytes();
7443
7444 // Allocate plenty of space to enable indexing in both directions.
7445 int data_size = vl * 128;
7446
7447 uint8_t* data = new uint8_t[data_size];
7448 memset(data, 0, data_size);
7449
7450 // Set the base half-way through the buffer so we can use negative indeces.
7451 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7452
Jacob Bramleye483ce52019-11-05 16:52:29 +00007453 __ Index(z10.VnB(), -4, 11);
7454 __ Index(z11.VnB(), -5, 11);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007455 __ Ptrue(p7.VnB(), SVE_MUL4);
7456 __ Mov(x1, 0);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007457 __ St2b(z10.VnB(), z11.VnB(), p7, SVEMemOperand(x0, x1));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007458
Jacob Bramleye483ce52019-11-05 16:52:29 +00007459 __ Index(z12.VnH(), 6, -2);
7460 __ Index(z13.VnH(), 7, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007461 __ Ptrue(p6.VnH(), SVE_VL16);
7462 __ Rdvl(x2, 3); // Make offsets VL-dependent so we can avoid overlap.
Jacob Bramleye483ce52019-11-05 16:52:29 +00007463 __ St2h(z12.VnH(), z13.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007464
Jacob Bramleye483ce52019-11-05 16:52:29 +00007465 __ Index(z14.VnS(), -7, 3);
7466 __ Index(z15.VnS(), -8, 3);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007467 // Sparse predication, including some irrelevant bits (0xe). To make the
7468 // results easy to check, activate each lane <n> where n is a multiple of 5.
7469 Initialise(&masm,
7470 p5,
7471 0xeee1000010000100,
7472 0x001eeee100001000,
7473 0x0100001eeee10000,
7474 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007475 __ Rdvl(x3, -3);
7476 __ St2w(z14.VnS(), z15.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007477
7478 // Wrap around from z31 to z0.
7479 __ Index(z31.VnD(), 32, -11);
7480 __ Index(z0.VnD(), 33, -11);
7481 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007482 __ Rdvl(x4, 1);
7483 __ St2d(z31.VnD(), z0.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007484
Jacob Bramleye483ce52019-11-05 16:52:29 +00007485 // We can test ld2 by comparing the values loaded with the values stored.
7486 // There are two complications:
7487 // - Loads have zeroing predication, so we have to clear the inactive
7488 // elements on our reference.
7489 // - We want to test both loads and stores that span { z31, z0 }, so we have
7490 // to move some values around.
7491 //
7492 // Registers z4-z11 will hold as-stored values (with inactive elements
7493 // cleared). Registers z20-z27 will hold the values that were loaded.
7494
7495 // Ld2b(z20.VnB(), z21.VnB(), ...)
7496 __ Dup(z4.VnB(), 0);
7497 __ Dup(z5.VnB(), 0);
7498 __ Mov(z4.VnB(), p7.Merging(), z10.VnB());
7499 __ Mov(z5.VnB(), p7.Merging(), z11.VnB());
7500
7501 // Ld2h(z22.VnH(), z23.VnH(), ...)
7502 __ Dup(z6.VnH(), 0);
7503 __ Dup(z7.VnH(), 0);
7504 __ Mov(z6.VnH(), p6.Merging(), z12.VnH());
7505 __ Mov(z7.VnH(), p6.Merging(), z13.VnH());
7506
7507 // Ld2w(z24.VnS(), z25.VnS(), ...)
7508 __ Dup(z8.VnS(), 0);
7509 __ Dup(z9.VnS(), 0);
7510 __ Mov(z8.VnS(), p5.Merging(), z14.VnS());
7511 __ Mov(z9.VnS(), p5.Merging(), z15.VnS());
7512
7513 // Ld2d(z31.VnD(), z0.VnD(), ...)
7514 __ Dup(z10.VnD(), 0);
7515 __ Dup(z11.VnD(), 0);
7516 __ Mov(z10.VnD(), p4.Merging(), z31.VnD());
7517 __ Mov(z11.VnD(), p4.Merging(), z0.VnD());
7518
7519 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7520 __ Ld2b(z31.VnB(), z0.VnB(), p7.Zeroing(), SVEMemOperand(x0, x1));
7521 __ Mov(z20, z31);
7522 __ Mov(z21, z0);
7523
7524 __ Ld2h(z22.VnH(), z23.VnH(), p6.Zeroing(), SVEMemOperand(x0, x2, LSL, 1));
7525 __ Ld2w(z24.VnS(), z25.VnS(), p5.Zeroing(), SVEMemOperand(x0, x3, LSL, 2));
7526 __ Ld2d(z26.VnD(), z27.VnD(), p4.Zeroing(), SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007527
7528 END();
7529
7530 if (CAN_RUN()) {
7531 RUN();
7532
7533 uint8_t* expected = new uint8_t[data_size];
7534 memset(expected, 0, data_size);
7535 uint8_t* middle = &expected[data_size / 2];
7536
7537 int vl_b = vl / kBRegSizeInBytes;
7538 int vl_h = vl / kHRegSizeInBytes;
7539 int vl_s = vl / kSRegSizeInBytes;
7540 int vl_d = vl / kDRegSizeInBytes;
7541
7542 int reg_count = 2;
7543
Jacob Bramleye483ce52019-11-05 16:52:29 +00007544 // st2b { z10.b, z11.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007545 int vl_b_mul4 = vl_b - (vl_b % 4);
7546 for (int i = 0; i < vl_b_mul4; i++) {
7547 uint8_t lane0 = -4 + (11 * i);
7548 uint8_t lane1 = -5 + (11 * i);
7549 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7550 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7551 }
7552
Jacob Bramleye483ce52019-11-05 16:52:29 +00007553 // st2h { z12.h, z13.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007554 if (vl_h >= 16) {
7555 for (int i = 0; i < 16; i++) {
7556 int64_t offset = (3 << kHRegSizeInBytesLog2) * vl;
7557 uint16_t lane0 = 6 - (2 * i);
7558 uint16_t lane1 = 7 - (2 * i);
7559 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7560 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7561 }
7562 }
7563
Jacob Bramleye483ce52019-11-05 16:52:29 +00007564 // st2w { z14.s, z15.s }, ((i % 5) == 0)
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007565 for (int i = 0; i < vl_s; i++) {
7566 if ((i % 5) == 0) {
7567 int64_t offset = -(3 << kSRegSizeInBytesLog2) * vl;
7568 uint32_t lane0 = -7 + (3 * i);
7569 uint32_t lane1 = -8 + (3 * i);
7570 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7571 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7572 }
7573 }
7574
7575 // st2d { z31.b, z0.b }, SVE_MUL3
7576 int vl_d_mul3 = vl_d - (vl_d % 3);
7577 for (int i = 0; i < vl_d_mul3; i++) {
7578 int64_t offset = (1 << kDRegSizeInBytesLog2) * vl;
7579 uint64_t lane0 = 32 - (11 * i);
7580 uint64_t lane1 = 33 - (11 * i);
7581 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7582 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7583 }
7584
7585 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7586
Jacob Bramleye483ce52019-11-05 16:52:29 +00007587 // Check that we loaded back the expected values.
7588
7589 // st2b/ld2b
7590 ASSERT_EQUAL_SVE(z4, z20);
7591 ASSERT_EQUAL_SVE(z5, z21);
7592
7593 // st2h/ld2h
7594 ASSERT_EQUAL_SVE(z6, z22);
7595 ASSERT_EQUAL_SVE(z7, z23);
7596
7597 // st2w/ld2w
7598 ASSERT_EQUAL_SVE(z8, z24);
7599 ASSERT_EQUAL_SVE(z9, z25);
7600
7601 // st2d/ld2d
7602 ASSERT_EQUAL_SVE(z10, z26);
7603 ASSERT_EQUAL_SVE(z11, z27);
7604
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007605 delete[] expected;
7606 }
7607 delete[] data;
7608}
7609
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007610TEST_SVE(sve_ld3_st3_scalar_plus_imm) {
7611 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7612 START();
7613
7614 int vl = config->sve_vl_in_bytes();
7615
7616 // The immediate can address [-24, 21] times the VL, so allocate enough space
7617 // to exceed that in both directions.
7618 int data_size = vl * 128;
7619
7620 uint8_t* data = new uint8_t[data_size];
7621 memset(data, 0, data_size);
7622
7623 // Set the base half-way through the buffer so we can use negative indeces.
7624 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7625
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007626 // We can test ld3 by comparing the values loaded with the values stored.
7627 // There are two complications:
7628 // - Loads have zeroing predication, so we have to clear the inactive
7629 // elements on our reference.
7630 // - We want to test both loads and stores that span { z31, z0 }, so we have
7631 // to move some values around.
7632 //
7633 // Registers z4-z15 will hold as-stored values (with inactive elements
7634 // cleared). Registers z16-z27 will hold the values that were loaded.
7635
7636 __ Index(z10.VnB(), 1, -3);
7637 __ Index(z11.VnB(), 2, -3);
7638 __ Index(z12.VnB(), 3, -3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007639 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007640 __ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p0, SVEMemOperand(x0));
7641 // Save the stored values for ld3 tests.
7642 __ Dup(z4.VnB(), 0);
7643 __ Dup(z5.VnB(), 0);
7644 __ Dup(z6.VnB(), 0);
7645 __ Mov(z4.VnB(), p0.Merging(), z10.VnB());
7646 __ Mov(z5.VnB(), p0.Merging(), z11.VnB());
7647 __ Mov(z6.VnB(), p0.Merging(), z12.VnB());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007648
7649 // Wrap around from z31 to z0.
7650 __ Index(z31.VnH(), -2, 5);
7651 __ Index(z0.VnH(), -3, 5);
7652 __ Index(z1.VnH(), -4, 5);
7653 __ Ptrue(p1.VnH(), SVE_MUL3);
7654 __ St3h(z31.VnH(), z0.VnH(), z1.VnH(), p1, SVEMemOperand(x0, 9, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007655 // Save the stored values for ld3 tests.
7656 __ Dup(z7.VnH(), 0);
7657 __ Dup(z8.VnH(), 0);
7658 __ Dup(z9.VnH(), 0);
7659 __ Mov(z7.VnH(), p1.Merging(), z31.VnH());
7660 __ Mov(z8.VnH(), p1.Merging(), z0.VnH());
7661 __ Mov(z9.VnH(), p1.Merging(), z1.VnH());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007662
7663 __ Index(z30.VnS(), 3, -7);
7664 __ Index(z31.VnS(), 4, -7);
7665 __ Index(z0.VnS(), 5, -7);
7666 __ Ptrue(p2.VnS(), SVE_POW2);
7667 __ St3w(z30.VnS(),
7668 z31.VnS(),
7669 z0.VnS(),
7670 p2,
7671 SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007672 // Save the stored values for ld3 tests.
7673 __ Dup(z10.VnS(), 0);
7674 __ Dup(z11.VnS(), 0);
7675 __ Dup(z12.VnS(), 0);
7676 __ Mov(z10.VnS(), p2.Merging(), z30.VnS());
7677 __ Mov(z11.VnS(), p2.Merging(), z31.VnS());
7678 __ Mov(z12.VnS(), p2.Merging(), z0.VnS());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007679
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007680 __ Index(z0.VnD(), -7, 3);
7681 __ Index(z1.VnD(), -8, 3);
7682 __ Index(z2.VnD(), -9, 3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007683 // Sparse predication, including some irrelevant bits (0xee). To make the
7684 // results easy to check, activate each lane <n> where n is a multiple of 5.
7685 Initialise(&masm,
7686 p3,
7687 0xeee10000000001ee,
7688 0xeeeeeee100000000,
7689 0x01eeeeeeeee10000,
7690 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007691 __ St3d(z0.VnD(), z1.VnD(), z2.VnD(), p3, SVEMemOperand(x0, 15, SVE_MUL_VL));
7692 // Save the stored values for ld3 tests.
7693 __ Dup(z13.VnD(), 0);
7694 __ Dup(z14.VnD(), 0);
7695 __ Dup(z15.VnD(), 0);
7696 __ Mov(z13.VnD(), p3.Merging(), z0.VnD());
7697 __ Mov(z14.VnD(), p3.Merging(), z1.VnD());
7698 __ Mov(z15.VnD(), p3.Merging(), z2.VnD());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007699
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007700 // Corresponding loads.
7701 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7702 __ Ld3b(z31.VnB(), z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(x0));
7703 __ Mov(z16, z31);
7704 __ Mov(z17, z0);
7705 __ Mov(z18, z1);
7706 __ Ld3h(z30.VnH(),
7707 z31.VnH(),
7708 z0.VnH(),
7709 p1.Zeroing(),
7710 SVEMemOperand(x0, 9, SVE_MUL_VL));
7711 __ Mov(z19, z30);
7712 __ Mov(z20, z31);
7713 __ Mov(z21, z0);
7714 __ Ld3w(z22.VnS(),
7715 z23.VnS(),
7716 z24.VnS(),
7717 p2.Zeroing(),
7718 SVEMemOperand(x0, -12, SVE_MUL_VL));
7719 __ Ld3d(z25.VnD(),
7720 z26.VnD(),
7721 z27.VnD(),
7722 p3.Zeroing(),
7723 SVEMemOperand(x0, 15, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007724
7725 END();
7726
7727 if (CAN_RUN()) {
7728 RUN();
7729
7730 uint8_t* expected = new uint8_t[data_size];
7731 memset(expected, 0, data_size);
7732 uint8_t* middle = &expected[data_size / 2];
7733
7734 int vl_b = vl / kBRegSizeInBytes;
7735 int vl_h = vl / kHRegSizeInBytes;
7736 int vl_s = vl / kSRegSizeInBytes;
7737 int vl_d = vl / kDRegSizeInBytes;
7738
7739 int reg_count = 3;
7740
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007741 // st3b { z10.b, z11.b, z12.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007742 for (int i = 0; i < vl_b; i++) {
7743 uint8_t lane0 = 1 - (3 * i);
7744 uint8_t lane1 = 2 - (3 * i);
7745 uint8_t lane2 = 3 - (3 * i);
7746 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7747 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7748 MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
7749 }
7750
7751 // st3h { z31.h, z0.h, z1.h }, SVE_MUL3
7752 int vl_h_mul3 = vl_h - (vl_h % 3);
7753 for (int i = 0; i < vl_h_mul3; i++) {
7754 int64_t offset = 9 * vl;
7755 uint16_t lane0 = -2 + (5 * i);
7756 uint16_t lane1 = -3 + (5 * i);
7757 uint16_t lane2 = -4 + (5 * i);
7758 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7759 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7760 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7761 }
7762
7763 // st3w { z30.s, z31.s, z0.s }, SVE_POW2
7764 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7765 for (int i = 0; i < vl_s_pow2; i++) {
7766 int64_t offset = -12 * vl;
7767 uint32_t lane0 = 3 - (7 * i);
7768 uint32_t lane1 = 4 - (7 * i);
7769 uint32_t lane2 = 5 - (7 * i);
7770 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7771 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7772 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7773 }
7774
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007775 // st3d { z0.d, z1.d, z2.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007776 for (int i = 0; i < vl_d; i++) {
7777 if ((i % 5) == 0) {
7778 int64_t offset = 15 * vl;
7779 uint64_t lane0 = -7 + (3 * i);
7780 uint64_t lane1 = -8 + (3 * i);
7781 uint64_t lane2 = -9 + (3 * i);
7782 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7783 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7784 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7785 }
7786 }
7787
7788 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7789
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007790 // Check that we loaded back the expected values.
7791
7792 // st3b/ld3b
7793 ASSERT_EQUAL_SVE(z4, z16);
7794 ASSERT_EQUAL_SVE(z5, z17);
7795 ASSERT_EQUAL_SVE(z6, z18);
7796
7797 // st3h/ld3h
7798 ASSERT_EQUAL_SVE(z7, z19);
7799 ASSERT_EQUAL_SVE(z8, z20);
7800 ASSERT_EQUAL_SVE(z9, z21);
7801
7802 // st3w/ld3w
7803 ASSERT_EQUAL_SVE(z10, z22);
7804 ASSERT_EQUAL_SVE(z11, z23);
7805 ASSERT_EQUAL_SVE(z12, z24);
7806
7807 // st3d/ld3d
7808 ASSERT_EQUAL_SVE(z13, z25);
7809 ASSERT_EQUAL_SVE(z14, z26);
7810 ASSERT_EQUAL_SVE(z15, z27);
7811
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007812 delete[] expected;
7813 }
7814 delete[] data;
7815}
7816
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007817TEST_SVE(sve_ld3_st3_scalar_plus_scalar) {
7818 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7819 START();
7820
7821 int vl = config->sve_vl_in_bytes();
7822
7823 // Allocate plenty of space to enable indexing in both directions.
7824 int data_size = vl * 128;
7825
7826 uint8_t* data = new uint8_t[data_size];
7827 memset(data, 0, data_size);
7828
7829 // Set the base half-way through the buffer so we can use negative indeces.
7830 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7831
Jacob Bramleye483ce52019-11-05 16:52:29 +00007832 // We can test ld3 by comparing the values loaded with the values stored.
7833 // There are two complications:
7834 // - Loads have zeroing predication, so we have to clear the inactive
7835 // elements on our reference.
7836 // - We want to test both loads and stores that span { z31, z0 }, so we have
7837 // to move some values around.
7838 //
7839 // Registers z4-z15 will hold as-stored values (with inactive elements
7840 // cleared). Registers z16-z27 will hold the values that were loaded.
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007841
Jacob Bramleye483ce52019-11-05 16:52:29 +00007842 __ Index(z10.VnB(), -4, 11);
7843 __ Index(z11.VnB(), -5, 11);
7844 __ Index(z12.VnB(), -6, 11);
7845 __ Ptrue(p7.VnB(), SVE_MUL4);
7846 __ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
7847 __ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p7, SVEMemOperand(x0, x1, LSL, 0));
7848 // Save the stored values for ld3 tests.
7849 __ Dup(z4.VnB(), 0);
7850 __ Dup(z5.VnB(), 0);
7851 __ Dup(z6.VnB(), 0);
7852 __ Mov(z4.VnB(), p7.Merging(), z10.VnB());
7853 __ Mov(z5.VnB(), p7.Merging(), z11.VnB());
7854 __ Mov(z6.VnB(), p7.Merging(), z12.VnB());
7855
7856 __ Index(z13.VnH(), 6, -2);
7857 __ Index(z14.VnH(), 7, -2);
7858 __ Index(z15.VnH(), 8, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007859 __ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007860 __ Rdvl(x2, 5); // (5 * vl) << 1 = 10 * vl
7861 __ St3h(z13.VnH(), z14.VnH(), z15.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
7862 // Save the stored values for ld3 tests.
7863 __ Dup(z7.VnH(), 0);
7864 __ Dup(z8.VnH(), 0);
7865 __ Dup(z9.VnH(), 0);
7866 __ Mov(z7.VnH(), p6.Merging(), z13.VnH());
7867 __ Mov(z8.VnH(), p6.Merging(), z14.VnH());
7868 __ Mov(z9.VnH(), p6.Merging(), z15.VnH());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007869
7870 // Wrap around from z31 to z0.
7871 __ Index(z30.VnS(), -7, 3);
7872 __ Index(z31.VnS(), -8, 3);
7873 __ Index(z0.VnS(), -9, 3);
7874 // Sparse predication, including some irrelevant bits (0xe). To make the
7875 // results easy to check, activate each lane <n> where n is a multiple of 5.
7876 Initialise(&masm,
7877 p5,
7878 0xeee1000010000100,
7879 0x001eeee100001000,
7880 0x0100001eeee10000,
7881 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007882 __ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
7883 __ St3w(z30.VnS(), z31.VnS(), z0.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
7884 // Save the stored values for ld3 tests.
7885 __ Dup(z10.VnS(), 0);
7886 __ Dup(z11.VnS(), 0);
7887 __ Dup(z12.VnS(), 0);
7888 __ Mov(z10.VnS(), p5.Merging(), z30.VnS());
7889 __ Mov(z11.VnS(), p5.Merging(), z31.VnS());
7890 __ Mov(z12.VnS(), p5.Merging(), z0.VnS());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007891
7892 __ Index(z31.VnD(), 32, -11);
7893 __ Index(z0.VnD(), 33, -11);
7894 __ Index(z1.VnD(), 34, -11);
7895 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007896 __ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 * vl
7897 __ St3d(z31.VnD(), z0.VnD(), z1.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
7898 // Save the stored values for ld3 tests.
7899 __ Dup(z13.VnD(), 0);
7900 __ Dup(z14.VnD(), 0);
7901 __ Dup(z15.VnD(), 0);
7902 __ Mov(z13.VnD(), p4.Merging(), z31.VnD());
7903 __ Mov(z14.VnD(), p4.Merging(), z0.VnD());
7904 __ Mov(z15.VnD(), p4.Merging(), z1.VnD());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007905
Jacob Bramleye483ce52019-11-05 16:52:29 +00007906 // Corresponding loads.
7907 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7908 __ Ld3b(z31.VnB(),
7909 z0.VnB(),
7910 z1.VnB(),
7911 p7.Zeroing(),
7912 SVEMemOperand(x0, x1, LSL, 0));
7913 __ Mov(z16, z31);
7914 __ Mov(z17, z0);
7915 __ Mov(z18, z1);
7916 __ Ld3h(z30.VnH(),
7917 z31.VnH(),
7918 z0.VnH(),
7919 p6.Zeroing(),
7920 SVEMemOperand(x0, x2, LSL, 1));
7921 __ Mov(z19, z30);
7922 __ Mov(z20, z31);
7923 __ Mov(z21, z0);
7924 __ Ld3w(z22.VnS(),
7925 z23.VnS(),
7926 z24.VnS(),
7927 p5.Zeroing(),
7928 SVEMemOperand(x0, x3, LSL, 2));
7929 __ Ld3d(z25.VnD(),
7930 z26.VnD(),
7931 z27.VnD(),
7932 p4.Zeroing(),
7933 SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007934
7935 END();
7936
7937 if (CAN_RUN()) {
7938 RUN();
7939
7940 uint8_t* expected = new uint8_t[data_size];
7941 memset(expected, 0, data_size);
7942 uint8_t* middle = &expected[data_size / 2];
7943
7944 int vl_b = vl / kBRegSizeInBytes;
7945 int vl_h = vl / kHRegSizeInBytes;
7946 int vl_s = vl / kSRegSizeInBytes;
7947 int vl_d = vl / kDRegSizeInBytes;
7948
7949 int reg_count = 3;
7950
Jacob Bramleye483ce52019-11-05 16:52:29 +00007951 // st3b { z10.b, z11.b, z12.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007952 int vl_b_mul4 = vl_b - (vl_b % 4);
7953 for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007954 int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007955 uint8_t lane0 = -4 + (11 * i);
7956 uint8_t lane1 = -5 + (11 * i);
7957 uint8_t lane2 = -6 + (11 * i);
7958 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7959 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7960 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7961 }
7962
Jacob Bramleye483ce52019-11-05 16:52:29 +00007963 // st3h { z13.h, z14.h, z15.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007964 if (vl_h >= 16) {
7965 for (int i = 0; i < 16; i++) {
7966 int64_t offset = (5 << kHRegSizeInBytesLog2) * vl;
7967 uint16_t lane0 = 6 - (2 * i);
7968 uint16_t lane1 = 7 - (2 * i);
7969 uint16_t lane2 = 8 - (2 * i);
7970 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7971 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7972 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7973 }
7974 }
7975
7976 // st3w { z30.s, z31.s, z0.s }, ((i % 5) == 0)
7977 for (int i = 0; i < vl_s; i++) {
7978 if ((i % 5) == 0) {
7979 int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
7980 uint32_t lane0 = -7 + (3 * i);
7981 uint32_t lane1 = -8 + (3 * i);
7982 uint32_t lane2 = -9 + (3 * i);
7983 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7984 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7985 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7986 }
7987 }
7988
7989 // st3d { z31.d, z0.d, z1.d }, SVE_MUL3
7990 int vl_d_mul3 = vl_d - (vl_d % 3);
7991 for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007992 int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007993 uint64_t lane0 = 32 - (11 * i);
7994 uint64_t lane1 = 33 - (11 * i);
7995 uint64_t lane2 = 34 - (11 * i);
7996 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7997 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7998 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7999 }
8000
8001 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
8002
Jacob Bramleye483ce52019-11-05 16:52:29 +00008003 // Check that we loaded back the expected values.
8004
8005 // st3b/ld3b
8006 ASSERT_EQUAL_SVE(z4, z16);
8007 ASSERT_EQUAL_SVE(z5, z17);
8008 ASSERT_EQUAL_SVE(z6, z18);
8009
8010 // st3h/ld3h
8011 ASSERT_EQUAL_SVE(z7, z19);
8012 ASSERT_EQUAL_SVE(z8, z20);
8013 ASSERT_EQUAL_SVE(z9, z21);
8014
8015 // st3w/ld3w
8016 ASSERT_EQUAL_SVE(z10, z22);
8017 ASSERT_EQUAL_SVE(z11, z23);
8018 ASSERT_EQUAL_SVE(z12, z24);
8019
8020 // st3d/ld3d
8021 ASSERT_EQUAL_SVE(z13, z25);
8022 ASSERT_EQUAL_SVE(z14, z26);
8023 ASSERT_EQUAL_SVE(z15, z27);
8024
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008025 delete[] expected;
8026 }
8027 delete[] data;
8028}
8029
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008030TEST_SVE(sve_ld4_st4_scalar_plus_imm) {
8031 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8032 START();
8033
8034 int vl = config->sve_vl_in_bytes();
8035
8036 // The immediate can address [-24, 21] times the VL, so allocate enough space
8037 // to exceed that in both directions.
8038 int data_size = vl * 128;
8039
8040 uint8_t* data = new uint8_t[data_size];
8041 memset(data, 0, data_size);
8042
8043 // Set the base half-way through the buffer so we can use negative indeces.
8044 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
8045
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008046 // We can test ld4 by comparing the values loaded with the values stored.
8047 // There are two complications:
8048 // - Loads have zeroing predication, so we have to clear the inactive
8049 // elements on our reference.
8050 // - We want to test both loads and stores that span { z31, z0 }, so we have
8051 // to move some values around.
8052 //
8053 // Registers z3-z18 will hold as-stored values (with inactive elements
8054 // cleared). Registers z19-z31 and z0-z2 will hold the values that were
8055 // loaded.
8056
8057 __ Index(z10.VnB(), 1, -7);
8058 __ Index(z11.VnB(), 2, -7);
8059 __ Index(z12.VnB(), 3, -7);
8060 __ Index(z13.VnB(), 4, -7);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008061 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008062 __ St4b(z10.VnB(), z11.VnB(), z12.VnB(), z13.VnB(), p0, SVEMemOperand(x0));
8063 // Save the stored values for ld4 tests.
8064 __ Dup(z3.VnB(), 0);
8065 __ Dup(z4.VnB(), 0);
8066 __ Dup(z5.VnB(), 0);
8067 __ Dup(z6.VnB(), 0);
8068 __ Mov(z3.VnB(), p0.Merging(), z10.VnB());
8069 __ Mov(z4.VnB(), p0.Merging(), z11.VnB());
8070 __ Mov(z5.VnB(), p0.Merging(), z12.VnB());
8071 __ Mov(z6.VnB(), p0.Merging(), z13.VnB());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008072
8073 // Wrap around from z31 to z0.
8074 __ Index(z31.VnH(), -2, 5);
8075 __ Index(z0.VnH(), -3, 5);
8076 __ Index(z1.VnH(), -4, 5);
8077 __ Index(z2.VnH(), -5, 5);
8078 __ Ptrue(p1.VnH(), SVE_MUL3);
8079 __ St4h(z31.VnH(),
8080 z0.VnH(),
8081 z1.VnH(),
8082 z2.VnH(),
8083 p1,
8084 SVEMemOperand(x0, 4, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008085 // Save the stored values for ld4 tests.
8086 __ Dup(z7.VnH(), 0);
8087 __ Dup(z8.VnH(), 0);
8088 __ Dup(z9.VnH(), 0);
8089 __ Dup(z10.VnH(), 0);
8090 __ Mov(z7.VnH(), p1.Merging(), z31.VnH());
8091 __ Mov(z8.VnH(), p1.Merging(), z0.VnH());
8092 __ Mov(z9.VnH(), p1.Merging(), z1.VnH());
8093 __ Mov(z10.VnH(), p1.Merging(), z2.VnH());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008094
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008095 // Wrap around from z31 to z0.
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008096 __ Index(z29.VnS(), 2, -7);
8097 __ Index(z30.VnS(), 3, -7);
8098 __ Index(z31.VnS(), 4, -7);
8099 __ Index(z0.VnS(), 5, -7);
8100 __ Ptrue(p2.VnS(), SVE_POW2);
8101 __ St4w(z29.VnS(),
8102 z30.VnS(),
8103 z31.VnS(),
8104 z0.VnS(),
8105 p2,
8106 SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008107 // Save the stored values for ld4 tests.
8108 __ Dup(z11.VnS(), 0);
8109 __ Dup(z12.VnS(), 0);
8110 __ Dup(z13.VnS(), 0);
8111 __ Dup(z14.VnS(), 0);
8112 __ Mov(z11.VnS(), p2.Merging(), z29.VnS());
8113 __ Mov(z12.VnS(), p2.Merging(), z30.VnS());
8114 __ Mov(z13.VnS(), p2.Merging(), z31.VnS());
8115 __ Mov(z14.VnS(), p2.Merging(), z0.VnS());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008116
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008117 __ Index(z20.VnD(), -7, 8);
8118 __ Index(z21.VnD(), -8, 8);
8119 __ Index(z22.VnD(), -9, 8);
8120 __ Index(z23.VnD(), -10, 8);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008121 // Sparse predication, including some irrelevant bits (0xee). To make the
8122 // results easy to check, activate each lane <n> where n is a multiple of 5.
8123 Initialise(&masm,
8124 p3,
8125 0xeee10000000001ee,
8126 0xeeeeeee100000000,
8127 0x01eeeeeeeee10000,
8128 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008129 __ St4d(z20.VnD(),
8130 z21.VnD(),
8131 z22.VnD(),
8132 z23.VnD(),
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008133 p3,
8134 SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008135 // Save the stored values for ld4 tests.
8136 __ Dup(z15.VnD(), 0);
8137 __ Dup(z16.VnD(), 0);
8138 __ Dup(z17.VnD(), 0);
8139 __ Dup(z18.VnD(), 0);
8140 __ Mov(z15.VnD(), p3.Merging(), z20.VnD());
8141 __ Mov(z16.VnD(), p3.Merging(), z21.VnD());
8142 __ Mov(z17.VnD(), p3.Merging(), z22.VnD());
8143 __ Mov(z18.VnD(), p3.Merging(), z23.VnD());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008144
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008145 // Corresponding loads.
8146 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
8147 __ Ld4b(z31.VnB(),
8148 z0.VnB(),
8149 z1.VnB(),
8150 z2.VnB(),
8151 p0.Zeroing(),
8152 SVEMemOperand(x0));
8153 __ Mov(z19, z31);
8154 __ Mov(z20, z0);
8155 __ Mov(z21, z1);
8156 __ Mov(z22, z2);
8157 __ Ld4h(z23.VnH(),
8158 z24.VnH(),
8159 z25.VnH(),
8160 z26.VnH(),
8161 p1.Zeroing(),
8162 SVEMemOperand(x0, 4, SVE_MUL_VL));
8163 __ Ld4w(z27.VnS(),
8164 z28.VnS(),
8165 z29.VnS(),
8166 z30.VnS(),
8167 p2.Zeroing(),
8168 SVEMemOperand(x0, -12, SVE_MUL_VL));
8169 // Wrap around from z31 to z0.
8170 __ Ld4d(z31.VnD(),
8171 z0.VnD(),
8172 z1.VnD(),
8173 z2.VnD(),
8174 p3.Zeroing(),
8175 SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008176
8177 END();
8178
8179 if (CAN_RUN()) {
8180 RUN();
8181
8182 uint8_t* expected = new uint8_t[data_size];
8183 memset(expected, 0, data_size);
8184 uint8_t* middle = &expected[data_size / 2];
8185
8186 int vl_b = vl / kBRegSizeInBytes;
8187 int vl_h = vl / kHRegSizeInBytes;
8188 int vl_s = vl / kSRegSizeInBytes;
8189 int vl_d = vl / kDRegSizeInBytes;
8190
8191 int reg_count = 4;
8192
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008193 // st2b { z10.b, z11.b, z12.b, z13.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008194 for (int i = 0; i < vl_b; i++) {
8195 uint8_t lane0 = 1 - (7 * i);
8196 uint8_t lane1 = 2 - (7 * i);
8197 uint8_t lane2 = 3 - (7 * i);
8198 uint8_t lane3 = 4 - (7 * i);
8199 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
8200 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
8201 MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
8202 MemoryWrite(middle, 0, (i * reg_count) + 3, lane3);
8203 }
8204
8205 // st4h { z31.h, z0.h, z1.h, z2.h }, SVE_MUL3
8206 int vl_h_mul3 = vl_h - (vl_h % 3);
8207 for (int i = 0; i < vl_h_mul3; i++) {
8208 int64_t offset = 4 * vl;
8209 uint16_t lane0 = -2 + (5 * i);
8210 uint16_t lane1 = -3 + (5 * i);
8211 uint16_t lane2 = -4 + (5 * i);
8212 uint16_t lane3 = -5 + (5 * i);
8213 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8214 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8215 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8216 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8217 }
8218
8219 // st4w { z29.s, z30.s, z31.s, z0.s }, SVE_POW2
8220 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
8221 for (int i = 0; i < vl_s_pow2; i++) {
8222 int64_t offset = -12 * vl;
8223 uint32_t lane0 = 2 - (7 * i);
8224 uint32_t lane1 = 3 - (7 * i);
8225 uint32_t lane2 = 4 - (7 * i);
8226 uint32_t lane3 = 5 - (7 * i);
8227 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8228 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8229 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8230 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8231 }
8232
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008233 // st4d { z20.d, z21.d, z22.d, z23.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008234 for (int i = 0; i < vl_d; i++) {
8235 if ((i % 5) == 0) {
8236 int64_t offset = 16 * vl;
8237 uint64_t lane0 = -7 + (8 * i);
8238 uint64_t lane1 = -8 + (8 * i);
8239 uint64_t lane2 = -9 + (8 * i);
8240 uint64_t lane3 = -10 + (8 * i);
8241 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8242 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8243 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8244 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8245 }
8246 }
8247
8248 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
8249
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008250 // Check that we loaded back the expected values.
8251
8252 // st4b/ld4b
8253 ASSERT_EQUAL_SVE(z3, z19);
8254 ASSERT_EQUAL_SVE(z4, z20);
8255 ASSERT_EQUAL_SVE(z5, z21);
8256 ASSERT_EQUAL_SVE(z6, z22);
8257
8258 // st4h/ld4h
8259 ASSERT_EQUAL_SVE(z7, z23);
8260 ASSERT_EQUAL_SVE(z8, z24);
8261 ASSERT_EQUAL_SVE(z9, z25);
8262 ASSERT_EQUAL_SVE(z10, z26);
8263
8264 // st4w/ld4w
8265 ASSERT_EQUAL_SVE(z11, z27);
8266 ASSERT_EQUAL_SVE(z12, z28);
8267 ASSERT_EQUAL_SVE(z13, z29);
8268 ASSERT_EQUAL_SVE(z14, z30);
8269
8270 // st4d/ld4d
8271 ASSERT_EQUAL_SVE(z15, z31);
8272 ASSERT_EQUAL_SVE(z16, z0);
8273 ASSERT_EQUAL_SVE(z17, z1);
8274 ASSERT_EQUAL_SVE(z18, z2);
8275
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008276 delete[] expected;
8277 }
8278 delete[] data;
8279}
8280
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008281TEST_SVE(sve_ld4_st4_scalar_plus_scalar) {
8282 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8283 START();
8284
8285 int vl = config->sve_vl_in_bytes();
8286
8287 // Allocate plenty of space to enable indexing in both directions.
8288 int data_size = vl * 128;
8289
8290 uint8_t* data = new uint8_t[data_size];
8291 memset(data, 0, data_size);
8292
8293 // Set the base half-way through the buffer so we can use negative indeces.
8294 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
8295
Jacob Bramleye483ce52019-11-05 16:52:29 +00008296 // We can test ld4 by comparing the values loaded with the values stored.
8297 // There are two complications:
8298 // - Loads have zeroing predication, so we have to clear the inactive
8299 // elements on our reference.
8300 // - We want to test both loads and stores that span { z31, z0 }, so we have
8301 // to move some values around.
8302 //
8303 // Registers z3-z18 will hold as-stored values (with inactive elements
8304 // cleared). Registers z19-z31 and z0-z2 will hold the values that were
8305 // loaded.
8306
8307 __ Index(z19.VnB(), -4, 11);
8308 __ Index(z20.VnB(), -5, 11);
8309 __ Index(z21.VnB(), -6, 11);
8310 __ Index(z22.VnB(), -7, 11);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008311 __ Ptrue(p7.VnB(), SVE_MUL4);
Jacob Bramleye483ce52019-11-05 16:52:29 +00008312 __ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
8313 __ St4b(z19.VnB(),
8314 z20.VnB(),
8315 z21.VnB(),
8316 z22.VnB(),
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008317 p7,
8318 SVEMemOperand(x0, x1, LSL, 0));
Jacob Bramleye483ce52019-11-05 16:52:29 +00008319 // Save the stored values for ld4 tests.
8320 __ Dup(z3.VnB(), 0);
8321 __ Dup(z4.VnB(), 0);
8322 __ Dup(z5.VnB(), 0);
8323 __ Dup(z6.VnB(), 0);
8324 __ Mov(z3.VnB(), p7.Merging(), z19.VnB());
8325 __ Mov(z4.VnB(), p7.Merging(), z20.VnB());
8326 __ Mov(z5.VnB(), p7.Merging(), z21.VnB());
8327 __ Mov(z6.VnB(), p7.Merging(), z22.VnB());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008328
Jacob Bramleye483ce52019-11-05 16:52:29 +00008329 __ Index(z23.VnH(), 6, -2);
8330 __ Index(z24.VnH(), 7, -2);
8331 __ Index(z25.VnH(), 8, -2);
8332 __ Index(z26.VnH(), 9, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008333 __ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramleye483ce52019-11-05 16:52:29 +00008334 __ Rdvl(x2, 7); // (7 * vl) << 1 = 14 * vl
8335 __ St4h(z23.VnH(),
8336 z24.VnH(),
8337 z25.VnH(),
8338 z26.VnH(),
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008339 p6,
8340 SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramleye483ce52019-11-05 16:52:29 +00008341 // Save the stored values for ld4 tests.
8342 __ Dup(z7.VnH(), 0);
8343 __ Dup(z8.VnH(), 0);
8344 __ Dup(z9.VnH(), 0);
8345 __ Dup(z10.VnH(), 0);
8346 __ Mov(z7.VnH(), p6.Merging(), z23.VnH());
8347 __ Mov(z8.VnH(), p6.Merging(), z24.VnH());
8348 __ Mov(z9.VnH(), p6.Merging(), z25.VnH());
8349 __ Mov(z10.VnH(), p6.Merging(), z26.VnH());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008350
8351 // Wrap around from z31 to z0.
8352 __ Index(z29.VnS(), -6, 7);
8353 __ Index(z30.VnS(), -7, 7);
8354 __ Index(z31.VnS(), -8, 7);
8355 __ Index(z0.VnS(), -9, 7);
8356 // Sparse predication, including some irrelevant bits (0xe). To make the
8357 // results easy to check, activate each lane <n> where n is a multiple of 5.
8358 Initialise(&masm,
8359 p5,
8360 0xeee1000010000100,
8361 0x001eeee100001000,
8362 0x0100001eeee10000,
8363 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00008364 __ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008365 __ St4w(z29.VnS(),
8366 z30.VnS(),
8367 z31.VnS(),
8368 z0.VnS(),
8369 p5,
Jacob Bramleye483ce52019-11-05 16:52:29 +00008370 SVEMemOperand(x0, x3, LSL, 2));
8371 // Save the stored values for ld4 tests.
8372 __ Dup(z11.VnS(), 0);
8373 __ Dup(z12.VnS(), 0);
8374 __ Dup(z13.VnS(), 0);
8375 __ Dup(z14.VnS(), 0);
8376 __ Mov(z11.VnS(), p5.Merging(), z29.VnS());
8377 __ Mov(z12.VnS(), p5.Merging(), z30.VnS());
8378 __ Mov(z13.VnS(), p5.Merging(), z31.VnS());
8379 __ Mov(z14.VnS(), p5.Merging(), z0.VnS());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008380
8381 __ Index(z31.VnD(), 32, -11);
8382 __ Index(z0.VnD(), 33, -11);
8383 __ Index(z1.VnD(), 34, -11);
8384 __ Index(z2.VnD(), 35, -11);
8385 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00008386 __ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 *vl
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008387 __ St4d(z31.VnD(),
8388 z0.VnD(),
8389 z1.VnD(),
8390 z2.VnD(),
8391 p4,
Jacob Bramleye483ce52019-11-05 16:52:29 +00008392 SVEMemOperand(x0, x4, LSL, 3));
8393 // Save the stored values for ld4 tests.
8394 __ Dup(z15.VnD(), 0);
8395 __ Dup(z16.VnD(), 0);
8396 __ Dup(z17.VnD(), 0);
8397 __ Dup(z18.VnD(), 0);
8398 __ Mov(z15.VnD(), p4.Merging(), z31.VnD());
8399 __ Mov(z16.VnD(), p4.Merging(), z0.VnD());
8400 __ Mov(z17.VnD(), p4.Merging(), z1.VnD());
8401 __ Mov(z18.VnD(), p4.Merging(), z2.VnD());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008402
Jacob Bramleye483ce52019-11-05 16:52:29 +00008403 // Corresponding loads.
8404 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
8405 __ Ld4b(z31.VnB(),
8406 z0.VnB(),
8407 z1.VnB(),
8408 z2.VnB(),
8409 p7.Zeroing(),
8410 SVEMemOperand(x0, x1, LSL, 0));
8411 __ Mov(z19, z31);
8412 __ Mov(z20, z0);
8413 __ Mov(z21, z1);
8414 __ Mov(z22, z2);
8415 __ Ld4h(z23.VnH(),
8416 z24.VnH(),
8417 z25.VnH(),
8418 z26.VnH(),
8419 p6.Zeroing(),
8420 SVEMemOperand(x0, x2, LSL, 1));
8421 __ Ld4w(z27.VnS(),
8422 z28.VnS(),
8423 z29.VnS(),
8424 z30.VnS(),
8425 p5.Zeroing(),
8426 SVEMemOperand(x0, x3, LSL, 2));
8427 // Wrap around from z31 to z0.
8428 __ Ld4d(z31.VnD(),
8429 z0.VnD(),
8430 z1.VnD(),
8431 z2.VnD(),
8432 p4.Zeroing(),
8433 SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008434
8435 END();
8436
8437 if (CAN_RUN()) {
8438 RUN();
8439
8440 uint8_t* expected = new uint8_t[data_size];
8441 memset(expected, 0, data_size);
8442 uint8_t* middle = &expected[data_size / 2];
8443
8444 int vl_b = vl / kBRegSizeInBytes;
8445 int vl_h = vl / kHRegSizeInBytes;
8446 int vl_s = vl / kSRegSizeInBytes;
8447 int vl_d = vl / kDRegSizeInBytes;
8448
8449 int reg_count = 4;
8450
Jacob Bramleye483ce52019-11-05 16:52:29 +00008451 // st4b { z19.b, z20.b, z21.b, z22.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008452 int vl_b_mul4 = vl_b - (vl_b % 4);
8453 for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00008454 int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008455 uint8_t lane0 = -4 + (11 * i);
8456 uint8_t lane1 = -5 + (11 * i);
8457 uint8_t lane2 = -6 + (11 * i);
8458 uint8_t lane3 = -7 + (11 * i);
8459 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8460 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8461 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8462 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8463 }
8464
Jacob Bramleye483ce52019-11-05 16:52:29 +00008465 // st4h { z22.h, z23.h, z24.h, z25.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008466 if (vl_h >= 16) {
8467 for (int i = 0; i < 16; i++) {
8468 int64_t offset = (7 << kHRegSizeInBytesLog2) * vl;
8469 uint16_t lane0 = 6 - (2 * i);
8470 uint16_t lane1 = 7 - (2 * i);
8471 uint16_t lane2 = 8 - (2 * i);
8472 uint16_t lane3 = 9 - (2 * i);
8473 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8474 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8475 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8476 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8477 }
8478 }
8479
8480 // st4w { z29.s, z30.s, z31.s, z0.s }, ((i % 5) == 0)
8481 for (int i = 0; i < vl_s; i++) {
8482 if ((i % 5) == 0) {
8483 int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
8484 uint32_t lane0 = -6 + (7 * i);
8485 uint32_t lane1 = -7 + (7 * i);
8486 uint32_t lane2 = -8 + (7 * i);
8487 uint32_t lane3 = -9 + (7 * i);
8488 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8489 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8490 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8491 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8492 }
8493 }
8494
8495 // st4d { z31.d, z0.d, z1.d, z2.d }, SVE_MUL3
8496 int vl_d_mul3 = vl_d - (vl_d % 3);
8497 for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00008498 int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008499 uint64_t lane0 = 32 - (11 * i);
8500 uint64_t lane1 = 33 - (11 * i);
8501 uint64_t lane2 = 34 - (11 * i);
8502 uint64_t lane3 = 35 - (11 * i);
8503 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
8504 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
8505 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
8506 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
8507 }
8508
8509 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
8510
Jacob Bramleye483ce52019-11-05 16:52:29 +00008511 // Check that we loaded back the expected values.
8512
8513 // st4b/ld4b
8514 ASSERT_EQUAL_SVE(z3, z19);
8515 ASSERT_EQUAL_SVE(z4, z20);
8516 ASSERT_EQUAL_SVE(z5, z21);
8517 ASSERT_EQUAL_SVE(z6, z22);
8518
8519 // st4h/ld4h
8520 ASSERT_EQUAL_SVE(z7, z23);
8521 ASSERT_EQUAL_SVE(z8, z24);
8522 ASSERT_EQUAL_SVE(z9, z25);
8523 ASSERT_EQUAL_SVE(z10, z26);
8524
8525 // st4w/ld4w
8526 ASSERT_EQUAL_SVE(z11, z27);
8527 ASSERT_EQUAL_SVE(z12, z28);
8528 ASSERT_EQUAL_SVE(z13, z29);
8529 ASSERT_EQUAL_SVE(z14, z30);
8530
8531 // st4d/ld4d
8532 ASSERT_EQUAL_SVE(z15, z31);
8533 ASSERT_EQUAL_SVE(z16, z0);
8534 ASSERT_EQUAL_SVE(z17, z1);
8535 ASSERT_EQUAL_SVE(z18, z2);
8536
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008537 delete[] expected;
8538 }
8539 delete[] data;
8540}
8541
8542TEST_SVE(sve_ld234_st234_scalar_plus_scalar_sp) {
8543 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8544 START();
8545
8546 // Check that the simulator correctly interprets rn == 31 as sp.
8547 // The indexing logic is the same regardless so we just check one load and
8548 // store of each type.
8549
8550 // There are no pre- or post-indexing modes, so reserve space first.
8551 __ ClaimVL(2 + 3 + 4);
8552
8553 __ Index(z0.VnB(), 42, 2);
8554 __ Index(z1.VnB(), 43, 2);
8555 __ Ptrue(p0.VnB(), SVE_VL7);
8556 __ Rdvl(x0, 0);
8557 __ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, x0));
8558
8559 __ Index(z4.VnH(), 42, 3);
8560 __ Index(z5.VnH(), 43, 3);
8561 __ Index(z6.VnH(), 44, 3);
8562 __ Ptrue(p1.VnH(), SVE_POW2);
8563 __ Rdvl(x1, 2);
8564 __ Lsr(x1, x1, 1);
8565 __ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, x1, LSL, 1));
8566
8567 __ Index(z8.VnS(), 42, 4);
8568 __ Index(z9.VnS(), 43, 4);
8569 __ Index(z10.VnS(), 44, 4);
8570 __ Index(z11.VnS(), 45, 4);
8571 __ Ptrue(p2.VnS());
8572 __ Rdvl(x2, 2 + 3);
8573 __ Lsr(x2, x2, 2);
8574 __ St4w(z8.VnS(),
8575 z9.VnS(),
8576 z10.VnS(),
8577 z11.VnS(),
8578 p2,
8579 SVEMemOperand(sp, x2, LSL, 2));
8580
Jacob Bramleye483ce52019-11-05 16:52:29 +00008581 // Corresponding loads.
8582 // We have to explicitly zero inactive lanes in the reference values because
8583 // loads have zeroing predication.
8584 __ Dup(z12.VnB(), 0);
8585 __ Dup(z13.VnB(), 0);
8586 __ Mov(z12.VnB(), p0.Merging(), z0.VnB());
8587 __ Mov(z13.VnB(), p0.Merging(), z1.VnB());
8588 __ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, x0));
8589
8590 __ Dup(z16.VnH(), 0);
8591 __ Dup(z17.VnH(), 0);
8592 __ Dup(z18.VnH(), 0);
8593 __ Mov(z16.VnH(), p1.Merging(), z4.VnH());
8594 __ Mov(z17.VnH(), p1.Merging(), z5.VnH());
8595 __ Mov(z18.VnH(), p1.Merging(), z6.VnH());
8596 __ Ld3h(z4.VnH(),
8597 z5.VnH(),
8598 z6.VnH(),
8599 p1.Zeroing(),
8600 SVEMemOperand(sp, x1, LSL, 1));
8601
8602 __ Dup(z20.VnS(), 0);
8603 __ Dup(z21.VnS(), 0);
8604 __ Dup(z22.VnS(), 0);
8605 __ Dup(z23.VnS(), 0);
8606 __ Mov(z20.VnS(), p2.Merging(), z8.VnS());
8607 __ Mov(z21.VnS(), p2.Merging(), z9.VnS());
8608 __ Mov(z22.VnS(), p2.Merging(), z10.VnS());
8609 __ Mov(z23.VnS(), p2.Merging(), z11.VnS());
8610 __ Ld4w(z8.VnS(),
8611 z9.VnS(),
8612 z10.VnS(),
8613 z11.VnS(),
8614 p2.Zeroing(),
8615 SVEMemOperand(sp, x2, LSL, 2));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008616
8617 __ DropVL(2 + 3 + 4);
8618
8619 END();
8620
8621 if (CAN_RUN()) {
8622 RUN();
8623
8624 // The most likely failure mode is the that simulator reads sp as xzr and
8625 // crashes on execution. We already test the address calculations separately
8626 // and sp doesn't change this, so just test that we load the values we
8627 // stored.
Jacob Bramleye483ce52019-11-05 16:52:29 +00008628
8629 // st2b/ld2b
8630 ASSERT_EQUAL_SVE(z0, z12);
8631 ASSERT_EQUAL_SVE(z1, z13);
8632
8633 // st3h/ld3h
8634 ASSERT_EQUAL_SVE(z4, z16);
8635 ASSERT_EQUAL_SVE(z5, z17);
8636 ASSERT_EQUAL_SVE(z6, z18);
8637
8638 // st4h/ld4h
8639 ASSERT_EQUAL_SVE(z8, z20);
8640 ASSERT_EQUAL_SVE(z9, z21);
8641 ASSERT_EQUAL_SVE(z10, z22);
8642 ASSERT_EQUAL_SVE(z11, z23);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008643 }
8644}
8645
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008646TEST_SVE(sve_ld234_st234_scalar_plus_imm_sp) {
8647 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8648 START();
8649
8650 // Check that the simulator correctly interprets rn == 31 as sp.
8651 // The indexing logic is the same regardless so we just check one load and
8652 // store of each type.
8653
8654 // There are no pre- or post-indexing modes, so reserve space first.
8655 // Note that the stores fill in an order that allows each immediate to be a
8656 // multiple of the number of registers.
8657 __ ClaimVL(4 + 2 + 3);
8658
8659 __ Index(z0.VnB(), 42, 2);
8660 __ Index(z1.VnB(), 43, 2);
8661 __ Ptrue(p0.VnB(), SVE_POW2);
8662 __ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, 4, SVE_MUL_VL));
8663
8664 __ Index(z4.VnH(), 42, 3);
8665 __ Index(z5.VnH(), 43, 3);
8666 __ Index(z6.VnH(), 44, 3);
8667 __ Ptrue(p1.VnH(), SVE_VL7);
8668 __ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, 6, SVE_MUL_VL));
8669
8670 __ Index(z8.VnS(), 42, 4);
8671 __ Index(z9.VnS(), 43, 4);
8672 __ Index(z10.VnS(), 44, 4);
8673 __ Index(z11.VnS(), 45, 4);
8674 __ Ptrue(p2.VnS());
8675 __ St4w(z8.VnS(), z9.VnS(), z10.VnS(), z11.VnS(), p2, SVEMemOperand(sp));
8676
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008677 // Corresponding loads.
8678 // We have to explicitly zero inactive lanes in the reference values because
8679 // loads have zeroing predication.
8680 __ Dup(z12.VnB(), 0);
8681 __ Dup(z13.VnB(), 0);
8682 __ Mov(z12.VnB(), p0.Merging(), z0.VnB());
8683 __ Mov(z13.VnB(), p0.Merging(), z1.VnB());
8684 __ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, 4, SVE_MUL_VL));
8685
8686 __ Dup(z16.VnH(), 0);
8687 __ Dup(z17.VnH(), 0);
8688 __ Dup(z18.VnH(), 0);
8689 __ Mov(z16.VnH(), p1.Merging(), z4.VnH());
8690 __ Mov(z17.VnH(), p1.Merging(), z5.VnH());
8691 __ Mov(z18.VnH(), p1.Merging(), z6.VnH());
8692 __ Ld3h(z4.VnH(),
8693 z5.VnH(),
8694 z6.VnH(),
8695 p1.Zeroing(),
8696 SVEMemOperand(sp, 6, SVE_MUL_VL));
8697
8698 __ Dup(z20.VnS(), 0);
8699 __ Dup(z21.VnS(), 0);
8700 __ Dup(z22.VnS(), 0);
8701 __ Dup(z23.VnS(), 0);
8702 __ Mov(z20.VnS(), p2.Merging(), z8.VnS());
8703 __ Mov(z21.VnS(), p2.Merging(), z9.VnS());
8704 __ Mov(z22.VnS(), p2.Merging(), z10.VnS());
8705 __ Mov(z23.VnS(), p2.Merging(), z11.VnS());
8706 __ Ld4w(z8.VnS(),
8707 z9.VnS(),
8708 z10.VnS(),
8709 z11.VnS(),
8710 p2.Zeroing(),
8711 SVEMemOperand(sp));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008712
8713 __ DropVL(4 + 2 + 3);
8714
8715 END();
8716
8717 if (CAN_RUN()) {
8718 RUN();
8719
8720 // The most likely failure mode is the that simulator reads sp as xzr and
8721 // crashes on execution. We already test the address calculations separately
8722 // and sp doesn't change this, so just test that we load the values we
8723 // stored.
8724 // TODO: Actually do this, once loads are implemented.
8725 }
8726}
8727
Jacob Bramley85a9c102019-12-09 17:48:29 +00008728TEST_SVE(sve_ldff1_scalar_plus_scalar) {
8729 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8730 START();
8731
8732 int vl = config->sve_vl_in_bytes();
8733 size_t page_size = sysconf(_SC_PAGE_SIZE);
8734 VIXL_ASSERT(page_size > static_cast<size_t>(vl));
8735
8736 // Allocate two pages, then mprotect the second one to make it inaccessible.
8737 uintptr_t data = reinterpret_cast<uintptr_t>(mmap(NULL,
8738 page_size * 2,
8739 PROT_READ | PROT_WRITE,
8740 MAP_PRIVATE | MAP_ANONYMOUS,
8741 -1,
8742 0));
8743 mprotect(reinterpret_cast<void*>(data + page_size), page_size, PROT_NONE);
8744
8745 // Fill the accessible page with arbitrary data.
8746 for (size_t i = 0; i < page_size; i++) {
8747 // Reverse bits so we get a mixture of positive and negative values.
8748 uint8_t byte = ReverseBits(static_cast<uint8_t>(i));
8749 memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
8750 }
8751
8752 __ Mov(x20, data);
8753
8754 PRegister all = p7;
8755 __ Ptrue(all.VnB());
8756
8757 size_t offset_modifier = 0;
8758
8759 // The highest adddress at which a load stopped. Every FF load should fault at
8760 // `data + page_size`, so this value should not exceed that value. However,
8761 // the architecture allows fault-tolerant loads to fault arbitrarily, so the
8762 // real value may be lower.
8763 //
8764 // This is used to check that the `mprotect` above really does make the second
8765 // page inaccessible, and that the resulting FFR from each load reflects that.
8766 Register limit = x22;
8767 __ Mov(limit, 0);
8768
8769 // If the FFR grows unexpectedly, we increment this register by the
8770 // difference. FFR should never grow, except when explicitly set.
8771 Register ffr_grow_count = x23;
8772 __ Mov(ffr_grow_count, 0);
8773
8774#define VIXL_EMIT_LDFF1_TEST(LDFF1, M_SIZE, Zt, E_SIZE, LD1, ZtRef) \
8775 do { \
8776 /* Set the offset so that the load is guaranteed to start in the */ \
8777 /* accessible page, but end in the inaccessible one. */ \
8778 VIXL_ASSERT((page_size % k##M_SIZE##RegSizeInBytes) == 0); \
8779 VIXL_ASSERT((vl % k##M_SIZE##RegSizeInBytes) == 0); \
8780 size_t elements_per_page = page_size / k##M_SIZE##RegSizeInBytes; \
8781 size_t elements_per_access = vl / k##E_SIZE##RegSizeInBytes; \
8782 size_t min_offset = (elements_per_page - elements_per_access) + 1; \
8783 size_t max_offset = elements_per_page - 1; \
8784 size_t offset = \
8785 min_offset + (offset_modifier % (max_offset - min_offset + 1)); \
8786 offset_modifier++; \
8787 __ Mov(x21, offset); \
8788 __ Setffr(); \
8789 __ LDFF1(Zt.Vn##E_SIZE(), \
8790 all.Zeroing(), \
8791 SVEMemOperand(x20, x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
8792 __ Rdffrs(p0.VnB(), all.Zeroing()); \
8793 /* Execute another LDFF1 with no offset, so that every element could be */ \
8794 /* read. It should respect FFR, and load no more than we loaded the */ \
8795 /* first time. */ \
8796 __ LDFF1(ZtRef.Vn##E_SIZE(), all.Zeroing(), SVEMemOperand(x20)); \
8797 __ Rdffrs(p1.VnB(), all.Zeroing()); \
8798 __ Cntp(x0, all, p1.VnB()); \
8799 __ Uqdecp(x0, p0.VnB()); \
8800 __ Add(ffr_grow_count, ffr_grow_count, x0); \
8801 /* Use the FFR to predicate the normal load. If it wasn't properly set, */ \
8802 /* the normal load will abort. */ \
8803 __ LD1(ZtRef.Vn##E_SIZE(), \
8804 p0.Zeroing(), \
8805 SVEMemOperand(x20, x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
8806 /* Work out the address after the one that was just accessed. */ \
8807 __ Incp(x21, p0.Vn##E_SIZE()); \
8808 __ Add(x0, x20, Operand(x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
8809 __ Cmp(limit, x0); \
8810 __ Csel(limit, limit, x0, hs); \
8811 /* Clear lanes inactive in FFR. These have an undefined result. */ \
8812 /* TODO: Use the 'Not' and 'Mov' aliases once they are implemented. */ \
8813 __ Eor(p0.Vn##E_SIZE(), all.Zeroing(), p0.Vn##E_SIZE(), all.Vn##E_SIZE()); \
8814 __ Cpy(Zt.Vn##E_SIZE(), p0.Merging(), 0); \
8815 } while (0)
8816
8817 VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z0, B, Ld1b, z16);
8818 VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z1, H, Ld1b, z17);
8819 VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z2, S, Ld1b, z18);
8820 VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z3, D, Ld1b, z19);
8821
8822 VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z4, H, Ld1h, z20);
8823 VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z5, S, Ld1h, z21);
8824 VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z6, D, Ld1h, z22);
8825
8826 VIXL_EMIT_LDFF1_TEST(Ldff1w, S, z7, S, Ld1w, z23);
8827 VIXL_EMIT_LDFF1_TEST(Ldff1w, S, z8, D, Ld1w, z24);
8828
8829 VIXL_EMIT_LDFF1_TEST(Ldff1d, D, z9, D, Ld1d, z25);
8830
8831 VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z10, H, Ld1sb, z26);
8832 VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z11, S, Ld1sb, z27);
8833 VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z12, D, Ld1sb, z28);
8834
8835 VIXL_EMIT_LDFF1_TEST(Ldff1sh, H, z13, S, Ld1sh, z29);
8836 VIXL_EMIT_LDFF1_TEST(Ldff1sh, H, z14, D, Ld1sh, z30);
8837
8838 VIXL_EMIT_LDFF1_TEST(Ldff1sw, S, z15, D, Ld1sw, z31);
8839
8840#undef VIXL_EMIT_LDFF1_TEST
8841
8842 END();
8843
8844 if (CAN_RUN()) {
8845 RUN();
8846
8847 uintptr_t expected_limit = data + page_size;
8848 uintptr_t measured_limit = core.xreg(limit.GetCode());
8849 VIXL_CHECK(measured_limit <= expected_limit);
8850 if (measured_limit < expected_limit) {
8851 // We can't fail the test for this case, but a warning is helpful for
8852 // manually-run tests.
8853 printf(
8854 "WARNING: All fault-tolerant loads detected faults before the\n"
8855 "expected limit. This is architecturally possible, but improbable,\n"
8856 "and could be a symptom of another problem.\n");
8857 }
8858
8859 ASSERT_EQUAL_64(0, ffr_grow_count);
8860
8861 // Ldff1b
8862 ASSERT_EQUAL_SVE(z0.VnB(), z16.VnB());
8863 ASSERT_EQUAL_SVE(z1.VnH(), z17.VnH());
8864 ASSERT_EQUAL_SVE(z2.VnS(), z18.VnS());
8865 ASSERT_EQUAL_SVE(z3.VnD(), z19.VnD());
8866
8867 // Ldff1h
8868 ASSERT_EQUAL_SVE(z4.VnH(), z20.VnH());
8869 ASSERT_EQUAL_SVE(z5.VnS(), z21.VnS());
8870 ASSERT_EQUAL_SVE(z6.VnD(), z22.VnD());
8871
8872 // Ldff1w
8873 ASSERT_EQUAL_SVE(z7.VnS(), z23.VnS());
8874 ASSERT_EQUAL_SVE(z8.VnD(), z24.VnD());
8875
8876 // Ldff1d
8877 ASSERT_EQUAL_SVE(z9.VnD(), z25.VnD());
8878
8879 // Ldff1sb
8880 ASSERT_EQUAL_SVE(z10.VnH(), z26.VnH());
8881 ASSERT_EQUAL_SVE(z11.VnS(), z27.VnS());
8882 ASSERT_EQUAL_SVE(z12.VnD(), z28.VnD());
8883
8884 // Ldff1sh
8885 ASSERT_EQUAL_SVE(z13.VnS(), z29.VnS());
8886 ASSERT_EQUAL_SVE(z14.VnD(), z30.VnD());
8887
8888 // Ldff1sw
8889 ASSERT_EQUAL_SVE(z15.VnD(), z31.VnD());
8890 }
8891
8892 munmap(reinterpret_cast<void*>(data), page_size * 2);
8893}
8894
Jacob Bramleydcdbd752020-01-20 11:47:36 +00008895// Test gather loads by comparing them with the result of a set of equivalent
8896// scalar loads.
8897template <typename F>
8898static void GatherLoadHelper(Test* config,
8899 unsigned msize_in_bits,
8900 unsigned esize_in_bits,
8901 F sve_ld1,
8902 bool is_signed) {
8903 // SVE supports 32- and 64-bit addressing for gather loads.
8904 VIXL_ASSERT((esize_in_bits == kSRegSize) || (esize_in_bits == kDRegSize));
8905 static const unsigned kMaxLaneCount = kZRegMaxSize / kSRegSize;
8906
8907 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8908 START();
8909
8910 unsigned msize_in_bytes = msize_in_bits / kBitsPerByte;
8911 unsigned esize_in_bytes = esize_in_bits / kBitsPerByte;
8912 int vl = config->sve_vl_in_bytes();
8913
8914 // Use a fixed seed for nrand48() so that test runs are reproducible.
8915 unsigned short seed[3] = {1, 2, 3}; // NOLINT(runtime/int)
8916
8917 // Fill a buffer with arbitrary data.
8918 size_t buffer_size = vl * 64;
8919 uint64_t data = reinterpret_cast<uintptr_t>(malloc(buffer_size));
8920 for (size_t i = 0; i < buffer_size; i++) {
8921 uint8_t byte = nrand48(seed) & 0xff;
8922 memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
8923 }
8924
8925 // Vectors of random addresses and offsets into the buffer.
8926 uint64_t addresses[kMaxLaneCount];
8927 uint64_t offsets[kMaxLaneCount];
8928 uint64_t max_address = 0;
8929 for (unsigned i = 0; i < kMaxLaneCount; i++) {
8930 uint64_t rnd = nrand48(seed);
8931 // Limit the range to the set of completely-accessible elements in memory.
8932 offsets[i] = rnd % (buffer_size - msize_in_bytes);
8933 addresses[i] = data + offsets[i];
8934 max_address = std::max(max_address, addresses[i]);
8935 }
8936
8937 // Maximised offsets, to ensure that the address calculation is modulo-2^64,
8938 // and that the vector addresses are not sign-extended.
8939 uint64_t uint_e_max = (esize_in_bits == kDRegSize) ? UINT64_MAX : UINT32_MAX;
8940 uint64_t maxed_offsets[kMaxLaneCount];
8941 uint64_t maxed_offsets_imm = max_address - uint_e_max;
8942 for (unsigned i = 0; i < kMaxLaneCount; i++) {
8943 maxed_offsets[i] = addresses[i] - maxed_offsets_imm;
8944 }
8945
8946 ZRegister zn = z0.WithLaneSize(esize_in_bits);
8947 ZRegister zt_addresses = z1.WithLaneSize(esize_in_bits);
8948 ZRegister zt_offsets = z2.WithLaneSize(esize_in_bits);
8949 ZRegister zt_maxed = z3.WithLaneSize(esize_in_bits);
8950 ZRegister zt_ref = z4.WithLaneSize(esize_in_bits);
8951
8952 PRegisterZ pg = p0.Zeroing();
8953 Initialise(&masm,
8954 pg,
8955 0x9abcdef012345678,
8956 0xabcdef0123456789,
8957 0xf4f3f1f0fefdfcfa,
8958 0xf9f8f6f5f3f2f0ff);
8959
8960 // Execute each load.
8961
8962 if (esize_in_bits == kDRegSize) {
8963 // Only test `addresses` if we can use 64-bit pointers. InsrHelper will fail
8964 // if any value won't fit in a lane of zn.
8965 InsrHelper(&masm, zn, addresses);
8966 (masm.*sve_ld1)(zt_addresses, pg, SVEMemOperand(zn));
8967 }
8968
8969 InsrHelper(&masm, zn, offsets);
8970 (masm.*sve_ld1)(zt_offsets, pg, SVEMemOperand(zn, data));
8971
8972 InsrHelper(&masm, zn, maxed_offsets);
8973 (masm.*sve_ld1)(zt_maxed, pg, SVEMemOperand(zn, maxed_offsets_imm));
8974
8975 // TODO: Also test scalar-plus-vector SVEMemOperands.
8976 // TODO: Also test first-fault loads.
8977
8978 // Generate a reference result using scalar loads.
8979
8980 ZRegister lane_numbers = z10.WithLaneSize(esize_in_bits);
8981 __ Index(lane_numbers, 0, 1);
8982 __ Dup(zt_ref, 0);
8983 for (unsigned i = 0; i < (vl / esize_in_bytes); i++) {
8984 __ Mov(x0, addresses[ArrayLength(addresses) - i - 1]);
8985 Register rt(0, esize_in_bits);
8986 if (is_signed) {
8987 switch (msize_in_bits) {
8988 case kBRegSize:
8989 __ Ldrsb(rt, MemOperand(x0));
8990 break;
8991 case kHRegSize:
8992 __ Ldrsh(rt, MemOperand(x0));
8993 break;
8994 case kWRegSize:
8995 __ Ldrsw(rt, MemOperand(x0));
8996 break;
8997 }
8998 } else {
8999 switch (msize_in_bits) {
9000 case kBRegSize:
9001 __ Ldrb(rt, MemOperand(x0));
9002 break;
9003 case kHRegSize:
9004 __ Ldrh(rt, MemOperand(x0));
9005 break;
9006 case kWRegSize:
9007 __ Ldr(rt.W(), MemOperand(x0));
9008 break;
9009 case kXRegSize:
9010 __ Ldr(rt, MemOperand(x0));
9011 break;
9012 }
9013 }
9014
9015 // Emulate predication.
9016 __ Cmpeq(p7.WithLaneSize(esize_in_bits), pg, lane_numbers, i);
9017 __ Cpy(zt_ref, p7.Merging(), rt);
9018 }
9019
9020 END();
9021
9022 if (CAN_RUN()) {
9023 RUN();
9024
9025 if (esize_in_bits == kDRegSize) {
9026 ASSERT_EQUAL_SVE(zt_ref, zt_addresses);
9027 }
9028 ASSERT_EQUAL_SVE(zt_ref, zt_offsets);
9029 ASSERT_EQUAL_SVE(zt_ref, zt_maxed);
9030 }
9031
9032 free(reinterpret_cast<void*>(data));
9033}
9034
9035TEST_SVE(sve_ld1b_64bit_vector_plus_immediate) {
9036 GatherLoadHelper(config, kBRegSize, kDRegSize, &MacroAssembler::Ld1b, false);
9037}
9038
9039TEST_SVE(sve_ld1h_64bit_vector_plus_immediate) {
9040 GatherLoadHelper(config, kHRegSize, kDRegSize, &MacroAssembler::Ld1h, false);
9041}
9042
9043TEST_SVE(sve_ld1w_64bit_vector_plus_immediate) {
9044 GatherLoadHelper(config, kSRegSize, kDRegSize, &MacroAssembler::Ld1w, false);
9045}
9046
9047TEST_SVE(sve_ld1d_64bit_vector_plus_immediate) {
9048 GatherLoadHelper(config, kDRegSize, kDRegSize, &MacroAssembler::Ld1d, false);
9049}
9050
9051TEST_SVE(sve_ld1sb_64bit_vector_plus_immediate) {
9052 GatherLoadHelper(config, kBRegSize, kDRegSize, &MacroAssembler::Ld1sb, true);
9053}
9054
9055TEST_SVE(sve_ld1sh_64bit_vector_plus_immediate) {
9056 GatherLoadHelper(config, kHRegSize, kDRegSize, &MacroAssembler::Ld1sh, true);
9057}
9058
9059TEST_SVE(sve_ld1sw_64bit_vector_plus_immediate) {
9060 GatherLoadHelper(config, kSRegSize, kDRegSize, &MacroAssembler::Ld1sw, true);
9061}
9062
9063TEST_SVE(sve_ld1b_32bit_vector_plus_immediate) {
9064 GatherLoadHelper(config, kBRegSize, kSRegSize, &MacroAssembler::Ld1b, false);
9065}
9066
9067TEST_SVE(sve_ld1h_32bit_vector_plus_immediate) {
9068 GatherLoadHelper(config, kHRegSize, kSRegSize, &MacroAssembler::Ld1h, false);
9069}
9070
9071TEST_SVE(sve_ld1w_32bit_vector_plus_immediate) {
9072 GatherLoadHelper(config, kSRegSize, kSRegSize, &MacroAssembler::Ld1w, false);
9073}
9074
9075TEST_SVE(sve_ld1sb_32bit_vector_plus_immediate) {
9076 GatherLoadHelper(config, kBRegSize, kSRegSize, &MacroAssembler::Ld1sb, true);
9077}
9078
9079TEST_SVE(sve_ld1sh_32bit_vector_plus_immediate) {
9080 GatherLoadHelper(config, kHRegSize, kSRegSize, &MacroAssembler::Ld1sh, true);
9081}
9082
TatWai Chong6995bfd2019-09-26 10:48:05 +01009083typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
9084 const ZRegister& zn,
9085 const IntegerOperand imm);
9086
9087template <typename F, typename Td, typename Tn>
9088static void IntWideImmHelper(Test* config,
9089 F macro,
9090 unsigned lane_size_in_bits,
9091 const Tn& zn_inputs,
9092 IntegerOperand imm,
9093 const Td& zd_expected) {
9094 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9095 START();
9096
9097 ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
9098 InsrHelper(&masm, zd1, zn_inputs);
9099
9100 // Also test with a different zn, to test the movprfx case.
9101 ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
9102 InsrHelper(&masm, zn, zn_inputs);
9103 ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
9104 ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
9105
9106 // Make a copy so we can check that constructive operations preserve zn.
9107 __ Mov(zn_copy, zn);
9108
9109 {
9110 UseScratchRegisterScope temps(&masm);
9111 // The MacroAssembler needs a P scratch register for some of these macros,
9112 // and it doesn't have one by default.
9113 temps.Include(p3);
9114
9115 (masm.*macro)(zd1, zd1, imm);
9116 (masm.*macro)(zd2, zn, imm);
9117 }
9118
9119 END();
9120
9121 if (CAN_RUN()) {
9122 RUN();
9123
9124 ASSERT_EQUAL_SVE(zd_expected, zd1);
9125
9126 // Check the result from `instr` with movprfx is the same as
9127 // the immediate version.
9128 ASSERT_EQUAL_SVE(zd_expected, zd2);
9129
9130 ASSERT_EQUAL_SVE(zn_copy, zn);
9131 }
9132}
9133
9134TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
9135 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
9136 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
9137 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
9138 int64_t in_d[] = {1, 10, 10000, 1000000};
9139
9140 IntWideImmFn fn = &MacroAssembler::Smax;
9141
9142 int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
9143 int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
9144 int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
9145 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
9146
9147 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
9148 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
9149 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
9150 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
9151
9152 int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
9153 int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
9154 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
9155
9156 // The immediate is in the range [-128, 127], but the macro is able to
9157 // synthesise unencodable immediates.
9158 // B-sized lanes cannot take an immediate out of the range [-128, 127].
9159 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
9160 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
9161 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
9162}
9163
9164TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
9165 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
9166 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
9167 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
9168 int64_t in_d[] = {1, 10, 10000, 1000000};
9169
9170 IntWideImmFn fn = &MacroAssembler::Smin;
9171
9172 int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
9173 int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
9174 int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
9175 int64_t exp_d_1[] = {1, 10, 99, 99};
9176
9177 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
9178 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
9179 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
9180 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
9181
9182 int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
9183 int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
9184 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
9185
9186 // The immediate is in the range [-128, 127], but the macro is able to
9187 // synthesise unencodable immediates.
9188 // B-sized lanes cannot take an immediate out of the range [-128, 127].
9189 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
9190 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
9191 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
9192}
9193
9194TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
9195 int in_b[] = {0, 255, 127, 0x80, 1, 55};
9196 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
9197 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
9198 int64_t in_d[] = {1, 10, 10000, 1000000};
9199
9200 IntWideImmFn fn = &MacroAssembler::Umax;
9201
9202 int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
9203 int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
9204 int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
9205 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
9206
9207 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
9208 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
9209 IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
9210 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
9211
9212 int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
9213 int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
9214 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
9215
9216 // The immediate is in the range [0, 255], but the macro is able to
9217 // synthesise unencodable immediates.
9218 // B-sized lanes cannot take an immediate out of the range [0, 255].
9219 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
9220 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
9221 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
9222}
9223
9224TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
9225 int in_b[] = {0, 255, 127, 0x80, 1, 55};
9226 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
9227 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
9228 int64_t in_d[] = {1, 10, 10000, 1000000};
9229
9230 IntWideImmFn fn = &MacroAssembler::Umin;
9231
9232 int exp_b_1[] = {0, 17, 17, 17, 1, 17};
9233 int exp_h_1[] = {0, 127, 127, 127, 1, 127};
9234 int exp_s_1[] = {0, 255, 127, 255, 1, 255};
9235 int64_t exp_d_1[] = {1, 10, 99, 99};
9236
9237 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
9238 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
9239 IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
9240 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
9241
9242 int exp_h_2[] = {0, 255, 127, 511, 1, 511};
9243 int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
9244 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
9245
9246 // The immediate is in the range [0, 255], but the macro is able to
9247 // synthesise unencodable immediates.
9248 // B-sized lanes cannot take an immediate out of the range [0, 255].
9249 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
9250 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
9251 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
9252}
9253
9254TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
9255 int in_b[] = {11, -1, 7, -3};
9256 int in_h[] = {111, -1, 17, -123};
9257 int in_s[] = {11111, -1, 117, -12345};
9258 int64_t in_d[] = {0x7fffffff, 0x80000000};
9259
9260 IntWideImmFn fn = &MacroAssembler::Mul;
9261
9262 int exp_b_1[] = {66, -6, 42, -18};
9263 int exp_h_1[] = {-14208, 128, -2176, 15744};
9264 int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
9265 int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
9266
9267 IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
9268 IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
9269 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
9270 IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
9271
9272 int exp_h_2[] = {-28305, 255, -4335, 31365};
9273 int exp_s_2[] = {22755328, -2048, 239616, -25282560};
9274 int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
9275
9276 // The immediate is in the range [-128, 127], but the macro is able to
9277 // synthesise unencodable immediates.
9278 // B-sized lanes cannot take an immediate out of the range [0, 255].
9279 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
9280 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
9281 IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
9282
9283 // Integer overflow on multiplication.
9284 unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
9285
9286 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
9287}
9288
9289TEST_SVE(sve_int_wide_imm_unpredicated_add) {
9290 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
9291 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
9292 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
9293 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
9294
9295 IntWideImmFn fn = &MacroAssembler::Add;
9296
9297 unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
9298 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
9299 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
9300 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
9301
9302 // Encodable with `add` (shift 0).
9303 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
9304 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
9305 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
9306 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
9307
9308 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
9309 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
9310 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
9311
9312 // Encodable with `add` (shift 8).
9313 // B-sized lanes cannot take a shift of 8.
9314 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
9315 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
9316 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
9317
9318 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
9319
9320 // The macro is able to synthesise unencodable immediates.
9321 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01009322
9323 unsigned exp_b_4[] = {0x61, 0x5f, 0xf0, 0xdf};
9324 unsigned exp_h_4[] = {0x6181, 0x5f7f, 0xf010, 0x8aaa};
9325 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
9326 uint64_t exp_d_4[] = {0x8000000180018180, 0x7fffffff7fff7f7e};
9327
9328 // Negative immediates use `sub`.
9329 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
9330 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
9331 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
9332 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009333}
9334
9335TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
9336 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
9337 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
9338 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
9339 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
9340
9341 IntWideImmFn fn = &MacroAssembler::Sqadd;
9342
Jacob Bramleyb28f6172019-10-02 12:12:35 +01009343 unsigned exp_b_1[] = {0x02, 0x7f, 0x7f, 0x7f};
TatWai Chong6995bfd2019-09-26 10:48:05 +01009344 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
9345 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
9346 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
9347
9348 // Encodable with `sqadd` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01009349 // Note that encodable immediates are unsigned, even for signed saturation.
9350 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009351 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
9352 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01009353 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009354
9355 unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
9356 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
9357 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
9358
9359 // Encodable with `sqadd` (shift 8).
9360 // B-sized lanes cannot take a shift of 8.
9361 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
9362 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
9363 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009364}
9365
9366TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
9367 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
9368 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
9369 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
9370 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
9371
9372 IntWideImmFn fn = &MacroAssembler::Uqadd;
9373
9374 unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
9375 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
9376 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
9377 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
9378
9379 // Encodable with `uqadd` (shift 0).
9380 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
9381 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
9382 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
9383 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
9384
9385 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
9386 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
9387 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
9388
9389 // Encodable with `uqadd` (shift 8).
9390 // B-sized lanes cannot take a shift of 8.
9391 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
9392 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
9393 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009394}
9395
9396TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
9397 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
9398 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
9399 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
9400 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
9401
9402 IntWideImmFn fn = &MacroAssembler::Sub;
9403
9404 unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
9405 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
9406 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
9407 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
9408
9409 // Encodable with `sub` (shift 0).
9410 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
9411 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
9412 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
9413 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
9414
9415 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
9416 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
9417 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
9418
9419 // Encodable with `sub` (shift 8).
9420 // B-sized lanes cannot take a shift of 8.
9421 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
9422 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
9423 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
9424
9425 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
9426
9427 // The macro is able to synthesise unencodable immediates.
9428 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01009429
9430 unsigned exp_b_4[] = {0xa1, 0x9f, 0x30, 0x1f};
9431 unsigned exp_h_4[] = {0xa181, 0x9f7f, 0x3010, 0xcaaa};
9432 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
9433 uint64_t exp_d_4[] = {0x8000000180018182, 0x7fffffff7fff7f80};
9434
9435 // Negative immediates use `add`.
9436 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
9437 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
9438 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
9439 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009440}
9441
9442TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
9443 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
9444 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
9445 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
9446 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
9447
9448 IntWideImmFn fn = &MacroAssembler::Sqsub;
9449
Jacob Bramleyb28f6172019-10-02 12:12:35 +01009450 unsigned exp_b_1[] = {0x80, 0xfe, 0x8f, 0x80};
TatWai Chong6995bfd2019-09-26 10:48:05 +01009451 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
9452 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
9453 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
9454
9455 // Encodable with `sqsub` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01009456 // Note that encodable immediates are unsigned, even for signed saturation.
9457 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009458 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
9459 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01009460 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009461
9462 unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
9463 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
9464 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
9465
9466 // Encodable with `sqsub` (shift 8).
9467 // B-sized lanes cannot take a shift of 8.
9468 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
9469 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
9470 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009471}
9472
9473TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
9474 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
9475 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
9476 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
9477 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
9478
9479 IntWideImmFn fn = &MacroAssembler::Uqsub;
9480
9481 unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
9482 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
9483 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
9484 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
9485
9486 // Encodable with `uqsub` (shift 0).
9487 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
9488 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
9489 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
9490 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
9491
9492 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
9493 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
9494 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
9495
9496 // Encodable with `uqsub` (shift 8).
9497 // B-sized lanes cannot take a shift of 8.
9498 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
9499 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
9500 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01009501}
9502
9503TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
9504 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9505 START();
9506
9507 // Encodable with `subr` (shift 0).
9508 __ Index(z0.VnD(), 1, 1);
9509 __ Sub(z0.VnD(), 100, z0.VnD());
9510 __ Index(z1.VnS(), 0x7f, 1);
9511 __ Sub(z1.VnS(), 0xf7, z1.VnS());
9512 __ Index(z2.VnH(), 0xaaaa, 0x2222);
9513 __ Sub(z2.VnH(), 0x80, z2.VnH());
9514 __ Index(z3.VnB(), 133, 1);
9515 __ Sub(z3.VnB(), 255, z3.VnB());
9516
9517 // Encodable with `subr` (shift 8).
9518 __ Index(z4.VnD(), 256, -1);
9519 __ Sub(z4.VnD(), 42 * 256, z4.VnD());
9520 __ Index(z5.VnS(), 0x7878, 1);
9521 __ Sub(z5.VnS(), 0x8000, z5.VnS());
9522 __ Index(z6.VnH(), 0x30f0, -1);
9523 __ Sub(z6.VnH(), 0x7f00, z6.VnH());
9524 // B-sized lanes cannot take a shift of 8.
9525
9526 // Select with movprfx.
9527 __ Index(z31.VnD(), 256, 4001);
9528 __ Sub(z7.VnD(), 42 * 256, z31.VnD());
9529
9530 // Out of immediate encodable range of `sub`.
9531 __ Index(z30.VnS(), 0x11223344, 1);
9532 __ Sub(z8.VnS(), 0x88776655, z30.VnS());
9533
9534 END();
9535
9536 if (CAN_RUN()) {
9537 RUN();
9538
9539 int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
9540 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
9541
9542 int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
9543 ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
9544
9545 int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
9546 ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
9547
9548 int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
9549 ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
9550
9551 int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
9552 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
9553
9554 int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
9555 ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
9556
9557 int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
9558 ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
9559
9560 int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
9561 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
9562
9563 int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
9564 ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
9565 }
9566}
9567
9568TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
9569 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9570 START();
9571
9572 // Immediates which can be encoded in the instructions.
9573 __ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
9574 __ Fdup(z1.VnS(), Float16(2.0));
9575 __ Fdup(z2.VnD(), Float16(3.875));
9576 __ Fdup(z3.VnH(), 8.0f);
9577 __ Fdup(z4.VnS(), -4.75f);
9578 __ Fdup(z5.VnD(), 0.5f);
9579 __ Fdup(z6.VnH(), 1.0);
9580 __ Fdup(z7.VnS(), 2.125);
9581 __ Fdup(z8.VnD(), -13.0);
9582
9583 // Immediates which cannot be encoded in the instructions.
9584 __ Fdup(z10.VnH(), Float16(0.0));
9585 __ Fdup(z11.VnH(), kFP16PositiveInfinity);
9586 __ Fdup(z12.VnS(), 255.0f);
9587 __ Fdup(z13.VnS(), kFP32NegativeInfinity);
9588 __ Fdup(z14.VnD(), 12.3456);
9589 __ Fdup(z15.VnD(), kFP64PositiveInfinity);
9590
9591 END();
9592
9593 if (CAN_RUN()) {
9594 RUN();
9595
9596 ASSERT_EQUAL_SVE(0xc500, z0.VnH());
9597 ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
9598 ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
9599 ASSERT_EQUAL_SVE(0x4800, z3.VnH());
9600 ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
9601 ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
9602 ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
9603 ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
9604 ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
9605
9606 ASSERT_EQUAL_SVE(0x0000, z10.VnH());
9607 ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
9608 ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
9609 ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
9610 ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
9611 ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
9612 }
9613}
9614
TatWai Chong6f111bc2019-10-07 09:20:37 +01009615TEST_SVE(sve_andv_eorv_orv) {
9616 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9617 START();
9618
9619 uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
9620 InsrHelper(&masm, z31.VnD(), in);
9621
9622 // For simplicity, we re-use the same pg for various lane sizes.
9623 // For D lanes: 1, 1, 0
9624 // For S lanes: 1, 1, 1, 0, 0
9625 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
9626 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
9627 Initialise(&masm, p0.VnB(), pg_in);
9628
9629 // Make a copy so we can check that constructive operations preserve zn.
9630 __ Mov(z0, z31);
9631 __ Andv(b0, p0, z0.VnB()); // destructive
9632 __ Andv(h1, p0, z31.VnH());
9633 __ Mov(z2, z31);
9634 __ Andv(s2, p0, z2.VnS()); // destructive
9635 __ Andv(d3, p0, z31.VnD());
9636
9637 __ Eorv(b4, p0, z31.VnB());
9638 __ Mov(z5, z31);
9639 __ Eorv(h5, p0, z5.VnH()); // destructive
9640 __ Eorv(s6, p0, z31.VnS());
9641 __ Mov(z7, z31);
9642 __ Eorv(d7, p0, z7.VnD()); // destructive
9643
9644 __ Mov(z8, z31);
9645 __ Orv(b8, p0, z8.VnB()); // destructive
9646 __ Orv(h9, p0, z31.VnH());
9647 __ Mov(z10, z31);
9648 __ Orv(s10, p0, z10.VnS()); // destructive
9649 __ Orv(d11, p0, z31.VnD());
9650
9651 END();
9652
9653 if (CAN_RUN()) {
9654 RUN();
9655
9656 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
9657 ASSERT_EQUAL_64(0x10, d0);
9658 ASSERT_EQUAL_64(0x1010, d1);
9659 ASSERT_EQUAL_64(0x33331111, d2);
9660 ASSERT_EQUAL_64(0x7777555533331111, d3);
9661 ASSERT_EQUAL_64(0xbf, d4);
9662 ASSERT_EQUAL_64(0xedcb, d5);
9663 ASSERT_EQUAL_64(0x44444444, d6);
9664 ASSERT_EQUAL_64(0x7777555533331111, d7);
9665 ASSERT_EQUAL_64(0xff, d8);
9666 ASSERT_EQUAL_64(0xffff, d9);
9667 ASSERT_EQUAL_64(0x77775555, d10);
9668 ASSERT_EQUAL_64(0x7777555533331111, d11);
9669 } else {
9670 ASSERT_EQUAL_64(0, d0);
9671 ASSERT_EQUAL_64(0x0010, d1);
9672 ASSERT_EQUAL_64(0x00110011, d2);
9673 ASSERT_EQUAL_64(0x0011001100110011, d3);
9674 ASSERT_EQUAL_64(0x62, d4);
9675 ASSERT_EQUAL_64(0x0334, d5);
9676 ASSERT_EQUAL_64(0x8899aabb, d6);
9677 ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
9678 ASSERT_EQUAL_64(0xff, d8);
9679 ASSERT_EQUAL_64(0xffff, d9);
9680 ASSERT_EQUAL_64(0xffffffff, d10);
9681 ASSERT_EQUAL_64(0xffffffffffffffff, d11);
9682 }
9683
9684 // Check the upper lanes above the top of the V register are all clear.
9685 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
9686 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
9687 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
9688 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
9689 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
9690 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
9691 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
9692 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
9693 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
9694 ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
9695 ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
9696 ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
9697 ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
9698 }
9699 }
9700}
9701
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07009702
9703TEST_SVE(sve_saddv_uaddv) {
9704 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9705 START();
9706
9707 uint64_t in[] = {0x8899aabbccddeeff, 0x8182838485868788, 0x0807060504030201};
9708 InsrHelper(&masm, z31.VnD(), in);
9709
9710 // For simplicity, we re-use the same pg for various lane sizes.
9711 // For D lanes: 1, 1, 0
9712 // For S lanes: 1, 1, 1, 0, 0
9713 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
9714 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
9715 Initialise(&masm, p0.VnB(), pg_in);
9716
9717 // Make a copy so we can check that constructive operations preserve zn.
9718 __ Mov(z0, z31);
9719 __ Saddv(b0, p0, z0.VnB()); // destructive
9720 __ Saddv(h1, p0, z31.VnH());
9721 __ Mov(z2, z31);
9722 __ Saddv(s2, p0, z2.VnS()); // destructive
9723
9724 __ Uaddv(b4, p0, z31.VnB());
9725 __ Mov(z5, z31);
9726 __ Uaddv(h5, p0, z5.VnH()); // destructive
9727 __ Uaddv(s6, p0, z31.VnS());
9728 __ Mov(z7, z31);
9729 __ Uaddv(d7, p0, z7.VnD()); // destructive
9730
9731 END();
9732
9733 if (CAN_RUN()) {
9734 RUN();
9735
9736 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
9737 // Saddv
9738 ASSERT_EQUAL_64(0xfffffffffffffda9, d0);
9739 ASSERT_EQUAL_64(0xfffffffffffe9495, d1);
9740 ASSERT_EQUAL_64(0xffffffff07090b0c, d2);
9741 // Uaddv
9742 ASSERT_EQUAL_64(0x00000000000002a9, d4);
9743 ASSERT_EQUAL_64(0x0000000000019495, d5);
9744 ASSERT_EQUAL_64(0x0000000107090b0c, d6);
9745 ASSERT_EQUAL_64(0x8182838485868788, d7);
9746 } else {
9747 // Saddv
9748 ASSERT_EQUAL_64(0xfffffffffffffd62, d0);
9749 ASSERT_EQUAL_64(0xfffffffffffe8394, d1);
9750 ASSERT_EQUAL_64(0xfffffffed3e6fa0b, d2);
9751 // Uaddv
9752 ASSERT_EQUAL_64(0x0000000000000562, d4);
9753 ASSERT_EQUAL_64(0x0000000000028394, d5);
9754 ASSERT_EQUAL_64(0x00000001d3e6fa0b, d6);
9755 ASSERT_EQUAL_64(0x0a1c2e4052647687, d7);
9756 }
9757
9758 // Check the upper lanes above the top of the V register are all clear.
9759 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
9760 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
9761 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
9762 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
9763 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
9764 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
9765 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
9766 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
9767 }
9768 }
9769}
9770
9771
9772TEST_SVE(sve_sminv_uminv) {
9773 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9774 START();
9775
9776 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
9777 InsrHelper(&masm, z31.VnD(), in);
9778
9779 // For simplicity, we re-use the same pg for various lane sizes.
9780 // For D lanes: 1, 0, 1
9781 // For S lanes: 1, 1, 0, 0, 1
9782 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
9783 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
9784 Initialise(&masm, p0.VnB(), pg_in);
9785
9786 // Make a copy so we can check that constructive operations preserve zn.
9787 __ Mov(z0, z31);
9788 __ Sminv(b0, p0, z0.VnB()); // destructive
9789 __ Sminv(h1, p0, z31.VnH());
9790 __ Mov(z2, z31);
9791 __ Sminv(s2, p0, z2.VnS()); // destructive
9792 __ Sminv(d3, p0, z31.VnD());
9793
9794 __ Uminv(b4, p0, z31.VnB());
9795 __ Mov(z5, z31);
9796 __ Uminv(h5, p0, z5.VnH()); // destructive
9797 __ Uminv(s6, p0, z31.VnS());
9798 __ Mov(z7, z31);
9799 __ Uminv(d7, p0, z7.VnD()); // destructive
9800
9801 END();
9802
9803 if (CAN_RUN()) {
9804 RUN();
9805
9806 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
9807 // Sminv
9808 ASSERT_EQUAL_64(0xaa, d0);
9809 ASSERT_EQUAL_64(0xaabb, d1);
9810 ASSERT_EQUAL_64(0xaabbfc00, d2);
9811 ASSERT_EQUAL_64(0x00112233aabbfc00, d3); // The smaller lane is inactive.
9812 // Uminv
9813 ASSERT_EQUAL_64(0, d4);
9814 ASSERT_EQUAL_64(0x2233, d5);
9815 ASSERT_EQUAL_64(0x112233, d6);
9816 ASSERT_EQUAL_64(0x00112233aabbfc00, d7); // The smaller lane is inactive.
9817 } else {
9818 // Sminv
9819 ASSERT_EQUAL_64(0xaa, d0);
9820 ASSERT_EQUAL_64(0xaaaa, d1);
9821 ASSERT_EQUAL_64(0xaaaaaaaa, d2);
9822 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d3);
9823 // Uminv
9824 ASSERT_EQUAL_64(0, d4);
9825 ASSERT_EQUAL_64(0x2233, d5);
9826 ASSERT_EQUAL_64(0x112233, d6);
9827 ASSERT_EQUAL_64(0x00112233aabbfc00, d7);
9828 }
9829
9830 // Check the upper lanes above the top of the V register are all clear.
9831 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
9832 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
9833 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
9834 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
9835 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
9836 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
9837 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
9838 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
9839 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
9840 }
9841 }
9842}
9843
9844TEST_SVE(sve_smaxv_umaxv) {
9845 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9846 START();
9847
9848 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
9849 InsrHelper(&masm, z31.VnD(), in);
9850
9851 // For simplicity, we re-use the same pg for various lane sizes.
9852 // For D lanes: 1, 0, 1
9853 // For S lanes: 1, 1, 0, 0, 1
9854 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
9855 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
9856 Initialise(&masm, p0.VnB(), pg_in);
9857
9858 // Make a copy so we can check that constructive operations preserve zn.
9859 __ Mov(z0, z31);
9860 __ Smaxv(b0, p0, z0.VnB()); // destructive
9861 __ Smaxv(h1, p0, z31.VnH());
9862 __ Mov(z2, z31);
9863 __ Smaxv(s2, p0, z2.VnS()); // destructive
9864 __ Smaxv(d3, p0, z31.VnD());
9865
9866 __ Umaxv(b4, p0, z31.VnB());
9867 __ Mov(z5, z31);
9868 __ Umaxv(h5, p0, z5.VnH()); // destructive
9869 __ Umaxv(s6, p0, z31.VnS());
9870 __ Mov(z7, z31);
9871 __ Umaxv(d7, p0, z7.VnD()); // destructive
9872
9873 END();
9874
9875 if (CAN_RUN()) {
9876 RUN();
9877
9878 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
9879 // Smaxv
9880 ASSERT_EQUAL_64(0x33, d0);
9881 ASSERT_EQUAL_64(0x44aa, d1);
9882 ASSERT_EQUAL_64(0x112233, d2);
9883 ASSERT_EQUAL_64(0x112233aabbfc00, d3);
9884 // Umaxv
9885 ASSERT_EQUAL_64(0xfe, d4);
9886 ASSERT_EQUAL_64(0xfc00, d5);
9887 ASSERT_EQUAL_64(0xaabbfc00, d6);
9888 ASSERT_EQUAL_64(0x112233aabbfc00, d7);
9889 } else {
9890 // Smaxv
9891 ASSERT_EQUAL_64(0x33, d0);
9892 ASSERT_EQUAL_64(0x44aa, d1);
9893 ASSERT_EQUAL_64(0x112233, d2);
9894 ASSERT_EQUAL_64(0x00112233aabbfc00, d3);
9895 // Umaxv
9896 ASSERT_EQUAL_64(0xfe, d4);
9897 ASSERT_EQUAL_64(0xfc00, d5);
9898 ASSERT_EQUAL_64(0xaabbfc00, d6);
9899 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d7);
9900 }
9901
9902 // Check the upper lanes above the top of the V register are all clear.
9903 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
9904 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
9905 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
9906 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
9907 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
9908 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
9909 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
9910 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
9911 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
9912 }
9913 }
9914}
9915
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009916typedef void (MacroAssembler::*SdotUdotFn)(const ZRegister& zd,
9917 const ZRegister& za,
9918 const ZRegister& zn,
9919 const ZRegister& zm);
9920
9921template <typename Td, typename Ts, typename Te>
9922static void SdotUdotHelper(Test* config,
9923 SdotUdotFn macro,
9924 unsigned lane_size_in_bits,
9925 const Td& zd_inputs,
9926 const Td& za_inputs,
9927 const Ts& zn_inputs,
9928 const Ts& zm_inputs,
9929 const Te& zd_expected,
9930 const Te& zdnm_expected) {
9931 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9932 START();
9933
9934 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
9935 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
9936 ZRegister zn = z2.WithLaneSize(lane_size_in_bits / 4);
9937 ZRegister zm = z3.WithLaneSize(lane_size_in_bits / 4);
9938
9939 InsrHelper(&masm, zd, zd_inputs);
9940 InsrHelper(&masm, za, za_inputs);
9941 InsrHelper(&masm, zn, zn_inputs);
9942 InsrHelper(&masm, zm, zm_inputs);
9943
9944 // The Dot macro handles arbitrarily-aliased registers in the argument list.
9945 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
9946 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
9947 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
9948 ZRegister dnm_result = z13.WithLaneSize(lane_size_in_bits);
9949 ZRegister d_result = z14.WithLaneSize(lane_size_in_bits);
9950
9951 __ Mov(da_result, za);
9952 // zda = zda + (zn . zm)
9953 (masm.*macro)(da_result, da_result, zn, zm);
9954
9955 __ Mov(dn_result, zn);
9956 // zdn = za + (zdn . zm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009957 (masm.*macro)(dn_result, za, dn_result.WithSameLaneSizeAs(zn), zm);
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009958
9959 __ Mov(dm_result, zm);
9960 // zdm = za + (zn . zdm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009961 (masm.*macro)(dm_result, za, zn, dm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009962
9963 __ Mov(d_result, zd);
9964 // zd = za + (zn . zm)
9965 (masm.*macro)(d_result, za, zn, zm);
9966
9967 __ Mov(dnm_result, zn);
9968 // zdnm = za + (zdmn . zdnm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009969 (masm.*macro)(dnm_result,
9970 za,
9971 dnm_result.WithSameLaneSizeAs(zn),
9972 dnm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009973
9974 END();
9975
9976 if (CAN_RUN()) {
9977 RUN();
9978
9979 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
9980 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits / 4));
9981 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits / 4));
9982
9983 ASSERT_EQUAL_SVE(zd_expected, da_result);
9984 ASSERT_EQUAL_SVE(zd_expected, dn_result);
9985 ASSERT_EQUAL_SVE(zd_expected, dm_result);
9986 ASSERT_EQUAL_SVE(zd_expected, d_result);
9987
9988 ASSERT_EQUAL_SVE(zdnm_expected, dnm_result);
9989 }
9990}
9991
9992TEST_SVE(sve_sdot) {
9993 int zd_inputs[] = {0x33, 0xee, 0xff};
9994 int za_inputs[] = {INT32_MAX, -3, 2};
9995 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
9996 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
9997
9998 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
9999 int32_t zd_expected_s[] = {-2147418113, -183, 133}; // 0x8000ffff
10000 int64_t zd_expected_d[] = {2147549183, -183, 133}; // 0x8000ffff
10001
10002 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
10003 int32_t zdnm_expected_s[] = {-2147418113, 980, 572};
10004 int64_t zdnm_expected_d[] = {2147549183, 980, 572};
10005
10006 SdotUdotHelper(config,
10007 &MacroAssembler::Sdot,
10008 kSRegSize,
10009 zd_inputs,
10010 za_inputs,
10011 zn_inputs,
10012 zm_inputs,
10013 zd_expected_s,
10014 zdnm_expected_s);
10015 SdotUdotHelper(config,
10016 &MacroAssembler::Sdot,
10017 kDRegSize,
10018 zd_inputs,
10019 za_inputs,
10020 zn_inputs,
10021 zm_inputs,
10022 zd_expected_d,
10023 zdnm_expected_d);
10024}
10025
10026TEST_SVE(sve_udot) {
10027 int zd_inputs[] = {0x33, 0xee, 0xff};
10028 int za_inputs[] = {INT32_MAX, -3, 2};
10029 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
10030 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
10031
10032 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
10033 uint32_t zd_expected_s[] = {0x8000ffff, 0x00001749, 0x0000f085};
10034 uint64_t zd_expected_d[] = {0x000000047c00ffff,
10035 0x000000000017ff49,
10036 0x00000000fff00085};
10037
10038 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
10039 uint32_t zdnm_expected_s[] = {0x8000ffff, 0x000101d4, 0x0001d03c};
10040 uint64_t zdnm_expected_d[] = {0x000000047c00ffff,
10041 0x00000000fffe03d4,
10042 0x00000001ffce023c};
10043
10044 SdotUdotHelper(config,
10045 &MacroAssembler::Udot,
10046 kSRegSize,
10047 zd_inputs,
10048 za_inputs,
10049 zn_inputs,
10050 zm_inputs,
10051 zd_expected_s,
10052 zdnm_expected_s);
10053 SdotUdotHelper(config,
10054 &MacroAssembler::Udot,
10055 kDRegSize,
10056 zd_inputs,
10057 za_inputs,
10058 zn_inputs,
10059 zm_inputs,
10060 zd_expected_d,
10061 zdnm_expected_d);
10062}
10063
TatWai Chong7a0d3672019-10-23 17:35:18 -070010064template <typename T, size_t N>
10065static void FPToRawbitsWithSize(const T (&inputs)[N],
10066 uint64_t* outputs,
10067 unsigned size_in_bits) {
TatWai Chongfe536042019-10-23 16:34:11 -070010068 for (size_t i = 0; i < N; i++) {
TatWai Chong7a0d3672019-10-23 17:35:18 -070010069 outputs[i] = vixl::FPToRawbitsWithSize(size_in_bits, inputs[i]);
TatWai Chongfe536042019-10-23 16:34:11 -070010070 }
10071}
10072
TatWai Chong7a0d3672019-10-23 17:35:18 -070010073template <typename Ti, typename Te, size_t N>
10074static void FPBinArithHelper(Test* config,
10075 ArithFn macro,
10076 int lane_size_in_bits,
10077 const Ti (&zn_inputs)[N],
10078 const Ti (&zm_inputs)[N],
10079 const Te (&zd_expected)[N]) {
TatWai Chongfe536042019-10-23 16:34:11 -070010080 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong7a0d3672019-10-23 17:35:18 -070010081
TatWai Chongfe536042019-10-23 16:34:11 -070010082 START();
10083
10084 ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
10085 ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
10086 ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
10087
10088 uint64_t zn_rawbits[N];
10089 uint64_t zm_rawbits[N];
10090
TatWai Chong7a0d3672019-10-23 17:35:18 -070010091 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
10092 FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
TatWai Chongfe536042019-10-23 16:34:11 -070010093
10094 InsrHelper(&masm, zn, zn_rawbits);
10095 InsrHelper(&masm, zm, zm_rawbits);
10096
10097 (masm.*macro)(zd, zn, zm);
10098
10099 END();
10100
10101 if (CAN_RUN()) {
10102 RUN();
10103
10104 ASSERT_EQUAL_SVE(zd_expected, zd);
10105 }
10106}
10107
10108TEST_SVE(sve_fp_arithmetic_unpredicated_fadd) {
10109 double zn_inputs[] = {24.0,
10110 5.5,
10111 0.0,
10112 3.875,
10113 2.125,
10114 kFP64PositiveInfinity,
10115 kFP64NegativeInfinity};
10116
10117 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
10118
TatWai Chong7a0d3672019-10-23 17:35:18 -070010119 ArithFn fn = &MacroAssembler::Fadd;
TatWai Chongfe536042019-10-23 16:34:11 -070010120
10121 uint16_t expected_h[] = {Float16ToRawbits(Float16(1048.0)),
10122 Float16ToRawbits(Float16(2053.5)),
10123 Float16ToRawbits(Float16(0.1)),
10124 Float16ToRawbits(Float16(-0.875)),
10125 Float16ToRawbits(Float16(14.465)),
10126 Float16ToRawbits(kFP16PositiveInfinity),
10127 Float16ToRawbits(kFP16NegativeInfinity)};
10128
TatWai Chong7a0d3672019-10-23 17:35:18 -070010129 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -070010130
10131 uint32_t expected_s[] = {FloatToRawbits(1048.0f),
10132 FloatToRawbits(2053.5f),
10133 FloatToRawbits(0.1f),
10134 FloatToRawbits(-0.875f),
10135 FloatToRawbits(14.465f),
10136 FloatToRawbits(kFP32PositiveInfinity),
10137 FloatToRawbits(kFP32NegativeInfinity)};
10138
TatWai Chong7a0d3672019-10-23 17:35:18 -070010139 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -070010140
10141 uint64_t expected_d[] = {DoubleToRawbits(1048.0),
10142 DoubleToRawbits(2053.5),
10143 DoubleToRawbits(0.1),
10144 DoubleToRawbits(-0.875),
10145 DoubleToRawbits(14.465),
10146 DoubleToRawbits(kFP64PositiveInfinity),
10147 DoubleToRawbits(kFP64NegativeInfinity)};
10148
TatWai Chong7a0d3672019-10-23 17:35:18 -070010149 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -070010150}
10151
10152TEST_SVE(sve_fp_arithmetic_unpredicated_fsub) {
10153 double zn_inputs[] = {24.0,
10154 5.5,
10155 0.0,
10156 3.875,
10157 2.125,
10158 kFP64PositiveInfinity,
10159 kFP64NegativeInfinity};
10160
10161 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
10162
TatWai Chong7a0d3672019-10-23 17:35:18 -070010163 ArithFn fn = &MacroAssembler::Fsub;
TatWai Chongfe536042019-10-23 16:34:11 -070010164
10165 uint16_t expected_h[] = {Float16ToRawbits(Float16(-1000.0)),
10166 Float16ToRawbits(Float16(-2042.5)),
10167 Float16ToRawbits(Float16(-0.1)),
10168 Float16ToRawbits(Float16(8.625)),
10169 Float16ToRawbits(Float16(-10.215)),
10170 Float16ToRawbits(kFP16PositiveInfinity),
10171 Float16ToRawbits(kFP16NegativeInfinity)};
10172
TatWai Chong7a0d3672019-10-23 17:35:18 -070010173 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -070010174
10175 uint32_t expected_s[] = {FloatToRawbits(-1000.0),
10176 FloatToRawbits(-2042.5),
10177 FloatToRawbits(-0.1),
10178 FloatToRawbits(8.625),
10179 FloatToRawbits(-10.215),
10180 FloatToRawbits(kFP32PositiveInfinity),
10181 FloatToRawbits(kFP32NegativeInfinity)};
10182
TatWai Chong7a0d3672019-10-23 17:35:18 -070010183 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -070010184
10185 uint64_t expected_d[] = {DoubleToRawbits(-1000.0),
10186 DoubleToRawbits(-2042.5),
10187 DoubleToRawbits(-0.1),
10188 DoubleToRawbits(8.625),
10189 DoubleToRawbits(-10.215),
10190 DoubleToRawbits(kFP64PositiveInfinity),
10191 DoubleToRawbits(kFP64NegativeInfinity)};
10192
TatWai Chong7a0d3672019-10-23 17:35:18 -070010193 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -070010194}
10195
10196TEST_SVE(sve_fp_arithmetic_unpredicated_fmul) {
10197 double zn_inputs[] = {24.0,
10198 5.5,
10199 0.0,
10200 3.875,
10201 2.125,
10202 kFP64PositiveInfinity,
10203 kFP64NegativeInfinity};
10204
10205 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
10206
TatWai Chong7a0d3672019-10-23 17:35:18 -070010207 ArithFn fn = &MacroAssembler::Fmul;
TatWai Chongfe536042019-10-23 16:34:11 -070010208
10209 uint16_t expected_h[] = {Float16ToRawbits(Float16(24576.0)),
10210 Float16ToRawbits(Float16(11264.0)),
10211 Float16ToRawbits(Float16(0.0)),
10212 Float16ToRawbits(Float16(-18.4)),
10213 Float16ToRawbits(Float16(26.23)),
10214 Float16ToRawbits(kFP16PositiveInfinity),
10215 Float16ToRawbits(kFP16PositiveInfinity)};
10216
TatWai Chong7a0d3672019-10-23 17:35:18 -070010217 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -070010218
10219 uint32_t expected_s[] = {FloatToRawbits(24576.0),
10220 FloatToRawbits(11264.0),
10221 FloatToRawbits(0.0),
10222 FloatToRawbits(-18.40625),
10223 FloatToRawbits(26.2225),
10224 FloatToRawbits(kFP32PositiveInfinity),
10225 FloatToRawbits(kFP32PositiveInfinity)};
10226
TatWai Chong7a0d3672019-10-23 17:35:18 -070010227 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -070010228
10229 uint64_t expected_d[] = {DoubleToRawbits(24576.0),
10230 DoubleToRawbits(11264.0),
10231 DoubleToRawbits(0.0),
10232 DoubleToRawbits(-18.40625),
10233 DoubleToRawbits(26.2225),
10234 DoubleToRawbits(kFP64PositiveInfinity),
10235 DoubleToRawbits(kFP64PositiveInfinity)};
10236
TatWai Chong7a0d3672019-10-23 17:35:18 -070010237 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -070010238}
10239
TatWai Chong7a0d3672019-10-23 17:35:18 -070010240typedef void (MacroAssembler::*FPArithPredicatedFn)(
10241 const ZRegister& zd,
10242 const PRegisterM& pg,
10243 const ZRegister& zn,
10244 const ZRegister& zm,
10245 FPMacroNaNPropagationOption nan_option);
10246
Martyn Capewell37f28182020-01-14 10:15:10 +000010247typedef void (MacroAssembler::*FPArithPredicatedNoNaNOptFn)(
10248 const ZRegister& zd,
10249 const PRegisterM& pg,
10250 const ZRegister& zn,
10251 const ZRegister& zm);
10252
TatWai Chong7a0d3672019-10-23 17:35:18 -070010253template <typename Ti, typename Te, size_t N>
10254static void FPBinArithHelper(
10255 Test* config,
10256 FPArithPredicatedFn macro,
Martyn Capewell37f28182020-01-14 10:15:10 +000010257 FPArithPredicatedNoNaNOptFn macro_nonan,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010258 unsigned lane_size_in_bits,
10259 const Ti (&zd_inputs)[N],
10260 const int (&pg_inputs)[N],
10261 const Ti (&zn_inputs)[N],
10262 const Ti (&zm_inputs)[N],
10263 const Te (&zd_expected)[N],
10264 FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
Martyn Capewell37f28182020-01-14 10:15:10 +000010265 VIXL_ASSERT((macro == NULL) ^ (macro_nonan == NULL));
TatWai Chongd316c5e2019-10-16 12:22:10 -070010266 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10267 START();
10268
TatWai Chong7a0d3672019-10-23 17:35:18 -070010269 // Avoid choosing default scratch registers.
10270 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
10271 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
10272 ZRegister zm = z28.WithLaneSize(lane_size_in_bits);
TatWai Chongd316c5e2019-10-16 12:22:10 -070010273
TatWai Chong7a0d3672019-10-23 17:35:18 -070010274 uint64_t zn_inputs_rawbits[N];
10275 uint64_t zm_inputs_rawbits[N];
10276 uint64_t zd_inputs_rawbits[N];
TatWai Chongd316c5e2019-10-16 12:22:10 -070010277
TatWai Chong7a0d3672019-10-23 17:35:18 -070010278 FPToRawbitsWithSize(zn_inputs, zn_inputs_rawbits, lane_size_in_bits);
10279 FPToRawbitsWithSize(zm_inputs, zm_inputs_rawbits, lane_size_in_bits);
10280 FPToRawbitsWithSize(zd_inputs, zd_inputs_rawbits, lane_size_in_bits);
10281
10282 InsrHelper(&masm, zn, zn_inputs_rawbits);
10283 InsrHelper(&masm, zm, zm_inputs_rawbits);
10284 InsrHelper(&masm, zd, zd_inputs_rawbits);
TatWai Chongd316c5e2019-10-16 12:22:10 -070010285
10286 PRegisterWithLaneSize pg = p0.WithLaneSize(lane_size_in_bits);
10287 Initialise(&masm, pg, pg_inputs);
10288
10289 // `instr` zdn, pg, zdn, zm
10290 ZRegister dn_result = z0.WithLaneSize(lane_size_in_bits);
10291 __ Mov(dn_result, zn);
Martyn Capewell37f28182020-01-14 10:15:10 +000010292 if (macro_nonan == NULL) {
10293 (masm.*macro)(dn_result, pg.Merging(), dn_result, zm, nan_option);
10294 } else {
10295 (masm.*macro_nonan)(dn_result, pg.Merging(), dn_result, zm);
10296 }
TatWai Chongd316c5e2019-10-16 12:22:10 -070010297
10298 // Based on whether zd and zm registers are aliased, the macro of instructions
10299 // (`Instr`) swaps the order of operands if it has the commutative property,
10300 // otherwise, transfer to the reversed `Instr`, such as fdivr.
10301 // `instr` zdm, pg, zn, zdm
10302 ZRegister dm_result = z1.WithLaneSize(lane_size_in_bits);
10303 __ Mov(dm_result, zm);
Martyn Capewell37f28182020-01-14 10:15:10 +000010304 if (macro_nonan == NULL) {
10305 (masm.*macro)(dm_result, pg.Merging(), zn, dm_result, nan_option);
10306 } else {
10307 (masm.*macro_nonan)(dm_result, pg.Merging(), zn, dm_result);
10308 }
TatWai Chongd316c5e2019-10-16 12:22:10 -070010309
10310 // The macro of instructions (`Instr`) automatically selects between `instr`
10311 // and movprfx + `instr` based on whether zd and zn registers are aliased.
10312 // A generated movprfx instruction is predicated that using the same
10313 // governing predicate register. In order to keep the result constant,
10314 // initialize the destination register first.
10315 // `instr` zd, pg, zn, zm
10316 ZRegister d_result = z2.WithLaneSize(lane_size_in_bits);
10317 __ Mov(d_result, zd);
Martyn Capewell37f28182020-01-14 10:15:10 +000010318 if (macro_nonan == NULL) {
10319 (masm.*macro)(d_result, pg.Merging(), zn, zm, nan_option);
10320 } else {
10321 (masm.*macro_nonan)(d_result, pg.Merging(), zn, zm);
10322 }
TatWai Chongd316c5e2019-10-16 12:22:10 -070010323
10324 END();
10325
10326 if (CAN_RUN()) {
10327 RUN();
10328
10329 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
10330 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
10331 if (!core.HasSVELane(dn_result, lane)) break;
10332 if ((pg_inputs[i] & 1) != 0) {
10333 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dn_result, lane);
10334 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -070010335 ASSERT_EQUAL_SVE_LANE(zn_inputs_rawbits[i], dn_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -070010336 }
10337 }
10338
10339 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
10340 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
10341 if (!core.HasSVELane(dm_result, lane)) break;
10342 if ((pg_inputs[i] & 1) != 0) {
10343 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dm_result, lane);
10344 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -070010345 ASSERT_EQUAL_SVE_LANE(zm_inputs_rawbits[i], dm_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -070010346 }
10347 }
10348
10349 ASSERT_EQUAL_SVE(zd_expected, d_result);
10350 }
10351}
10352
10353TEST_SVE(sve_binary_arithmetic_predicated_fdiv) {
TatWai Chong7a0d3672019-10-23 17:35:18 -070010354 // The inputs are shared with different precision tests.
TatWai Chongd316c5e2019-10-16 12:22:10 -070010355 double zd_in[] = {0.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};
10356
10357 double zn_in[] = {24.0,
10358 24.0,
10359 -2.0,
10360 -2.0,
10361 5.5,
10362 5.5,
10363 kFP64PositiveInfinity,
10364 kFP64PositiveInfinity,
10365 kFP64NegativeInfinity,
10366 kFP64NegativeInfinity};
10367
10368 double zm_in[] = {-2.0, -2.0, 24.0, 24.0, 0.5, 0.5, 0.65, 0.65, 24.0, 24.0};
10369
TatWai Chongd316c5e2019-10-16 12:22:10 -070010370 int pg_in[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
10371
TatWai Chong7a0d3672019-10-23 17:35:18 -070010372 uint16_t exp_h[] = {Float16ToRawbits(Float16(0.1)),
TatWai Chongd316c5e2019-10-16 12:22:10 -070010373 Float16ToRawbits(Float16(-12.0)),
10374 Float16ToRawbits(Float16(2.2)),
10375 Float16ToRawbits(Float16(-0.0833)),
10376 Float16ToRawbits(Float16(4.4)),
10377 Float16ToRawbits(Float16(11.0)),
10378 Float16ToRawbits(Float16(6.6)),
10379 Float16ToRawbits(kFP16PositiveInfinity),
10380 Float16ToRawbits(Float16(8.8)),
10381 Float16ToRawbits(kFP16NegativeInfinity)};
10382
TatWai Chong7a0d3672019-10-23 17:35:18 -070010383 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +000010384 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010385 &MacroAssembler::Fdiv,
10386 kHRegSize,
10387 zd_in,
10388 pg_in,
10389 zn_in,
10390 zm_in,
10391 exp_h);
TatWai Chongd316c5e2019-10-16 12:22:10 -070010392
10393 uint32_t exp_s[] = {FloatToRawbits(0.1),
10394 FloatToRawbits(-12.0),
10395 FloatToRawbits(2.2),
10396 0xbdaaaaab,
10397 FloatToRawbits(4.4),
10398 FloatToRawbits(11.0),
10399 FloatToRawbits(6.6),
10400 FloatToRawbits(kFP32PositiveInfinity),
10401 FloatToRawbits(8.8),
10402 FloatToRawbits(kFP32NegativeInfinity)};
10403
TatWai Chong7a0d3672019-10-23 17:35:18 -070010404 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +000010405 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010406 &MacroAssembler::Fdiv,
10407 kSRegSize,
10408 zd_in,
10409 pg_in,
10410 zn_in,
10411 zm_in,
10412 exp_s);
TatWai Chongd316c5e2019-10-16 12:22:10 -070010413
10414 uint64_t exp_d[] = {DoubleToRawbits(0.1),
10415 DoubleToRawbits(-12.0),
10416 DoubleToRawbits(2.2),
10417 0xbfb5555555555555,
10418 DoubleToRawbits(4.4),
10419 DoubleToRawbits(11.0),
10420 DoubleToRawbits(6.6),
10421 DoubleToRawbits(kFP64PositiveInfinity),
10422 DoubleToRawbits(8.8),
10423 DoubleToRawbits(kFP64NegativeInfinity)};
10424
TatWai Chong7a0d3672019-10-23 17:35:18 -070010425 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +000010426 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010427 &MacroAssembler::Fdiv,
10428 kDRegSize,
10429 zd_in,
10430 pg_in,
10431 zn_in,
10432 zm_in,
10433 exp_d);
TatWai Chongd316c5e2019-10-16 12:22:10 -070010434}
10435
Martyn Capewell9cc3f142019-10-29 14:06:35 +000010436TEST_SVE(sve_select) {
10437 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10438 START();
10439
10440 uint64_t in0[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
10441 uint64_t in1[] = {0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa};
10442
10443 // For simplicity, we re-use the same pg for various lane sizes.
10444 // For D lanes: 1, 1, 0
10445 // For S lanes: 1, 1, 1, 0, 0
10446 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
10447 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
10448 Initialise(&masm, p0.VnB(), pg_in);
10449 PRegisterM pg = p0.Merging();
10450
10451 InsrHelper(&masm, z30.VnD(), in0);
10452 InsrHelper(&masm, z31.VnD(), in1);
10453
10454 __ Sel(z0.VnB(), pg, z30.VnB(), z31.VnB());
10455 __ Sel(z1.VnH(), pg, z30.VnH(), z31.VnH());
10456 __ Sel(z2.VnS(), pg, z30.VnS(), z31.VnS());
10457 __ Sel(z3.VnD(), pg, z30.VnD(), z31.VnD());
10458
10459 END();
10460
10461 if (CAN_RUN()) {
10462 RUN();
10463
10464 uint64_t expected_z0[] = {0xaaaaaaaa05aa07f8,
10465 0xfeaaaaf0aac3870f,
10466 0xaaaa56aa9abcdeaa};
10467 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
10468
10469 uint64_t expected_z1[] = {0xaaaaaaaaaaaa07f8,
10470 0xaaaaf8f0e1c3870f,
10471 0xaaaaaaaa9abcaaaa};
10472 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
10473
10474 uint64_t expected_z2[] = {0xaaaaaaaa05f607f8,
10475 0xfefcf8f0e1c3870f,
10476 0xaaaaaaaaaaaaaaaa};
10477 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
10478
10479 uint64_t expected_z3[] = {0x01f203f405f607f8,
10480 0xfefcf8f0e1c3870f,
10481 0xaaaaaaaaaaaaaaaa};
10482 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
10483 }
10484}
TatWai Chongd316c5e2019-10-16 12:22:10 -070010485
TatWai Chong7a0d3672019-10-23 17:35:18 -070010486TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_h) {
10487 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
10488 double zn_inputs[] = {-2.1,
10489 8.5,
10490 225.5,
10491 0.0,
10492 8.8,
10493 -4.75,
10494 kFP64PositiveInfinity,
10495 kFP64NegativeInfinity};
10496 double zm_inputs[] = {-2.0,
10497 -13.0,
10498 24.0,
10499 0.01,
10500 0.5,
10501 300.75,
10502 kFP64NegativeInfinity,
10503 kFP64PositiveInfinity};
10504 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
10505
10506 uint16_t zd_expected_max[] = {Float16ToRawbits(Float16(-2.0)),
10507 Float16ToRawbits(Float16(8.5)),
10508 Float16ToRawbits(Float16(3.3)),
10509 Float16ToRawbits(Float16(0.01)),
10510 Float16ToRawbits(Float16(5.5)),
10511 Float16ToRawbits(Float16(300.75)),
10512 Float16ToRawbits(kFP16PositiveInfinity),
10513 Float16ToRawbits(kFP16PositiveInfinity)};
10514 FPBinArithHelper(config,
10515 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +000010516 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010517 kHRegSize,
10518 zd_inputs,
10519 pg_inputs,
10520 zn_inputs,
10521 zm_inputs,
10522 zd_expected_max);
10523
10524 uint16_t zd_expected_min[] = {Float16ToRawbits(Float16(-2.1)),
10525 Float16ToRawbits(Float16(-13.0)),
10526 Float16ToRawbits(Float16(3.3)),
10527 Float16ToRawbits(Float16(0.0)),
10528 Float16ToRawbits(Float16(5.5)),
10529 Float16ToRawbits(Float16(-4.75)),
10530 Float16ToRawbits(kFP16NegativeInfinity),
10531 Float16ToRawbits(kFP16NegativeInfinity)};
10532 FPBinArithHelper(config,
10533 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +000010534 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010535 kHRegSize,
10536 zd_inputs,
10537 pg_inputs,
10538 zn_inputs,
10539 zm_inputs,
10540 zd_expected_min);
10541}
10542
10543TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_s) {
10544 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
10545 double zn_inputs[] = {-2.1,
10546 8.5,
10547 225.5,
10548 0.0,
10549 8.8,
10550 -4.75,
10551 kFP64PositiveInfinity,
10552 kFP64NegativeInfinity};
10553 double zm_inputs[] = {-2.0,
10554 -13.0,
10555 24.0,
10556 0.01,
10557 0.5,
10558 300.75,
10559 kFP64NegativeInfinity,
10560 kFP64PositiveInfinity};
10561 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
10562
10563 uint32_t zd_expected_max[] = {FloatToRawbits(-2.0),
10564 FloatToRawbits(8.5),
10565 FloatToRawbits(3.3),
10566 FloatToRawbits(0.01),
10567 FloatToRawbits(5.5),
10568 FloatToRawbits(300.75),
10569 FloatToRawbits(kFP32PositiveInfinity),
10570 FloatToRawbits(kFP32PositiveInfinity)};
10571 FPBinArithHelper(config,
10572 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +000010573 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010574 kSRegSize,
10575 zd_inputs,
10576 pg_inputs,
10577 zn_inputs,
10578 zm_inputs,
10579 zd_expected_max);
10580
10581 uint32_t zd_expected_min[] = {FloatToRawbits(-2.1),
10582 FloatToRawbits(-13.0),
10583 FloatToRawbits(3.3),
10584 FloatToRawbits(0.0),
10585 FloatToRawbits(5.5),
10586 FloatToRawbits(-4.75),
10587 FloatToRawbits(kFP32NegativeInfinity),
10588 FloatToRawbits(kFP32NegativeInfinity)};
10589 FPBinArithHelper(config,
10590 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +000010591 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010592 kSRegSize,
10593 zd_inputs,
10594 pg_inputs,
10595 zn_inputs,
10596 zm_inputs,
10597 zd_expected_min);
10598}
10599
10600TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_d) {
10601 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
10602 double zn_inputs[] = {-2.1,
10603 8.5,
10604 225.5,
10605 0.0,
10606 8.8,
10607 -4.75,
10608 kFP64PositiveInfinity,
10609 kFP64NegativeInfinity};
10610 double zm_inputs[] = {-2.0,
10611 -13.0,
10612 24.0,
10613 0.01,
10614 0.5,
10615 300.75,
10616 kFP64NegativeInfinity,
10617 kFP64PositiveInfinity};
10618 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
10619
10620 uint64_t zd_expected_max[] = {DoubleToRawbits(-2.0),
10621 DoubleToRawbits(8.5),
10622 DoubleToRawbits(3.3),
10623 DoubleToRawbits(0.01),
10624 DoubleToRawbits(5.5),
10625 DoubleToRawbits(300.75),
10626 DoubleToRawbits(kFP64PositiveInfinity),
10627 DoubleToRawbits(kFP64PositiveInfinity)};
10628 FPBinArithHelper(config,
10629 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +000010630 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010631 kDRegSize,
10632 zd_inputs,
10633 pg_inputs,
10634 zn_inputs,
10635 zm_inputs,
10636 zd_expected_max);
10637
10638 uint64_t zd_expected_min[] = {DoubleToRawbits(-2.1),
10639 DoubleToRawbits(-13.0),
10640 DoubleToRawbits(3.3),
10641 DoubleToRawbits(0.0),
10642 DoubleToRawbits(5.5),
10643 DoubleToRawbits(-4.75),
10644 DoubleToRawbits(kFP64NegativeInfinity),
10645 DoubleToRawbits(kFP64NegativeInfinity)};
10646 FPBinArithHelper(config,
10647 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +000010648 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010649 kDRegSize,
10650 zd_inputs,
10651 pg_inputs,
10652 zn_inputs,
10653 zm_inputs,
10654 zd_expected_min);
10655}
TatWai Chong29a0c432019-11-06 22:20:44 -080010656
10657template <typename T, size_t N>
10658static void BitwiseShiftImmHelper(Test* config,
10659 int lane_size_in_bits,
10660 const T (&zn_inputs)[N],
10661 int shift) {
10662 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10663 START();
10664
10665 ZRegister zd_asr = z25.WithLaneSize(lane_size_in_bits);
10666 ZRegister zd_lsr = z26.WithLaneSize(lane_size_in_bits);
10667 ZRegister zd_lsl = z27.WithLaneSize(lane_size_in_bits);
10668 ZRegister zn = z28.WithLaneSize(lane_size_in_bits);
10669
10670 InsrHelper(&masm, zn, zn_inputs);
10671
10672 __ Asr(zd_asr, zn, shift);
10673 __ Lsr(zd_lsr, zn, shift);
Martyn Capewell147b0ba2020-02-19 11:16:02 +000010674 __ Lsl(zd_lsl, zn, shift - 1); // Lsl supports 0 - lane_size-1.
TatWai Chong29a0c432019-11-06 22:20:44 -080010675
10676 END();
10677
10678 if (CAN_RUN()) {
10679 RUN();
10680
10681 const uint64_t mask = GetUintMask(lane_size_in_bits);
10682 for (int i = 0; i < static_cast<int>(N); i++) {
10683 int lane = N - i - 1;
10684 if (!core.HasSVELane(zd_asr, lane)) break;
10685 bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
10686 uint64_t result;
10687 if (shift >= lane_size_in_bits) {
10688 result = is_negative ? mask : 0;
10689 } else {
10690 result = zn_inputs[i] >> shift;
10691 if (is_negative) {
10692 result |= mask << (lane_size_in_bits - shift);
10693 result &= mask;
10694 }
10695 }
10696 ASSERT_EQUAL_SVE_LANE(result, zd_asr, lane);
10697 }
10698
10699 for (int i = 0; i < static_cast<int>(N); i++) {
10700 int lane = N - i - 1;
10701 if (!core.HasSVELane(zd_lsr, lane)) break;
10702 uint64_t result =
10703 (shift >= lane_size_in_bits) ? 0 : zn_inputs[i] >> shift;
10704 ASSERT_EQUAL_SVE_LANE(result, zd_lsr, lane);
10705 }
10706
10707 for (int i = 0; i < static_cast<int>(N); i++) {
10708 int lane = N - i - 1;
10709 if (!core.HasSVELane(zd_lsl, lane)) break;
Martyn Capewell147b0ba2020-02-19 11:16:02 +000010710 uint64_t result = (shift > lane_size_in_bits) ? 0 : zn_inputs[i]
10711 << (shift - 1);
TatWai Chong29a0c432019-11-06 22:20:44 -080010712 ASSERT_EQUAL_SVE_LANE(result & mask, zd_lsl, lane);
10713 }
10714 }
10715}
10716
10717TEST_SVE(sve_bitwise_shift_imm_unpredicated) {
10718 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10719 int shift_b[] = {1, 3, 5, 8};
10720 for (size_t i = 0; i < ArrayLength(shift_b); i++) {
10721 BitwiseShiftImmHelper(config, kBRegSize, inputs_b, shift_b[i]);
10722 }
10723
10724 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233};
10725 int shift_h[] = {1, 8, 11, 16};
10726 for (size_t i = 0; i < ArrayLength(shift_h); i++) {
10727 BitwiseShiftImmHelper(config, kHRegSize, inputs_h, shift_h[i]);
10728 }
10729
10730 uint64_t inputs_s[] = {0xfedcba98, 0xfffa55aa, 0x00112233};
10731 int shift_s[] = {1, 9, 17, 32};
10732 for (size_t i = 0; i < ArrayLength(shift_s); i++) {
10733 BitwiseShiftImmHelper(config, kSRegSize, inputs_s, shift_s[i]);
10734 }
10735
10736 uint64_t inputs_d[] = {0xfedcba98fedcba98,
10737 0xfffa5555aaaaaaaa,
10738 0x0011223344aafe80};
10739 int shift_d[] = {1, 23, 45, 64};
10740 for (size_t i = 0; i < ArrayLength(shift_d); i++) {
10741 BitwiseShiftImmHelper(config, kDRegSize, inputs_d, shift_d[i]);
10742 }
10743}
10744
10745template <typename T, typename R, size_t N>
10746static void BitwiseShiftWideElementsHelper(Test* config,
10747 Shift shift_type,
10748 int lane_size_in_bits,
10749 const T (&zn_inputs)[N],
10750 const R& zm_inputs,
10751 const T (&zd_expected)[N]) {
10752 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10753 START();
10754
10755 ArithFn macro;
10756 // Since logical shift left and right by the current lane size width is equal
10757 // to 0, so initialize the array to 0 for convenience.
10758 uint64_t zd_expected_max_shift_amount[N] = {0};
10759 switch (shift_type) {
10760 case ASR: {
10761 macro = &MacroAssembler::Asr;
10762 uint64_t mask = GetUintMask(lane_size_in_bits);
10763 for (size_t i = 0; i < ArrayLength(zn_inputs); i++) {
10764 bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
10765 zd_expected_max_shift_amount[i] = is_negative ? mask : 0;
10766 }
10767 break;
10768 }
10769 case LSR:
10770 macro = &MacroAssembler::Lsr;
10771 break;
10772 case LSL:
10773 macro = &MacroAssembler::Lsl;
10774 break;
10775 default:
10776 VIXL_UNIMPLEMENTED();
10777 macro = NULL;
10778 break;
10779 }
10780
10781 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
10782 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
10783 ZRegister zm = z28.WithLaneSize(kDRegSize);
10784
10785 InsrHelper(&masm, zn, zn_inputs);
10786 InsrHelper(&masm, zm, zm_inputs);
10787
10788 (masm.*macro)(zd, zn, zm);
10789
10790 ZRegister zm_max_shift_amount = z25.WithLaneSize(kDRegSize);
10791 ZRegister zd_max_shift_amount = z24.WithLaneSize(lane_size_in_bits);
10792
10793 __ Dup(zm_max_shift_amount, lane_size_in_bits);
10794 (masm.*macro)(zd_max_shift_amount, zn, zm_max_shift_amount);
10795
10796 ZRegister zm_out_of_range = z23.WithLaneSize(kDRegSize);
10797 ZRegister zd_out_of_range = z22.WithLaneSize(lane_size_in_bits);
10798
10799 __ Dup(zm_out_of_range, GetUintMask(lane_size_in_bits));
10800 (masm.*macro)(zd_out_of_range, zn, zm_out_of_range);
10801
10802 END();
10803
10804 if (CAN_RUN()) {
10805 RUN();
10806
10807 ASSERT_EQUAL_SVE(zd_expected, zd);
10808 ASSERT_EQUAL_SVE(zd_expected_max_shift_amount, zd_max_shift_amount);
10809 ASSERT_EQUAL_SVE(zd_max_shift_amount, zd_out_of_range);
10810 }
10811}
10812
10813TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_asr) {
10814 // clang-format off
10815 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10816 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10817 int shift_b[] = {1, 3};
10818 uint64_t expected_b[] = {0xff, 0xee, 0xdd, 0xcc, 0xff, 0x2a, 0xd5, 0xc0,
10819 0xff, 0xfb, 0xf7, 0xf3, 0xff, 0x0a, 0xf5, 0xf0};
10820 BitwiseShiftWideElementsHelper(config,
10821 ASR,
10822 kBRegSize,
10823 inputs_b,
10824 shift_b,
10825 expected_b);
10826
10827 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10828 0xfedc, 0xfa55, 0x0011, 0x2233,
10829 0xfedc, 0xfa55, 0x0011, 0x2233};
10830 int shift_h[] = {1, 8, 11};
10831 uint64_t expected_h[] = {0xff6e, 0xfd2a, 0x0008, 0x1119,
10832 0xfffe, 0xfffa, 0x0000, 0x0022,
10833 0xffff, 0xffff, 0x0000, 0x0004};
10834 BitwiseShiftWideElementsHelper(config,
10835 ASR,
10836 kHRegSize,
10837 inputs_h,
10838 shift_h,
10839 expected_h);
10840
10841 uint64_t inputs_s[] =
10842 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10843 int shift_s[] = {1, 9, 23};
10844 uint64_t expected_s[] =
10845 {0xff6e5d4c, 0xfffd2ad5, 0x00000891, 0x000091a2, 0xffffff55, 0xffffff11};
10846 BitwiseShiftWideElementsHelper(config,
10847 ASR,
10848 kSRegSize,
10849 inputs_s,
10850 shift_s,
10851 expected_s);
10852 // clang-format on
10853}
10854
10855TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsr) {
10856 // clang-format off
10857 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10858 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10859 int shift_b[] = {1, 3};
10860 uint64_t expected_b[] = {0x7f, 0x6e, 0x5d, 0x4c, 0x7f, 0x2a, 0x55, 0x40,
10861 0x1f, 0x1b, 0x17, 0x13, 0x1f, 0x0a, 0x15, 0x10};
10862
10863 BitwiseShiftWideElementsHelper(config,
10864 LSR,
10865 kBRegSize,
10866 inputs_b,
10867 shift_b,
10868 expected_b);
10869
10870 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10871 0xfedc, 0xfa55, 0x0011, 0x2233,
10872 0xfedc, 0xfa55, 0x0011, 0x2233};
10873 int shift_h[] = {1, 8, 11};
10874 uint64_t expected_h[] = {0x7f6e, 0x7d2a, 0x0008, 0x1119,
10875 0x00fe, 0x00fa, 0x0000, 0x0022,
10876 0x001f, 0x001f, 0x0000, 0x0004};
10877 BitwiseShiftWideElementsHelper(config,
10878 LSR,
10879 kHRegSize,
10880 inputs_h,
10881 shift_h,
10882 expected_h);
10883
10884 uint64_t inputs_s[] =
10885 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10886 int shift_s[] = {1, 9, 23};
10887 uint64_t expected_s[] =
10888 {0x7f6e5d4c, 0x7ffd2ad5, 0x00000891, 0x000091a2, 0x00000155, 0x00000111};
10889 BitwiseShiftWideElementsHelper(config,
10890 LSR,
10891 kSRegSize,
10892 inputs_s,
10893 shift_s,
10894 expected_s);
10895 // clang-format on
10896}
10897
10898TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsl) {
10899 // clang-format off
10900 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10901 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10902 int shift_b[] = {1, 5};
10903
10904 uint64_t expected_b[] = {0xfc, 0xb8, 0x74, 0x30, 0xfe, 0xaa, 0x54, 0x00,
10905 0xc0, 0x80, 0x40, 0x00, 0xe0, 0xa0, 0x40, 0x00};
10906
10907 BitwiseShiftWideElementsHelper(config,
10908 LSL,
10909 kBRegSize,
10910 inputs_b,
10911 shift_b,
10912 expected_b);
10913 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10914 0xfedc, 0xfa55, 0x0011, 0x2233,
10915 0xfedc, 0xfa55, 0x0011, 0x2233};
10916 int shift_h[] = {1, 2, 14};
10917
10918 uint64_t expected_h[] = {0xfdb8, 0xf4aa, 0x0022, 0x4466,
10919 0xfb70, 0xe954, 0x0044, 0x88cc,
10920 0x0000, 0x4000, 0x4000, 0xc000};
10921 BitwiseShiftWideElementsHelper(config,
10922 LSL,
10923 kHRegSize,
10924 inputs_h,
10925 shift_h,
10926 expected_h);
10927 uint64_t inputs_s[] =
10928 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10929 int shift_s[] = {1, 19, 26};
10930 uint64_t expected_s[] =
10931 {0xfdb97530, 0xfff4ab54, 0x11980000, 0x2b380000, 0xa8000000, 0x20000000};
10932 BitwiseShiftWideElementsHelper(config,
10933 LSL,
10934 kSRegSize,
10935 inputs_s,
10936 shift_s,
10937 expected_s);
Martyn Capewell3bf2d162020-02-17 15:04:36 +000010938
10939 // Test large shifts outside the range of the "unsigned" type.
10940 uint64_t inputs_b2[] = {1, 2, 4, 8, 3, 5, 7, 9,
10941 1, 2, 4, 8, 3, 5, 7, 9};
10942 uint64_t shift_b2[] = {1, 0x1000000001};
10943 uint64_t expected_b2[] = {2, 4, 8, 16, 6, 10, 14, 18,
10944 0, 0, 0, 0, 0, 0, 0, 0};
10945 BitwiseShiftWideElementsHelper(config, LSL, kBRegSize, inputs_b2, shift_b2,
10946 expected_b2);
10947
TatWai Chong29a0c432019-11-06 22:20:44 -080010948 // clang-format on
10949}
10950
Martyn Capewell76c094a2020-02-13 17:26:49 +000010951TEST_SVE(sve_shift_by_vector) {
10952 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10953
10954 START();
10955 __ Ptrue(p0.VnB());
10956 __ Pfalse(p1.VnB());
10957 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
10958 __ Zip1(p3.VnH(), p0.VnH(), p1.VnH());
10959 __ Zip1(p4.VnS(), p0.VnS(), p1.VnS());
10960 __ Zip1(p5.VnD(), p0.VnD(), p1.VnD());
10961
10962 __ Dup(z31.VnD(), 0x8000000080008080);
10963 __ Dup(z0.VnB(), -1);
10964
10965 __ Index(z1.VnB(), 0, 1);
10966 __ Dup(z2.VnB(), 0x55);
10967 __ Lsr(z2.VnB(), p2.Merging(), z0.VnB(), z1.VnB());
10968 __ Lsl(z3.VnB(), p0.Merging(), z0.VnB(), z1.VnB());
10969 __ Asr(z4.VnB(), p0.Merging(), z31.VnB(), z1.VnB());
10970
10971 __ Index(z1.VnH(), 0, 1);
10972 __ Dup(z6.VnB(), 0x55);
10973 __ Lsr(z5.VnH(), p0.Merging(), z0.VnH(), z1.VnH());
10974 __ Lsl(z6.VnH(), p3.Merging(), z0.VnH(), z1.VnH());
10975 __ Asr(z7.VnH(), p0.Merging(), z31.VnH(), z1.VnH());
10976
10977 __ Index(z1.VnS(), 0, 1);
10978 __ Dup(z10.VnB(), 0x55);
10979 __ Lsr(z8.VnS(), p0.Merging(), z0.VnS(), z1.VnS());
10980 __ Lsl(z9.VnS(), p0.Merging(), z0.VnS(), z1.VnS());
10981 __ Asr(z10.VnS(), p4.Merging(), z31.VnS(), z1.VnS());
10982
10983 __ Index(z1.VnD(), 0, 1);
10984 __ Lsr(z0.VnD(), p5.Merging(), z0.VnD(), z1.VnD());
10985 __ Lsl(z12.VnD(), p0.Merging(), z0.VnD(), z1.VnD());
10986 __ Asr(z13.VnD(), p0.Merging(), z31.VnD(), z1.VnD());
10987
10988 __ Dup(z11.VnD(), 0x100000001);
10989 __ Lsl(z14.VnD(), p0.Merging(), z1.VnD(), z11.VnD());
10990
10991 __ Index(z0.VnH(), 7, -1);
10992 __ Lsr(z0.VnH(), p0.Merging(), z31.VnH(), z0.VnH());
10993 END();
10994
10995 if (CAN_RUN()) {
10996 RUN();
10997
10998 uint64_t expected_z0[] = {0x8000000020001010, 0x0800000002000101};
10999 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
11000 uint64_t expected_z2[] = {0x5500550055005500, 0x5503550f553f55ff};
11001 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11002 uint64_t expected_z3[] = {0x0000000000000000, 0x80c0e0f0f8fcfeff};
11003 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11004 uint64_t expected_z4[] = {0xff000000ff00ffff, 0xff000000f000c080};
11005 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11006 uint64_t expected_z5[] = {0x01ff03ff07ff0fff, 0x1fff3fff7fffffff};
11007 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11008 uint64_t expected_z6[] = {0x5555ffc05555fff0, 0x5555fffc5555ffff};
11009 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11010 uint64_t expected_z7[] = {0xff000000fc00f808, 0xf0000000c0008080};
11011 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11012 uint64_t expected_z8[] = {0x1fffffff3fffffff, 0x7fffffffffffffff};
11013 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11014 uint64_t expected_z9[] = {0xfffffff8fffffffc, 0xfffffffeffffffff};
11015 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11016 uint64_t expected_z10[] = {0x55555555e0002020, 0x5555555580008080};
11017 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11018 uint64_t expected_z12[] = {0xfffffffffffffffe, 0xffffffffffffffff};
11019 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
11020 uint64_t expected_z13[] = {0xc000000040004040, 0x8000000080008080};
11021 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
11022 uint64_t expected_z14[] = {0, 0};
11023 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
11024 }
11025}
11026
11027TEST_SVE(sve_shift_by_wide_vector) {
11028 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11029
11030 START();
11031 __ Ptrue(p0.VnB());
11032 __ Pfalse(p1.VnB());
11033 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
11034 __ Zip1(p3.VnH(), p0.VnH(), p1.VnH());
11035 __ Zip1(p4.VnS(), p0.VnS(), p1.VnS());
11036
11037 __ Dup(z31.VnD(), 0x8000000080008080);
11038 __ Dup(z0.VnB(), -1);
11039 __ Index(z1.VnD(), 1, 5);
11040
11041 __ Dup(z2.VnB(), 0x55);
11042 __ Lsr(z2.VnB(), p2.Merging(), z2.VnB(), z1.VnD());
11043 __ Lsl(z3.VnB(), p0.Merging(), z0.VnB(), z1.VnD());
11044 __ Asr(z4.VnB(), p0.Merging(), z31.VnB(), z1.VnD());
11045
11046 __ Dup(z6.VnB(), 0x55);
11047 __ Lsr(z5.VnH(), p0.Merging(), z0.VnH(), z1.VnD());
11048 __ Lsl(z6.VnH(), p3.Merging(), z6.VnH(), z1.VnD());
11049 __ Asr(z7.VnH(), p0.Merging(), z31.VnH(), z1.VnD());
11050
11051 __ Dup(z10.VnB(), 0x55);
11052 __ Lsr(z8.VnS(), p0.Merging(), z0.VnS(), z1.VnD());
11053 __ Lsl(z9.VnS(), p0.Merging(), z0.VnS(), z1.VnD());
11054 __ Asr(z10.VnS(), p4.Merging(), z31.VnS(), z1.VnD());
11055 END();
11056
11057 if (CAN_RUN()) {
11058 RUN();
11059
11060 uint64_t expected_z2[] = {0x5501550155015501, 0x552a552a552a552a};
11061 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11062 uint64_t expected_z3[] = {0xc0c0c0c0c0c0c0c0, 0xfefefefefefefefe};
11063 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11064 uint64_t expected_z4[] = {0xfe000000fe00fefe, 0xc0000000c000c0c0};
11065 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11066 uint64_t expected_z5[] = {0x03ff03ff03ff03ff, 0x7fff7fff7fff7fff};
11067 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11068 uint64_t expected_z6[] = {0x5555554055555540, 0x5555aaaa5555aaaa};
11069 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11070 uint64_t expected_z7[] = {0xfe000000fe00fe02, 0xc0000000c000c040};
11071 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11072 uint64_t expected_z8[] = {0x03ffffff03ffffff, 0x7fffffff7fffffff};
11073 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11074 uint64_t expected_z9[] = {0xffffffc0ffffffc0, 0xfffffffefffffffe};
11075 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11076 uint64_t expected_z10[] = {0x55555555fe000202, 0x55555555c0004040};
11077 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11078 }
11079}
11080
Martyn Capewell83e86612020-02-19 15:46:15 +000011081TEST_SVE(sve_pred_shift_imm) {
11082 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11083
11084 START();
11085 __ Ptrue(p0.VnB());
11086 __ Pfalse(p1.VnB());
11087 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
11088 __ Zip1(p3.VnH(), p0.VnH(), p1.VnH());
11089 __ Zip1(p4.VnS(), p0.VnS(), p1.VnS());
11090 __ Zip1(p5.VnD(), p0.VnD(), p1.VnD());
11091
11092 __ Dup(z31.VnD(), 0x8000000080008080);
11093 __ Lsr(z0.VnB(), p0.Merging(), z31.VnB(), 1);
11094 __ Mov(z1, z0);
11095 __ Lsl(z1.VnB(), p2.Merging(), z1.VnB(), 1);
11096 __ Asr(z2.VnB(), p0.Merging(), z1.VnB(), 2);
11097
11098 __ Lsr(z3.VnH(), p0.Merging(), z31.VnH(), 2);
11099 __ Mov(z4, z3);
11100 __ Lsl(z4.VnH(), p3.Merging(), z4.VnH(), 2);
11101 __ Asr(z5.VnH(), p0.Merging(), z4.VnH(), 3);
11102
11103 __ Lsr(z6.VnS(), p0.Merging(), z31.VnS(), 3);
11104 __ Mov(z7, z6);
11105 __ Lsl(z7.VnS(), p4.Merging(), z7.VnS(), 3);
11106 __ Asr(z8.VnS(), p0.Merging(), z7.VnS(), 4);
11107
11108 __ Lsr(z9.VnD(), p0.Merging(), z31.VnD(), 4);
11109 __ Mov(z10, z9);
11110 __ Lsl(z10.VnD(), p5.Merging(), z10.VnD(), 4);
11111 __ Asr(z11.VnD(), p0.Merging(), z10.VnD(), 5);
11112 END();
11113
11114 if (CAN_RUN()) {
11115 RUN();
11116 uint64_t expected_z0[] = {0x4000000040004040, 0x4000000040004040};
11117 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
11118 uint64_t expected_z1[] = {0x4000000040004080, 0x4000000040004080};
11119 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
11120 uint64_t expected_z2[] = {0x10000000100010e0, 0x10000000100010e0};
11121 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11122 uint64_t expected_z3[] = {0x2000000020002020, 0x2000000020002020};
11123 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11124 uint64_t expected_z4[] = {0x2000000020008080, 0x2000000020008080};
11125 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11126 uint64_t expected_z5[] = {0x040000000400f010, 0x040000000400f010};
11127 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11128 uint64_t expected_z6[] = {0x1000000010001010, 0x1000000010001010};
11129 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11130 uint64_t expected_z7[] = {0x1000000080008080, 0x1000000080008080};
11131 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11132 uint64_t expected_z8[] = {0x01000000f8000808, 0x01000000f8000808};
11133 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11134 uint64_t expected_z9[] = {0x0800000008000808, 0x0800000008000808};
11135 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11136 uint64_t expected_z10[] = {0x0800000008000808, 0x8000000080008080};
11137 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11138 uint64_t expected_z11[] = {0x0040000000400040, 0xfc00000004000404};
11139 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
11140 }
11141}
11142
11143TEST_SVE(sve_asrd) {
11144 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11145
11146 START();
11147 __ Ptrue(p0.VnB());
11148 __ Pfalse(p1.VnB());
11149 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
11150 __ Zip1(p3.VnH(), p0.VnH(), p1.VnH());
11151 __ Zip1(p4.VnS(), p0.VnS(), p1.VnS());
11152 __ Zip1(p5.VnD(), p0.VnD(), p1.VnD());
11153
11154 __ Index(z31.VnB(), 0x7f - 3, 1);
11155 __ Asrd(z0.VnB(), p0.Merging(), z31.VnB(), 1);
11156 __ Mov(z1, z31);
11157 __ Asrd(z1.VnB(), p2.Merging(), z1.VnB(), 2);
11158 __ Asrd(z2.VnB(), p0.Merging(), z31.VnB(), 7);
11159 __ Asrd(z3.VnB(), p0.Merging(), z31.VnB(), 8);
11160
11161 __ Index(z31.VnH(), 0x7fff - 3, 1);
11162 __ Asrd(z4.VnH(), p0.Merging(), z31.VnH(), 1);
11163 __ Mov(z5, z31);
11164 __ Asrd(z5.VnH(), p3.Merging(), z5.VnH(), 2);
11165 __ Asrd(z6.VnH(), p0.Merging(), z31.VnH(), 15);
11166 __ Asrd(z7.VnH(), p0.Merging(), z31.VnH(), 16);
11167
11168 __ Index(z31.VnS(), 0x7fffffff - 1, 1);
11169 __ Asrd(z8.VnS(), p0.Merging(), z31.VnS(), 1);
11170 __ Mov(z9, z31);
11171 __ Asrd(z9.VnS(), p4.Merging(), z9.VnS(), 2);
11172 __ Asrd(z10.VnS(), p0.Merging(), z31.VnS(), 31);
11173 __ Asrd(z11.VnS(), p0.Merging(), z31.VnS(), 32);
11174
11175 __ Index(z31.VnD(), 0x7fffffffffffffff, 1);
11176 __ Asrd(z12.VnD(), p0.Merging(), z31.VnD(), 1);
11177 __ Mov(z13, z31);
11178 __ Asrd(z13.VnD(), p5.Merging(), z13.VnD(), 2);
11179 __ Asrd(z14.VnD(), p0.Merging(), z31.VnD(), 63);
11180 __ Asrd(z31.VnD(), p0.Merging(), z31.VnD(), 64);
11181 END();
11182
11183 if (CAN_RUN()) {
11184 RUN();
11185 uint64_t expected_z0[] = {0xc6c5c5c4c4c3c3c2, 0xc2c1c1c03f3f3e3e};
11186 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
11187 uint64_t expected_z1[] = {0x8be389e287e285e1, 0x83e181e07f1f7d1f};
11188 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
11189 uint64_t expected_z2[] = {0x0000000000000000, 0x000000ff00000000};
11190 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11191 uint64_t expected_z3[] = {0x0000000000000000, 0x0000000000000000};
11192 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11193 uint64_t expected_z4[] = {0xc002c001c001c000, 0x3fff3fff3ffe3ffe};
11194 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11195 uint64_t expected_z5[] = {0x8003e0018001e000, 0x7fff1fff7ffd1fff};
11196 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11197 uint64_t expected_z6[] = {0x000000000000ffff, 0x0000000000000000};
11198 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11199 uint64_t expected_z7[] = {0x0000000000000000, 0x0000000000000000};
11200 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11201 uint64_t expected_z8[] = {0xc0000001c0000000, 0x3fffffff3fffffff};
11202 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11203 uint64_t expected_z9[] = {0x80000001e0000000, 0x7fffffff1fffffff};
11204 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11205 uint64_t expected_z10[] = {0x00000000ffffffff, 0x0000000000000000};
11206 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11207 uint64_t expected_z11[] = {0x0000000000000000, 0x0000000000000000};
11208 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
11209 uint64_t expected_z12[] = {0xc000000000000000, 0x3fffffffffffffff};
11210 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
11211 uint64_t expected_z13[] = {0x8000000000000000, 0x1fffffffffffffff};
11212 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
11213 uint64_t expected_z14[] = {0xffffffffffffffff, 0x0000000000000000};
11214 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
11215 uint64_t expected_z31[] = {0x0000000000000000, 0x0000000000000000};
11216 ASSERT_EQUAL_SVE(expected_z31, z31.VnD());
11217 }
11218}
11219
TatWai Chong4023d7a2019-11-18 14:16:28 -080011220TEST_SVE(sve_setffr) {
11221 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11222 START();
11223
11224 __ Ptrue(p15.VnB());
11225 __ Setffr();
11226 __ Rdffr(p14.VnB());
11227
11228 END();
11229
11230 if (CAN_RUN()) {
11231 RUN();
11232
11233 ASSERT_EQUAL_SVE(p14.VnB(), p15.VnB());
11234 }
11235}
11236
11237static void WrffrHelper(Test* config, unsigned active_lanes) {
11238 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11239 START();
11240
11241 int inputs[kPRegMaxSize] = {0};
11242 VIXL_ASSERT(active_lanes <= kPRegMaxSize);
11243 for (unsigned i = 0; i < active_lanes; i++) {
11244 // The rightmost (highest-indexed) array element maps to the lowest-numbered
11245 // lane.
11246 inputs[kPRegMaxSize - i - 1] = 1;
11247 }
11248
11249 Initialise(&masm, p1.VnB(), inputs);
11250 __ Wrffr(p1.VnB());
11251 __ Rdffr(p2.VnB());
11252
11253 END();
11254
11255 if (CAN_RUN()) {
11256 RUN();
11257
11258 ASSERT_EQUAL_SVE(p1.VnB(), p2.VnB());
11259 }
11260}
11261
11262TEST_SVE(sve_wrffr) {
11263 int active_lanes_inputs[] = {0, 1, 7, 10, 32, 48, kPRegMaxSize};
11264 for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
11265 WrffrHelper(config, active_lanes_inputs[i]);
11266 }
11267}
11268
TatWai Chonga3e8b172019-11-22 21:48:56 -080011269template <size_t N>
11270static void RdffrHelper(Test* config,
11271 size_t active_lanes,
11272 const int (&pg_inputs)[N]) {
11273 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11274 START();
11275
11276 VIXL_ASSERT(active_lanes <= kPRegMaxSize);
11277
11278 // The rightmost (highest-indexed) array element maps to the lowest-numbered
11279 // lane.
11280 int pd[kPRegMaxSize] = {0};
11281 for (unsigned i = 0; i < active_lanes; i++) {
11282 pd[kPRegMaxSize - i - 1] = 1;
11283 }
11284
11285 int pg[kPRegMaxSize] = {0};
11286 for (unsigned i = 0; i < N; i++) {
11287 pg[kPRegMaxSize - i - 1] = pg_inputs[i];
11288 }
11289
11290 int pd_expected[kPRegMaxSize] = {0};
11291 for (unsigned i = 0; i < std::min(active_lanes, N); i++) {
11292 int lane = kPRegMaxSize - i - 1;
11293 pd_expected[lane] = pd[lane] & pg[lane];
11294 }
11295
11296 Initialise(&masm, p0.VnB(), pg);
11297 Initialise(&masm, p1.VnB(), pd);
11298
11299 // The unpredicated form of rdffr has been tested in `WrffrHelper`.
11300 __ Wrffr(p1.VnB());
11301 __ Rdffr(p14.VnB(), p0.Zeroing());
11302 __ Rdffrs(p13.VnB(), p0.Zeroing());
11303 __ Mrs(x8, NZCV);
11304
11305 END();
11306
11307 if (CAN_RUN()) {
11308 RUN();
11309
11310 ASSERT_EQUAL_SVE(pd_expected, p14.VnB());
11311 ASSERT_EQUAL_SVE(pd_expected, p13.VnB());
11312 StatusFlags nzcv_expected =
11313 GetPredTestFlags(pd_expected, pg, core.GetSVELaneCount(kBRegSize));
11314 ASSERT_EQUAL_64(nzcv_expected, x8);
11315 }
11316}
11317
11318TEST_SVE(sve_rdffr_rdffrs) {
11319 // clang-format off
11320 int active_lanes_inputs[] = {0, 1, 15, 26, 39, 47, kPRegMaxSize};
11321 int pg_inputs_0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11322 int pg_inputs_1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
11323 int pg_inputs_2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11324 int pg_inputs_3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
11325 int pg_inputs_4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11326 // clang-format on
11327
11328 for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
11329 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_0);
11330 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_1);
11331 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_2);
11332 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_3);
11333 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_4);
11334 }
11335}
11336
TatWai Chong38303d92019-12-02 15:49:29 -080011337typedef void (MacroAssembler::*BrkpFn)(const PRegisterWithLaneSize& pd,
11338 const PRegisterZ& pg,
11339 const PRegisterWithLaneSize& pn,
11340 const PRegisterWithLaneSize& pm);
11341
11342template <typename Tg, typename Tn, typename Td>
11343static void BrkpaBrkpbHelper(Test* config,
11344 BrkpFn macro,
11345 BrkpFn macro_set_flags,
11346 const Tg& pg_inputs,
11347 const Tn& pn_inputs,
11348 const Tn& pm_inputs,
11349 const Td& pd_expected) {
11350 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11351 START();
11352
11353 PRegister pg = p15;
11354 PRegister pn = p14;
11355 PRegister pm = p13;
11356 Initialise(&masm, pg.VnB(), pg_inputs);
11357 Initialise(&masm, pn.VnB(), pn_inputs);
11358 Initialise(&masm, pm.VnB(), pm_inputs);
11359
11360 // Initialise NZCV to an impossible value, to check that we actually write it.
11361 __ Mov(x10, NZCVFlag);
11362 __ Msr(NZCV, x10);
11363
11364 (masm.*macro_set_flags)(p0.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
11365 __ Mrs(x0, NZCV);
11366
11367 (masm.*macro)(p1.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
11368
11369 END();
11370
11371 if (CAN_RUN()) {
11372 RUN();
11373
11374 ASSERT_EQUAL_SVE(pd_expected, p0.VnB());
11375
11376 // Check that the flags were properly set.
11377 StatusFlags nzcv_expected =
11378 GetPredTestFlags(pd_expected,
11379 pg_inputs,
11380 core.GetSVELaneCount(kBRegSize));
11381 ASSERT_EQUAL_64(nzcv_expected, x0);
11382 ASSERT_EQUAL_SVE(p0.VnB(), p1.VnB());
11383 }
11384}
11385
11386template <typename Tg, typename Tn, typename Td>
11387static void BrkpaHelper(Test* config,
11388 const Tg& pg_inputs,
11389 const Tn& pn_inputs,
11390 const Tn& pm_inputs,
11391 const Td& pd_expected) {
11392 BrkpaBrkpbHelper(config,
11393 &MacroAssembler::Brkpa,
11394 &MacroAssembler::Brkpas,
11395 pg_inputs,
11396 pn_inputs,
11397 pm_inputs,
11398 pd_expected);
11399}
11400
11401template <typename Tg, typename Tn, typename Td>
11402static void BrkpbHelper(Test* config,
11403 const Tg& pg_inputs,
11404 const Tn& pn_inputs,
11405 const Tn& pm_inputs,
11406 const Td& pd_expected) {
11407 BrkpaBrkpbHelper(config,
11408 &MacroAssembler::Brkpb,
11409 &MacroAssembler::Brkpbs,
11410 pg_inputs,
11411 pn_inputs,
11412 pm_inputs,
11413 pd_expected);
11414}
11415
11416TEST_SVE(sve_brkpb) {
11417 // clang-format off
11418 // The last active element of `pn` are `true` in all vector length configurations.
11419 // | boundary of 128-bits VL.
11420 // v
11421 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11422 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11423 int pg_3[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
11424
11425 // | highest-numbered lane lowest-numbered lane |
11426 // v v
11427 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
11428 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
11429 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1};
11430
11431 int pm_1[] = {1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
11432 int pm_2[] = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11433 int pm_3[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11434
11435 // | first active
11436 // v
11437 int exp_1_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
11438 // | first active
11439 // v
11440 int exp_1_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11441 // | first active
11442 // v
11443 int exp_1_3_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
11444
11445 BrkpbHelper(config, pg_1, pn_1, pm_1, exp_1_1_1);
11446 BrkpbHelper(config, pg_1, pn_2, pm_2, exp_1_2_2);
11447 BrkpbHelper(config, pg_1, pn_3, pm_3, exp_1_3_3);
11448
11449 // | first active
11450 // v
11451 int exp_2_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11452 // | first active
11453 // v
11454 int exp_2_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11455 // | first active
11456 // v
11457 int exp_2_3_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
11458 BrkpbHelper(config, pg_2, pn_1, pm_2, exp_2_1_2);
11459 BrkpbHelper(config, pg_2, pn_2, pm_3, exp_2_2_3);
11460 BrkpbHelper(config, pg_2, pn_3, pm_1, exp_2_3_1);
11461
11462 // | first active
11463 // v
11464 int exp_3_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
11465 // | first active
11466 // v
11467 int exp_3_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
11468 // | first active
11469 // v
11470 int exp_3_3_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
11471 BrkpbHelper(config, pg_3, pn_1, pm_3, exp_3_1_3);
11472 BrkpbHelper(config, pg_3, pn_2, pm_1, exp_3_2_1);
11473 BrkpbHelper(config, pg_3, pn_3, pm_2, exp_3_3_2);
11474
11475 // The last active element of `pn` are `false` in all vector length configurations.
11476 // | last active lane when VL > 128 bits.
11477 // v
11478 // | last active lane when VL == 128 bits.
11479 // v
11480 int pg_4[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
11481 int exp_4_x_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11482 BrkpbHelper(config, pg_4, pn_1, pm_1, exp_4_x_x);
11483 BrkpbHelper(config, pg_4, pn_2, pm_2, exp_4_x_x);
11484 BrkpbHelper(config, pg_4, pn_3, pm_3, exp_4_x_x);
11485 // clang-format on
11486}
11487
11488TEST_SVE(sve_brkpa) {
11489 // clang-format off
11490 // The last active element of `pn` are `true` in all vector length configurations.
11491 // | boundary of 128-bits VL.
11492 // v
11493 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11494 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11495 int pg_3[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
11496
11497 // | highest-numbered lane lowest-numbered lane |
11498 // v v
11499 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
11500 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
11501 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1};
11502
11503 int pm_1[] = {1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
11504 int pm_2[] = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11505 int pm_3[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11506
11507 // | first active
11508 // v
11509 int exp_1_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
11510 // | first active
11511 // v
11512 int exp_1_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11513 // | first active
11514 // v
11515 int exp_1_3_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
11516
11517 BrkpaHelper(config, pg_1, pn_1, pm_1, exp_1_1_1);
11518 BrkpaHelper(config, pg_1, pn_2, pm_2, exp_1_2_2);
11519 BrkpaHelper(config, pg_1, pn_3, pm_3, exp_1_3_3);
11520
11521 // | first active
11522 // v
11523 int exp_2_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11524 // | first active
11525 // v
11526 int exp_2_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11527 // | first active
11528 // v
11529 int exp_2_3_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1};
11530 BrkpaHelper(config, pg_2, pn_1, pm_2, exp_2_1_2);
11531 BrkpaHelper(config, pg_2, pn_2, pm_3, exp_2_2_3);
11532 BrkpaHelper(config, pg_2, pn_3, pm_1, exp_2_3_1);
11533
11534 // | first active
11535 // v
11536 int exp_3_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
11537 // | first active
11538 // v
11539 int exp_3_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
11540 // | first active
11541 // v
11542 int exp_3_3_2[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
11543 BrkpaHelper(config, pg_3, pn_1, pm_3, exp_3_1_3);
11544 BrkpaHelper(config, pg_3, pn_2, pm_1, exp_3_2_1);
11545 BrkpaHelper(config, pg_3, pn_3, pm_2, exp_3_3_2);
11546
11547 // The last active element of `pn` are `false` in all vector length configurations.
11548 // | last active lane when VL > 128 bits.
11549 // v
11550 // | last active lane when VL == 128 bits.
11551 // v
11552 int pg_4[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
11553 int exp_4_x_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11554 BrkpaHelper(config, pg_4, pn_1, pm_1, exp_4_x_x);
11555 BrkpaHelper(config, pg_4, pn_2, pm_2, exp_4_x_x);
11556 BrkpaHelper(config, pg_4, pn_3, pm_3, exp_4_x_x);
11557 // clang-format on
11558}
11559
Martyn Capewell77b6d982019-12-02 18:34:59 +000011560TEST_SVE(sve_rbit) {
11561 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11562 START();
11563
11564 uint64_t inputs[] = {0xaaaaaaaa55555555, 0xaaaa5555aa55aa55};
11565 InsrHelper(&masm, z0.VnD(), inputs);
11566
11567 __ Ptrue(p1.VnB());
11568 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
11569 Initialise(&masm, p2.VnB(), pred);
11570
11571 __ Rbit(z0.VnB(), p1.Merging(), z0.VnB());
11572 __ Rbit(z0.VnB(), p1.Merging(), z0.VnB());
11573
11574 __ Rbit(z1.VnB(), p1.Merging(), z0.VnB());
11575 __ Rbit(z2.VnH(), p1.Merging(), z0.VnH());
11576 __ Rbit(z3.VnS(), p1.Merging(), z0.VnS());
11577 __ Rbit(z4.VnD(), p1.Merging(), z0.VnD());
11578
11579 __ Dup(z5.VnB(), 0x42);
11580 __ Rbit(z5.VnB(), p2.Merging(), z0.VnB());
11581 __ Dup(z6.VnB(), 0x42);
11582 __ Rbit(z6.VnS(), p2.Merging(), z0.VnS());
11583
11584 END();
11585
11586 if (CAN_RUN()) {
11587 RUN();
11588
11589 ASSERT_EQUAL_SVE(inputs, z0.VnD());
11590
11591 uint64_t expected_z1[] = {0x55555555aaaaaaaa, 0x5555aaaa55aa55aa};
11592 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
11593 uint64_t expected_z2[] = {0x55555555aaaaaaaa, 0x5555aaaaaa55aa55};
11594 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11595 uint64_t expected_z3[] = {0x55555555aaaaaaaa, 0xaaaa5555aa55aa55};
11596 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11597 uint64_t expected_z4[] = {0xaaaaaaaa55555555, 0xaa55aa55aaaa5555};
11598 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11599 uint64_t expected_z5[] = {0x4255425542aa42aa, 0x4255424242aa42aa};
11600 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11601 uint64_t expected_z6[] = {0x55555555aaaaaaaa, 0x42424242aa55aa55};
11602 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11603 }
11604}
11605
11606TEST_SVE(sve_rev_bhw) {
11607 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11608 START();
11609
11610 uint64_t inputs[] = {0xaaaaaaaa55555555, 0xaaaa5555aa55aa55};
11611 InsrHelper(&masm, z0.VnD(), inputs);
11612
11613 __ Ptrue(p1.VnB());
11614 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
11615 Initialise(&masm, p2.VnB(), pred);
11616
11617 __ Revb(z1.VnH(), p1.Merging(), z0.VnH());
11618 __ Revb(z2.VnS(), p1.Merging(), z0.VnS());
11619 __ Revb(z3.VnD(), p1.Merging(), z0.VnD());
11620 __ Revh(z4.VnS(), p1.Merging(), z0.VnS());
11621 __ Revh(z5.VnD(), p1.Merging(), z0.VnD());
11622 __ Revw(z6.VnD(), p1.Merging(), z0.VnD());
11623
11624 __ Dup(z7.VnB(), 0x42);
11625 __ Revb(z7.VnH(), p2.Merging(), z0.VnH());
11626 __ Dup(z8.VnB(), 0x42);
11627 __ Revh(z8.VnS(), p2.Merging(), z0.VnS());
11628
11629 END();
11630
11631 if (CAN_RUN()) {
11632 RUN();
11633
11634 uint64_t expected_z1[] = {0xaaaaaaaa55555555, 0xaaaa555555aa55aa};
11635 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
11636 uint64_t expected_z2[] = {0xaaaaaaaa55555555, 0x5555aaaa55aa55aa};
11637 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11638 uint64_t expected_z3[] = {0x55555555aaaaaaaa, 0x55aa55aa5555aaaa};
11639 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11640 uint64_t expected_z4[] = {0xaaaaaaaa55555555, 0x5555aaaaaa55aa55};
11641 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11642 uint64_t expected_z5[] = {0x55555555aaaaaaaa, 0xaa55aa555555aaaa};
11643 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11644 uint64_t expected_z6[] = {0x55555555aaaaaaaa, 0xaa55aa55aaaa5555};
11645 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11646 uint64_t expected_z7[] = {0xaaaaaaaa55555555, 0xaaaa424255aa55aa};
11647 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11648 uint64_t expected_z8[] = {0xaaaaaaaa55555555, 0x42424242aa55aa55};
11649 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11650 }
11651}
11652
Martyn Capewell43782632019-12-12 13:22:10 +000011653TEST_SVE(sve_ftssel) {
11654 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11655 START();
11656
11657 uint64_t in[] = {0x1111777766665555, 0xaaaabbbbccccdddd};
11658 uint64_t q[] = {0x0001000300000002, 0x0001000200000003};
11659 InsrHelper(&masm, z0.VnD(), in);
11660 InsrHelper(&masm, z1.VnD(), q);
11661
11662 __ Ftssel(z2.VnH(), z0.VnH(), z1.VnH());
11663 __ Ftssel(z3.VnS(), z0.VnS(), z1.VnS());
11664 __ Ftssel(z4.VnD(), z0.VnD(), z1.VnD());
11665
11666 END();
11667
11668 if (CAN_RUN()) {
11669 RUN();
11670
11671 uint64_t expected_z2[] = {0x3c00bc006666d555, 0x3c003bbbccccbc00};
11672 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11673 uint64_t expected_z3[] = {0xbf800000e6665555, 0x2aaabbbbbf800000};
11674 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11675 uint64_t expected_z4[] = {0x9111777766665555, 0xbff0000000000000};
11676 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11677 }
11678}
11679
11680TEST_SVE(sve_fexpa) {
11681 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11682 START();
11683
11684 uint64_t in0[] = {0x3ff0000000000000, 0x3ff0000000011001};
11685 uint64_t in1[] = {0x3ff000000002200f, 0xbff000000003301f};
11686 uint64_t in2[] = {0xbff000000004403f, 0x3ff0000000055040};
11687 uint64_t in3[] = {0x3f800000bf800001, 0x3f80000f3f80001f};
11688 uint64_t in4[] = {0x3f80002f3f82203f, 0xbf8000403f833041};
11689 uint64_t in5[] = {0x3c003c01bc00bc07, 0x3c08bc0f3c1fbc20};
11690 InsrHelper(&masm, z0.VnD(), in0);
11691 InsrHelper(&masm, z1.VnD(), in1);
11692 InsrHelper(&masm, z2.VnD(), in2);
11693 InsrHelper(&masm, z3.VnD(), in3);
11694 InsrHelper(&masm, z4.VnD(), in4);
11695 InsrHelper(&masm, z5.VnD(), in5);
11696
11697 __ Fexpa(z6.VnD(), z0.VnD());
11698 __ Fexpa(z7.VnD(), z1.VnD());
11699 __ Fexpa(z8.VnD(), z2.VnD());
11700 __ Fexpa(z9.VnS(), z3.VnS());
11701 __ Fexpa(z10.VnS(), z4.VnS());
11702 __ Fexpa(z11.VnH(), z5.VnH());
11703
11704 END();
11705
11706 if (CAN_RUN()) {
11707 RUN();
11708 uint64_t expected_z6[] = {0x0000000000000000, 0x44002c9a3e778061};
11709 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11710 uint64_t expected_z7[] = {0x0802d285a6e4030b, 0x4c06623882552225};
11711 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11712 uint64_t expected_z8[] = {0x100fa7c1819e90d8, 0x5410000000000000};
11713 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11714 uint64_t expected_z9[] = {0x00000000000164d2, 0x0016942d003311c4};
11715 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11716 uint64_t expected_z10[] = {0x0054f35b407d3e0c, 0x00800000608164d2};
11717 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11718 uint64_t expected_z11[] = {0x00000016000000a8, 0x00c2018903d40400};
11719 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
11720 }
11721}
11722
Martyn Capewell7fd6fd52019-12-06 14:50:15 +000011723TEST_SVE(sve_rev_p) {
11724 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11725 START();
11726
11727 Initialise(&masm,
11728 p0.VnB(),
11729 0xabcdabcdabcdabcd,
11730 0xabcdabcdabcdabcd,
11731 0xabcdabcdabcdabcd,
11732 0xabcdabcdabcdabcd);
11733
11734 __ Rev(p1.VnB(), p0.VnB());
11735 __ Rev(p2.VnH(), p0.VnH());
11736 __ Rev(p3.VnS(), p0.VnS());
11737 __ Rev(p4.VnD(), p0.VnD());
11738
11739 END();
11740
11741 if (CAN_RUN()) {
11742 RUN();
11743
11744 int p1_expected[] = {1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1};
11745 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
11746 int p2_expected[] = {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0};
11747 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11748 int p3_expected[] = {1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0};
11749 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
11750 int p4_expected[] = {1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1};
11751 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
11752 }
11753}
11754
11755TEST_SVE(sve_trn_p_bh) {
11756 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11757 START();
11758
11759 Initialise(&masm, p0.VnB(), 0xa5a55a5a);
11760 __ Pfalse(p1.VnB());
11761
11762 __ Trn1(p2.VnB(), p0.VnB(), p0.VnB());
11763 __ Trn2(p3.VnB(), p0.VnB(), p0.VnB());
11764 __ Trn1(p4.VnB(), p1.VnB(), p0.VnB());
11765 __ Trn2(p5.VnB(), p1.VnB(), p0.VnB());
11766 __ Trn1(p6.VnB(), p0.VnB(), p1.VnB());
11767 __ Trn2(p7.VnB(), p0.VnB(), p1.VnB());
11768
11769 __ Trn1(p8.VnH(), p0.VnH(), p0.VnH());
11770 __ Trn2(p9.VnH(), p0.VnH(), p0.VnH());
11771 __ Trn1(p10.VnH(), p1.VnH(), p0.VnH());
11772 __ Trn2(p11.VnH(), p1.VnH(), p0.VnH());
11773 __ Trn1(p12.VnH(), p0.VnH(), p1.VnH());
11774 __ Trn2(p13.VnH(), p0.VnH(), p1.VnH());
11775
11776 END();
11777
11778 if (CAN_RUN()) {
11779 RUN();
11780 int p2_expected[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
11781 int p3_expected[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
11782 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11783 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
11784
11785 int p4_expected[] = {1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11786 int p5_expected[] = {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0};
11787 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
11788 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
11789
11790 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0};
11791 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1};
11792 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
11793 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
11794
11795 int p8_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11796 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11797 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
11798 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
11799
11800 int p10_expected[] = {0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0};
11801 int p11_expected[] = {0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0};
11802 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
11803 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
11804
11805 int p12_expected[] = {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0};
11806 int p13_expected[] = {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0};
11807 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
11808 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
11809 }
11810}
11811
11812TEST_SVE(sve_trn_p_sd) {
11813 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11814 START();
11815
11816 Initialise(&masm, p0.VnB(), 0x55a55aaa);
11817 __ Pfalse(p1.VnB());
11818
11819 __ Trn1(p2.VnS(), p0.VnS(), p0.VnS());
11820 __ Trn2(p3.VnS(), p0.VnS(), p0.VnS());
11821 __ Trn1(p4.VnS(), p1.VnS(), p0.VnS());
11822 __ Trn2(p5.VnS(), p1.VnS(), p0.VnS());
11823 __ Trn1(p6.VnS(), p0.VnS(), p1.VnS());
11824 __ Trn2(p7.VnS(), p0.VnS(), p1.VnS());
11825
11826 __ Trn1(p8.VnD(), p0.VnD(), p0.VnD());
11827 __ Trn2(p9.VnD(), p0.VnD(), p0.VnD());
11828 __ Trn1(p10.VnD(), p1.VnD(), p0.VnD());
11829 __ Trn2(p11.VnD(), p1.VnD(), p0.VnD());
11830 __ Trn1(p12.VnD(), p0.VnD(), p1.VnD());
11831 __ Trn2(p13.VnD(), p0.VnD(), p1.VnD());
11832
11833 END();
11834
11835 if (CAN_RUN()) {
11836 RUN();
11837 int p2_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
11838 int p3_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11839 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11840 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
11841
11842 int p4_expected[] = {1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11843 int p5_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11844 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
11845 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
11846
11847 int p6_expected[] = {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0};
11848 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
11849 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
11850 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
11851
11852 int p8_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
11853 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11854 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
11855 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
11856
11857 int p10_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11858 int p11_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11859 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
11860 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
11861
11862 int p12_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0};
11863 int p13_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11864 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
11865 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
11866 }
11867}
11868
11869TEST_SVE(sve_zip_p_bh) {
11870 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11871 START();
11872
11873 Initialise(&masm,
11874 p0.VnB(),
11875 0x5a5a5a5a5a5a5a5a,
11876 0x5a5a5a5a5a5a5a5a,
11877 0x5a5a5a5a5a5a5a5a,
11878 0x5a5a5a5a5a5a5a5a);
11879 __ Pfalse(p1.VnB());
11880
11881 __ Zip1(p2.VnB(), p0.VnB(), p0.VnB());
11882 __ Zip2(p3.VnB(), p0.VnB(), p0.VnB());
11883 __ Zip1(p4.VnB(), p1.VnB(), p0.VnB());
11884 __ Zip2(p5.VnB(), p1.VnB(), p0.VnB());
11885 __ Zip1(p6.VnB(), p0.VnB(), p1.VnB());
11886 __ Zip2(p7.VnB(), p0.VnB(), p1.VnB());
11887
11888 __ Zip1(p8.VnH(), p0.VnH(), p0.VnH());
11889 __ Zip2(p9.VnH(), p0.VnH(), p0.VnH());
11890 __ Zip1(p10.VnH(), p1.VnH(), p0.VnH());
11891 __ Zip2(p11.VnH(), p1.VnH(), p0.VnH());
11892 __ Zip1(p12.VnH(), p0.VnH(), p1.VnH());
11893 __ Zip2(p13.VnH(), p0.VnH(), p1.VnH());
11894
11895 END();
11896
11897 if (CAN_RUN()) {
11898 RUN();
11899 int p2_expected[] = {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0};
11900 int p3_expected[] = {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0};
11901 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11902 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
11903
11904 int p4_expected[] = {0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11905 int p5_expected[] = {0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11906 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
11907 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
11908
11909 int p6_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0};
11910 int p7_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0};
11911 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
11912 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
11913
11914 int p8_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11915 int p9_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11916 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
11917 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
11918
11919 int p10_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11920 int p11_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11921 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
11922 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
11923
11924 int p12_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0};
11925 int p13_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0};
11926 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
11927 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
11928 }
11929}
11930
11931TEST_SVE(sve_zip_p_sd) {
11932 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11933 START();
11934
11935 Initialise(&masm,
11936 p0.VnB(),
11937 0x5a5a5a5a5a5a5a5a,
11938 0x5a5a5a5a5a5a5a5a,
11939 0x5a5a5a5a5a5a5a5a,
11940 0x5a5a5a5a5a5a5a5a);
11941 __ Pfalse(p1.VnB());
11942
11943 __ Zip1(p2.VnS(), p0.VnS(), p0.VnS());
11944 __ Zip2(p3.VnS(), p0.VnS(), p0.VnS());
11945 __ Zip1(p4.VnS(), p1.VnS(), p0.VnS());
11946 __ Zip2(p5.VnS(), p1.VnS(), p0.VnS());
11947 __ Zip1(p6.VnS(), p0.VnS(), p1.VnS());
11948 __ Zip2(p7.VnS(), p0.VnS(), p1.VnS());
11949
11950 __ Zip1(p8.VnD(), p0.VnD(), p0.VnD());
11951 __ Zip2(p9.VnD(), p0.VnD(), p0.VnD());
11952 __ Zip1(p10.VnD(), p1.VnD(), p0.VnD());
11953 __ Zip2(p11.VnD(), p1.VnD(), p0.VnD());
11954 __ Zip1(p12.VnD(), p0.VnD(), p1.VnD());
11955 __ Zip2(p13.VnD(), p0.VnD(), p1.VnD());
11956
11957 END();
11958
11959 if (CAN_RUN()) {
11960 RUN();
11961 int p2_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11962 int p3_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11963 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11964 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
11965
11966 int p4_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11967 int p5_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11968 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
11969 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
11970
11971 int p6_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
11972 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
11973 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
11974 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
11975
11976 int p8_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11977 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11978 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
11979 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
11980
11981 int p10_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11982 int p11_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11983 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
11984 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
11985
11986 int p12_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11987 int p13_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11988 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
11989 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
11990 }
11991}
11992
11993TEST_SVE(sve_uzp_p) {
11994 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11995 START();
11996
11997 Initialise(&masm,
11998 p0.VnB(),
11999 0xf0f0ff00ffff0000,
12000 0x4242424242424242,
12001 0x5a5a5a5a5a5a5a5a,
12002 0x0123456789abcdef);
12003 __ Rev(p1.VnB(), p0.VnB());
12004
12005 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
12006 __ Zip2(p3.VnB(), p0.VnB(), p1.VnB());
12007 __ Uzp1(p4.VnB(), p2.VnB(), p3.VnB());
12008 __ Uzp2(p5.VnB(), p2.VnB(), p3.VnB());
12009
12010 __ Zip1(p2.VnH(), p0.VnH(), p1.VnH());
12011 __ Zip2(p3.VnH(), p0.VnH(), p1.VnH());
12012 __ Uzp1(p6.VnH(), p2.VnH(), p3.VnH());
12013 __ Uzp2(p7.VnH(), p2.VnH(), p3.VnH());
12014
12015 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
12016 __ Zip2(p3.VnS(), p0.VnS(), p1.VnS());
12017 __ Uzp1(p8.VnS(), p2.VnS(), p3.VnS());
12018 __ Uzp2(p9.VnS(), p2.VnS(), p3.VnS());
12019
12020 __ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
12021 __ Zip2(p3.VnD(), p0.VnD(), p1.VnD());
12022 __ Uzp1(p10.VnD(), p2.VnD(), p3.VnD());
12023 __ Uzp2(p11.VnD(), p2.VnD(), p3.VnD());
12024
12025 END();
12026
12027 if (CAN_RUN()) {
12028 RUN();
12029
12030 ASSERT_EQUAL_SVE(p0, p4);
12031 ASSERT_EQUAL_SVE(p1, p5);
12032 ASSERT_EQUAL_SVE(p0, p6);
12033 ASSERT_EQUAL_SVE(p1, p7);
12034 ASSERT_EQUAL_SVE(p0, p8);
12035 ASSERT_EQUAL_SVE(p1, p9);
12036 ASSERT_EQUAL_SVE(p0, p10);
12037 ASSERT_EQUAL_SVE(p1, p11);
12038 }
12039}
12040
12041TEST_SVE(sve_punpk) {
12042 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12043 START();
12044
12045 Initialise(&masm,
12046 p0.VnB(),
12047 0xf0a0f0a0f0a0f0a0,
12048 0xf0a0f0a0f0a0f0a0,
12049 0xa0f0a0f0a0f0a0f0,
12050 0xa0f0a0f0a0f0a0f0);
12051 __ Punpklo(p1.VnH(), p0.VnB());
12052 __ Punpkhi(p2.VnH(), p0.VnB());
12053
12054 END();
12055
12056 if (CAN_RUN()) {
12057 RUN();
12058
12059 int p1_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
12060 int p2_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12061 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
12062 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
12063 }
12064}
12065
TatWai Chong5d872292020-01-02 15:39:51 -080012066typedef void (MacroAssembler::*BrkFn)(const PRegisterWithLaneSize& pd,
12067 const PRegister& pg,
12068 const PRegisterWithLaneSize& pn);
12069
12070typedef void (MacroAssembler::*BrksFn)(const PRegisterWithLaneSize& pd,
12071 const PRegisterZ& pg,
12072 const PRegisterWithLaneSize& pn);
12073
12074template <typename T, size_t N>
12075static void BrkaBrkbHelper(Test* config,
12076 BrkFn macro,
12077 BrksFn macro_set_flags,
12078 const T (&pd_inputs)[N],
12079 const T (&pg_inputs)[N],
12080 const T (&pn_inputs)[N],
12081 const T (&pd_z_expected)[N]) {
12082 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12083 START();
12084
12085 PRegister pg = p10;
12086 PRegister pn = p9;
12087 PRegister pd_z = p0;
12088 PRegister pd_z_s = p1;
12089 PRegister pd_m = p2;
12090 Initialise(&masm, pg.VnB(), pg_inputs);
12091 Initialise(&masm, pn.VnB(), pn_inputs);
12092 Initialise(&masm, pd_m.VnB(), pd_inputs);
12093
12094 // Initialise NZCV to an impossible value, to check that we actually write it.
12095 __ Mov(x10, NZCVFlag);
12096 __ Msr(NZCV, x10);
12097
12098 (masm.*macro)(pd_z.VnB(), pg.Zeroing(), pn.VnB());
12099 (masm.*macro_set_flags)(pd_z_s.VnB(), pg.Zeroing(), pn.VnB());
12100 __ Mrs(x0, NZCV);
12101
12102 (masm.*macro)(pd_m.VnB(), pg.Merging(), pn.VnB());
12103
12104 END();
12105
12106 if (CAN_RUN()) {
12107 RUN();
12108
12109 ASSERT_EQUAL_SVE(pd_z_expected, pd_z.VnB());
12110
12111 // Check that the flags were properly set.
12112 StatusFlags nzcv_expected =
12113 GetPredTestFlags(pd_z_expected,
12114 pg_inputs,
12115 core.GetSVELaneCount(kBRegSize));
12116 ASSERT_EQUAL_64(nzcv_expected, x0);
12117 ASSERT_EQUAL_SVE(pd_z.VnB(), pd_z_s.VnB());
12118
12119 T pd_m_expected[N];
12120 // Set expected `pd` result on merging predication.
12121 for (size_t i = 0; i < N; i++) {
12122 pd_m_expected[i] = pg_inputs[i] ? pd_z_expected[i] : pd_inputs[i];
12123 }
12124 ASSERT_EQUAL_SVE(pd_m_expected, pd_m.VnB());
12125 }
12126}
12127
12128template <typename T>
12129static void BrkaHelper(Test* config,
12130 const T& pd_inputs,
12131 const T& pg_inputs,
12132 const T& pn_inputs,
12133 const T& pd_expected) {
12134 BrkaBrkbHelper(config,
12135 &MacroAssembler::Brka,
12136 &MacroAssembler::Brkas,
12137 pd_inputs,
12138 pg_inputs,
12139 pn_inputs,
12140 pd_expected);
12141}
12142
12143TEST_SVE(sve_brka) {
12144 // clang-format off
12145 // | boundary of 128-bits VL.
12146 // v
12147 int pd[] = {1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12148
12149 // | highest-numbered lane lowest-numbered lane |
12150 // v v
12151 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12152 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12153
12154 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
12155 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12156 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1};
12157
12158 // | first break
12159 // v
12160 int exp_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
12161 // | first break
12162 // v
12163 int exp_1_2[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12164 // | first break
12165 // v
12166 int exp_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12167
12168 BrkaHelper(config, pd, pg_1, pn_1, exp_1_1);
12169 BrkaHelper(config, pd, pg_1, pn_2, exp_1_2);
12170 BrkaHelper(config, pd, pg_1, pn_3, exp_1_3);
12171
12172 // | first break
12173 // v
12174 int exp_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1};
12175 // | first break
12176 // v
12177 int exp_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12178 // | first break
12179 // v
12180 int exp_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
12181 BrkaHelper(config, pd, pg_2, pn_1, exp_2_1);
12182 BrkaHelper(config, pd, pg_2, pn_2, exp_2_2);
12183 BrkaHelper(config, pd, pg_2, pn_3, exp_2_3);
12184
12185 // The all-inactive zeroing predicate sets destination predicate all-false.
12186 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12187 int exp_3_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12188 BrkaHelper(config, pd, pg_3, pn_1, exp_3_x);
12189 BrkaHelper(config, pd, pg_3, pn_2, exp_3_x);
12190 BrkaHelper(config, pd, pg_3, pn_3, exp_3_x);
12191 // clang-format on
12192}
12193
12194template <typename T>
12195static void BrkbHelper(Test* config,
12196 const T& pd_inputs,
12197 const T& pg_inputs,
12198 const T& pn_inputs,
12199 const T& pd_expected) {
12200 BrkaBrkbHelper(config,
12201 &MacroAssembler::Brkb,
12202 &MacroAssembler::Brkbs,
12203 pd_inputs,
12204 pg_inputs,
12205 pn_inputs,
12206 pd_expected);
12207}
12208
12209TEST_SVE(sve_brkb) {
12210 // clang-format off
12211 // | boundary of 128-bits VL.
12212 // v
12213 int pd[] = {1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12214
12215 // | highest-numbered lane lowest-numbered lane |
12216 // v v
12217 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12218 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12219
12220 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
12221 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12222 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1};
12223
12224 // | first break
12225 // v
12226 int exp_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
12227 // | first break
12228 // v
12229 int exp_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12230 // | first break
12231 // v
12232 int exp_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0};
12233
12234 BrkbHelper(config, pd, pg_1, pn_1, exp_1_1);
12235 BrkbHelper(config, pd, pg_1, pn_2, exp_1_2);
12236 BrkbHelper(config, pd, pg_1, pn_3, exp_1_3);
12237
12238 // | first break
12239 // v
12240 int exp_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
12241 // | first break
12242 // v
12243 int exp_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
12244 // | first break
12245 // v
12246 int exp_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12247 BrkbHelper(config, pd, pg_2, pn_1, exp_2_1);
12248 BrkbHelper(config, pd, pg_2, pn_2, exp_2_2);
12249 BrkbHelper(config, pd, pg_2, pn_3, exp_2_3);
12250
12251 // The all-inactive zeroing predicate sets destination predicate all-false.
12252 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12253 int exp_3_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
12254 BrkbHelper(config, pd, pg_3, pn_1, exp_3_x);
12255 BrkbHelper(config, pd, pg_3, pn_2, exp_3_x);
12256 BrkbHelper(config, pd, pg_3, pn_3, exp_3_x);
12257 // clang-format on
12258}
12259
12260typedef void (MacroAssembler::*BrknFn)(const PRegisterWithLaneSize& pd,
12261 const PRegisterZ& pg,
12262 const PRegisterWithLaneSize& pn,
12263 const PRegisterWithLaneSize& pm);
12264
12265typedef void (MacroAssembler::*BrknsFn)(const PRegisterWithLaneSize& pd,
12266 const PRegisterZ& pg,
12267 const PRegisterWithLaneSize& pn,
12268 const PRegisterWithLaneSize& pm);
12269
12270enum BrknDstPredicateState { kAllFalse, kUnchanged };
12271
12272template <typename T, size_t N>
12273static void BrknHelper(Test* config,
12274 BrknFn macro,
12275 BrknsFn macro_set_flags,
12276 const T (&pd_inputs)[N],
12277 const T (&pg_inputs)[N],
12278 const T (&pn_inputs)[N],
12279 const T (&pm_inputs)[N],
12280 BrknDstPredicateState expected_pd_state) {
12281 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12282 START();
12283
12284 PRegister pg = p10;
12285 PRegister pn = p9;
12286 PRegister pm = p8;
12287 PRegister pdm = p0;
12288 PRegister pd = p1;
12289 PRegister pd_s = p2;
12290 Initialise(&masm, pg.VnB(), pg_inputs);
12291 Initialise(&masm, pn.VnB(), pn_inputs);
12292 Initialise(&masm, pm.VnB(), pm_inputs);
12293 Initialise(&masm, pdm.VnB(), pm_inputs);
12294 Initialise(&masm, pd.VnB(), pd_inputs);
12295 Initialise(&masm, pd_s.VnB(), pd_inputs);
12296
12297 // Initialise NZCV to an impossible value, to check that we actually write it.
12298 __ Mov(x10, NZCVFlag);
12299 __ Msr(NZCV, x10);
12300
12301 (masm.*macro)(pdm.VnB(), pg.Zeroing(), pn.VnB(), pdm.VnB());
12302 // !pd.Aliases(pm).
12303 (masm.*macro)(pd.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
12304 (masm.*macro_set_flags)(pd_s.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
12305 __ Mrs(x0, NZCV);
12306
12307 END();
12308
12309 if (CAN_RUN()) {
12310 RUN();
12311
12312 T all_false[N] = {0};
12313 if (expected_pd_state == kAllFalse) {
12314 ASSERT_EQUAL_SVE(all_false, pd.VnB());
12315 } else {
12316 ASSERT_EQUAL_SVE(pm_inputs, pd.VnB());
12317 }
12318 ASSERT_EQUAL_SVE(pm_inputs, pm.VnB());
12319
12320 // Check that the flags were properly set.
12321 StatusFlags nzcv_expected =
12322 GetPredTestFlags((expected_pd_state == kAllFalse) ? all_false
12323 : pm_inputs,
12324 pg_inputs,
12325 core.GetSVELaneCount(kBRegSize));
12326 ASSERT_EQUAL_64(nzcv_expected, x0);
12327 ASSERT_EQUAL_SVE(pd.VnB(), pdm.VnB());
12328 ASSERT_EQUAL_SVE(pd.VnB(), pd_s.VnB());
12329 }
12330}
12331
12332TEST_SVE(sve_brkn) {
12333 // clang-format off
12334 int pd[] = {1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
12335 int pm[] = {0, 1, 1, 1, 1, 0, 0, 1, 0, 1};
12336
12337 int pg_1[] = {1, 1, 0, 0, 1, 0, 1, 1, 0, 0};
12338 int pg_2[] = {0, 0, 0, 1, 1, 1, 0, 0, 1, 1};
12339 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // all-false
12340
12341 int pn_1[] = {1, 0, 0, 0, 0, 1, 1, 0, 0, 0};
12342 int pn_2[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
12343 int pn_3[] = {0, 0, 0, 0, 1, 1, 0, 0, 1, 1};
12344
12345 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_1, pm, kUnchanged);
12346 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_2, pm, kAllFalse);
12347 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_3, pm, kAllFalse);
12348
12349 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_1, pm, kAllFalse);
12350 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_2, pm, kUnchanged);
12351 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_3, pm, kAllFalse);
12352
12353 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_1, pm, kAllFalse);
12354 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_2, pm, kAllFalse);
12355 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_3, pm, kAllFalse);
12356 // clang-format on
12357}
12358
Martyn Capewell15f89012020-01-09 11:18:30 +000012359TEST_SVE(sve_trn) {
12360 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12361 START();
12362
12363 uint64_t in0[] = {0xffeeddccbbaa9988, 0x7766554433221100};
12364 uint64_t in1[] = {0xaa55aa55aa55aa55, 0x55aa55aa55aa55aa};
12365 InsrHelper(&masm, z0.VnD(), in0);
12366 InsrHelper(&masm, z1.VnD(), in1);
12367
12368 __ Trn1(z2.VnB(), z0.VnB(), z1.VnB());
12369 __ Trn2(z3.VnB(), z0.VnB(), z1.VnB());
12370 __ Trn1(z4.VnH(), z0.VnH(), z1.VnH());
12371 __ Trn2(z5.VnH(), z0.VnH(), z1.VnH());
12372 __ Trn1(z6.VnS(), z0.VnS(), z1.VnS());
12373 __ Trn2(z7.VnS(), z0.VnS(), z1.VnS());
12374 __ Trn1(z8.VnD(), z0.VnD(), z1.VnD());
12375 __ Trn2(z9.VnD(), z0.VnD(), z1.VnD());
12376
12377 END();
12378
12379 if (CAN_RUN()) {
12380 RUN();
12381 uint64_t expected_z2[] = {0x55ee55cc55aa5588, 0xaa66aa44aa22aa00};
12382 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12383 uint64_t expected_z3[] = {0xaaffaaddaabbaa99, 0x5577555555335511};
12384 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12385 uint64_t expected_z4[] = {0xaa55ddccaa559988, 0x55aa554455aa1100};
12386 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12387 uint64_t expected_z5[] = {0xaa55ffeeaa55bbaa, 0x55aa776655aa3322};
12388 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12389 uint64_t expected_z6[] = {0xaa55aa55bbaa9988, 0x55aa55aa33221100};
12390 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12391 uint64_t expected_z7[] = {0xaa55aa55ffeeddcc, 0x55aa55aa77665544};
12392 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12393 uint64_t expected_z8[] = {0x55aa55aa55aa55aa, 0x7766554433221100};
12394 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12395 uint64_t expected_z9[] = {0xaa55aa55aa55aa55, 0xffeeddccbbaa9988};
12396 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12397 }
12398}
12399
12400TEST_SVE(sve_zip_uzp) {
12401 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12402 START();
12403
12404 __ Dup(z0.VnD(), 0xffeeddccbbaa9988);
12405 __ Insr(z0.VnD(), 0x7766554433221100);
12406 __ Dup(z1.VnD(), 0xaa55aa55aa55aa55);
12407 __ Insr(z1.VnD(), 0x55aa55aa55aa55aa);
12408
12409 __ Zip1(z2.VnB(), z0.VnB(), z1.VnB());
12410 __ Zip2(z3.VnB(), z0.VnB(), z1.VnB());
12411 __ Zip1(z4.VnH(), z0.VnH(), z1.VnH());
12412 __ Zip2(z5.VnH(), z0.VnH(), z1.VnH());
12413 __ Zip1(z6.VnS(), z0.VnS(), z1.VnS());
12414 __ Zip2(z7.VnS(), z0.VnS(), z1.VnS());
12415 __ Zip1(z8.VnD(), z0.VnD(), z1.VnD());
12416 __ Zip2(z9.VnD(), z0.VnD(), z1.VnD());
12417
12418 __ Uzp1(z10.VnB(), z2.VnB(), z3.VnB());
12419 __ Uzp2(z11.VnB(), z2.VnB(), z3.VnB());
12420 __ Uzp1(z12.VnH(), z4.VnH(), z5.VnH());
12421 __ Uzp2(z13.VnH(), z4.VnH(), z5.VnH());
12422 __ Uzp1(z14.VnS(), z6.VnS(), z7.VnS());
12423 __ Uzp2(z15.VnS(), z6.VnS(), z7.VnS());
12424 __ Uzp1(z16.VnD(), z8.VnD(), z9.VnD());
12425 __ Uzp2(z17.VnD(), z8.VnD(), z9.VnD());
12426
12427 END();
12428
12429 if (CAN_RUN()) {
12430 RUN();
12431 uint64_t expected_z2[] = {0x5577aa665555aa44, 0x5533aa225511aa00};
12432 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12433 uint64_t expected_z3[] = {0xaaff55eeaadd55cc, 0xaabb55aaaa995588};
12434 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12435 uint64_t expected_z4[] = {0x55aa776655aa5544, 0x55aa332255aa1100};
12436 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12437 uint64_t expected_z5[] = {0xaa55ffeeaa55ddcc, 0xaa55bbaaaa559988};
12438 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12439 uint64_t expected_z6[] = {0x55aa55aa77665544, 0x55aa55aa33221100};
12440 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12441 uint64_t expected_z7[] = {0xaa55aa55ffeeddcc, 0xaa55aa55bbaa9988};
12442 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12443 uint64_t expected_z8[] = {0x55aa55aa55aa55aa, 0x7766554433221100};
12444 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12445 uint64_t expected_z9[] = {0xaa55aa55aa55aa55, 0xffeeddccbbaa9988};
12446 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12447
12448 // Check uzp is the opposite of zip.
12449 ASSERT_EQUAL_SVE(z0.VnD(), z10.VnD());
12450 ASSERT_EQUAL_SVE(z1.VnD(), z11.VnD());
12451 ASSERT_EQUAL_SVE(z0.VnD(), z12.VnD());
12452 ASSERT_EQUAL_SVE(z1.VnD(), z13.VnD());
12453 ASSERT_EQUAL_SVE(z0.VnD(), z14.VnD());
12454 ASSERT_EQUAL_SVE(z1.VnD(), z15.VnD());
12455 ASSERT_EQUAL_SVE(z0.VnD(), z16.VnD());
12456 ASSERT_EQUAL_SVE(z1.VnD(), z17.VnD());
12457 }
12458}
Martyn Capewell50e9f552020-01-07 17:45:03 +000012459
Martyn Capewell0b1afa82020-03-04 11:31:42 +000012460TEST_SVE(sve_fcadd) {
12461 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12462 START();
12463
12464 __ Dup(z30.VnS(), 0);
12465
12466 __ Ptrue(p0.VnB());
12467 __ Pfalse(p1.VnB());
12468 __ Zip1(p2.VnH(), p0.VnH(), p1.VnH()); // Real elements.
12469 __ Zip1(p3.VnH(), p1.VnH(), p0.VnH()); // Imaginary elements.
12470
12471 __ Fdup(z0.VnH(), 10.0); // 10i + 10
12472 __ Fdup(z1.VnH(), 5.0); // 5i + 5
12473 __ Index(z7.VnH(), 1, 1);
12474 __ Scvtf(z7.VnH(), p0.Merging(), z7.VnH()); // Ai + B
12475
12476 __ Sel(z2.VnH(), p3, z1.VnH(), z30.VnH()); // 5i + 0
12477 __ Sel(z3.VnH(), p2, z1.VnH(), z30.VnH()); // 0i + 5
12478 __ Sel(z7.VnH(), p3, z7.VnH(), z0.VnH()); // Ai + 10
12479 __ Ext(z8.VnB(), z7.VnB(), z7.VnB(), 2);
12480 __ Sel(z8.VnH(), p2, z8.VnH(), z30.VnH()); // 0i + A
12481
12482 // (10i + 10) + rotate(5i + 0, 90)
12483 // = (10i + 10) + (0i - 5)
12484 // = 10i + 5
12485 __ Fcadd(z4.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 90);
12486
12487 // (10i + 5) + rotate(0i + 5, 270)
12488 // = (10i + 5) + (-5i + 0)
12489 // = 5i + 5
12490 __ Fcadd(z4.VnH(), p0.Merging(), z4.VnH(), z3.VnH(), 270);
12491
12492 // The same calculation, but selecting real/imaginary using predication.
12493 __ Mov(z5, z0);
12494 __ Fcadd(z5.VnH(), p2.Merging(), z5.VnH(), z1.VnH(), 90);
12495 __ Fcadd(z5.VnH(), p3.Merging(), z5.VnH(), z1.VnH(), 270);
12496
12497 // Reference calculation: (10i + 10) - (5i + 5)
12498 __ Fsub(z6.VnH(), z0.VnH(), z1.VnH());
12499
12500 // Calculation using varying imaginary values.
12501 // (Ai + 10) + rotate(5i + 0, 90)
12502 // = (Ai + 10) + (0i - 5)
12503 // = Ai + 5
12504 __ Fcadd(z7.VnH(), p0.Merging(), z7.VnH(), z2.VnH(), 90);
12505
12506 // (Ai + 5) + rotate(0i + A, 270)
12507 // = (Ai + 5) + (-Ai + 0)
12508 // = 5
12509 __ Fcadd(z7.VnH(), p0.Merging(), z7.VnH(), z8.VnH(), 270);
12510
12511 // Repeated, but for wider elements.
12512 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
12513 __ Zip1(p3.VnS(), p1.VnS(), p0.VnS());
12514 __ Fdup(z0.VnS(), 42.0);
12515 __ Fdup(z1.VnS(), 21.0);
12516 __ Index(z11.VnS(), 1, 1);
12517 __ Scvtf(z11.VnS(), p0.Merging(), z11.VnS());
12518 __ Sel(z2.VnS(), p3, z1.VnS(), z30.VnS());
12519 __ Sel(z29.VnS(), p2, z1.VnS(), z30.VnS());
12520 __ Sel(z11.VnS(), p3, z11.VnS(), z0.VnS());
12521 __ Ext(z12.VnB(), z11.VnB(), z11.VnB(), 4);
12522 __ Sel(z12.VnS(), p2, z12.VnS(), z30.VnS());
12523 __ Fcadd(z8.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 90);
12524 __ Fcadd(z8.VnS(), p0.Merging(), z8.VnS(), z29.VnS(), 270);
12525 __ Mov(z9, z0);
12526 __ Fcadd(z9.VnS(), p2.Merging(), z9.VnS(), z1.VnS(), 90);
12527 __ Fcadd(z9.VnS(), p3.Merging(), z9.VnS(), z1.VnS(), 270);
12528 __ Fsub(z10.VnS(), z0.VnS(), z1.VnS());
12529 __ Fcadd(z11.VnS(), p0.Merging(), z11.VnS(), z2.VnS(), 90);
12530 __ Fcadd(z11.VnS(), p0.Merging(), z11.VnS(), z12.VnS(), 270);
12531
12532 __ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
12533 __ Zip1(p3.VnD(), p1.VnD(), p0.VnD());
12534 __ Fdup(z0.VnD(), -42.0);
12535 __ Fdup(z1.VnD(), -21.0);
12536 __ Index(z15.VnD(), 1, 1);
12537 __ Scvtf(z15.VnD(), p0.Merging(), z15.VnD());
12538 __ Sel(z2.VnD(), p3, z1.VnD(), z30.VnD());
12539 __ Sel(z28.VnD(), p2, z1.VnD(), z30.VnD());
12540 __ Sel(z15.VnD(), p3, z15.VnD(), z0.VnD());
12541 __ Ext(z16.VnB(), z15.VnB(), z15.VnB(), 8);
12542 __ Sel(z16.VnD(), p2, z16.VnD(), z30.VnD());
12543 __ Fcadd(z12.VnD(), p0.Merging(), z0.VnD(), z2.VnD(), 90);
12544 __ Fcadd(z12.VnD(), p0.Merging(), z12.VnD(), z28.VnD(), 270);
12545 __ Mov(z13, z0);
12546 __ Fcadd(z13.VnD(), p2.Merging(), z13.VnD(), z1.VnD(), 90);
12547 __ Fcadd(z13.VnD(), p3.Merging(), z13.VnD(), z1.VnD(), 270);
12548 __ Fsub(z14.VnD(), z0.VnD(), z1.VnD());
12549 __ Fcadd(z15.VnD(), p0.Merging(), z15.VnD(), z2.VnD(), 90);
12550 __ Fcadd(z15.VnD(), p0.Merging(), z15.VnD(), z16.VnD(), 270);
12551 END();
12552
12553 if (CAN_RUN()) {
12554 RUN();
12555 ASSERT_EQUAL_SVE(z6.VnH(), z4.VnH());
12556 ASSERT_EQUAL_SVE(z6.VnH(), z5.VnH());
12557 ASSERT_EQUAL_SVE(z3.VnH(), z7.VnH());
12558 ASSERT_EQUAL_SVE(z10.VnS(), z8.VnS());
12559 ASSERT_EQUAL_SVE(z10.VnS(), z9.VnS());
12560 ASSERT_EQUAL_SVE(z29.VnS(), z11.VnS());
12561 ASSERT_EQUAL_SVE(z14.VnD(), z12.VnD());
12562 ASSERT_EQUAL_SVE(z14.VnD(), z13.VnD());
12563 ASSERT_EQUAL_SVE(z28.VnS(), z15.VnS());
12564 }
12565}
12566
Martyn Capewelle4886e52020-03-30 09:28:52 +010012567TEST_SVE(sve_fcmla_index) {
12568 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12569 START();
12570
12571 __ Ptrue(p0.VnB());
12572
12573 __ Fdup(z0.VnH(), 10.0);
12574 __ Fdup(z2.VnH(), 2.0);
12575 __ Zip1(z0.VnH(), z0.VnH(), z2.VnH());
12576
12577 // Duplicate complex numbers across z2 segments. First segment has 1i+0,
12578 // second has 3i+2, etc.
12579 __ Index(z1.VnH(), 0, 1);
12580 __ Scvtf(z1.VnH(), p0.Merging(), z1.VnH());
12581 __ Zip1(z2.VnS(), z1.VnS(), z1.VnS());
12582 __ Zip1(z2.VnS(), z2.VnS(), z2.VnS());
12583
12584 // Derive a vector from z2 where only the third element in each segment
12585 // contains a complex number, with other elements zero.
12586 __ Index(z3.VnS(), 0, 1);
12587 __ And(z3.VnS(), z3.VnS(), 3);
12588 __ Cmpeq(p2.VnS(), p0.Zeroing(), z3.VnS(), 2);
12589 __ Dup(z3.VnB(), 0);
12590 __ Sel(z3.VnS(), p2, z2.VnS(), z3.VnS());
12591
12592 // Use indexed complex multiply on this vector, indexing the third element.
12593 __ Dup(z4.VnH(), 0);
12594 __ Fcmla(z4.VnH(), z0.VnH(), z3.VnH(), 2, 0);
12595 __ Fcmla(z4.VnH(), z0.VnH(), z3.VnH(), 2, 90);
12596
12597 // Rotate the indexed complex number and repeat, negated, and with a different
12598 // index.
12599 __ Ext(z3.VnH(), z3.VnH(), z3.VnH(), 4);
12600 __ Dup(z5.VnH(), 0);
12601 __ Fcmla(z5.VnH(), z0.VnH(), z3.VnH(), 1, 180);
12602 __ Fcmla(z5.VnH(), z0.VnH(), z3.VnH(), 1, 270);
12603 __ Fneg(z5.VnH(), p0.Merging(), z5.VnH());
12604
12605 // Create a reference result from a vector complex multiply.
12606 __ Dup(z6.VnH(), 0);
12607 __ Fcmla(z6.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 0);
12608 __ Fcmla(z6.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 90);
12609
12610 // Repeated, but for wider elements.
12611 __ Fdup(z0.VnS(), 42.0);
12612 __ Fdup(z2.VnS(), 24.0);
12613 __ Zip1(z0.VnS(), z0.VnS(), z2.VnS());
12614 __ Index(z1.VnS(), -42, 13);
12615 __ Scvtf(z1.VnS(), p0.Merging(), z1.VnS());
12616 __ Zip1(z2.VnD(), z1.VnD(), z1.VnD());
12617 __ Zip1(z2.VnD(), z2.VnD(), z2.VnD());
12618 __ Index(z3.VnD(), 0, 1);
12619 __ And(z3.VnD(), z3.VnD(), 1);
12620 __ Cmpeq(p2.VnD(), p0.Zeroing(), z3.VnD(), 1);
12621 __ Dup(z3.VnB(), 0);
12622 __ Sel(z3.VnD(), p2, z2.VnD(), z3.VnD());
12623 __ Dup(z7.VnS(), 0);
12624 __ Fcmla(z7.VnS(), z0.VnS(), z3.VnS(), 1, 0);
12625 __ Fcmla(z7.VnS(), z0.VnS(), z3.VnS(), 1, 90);
12626 __ Ext(z3.VnB(), z3.VnB(), z3.VnB(), 8);
12627 __ Dup(z8.VnS(), 0);
12628 __ Fcmla(z8.VnS(), z0.VnS(), z3.VnS(), 0, 180);
12629 __ Fcmla(z8.VnS(), z0.VnS(), z3.VnS(), 0, 270);
12630 __ Fneg(z8.VnS(), p0.Merging(), z8.VnS());
12631 __ Dup(z9.VnS(), 0);
12632 __ Fcmla(z9.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 0);
12633 __ Fcmla(z9.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 90);
12634 END();
12635
12636 if (CAN_RUN()) {
12637 RUN();
12638 ASSERT_EQUAL_SVE(z6.VnH(), z4.VnH());
12639 ASSERT_EQUAL_SVE(z6.VnH(), z5.VnH());
12640 ASSERT_EQUAL_SVE(z9.VnS(), z7.VnS());
12641 ASSERT_EQUAL_SVE(z9.VnS(), z8.VnS());
12642 }
12643}
12644
Martyn Capewell75f1c432020-03-30 09:23:27 +010012645TEST_SVE(sve_fcmla) {
12646 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12647 START();
12648
12649 __ Ptrue(p0.VnB());
12650 __ Pfalse(p1.VnB());
12651 __ Zip1(p2.VnH(), p0.VnH(), p1.VnH()); // Real elements.
12652 __ Zip1(p3.VnH(), p1.VnH(), p0.VnH()); // Imaginary elements.
12653
12654 __ Fdup(z0.VnH(), 10.0);
12655 __ Fdup(z2.VnH(), 2.0);
12656
12657 // Create pairs of complex numbers, Ai + A. A is chosen to be non-zero, as
12658 // the later fneg will result in a failed comparison otherwise.
12659 __ Index(z1.VnH(), -4, 3);
12660 __ Zip1(z1.VnH(), z1.VnH(), z1.VnH());
12661 __ Zip1(z1.VnH(), z1.VnH(), z1.VnH());
12662 __ Scvtf(z1.VnH(), p0.Merging(), z1.VnH());
12663
12664 __ Sel(z3.VnH(), p2, z0.VnH(), z1.VnH()); // Ai + 10
12665 __ Sel(z4.VnH(), p2, z1.VnH(), z2.VnH()); // 2i + A
12666
12667 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS()); // Even complex numbers.
12668 __ Zip1(p3.VnS(), p1.VnS(), p0.VnS()); // Odd complex numbers.
12669
12670 // Calculate (Ai + 10) * (2i + A) = (20 + A^2)i + 8A, using predication to
12671 // select only the complex numbers in odd-numbered element pairs. This leaves
12672 // results in elements 2/3, 6/7, etc. with zero in elements 0/1, 4/5, etc.
12673 // ... 7 6 5 4 3 2 1 0 <-- element
12674 // ... | 20+A^2 | 8A | 0 | 0 | 20+A^2 | 8A | 0 | 0 | <-- value
12675 __ Dup(z5.VnH(), 0);
12676 __ Fcmla(z5.VnH(), p3.Merging(), z4.VnH(), z3.VnH(), 0);
12677 __ Fcmla(z5.VnH(), p3.Merging(), z4.VnH(), z3.VnH(), 90);
12678
12679 // Move the odd results to the even result positions.
12680 // ... 7 6 5 4 3 2 1 0 <-- element
12681 // ... | 0 | 0 | 20+A^2 | 8A | 0 | 0 | 20+A^2 | 8A | <-- value
12682 __ Ext(z5.VnB(), z5.VnB(), z5.VnB(), 4);
12683
12684 // Calculate -(Ai + 10) * (2i + A) = -(20 + A^2)i - 8A for the even complex
12685 // numbers.
12686 // ... 7 6 5 4 3 2 1 0 <-- element
12687 // ... | 0 | 0 | -20-A^2 | -8A | 0 | 0 | -20-A^2 | -8A | <-- value
12688 __ Dup(z6.VnH(), 0);
12689 __ Fcmla(z6.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), 180);
12690 __ Fcmla(z6.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), 270);
12691
12692 // Negate the even results. The results in z6 should now match the results
12693 // computed earlier in z5.
12694 // ... 7 6 5 4 3 2 1 0 <-- element
12695 // ... | 0 | 0 | 20+A^2 | 8A | 0 | 0 | 20+A^2 | 8A | <-- value
12696 __ Fneg(z6.VnH(), p2.Merging(), z6.VnH());
12697
12698
12699 // Similarly, but for wider elements.
12700 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
12701 __ Zip1(p3.VnS(), p1.VnS(), p0.VnS());
12702 __ Index(z1.VnS(), -4, 3);
12703 __ Zip1(z1.VnS(), z1.VnS(), z1.VnS());
12704 __ Zip1(z1.VnS(), z1.VnS(), z1.VnS());
12705 __ Scvtf(z1.VnS(), p0.Merging(), z1.VnS());
12706 __ Fdup(z0.VnS(), 20.0);
12707 __ Fdup(z2.VnS(), 21.0);
12708 __ Sel(z3.VnS(), p2, z0.VnS(), z1.VnS());
12709 __ Sel(z4.VnS(), p2, z1.VnS(), z2.VnS());
12710 __ Punpklo(p2.VnH(), p2.VnB());
12711 __ Punpklo(p3.VnH(), p3.VnB());
12712 __ Dup(z7.VnS(), 0);
12713 __ Fcmla(z7.VnS(), p3.Merging(), z4.VnS(), z3.VnS(), 0);
12714 __ Fcmla(z7.VnS(), p3.Merging(), z4.VnS(), z3.VnS(), 90);
12715 __ Ext(z7.VnB(), z7.VnB(), z7.VnB(), 8);
12716 __ Dup(z8.VnS(), 0);
12717 __ Fcmla(z8.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), 180);
12718 __ Fcmla(z8.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), 270);
12719 __ Fneg(z8.VnS(), p2.Merging(), z8.VnS());
12720
12721 // Double precision computed for even lanes only.
12722 __ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
12723 __ Index(z1.VnD(), -4, 3);
12724 __ Zip1(z1.VnD(), z1.VnD(), z1.VnD());
12725 __ Zip1(z1.VnD(), z1.VnD(), z1.VnD());
12726 __ Scvtf(z1.VnD(), p0.Merging(), z1.VnD());
12727 __ Fdup(z0.VnD(), 20.0);
12728 __ Fdup(z2.VnD(), 21.0);
12729 __ Sel(z3.VnD(), p2, z0.VnD(), z1.VnD());
12730 __ Sel(z4.VnD(), p2, z1.VnD(), z2.VnD());
12731 __ Punpklo(p2.VnH(), p2.VnB());
12732 __ Dup(z9.VnD(), 0);
12733 __ Fcmla(z9.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 0);
12734 __ Fcmla(z9.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 90);
12735 __ Dup(z10.VnD(), 0);
12736 __ Fcmla(z10.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 180);
12737 __ Fcmla(z10.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 270);
12738 __ Fneg(z10.VnD(), p2.Merging(), z10.VnD());
12739 END();
12740
12741 if (CAN_RUN()) {
12742 RUN();
12743 ASSERT_EQUAL_SVE(z5.VnH(), z6.VnH());
12744 ASSERT_EQUAL_SVE(z7.VnS(), z8.VnS());
12745 ASSERT_EQUAL_SVE(z9.VnD(), z10.VnD());
12746 }
12747}
12748
Martyn Capewell50e9f552020-01-07 17:45:03 +000012749TEST_SVE(sve_fpmul_index) {
12750 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12751 START();
12752
12753 uint64_t in0[] = {0x3ff000003f803c00, 0xbff00000bf80bc00};
12754 uint64_t in1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76};
12755
12756 InsrHelper(&masm, z0.VnD(), in0);
12757 InsrHelper(&masm, z1.VnD(), in1);
12758
12759 __ Fmul(z2.VnH(), z1.VnH(), z0.VnH(), 0);
12760 __ Fmul(z3.VnH(), z1.VnH(), z0.VnH(), 1);
12761 __ Fmul(z4.VnH(), z1.VnH(), z0.VnH(), 4);
12762 __ Fmul(z5.VnH(), z1.VnH(), z0.VnH(), 7);
12763
12764 __ Fmul(z6.VnS(), z1.VnS(), z0.VnS(), 0);
12765 __ Fmul(z7.VnS(), z1.VnS(), z0.VnS(), 1);
12766 __ Fmul(z8.VnS(), z1.VnS(), z0.VnS(), 2);
12767 __ Fmul(z9.VnS(), z1.VnS(), z0.VnS(), 3);
12768
12769 __ Fmul(z10.VnD(), z1.VnD(), z0.VnD(), 0);
12770 __ Fmul(z11.VnD(), z1.VnD(), z0.VnD(), 1);
12771
12772 // Compute the results using other instructions.
12773 __ Dup(z12.VnH(), z0.VnH(), 0);
12774 __ Fmul(z12.VnH(), z1.VnH(), z12.VnH());
12775 __ Dup(z13.VnH(), z0.VnH(), 1);
12776 __ Fmul(z13.VnH(), z1.VnH(), z13.VnH());
12777 __ Dup(z14.VnH(), z0.VnH(), 4);
12778 __ Fmul(z14.VnH(), z1.VnH(), z14.VnH());
12779 __ Dup(z15.VnH(), z0.VnH(), 7);
12780 __ Fmul(z15.VnH(), z1.VnH(), z15.VnH());
12781
12782 __ Dup(z16.VnS(), z0.VnS(), 0);
12783 __ Fmul(z16.VnS(), z1.VnS(), z16.VnS());
12784 __ Dup(z17.VnS(), z0.VnS(), 1);
12785 __ Fmul(z17.VnS(), z1.VnS(), z17.VnS());
12786 __ Dup(z18.VnS(), z0.VnS(), 2);
12787 __ Fmul(z18.VnS(), z1.VnS(), z18.VnS());
12788 __ Dup(z19.VnS(), z0.VnS(), 3);
12789 __ Fmul(z19.VnS(), z1.VnS(), z19.VnS());
12790
12791 __ Dup(z20.VnD(), z0.VnD(), 0);
12792 __ Fmul(z20.VnD(), z1.VnD(), z20.VnD());
12793 __ Dup(z21.VnD(), z0.VnD(), 1);
12794 __ Fmul(z21.VnD(), z1.VnD(), z21.VnD());
12795
12796 END();
12797
12798 if (CAN_RUN()) {
12799 RUN();
12800 ASSERT_EQUAL_SVE(z12.VnH(), z2.VnH());
12801 ASSERT_EQUAL_SVE(z13.VnH(), z3.VnH());
12802 ASSERT_EQUAL_SVE(z14.VnH(), z4.VnH());
12803 ASSERT_EQUAL_SVE(z15.VnH(), z5.VnH());
12804 ASSERT_EQUAL_SVE(z16.VnS(), z6.VnS());
12805 ASSERT_EQUAL_SVE(z17.VnS(), z7.VnS());
12806 ASSERT_EQUAL_SVE(z18.VnS(), z8.VnS());
12807 ASSERT_EQUAL_SVE(z19.VnS(), z9.VnS());
12808 ASSERT_EQUAL_SVE(z20.VnD(), z10.VnD());
12809 ASSERT_EQUAL_SVE(z21.VnD(), z11.VnD());
12810 }
12811}
12812
Martyn Capewell5fb2ad62020-01-10 14:08:27 +000012813TEST_SVE(sve_ftmad) {
12814 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12815 START();
12816
12817 uint64_t in_h0[] = {0x7c027e01fc02fe01,
12818 0x3c003c00bc00bc00,
12819 0x3c003c00bc00bc00};
12820 uint64_t in_h1[] = {0xfe01fc027e017e01,
12821 0x3c00bc003c00bc00,
12822 0x3c00bc003c00bc00};
12823 uint64_t in_s0[] = {0x7f800002ffc00001,
12824 0x3f8000003f800000,
12825 0xbf800000bf800000};
12826 uint64_t in_s1[] = {0xffc00001ffc00001,
12827 0x3f800000bf800000,
12828 0x3f800000bf800000};
12829 uint64_t in_d0[] = {0x7ff8000000000001,
12830 0x3ff0000000000000,
12831 0xbff0000000000000};
12832 uint64_t in_d1[] = {0xfff0000000000002,
12833 0xbff0000000000000,
12834 0x3ff0000000000000};
12835 InsrHelper(&masm, z0.VnD(), in_h0);
12836 InsrHelper(&masm, z1.VnD(), in_h1);
12837 InsrHelper(&masm, z2.VnD(), in_s0);
12838 InsrHelper(&masm, z3.VnD(), in_s1);
12839 InsrHelper(&masm, z4.VnD(), in_d0);
12840 InsrHelper(&masm, z5.VnD(), in_d1);
12841
12842 __ Mov(z6, z0);
12843 __ Ftmad(z6.VnH(), z6.VnH(), z1.VnH(), 0);
12844 __ Mov(z7, z0);
12845 __ Ftmad(z7.VnH(), z7.VnH(), z1.VnH(), 1);
12846 __ Mov(z8, z0);
12847 __ Ftmad(z8.VnH(), z8.VnH(), z1.VnH(), 2);
12848
12849 __ Mov(z9, z2);
12850 __ Ftmad(z9.VnS(), z9.VnS(), z3.VnS(), 0);
12851 __ Mov(z10, z2);
12852 __ Ftmad(z10.VnS(), z10.VnS(), z3.VnS(), 3);
12853 __ Mov(z11, z2);
12854 __ Ftmad(z11.VnS(), z11.VnS(), z3.VnS(), 4);
12855
12856 __ Mov(z12, z4);
12857 __ Ftmad(z12.VnD(), z12.VnD(), z5.VnD(), 0);
12858 __ Mov(z13, z4);
12859 __ Ftmad(z13.VnD(), z13.VnD(), z5.VnD(), 5);
12860 __ Mov(z14, z4);
12861 __ Ftmad(z14.VnD(), z14.VnD(), z5.VnD(), 7);
12862
12863 END();
12864
12865 if (CAN_RUN()) {
12866 RUN();
12867 uint64_t expected_z6[] = {0x7e027e02fe02fe01,
12868 0x4000400000000000,
12869 0x4000400000000000};
12870 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12871 uint64_t expected_z7[] = {0x7e027e02fe02fe01,
12872 0x3aab3800bcabbe00,
12873 0x3aab3800bcabbe00};
12874 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12875 uint64_t expected_z8[] = {0x7e027e02fe02fe01,
12876 0x3c083c2abbefbbac,
12877 0x3c083c2abbefbbac};
12878 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12879 uint64_t expected_z9[] = {0x7fc00002ffc00001,
12880 0x4000000040000000,
12881 0x0000000000000000};
12882 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12883 uint64_t expected_z10[] = {0x7fc00002ffc00001,
12884 0x3f7ff2ff3f7fa4fc,
12885 0xbf800680bf802d82};
12886 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
12887 uint64_t expected_z11[] = {0x7fc00002ffc00001,
12888 0x3f8000173f8000cd,
12889 0xbf7fffd2bf7ffe66};
12890 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
12891 uint64_t expected_z12[] = {0x7ff8000000000002,
12892 0x4000000000000000,
12893 0x0000000000000000};
12894 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
12895 uint64_t expected_z13[] = {0x7ff8000000000002,
12896 0x3fefffff6c0d846c,
12897 0xbff0000006b978ae};
12898 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
12899 uint64_t expected_z14[] = {0x7ff8000000000002,
12900 0x3feffffffffe708a,
12901 0xbff0000000000000};
12902 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
12903 }
12904}
12905
Martyn Capewell37f28182020-01-14 10:15:10 +000012906static void BasicFPArithHelper(MacroAssembler* masm,
12907 int lane_size_in_bits,
12908 const uint64_t (&inputs)[2],
12909 const uint64_t (&inputs_fmulx)[2],
12910 const uint64_t (&inputs_nans)[2]) {
12911 int ls = lane_size_in_bits;
12912
12913 for (int i = 0; i < 16; i++) {
12914 InsrHelper(masm, z0.VnD(), inputs);
12915 }
12916 ZRegister rvrs = z1.WithLaneSize(ls);
12917 masm->Rev(rvrs, z0.WithLaneSize(ls));
12918
12919 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
12920 Initialise(masm, p2.VnB(), pred);
12921 PRegisterM p2m = p2.Merging();
12922
12923 masm->Mov(z2, z0);
12924 masm->Fadd(z2.WithLaneSize(ls),
12925 p2m,
12926 z2.WithLaneSize(ls),
12927 rvrs,
12928 FastNaNPropagation);
12929 masm->Mov(z3, z0);
12930 masm->Fsub(z3.WithLaneSize(ls), p2m, z3.WithLaneSize(ls), rvrs);
12931 masm->Mov(z4, z0);
12932 masm->Fsub(z4.WithLaneSize(ls), p2m, rvrs, z4.WithLaneSize(ls));
12933 masm->Mov(z5, z0);
12934 masm->Fabd(z5.WithLaneSize(ls),
12935 p2m,
12936 z5.WithLaneSize(ls),
12937 rvrs,
12938 FastNaNPropagation);
12939 masm->Mov(z6, z0);
12940 masm->Fmul(z6.WithLaneSize(ls),
12941 p2m,
12942 z6.WithLaneSize(ls),
12943 rvrs,
12944 FastNaNPropagation);
12945
12946 for (int i = 0; i < 16; i++) {
12947 InsrHelper(masm, z7.VnD(), inputs_fmulx);
12948 }
12949 masm->Rev(z8.WithLaneSize(ls), z7.WithLaneSize(ls));
12950 masm->Fmulx(z7.WithLaneSize(ls),
12951 p2m,
12952 z7.WithLaneSize(ls),
12953 z8.WithLaneSize(ls),
12954 FastNaNPropagation);
12955
12956 InsrHelper(masm, z8.VnD(), inputs_nans);
12957 masm->Mov(z9, z8);
12958 masm->Fminnm(z9.WithLaneSize(ls),
12959 p2m,
12960 z9.WithLaneSize(ls),
12961 rvrs,
12962 FastNaNPropagation);
12963 masm->Mov(z10, z8);
12964 masm->Fmaxnm(z10.WithLaneSize(ls),
12965 p2m,
12966 z10.WithLaneSize(ls),
12967 rvrs,
12968 FastNaNPropagation);
12969}
12970
12971TEST_SVE(sve_fp_arith_pred_h) {
12972 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12973 START();
12974
12975 uint64_t inputs[] = {0x4800470046004500, 0x4400420040003c00};
12976 uint64_t inputs_fmulx[] = {0x7c00fc007c00fc00, 0x0000800000008000};
12977 uint64_t inputs_nans[] = {0x7fffffff7fffffff, 0x7bfffbff7fbbfbff};
12978
12979 BasicFPArithHelper(&masm, kHRegSize, inputs, inputs_fmulx, inputs_nans);
12980
12981 END();
12982
12983 if (CAN_RUN()) {
12984 RUN();
12985 uint64_t expected_z2[] = {0x4880488048804880, 0x4880420048804880};
12986 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12987 uint64_t expected_z3[] = {0x4700450042003c00, 0xbc004200c500c700};
12988 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12989 uint64_t expected_z4[] = {0xc700c500c200bc00, 0x3c00420045004700};
12990 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12991 uint64_t expected_z5[] = {0x4700450042003c00, 0x3c00420045004700};
12992 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12993 uint64_t expected_z6[] = {0x48004b004c804d00, 0x4d0042004b004800};
12994 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12995 uint64_t expected_z7[] = {0xc000c000c000c000, 0xc0008000c000c000};
12996 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12997 uint64_t expected_z9[] = {0x3c00400042004400, 0x4500fbff4700fbff};
12998 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12999 uint64_t expected_z10[] = {0x3c00400042004400, 0x7bfffbff47004800};
13000 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
13001 }
13002}
13003
13004TEST_SVE(sve_fp_arith_pred_s) {
13005 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13006 START();
13007
13008 uint64_t inputs[] = {0x4080000040400000, 0x400000003f800000};
13009 uint64_t inputs_fmulx[] = {0x7f800000ff800000, 0x0000000080000000};
13010 uint64_t inputs_nans[] = {0x7fffffffffffffff, 0x41000000c1000000};
13011
13012 BasicFPArithHelper(&masm, kSRegSize, inputs, inputs_fmulx, inputs_nans);
13013
13014 END();
13015
13016 if (CAN_RUN()) {
13017 RUN();
13018 uint64_t expected_z2[] = {0x40a0000040a00000, 0x4000000040a00000};
13019 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
13020 uint64_t expected_z3[] = {0x404000003f800000, 0x40000000c0400000};
13021 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
13022 uint64_t expected_z4[] = {0xc0400000bf800000, 0x4000000040400000};
13023 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
13024 uint64_t expected_z5[] = {0x404000003f800000, 0x4000000040400000};
13025 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
13026 uint64_t expected_z6[] = {0x4080000040c00000, 0x4000000040800000};
13027 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
13028 uint64_t expected_z7[] = {0xc0000000c0000000, 0x00000000c0000000};
13029 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
13030 uint64_t expected_z9[] = {0x3f80000040000000, 0x41000000c1000000};
13031 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
13032 uint64_t expected_z10[] = {0x3f80000040000000, 0x4100000040800000};
13033 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
13034 }
13035}
13036
13037TEST_SVE(sve_fp_arith_pred_d) {
13038 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13039 START();
13040
13041 uint64_t inputs[] = {0x4000000000000000, 0x3ff0000000000000};
13042 uint64_t inputs_fmulx[] = {0x7ff0000000000000, 0x8000000000000000};
13043 uint64_t inputs_nans[] = {0x7fffffffffffffff, 0x4100000000000000};
13044
13045 BasicFPArithHelper(&masm, kDRegSize, inputs, inputs_fmulx, inputs_nans);
13046
13047 END();
13048
13049 if (CAN_RUN()) {
13050 RUN();
13051 uint64_t expected_z2[] = {0x4008000000000000, 0x4008000000000000};
13052 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
13053 uint64_t expected_z3[] = {0x3ff0000000000000, 0xbff0000000000000};
13054 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
13055 uint64_t expected_z4[] = {0xbff0000000000000, 0x3ff0000000000000};
13056 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
13057 uint64_t expected_z5[] = {0x3ff0000000000000, 0x3ff0000000000000};
13058 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
13059 uint64_t expected_z6[] = {0x4000000000000000, 0x4000000000000000};
13060 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
13061 uint64_t expected_z7[] = {0xc000000000000000, 0xc000000000000000};
13062 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
13063 uint64_t expected_z9[] = {0x3ff0000000000000, 0x4000000000000000};
13064 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
13065 uint64_t expected_z10[] = {0x3ff0000000000000, 0x4100000000000000};
13066 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
13067 }
13068}
13069
Martyn Capewella2fadc22020-01-16 16:09:55 +000013070TEST_SVE(sve_fp_arith_pred_imm) {
13071 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13072 START();
13073
13074 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
13075 Initialise(&masm, p0.VnB(), pred);
13076 PRegisterM p0m = p0.Merging();
13077 __ Ptrue(p1.VnB());
13078
13079 __ Fdup(z0.VnD(), 0.0);
13080
13081 __ Mov(z1, z0);
13082 __ Fdiv(z1.VnH(), p1.Merging(), z1.VnH(), z1.VnH());
13083 __ Mov(z2, z0);
13084 __ Fadd(z2.VnH(), p0m, z2.VnH(), 0.5);
13085 __ Mov(z3, z2);
13086 __ Fsub(z3.VnH(), p0m, z3.VnH(), 1.0);
13087 __ Mov(z4, z3);
13088 __ Fsub(z4.VnH(), p0m, 1.0, z4.VnH());
13089 __ Mov(z5, z4);
13090 __ Fmul(z5.VnH(), p0m, z5.VnH(), 2.0);
13091 __ Mov(z6, z1);
13092 __ Fminnm(z6.VnH(), p0m, z6.VnH(), 0.0);
13093 __ Mov(z7, z1);
13094 __ Fmaxnm(z7.VnH(), p0m, z7.VnH(), 1.0);
13095 __ Mov(z8, z5);
13096 __ Fmin(z8.VnH(), p0m, z8.VnH(), 1.0);
13097 __ Mov(z9, z5);
13098 __ Fmax(z9.VnH(), p0m, z9.VnH(), 0.0);
13099
13100 __ Mov(z11, z0);
13101 __ Fdiv(z11.VnS(), p1.Merging(), z11.VnS(), z11.VnS());
13102 __ Mov(z12, z0);
13103 __ Fadd(z12.VnS(), p0m, z12.VnS(), 0.5);
13104 __ Mov(z13, z12);
13105 __ Fsub(z13.VnS(), p0m, z13.VnS(), 1.0);
13106 __ Mov(z14, z13);
13107 __ Fsub(z14.VnS(), p0m, 1.0, z14.VnS());
13108 __ Mov(z15, z14);
13109 __ Fmul(z15.VnS(), p0m, z15.VnS(), 2.0);
13110 __ Mov(z16, z11);
13111 __ Fminnm(z16.VnS(), p0m, z16.VnS(), 0.0);
13112 __ Mov(z17, z11);
13113 __ Fmaxnm(z17.VnS(), p0m, z17.VnS(), 1.0);
13114 __ Mov(z18, z15);
13115 __ Fmin(z18.VnS(), p0m, z18.VnS(), 1.0);
13116 __ Mov(z19, z15);
13117 __ Fmax(z19.VnS(), p0m, z19.VnS(), 0.0);
13118
13119 __ Mov(z21, z0);
13120 __ Fdiv(z21.VnD(), p1.Merging(), z21.VnD(), z21.VnD());
13121 __ Mov(z22, z0);
13122 __ Fadd(z22.VnD(), p0m, z22.VnD(), 0.5);
13123 __ Mov(z23, z22);
13124 __ Fsub(z23.VnD(), p0m, z23.VnD(), 1.0);
13125 __ Mov(z24, z23);
13126 __ Fsub(z24.VnD(), p0m, 1.0, z24.VnD());
13127 __ Mov(z25, z24);
13128 __ Fmul(z25.VnD(), p0m, z25.VnD(), 2.0);
13129 __ Mov(z26, z21);
13130 __ Fminnm(z26.VnD(), p0m, z26.VnD(), 0.0);
13131 __ Mov(z27, z21);
13132 __ Fmaxnm(z27.VnD(), p0m, z27.VnD(), 1.0);
13133 __ Mov(z28, z25);
13134 __ Fmin(z28.VnD(), p0m, z28.VnD(), 1.0);
13135 __ Mov(z29, z25);
13136 __ Fmax(z29.VnD(), p0m, z29.VnD(), 0.0);
13137
13138 __ Index(z0.VnH(), -3, 1);
13139 __ Scvtf(z0.VnH(), p1.Merging(), z0.VnH());
13140 __ Fmax(z0.VnH(), p1.Merging(), z0.VnH(), 0.0);
13141 __ Index(z1.VnS(), -4, 2);
13142 __ Scvtf(z1.VnS(), p1.Merging(), z1.VnS());
13143 __ Fadd(z1.VnS(), p1.Merging(), z1.VnS(), 1.0);
13144
13145 END();
13146
13147 if (CAN_RUN()) {
13148 RUN();
13149 uint64_t expected_z2[] = {0x3800380038003800, 0x3800000038003800};
13150 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
13151 uint64_t expected_z3[] = {0xb800b800b800b800, 0xb8000000b800b800};
13152 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
13153 uint64_t expected_z4[] = {0x3e003e003e003e00, 0x3e0000003e003e00};
13154 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
13155 uint64_t expected_z5[] = {0x4200420042004200, 0x4200000042004200};
13156 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
13157 uint64_t expected_z6[] = {0x0000000000000000, 0x00007e0000000000};
13158 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
13159 uint64_t expected_z7[] = {0x3c003c003c003c00, 0x3c007e003c003c00};
13160 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
13161 uint64_t expected_z8[] = {0x3c003c003c003c00, 0x3c0000003c003c00};
13162 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
13163 uint64_t expected_z9[] = {0x4200420042004200, 0x4200000042004200};
13164 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
13165
13166 uint64_t expected_z12[] = {0x3f0000003f000000, 0x000000003f000000};
13167 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
13168 uint64_t expected_z13[] = {0xbf000000bf000000, 0x00000000bf000000};
13169 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
13170 uint64_t expected_z14[] = {0x3fc000003fc00000, 0x000000003fc00000};
13171 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
13172 uint64_t expected_z15[] = {0x4040000040400000, 0x0000000040400000};
13173 ASSERT_EQUAL_SVE(expected_z15, z15.VnD());
13174 uint64_t expected_z16[] = {0x0000000000000000, 0x7fc0000000000000};
13175 ASSERT_EQUAL_SVE(expected_z16, z16.VnD());
13176 uint64_t expected_z17[] = {0x3f8000003f800000, 0x7fc000003f800000};
13177 ASSERT_EQUAL_SVE(expected_z17, z17.VnD());
13178 uint64_t expected_z18[] = {0x3f8000003f800000, 0x000000003f800000};
13179 ASSERT_EQUAL_SVE(expected_z18, z18.VnD());
13180 uint64_t expected_z19[] = {0x4040000040400000, 0x0000000040400000};
13181 ASSERT_EQUAL_SVE(expected_z19, z19.VnD());
13182
13183 uint64_t expected_z22[] = {0x3fe0000000000000, 0x3fe0000000000000};
13184 ASSERT_EQUAL_SVE(expected_z22, z22.VnD());
13185 uint64_t expected_z23[] = {0xbfe0000000000000, 0xbfe0000000000000};
13186 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
13187 uint64_t expected_z24[] = {0x3ff8000000000000, 0x3ff8000000000000};
13188 ASSERT_EQUAL_SVE(expected_z24, z24.VnD());
13189 uint64_t expected_z25[] = {0x4008000000000000, 0x4008000000000000};
13190 ASSERT_EQUAL_SVE(expected_z25, z25.VnD());
13191 uint64_t expected_z26[] = {0x0000000000000000, 0x0000000000000000};
13192 ASSERT_EQUAL_SVE(expected_z26, z26.VnD());
13193 uint64_t expected_z27[] = {0x3ff0000000000000, 0x3ff0000000000000};
13194 ASSERT_EQUAL_SVE(expected_z27, z27.VnD());
13195 uint64_t expected_z28[] = {0x3ff0000000000000, 0x3ff0000000000000};
13196 ASSERT_EQUAL_SVE(expected_z28, z28.VnD());
13197 uint64_t expected_z29[] = {0x4008000000000000, 0x4008000000000000};
13198 ASSERT_EQUAL_SVE(expected_z29, z29.VnD());
13199 uint64_t expected_z0[] = {0x4400420040003c00, 0x0000000000000000};
13200 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
13201 uint64_t expected_z1[] = {0x404000003f800000, 0xbf800000c0400000};
13202 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
13203 }
13204}
13205
Martyn Capewell37f28182020-01-14 10:15:10 +000013206TEST_SVE(sve_fscale) {
13207 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13208 START();
13209
13210 uint64_t inputs_h[] = {0x4800470046004500, 0x4400420040003c00};
13211 InsrHelper(&masm, z0.VnD(), inputs_h);
13212 uint64_t inputs_s[] = {0x4080000040400000, 0x400000003f800000};
13213 InsrHelper(&masm, z1.VnD(), inputs_s);
13214 uint64_t inputs_d[] = {0x40f0000000000000, 0x4000000000000000};
13215 InsrHelper(&masm, z2.VnD(), inputs_d);
13216
13217 uint64_t scales[] = {0x00080002fff8fffe, 0x00100001fff0ffff};
13218 InsrHelper(&masm, z3.VnD(), scales);
13219
13220 __ Ptrue(p0.VnB());
13221 int pred[] = {0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1};
13222 Initialise(&masm, p1.VnB(), pred);
13223
13224 __ Mov(z4, z0);
13225 __ Fscale(z4.VnH(), p0.Merging(), z4.VnH(), z3.VnH());
13226 __ Mov(z5, z0);
13227 __ Fscale(z5.VnH(), p1.Merging(), z5.VnH(), z3.VnH());
13228
13229 __ Sunpklo(z3.VnS(), z3.VnH());
13230 __ Mov(z6, z1);
13231 __ Fscale(z6.VnS(), p0.Merging(), z6.VnS(), z3.VnS());
13232 __ Mov(z7, z1);
13233 __ Fscale(z7.VnS(), p1.Merging(), z7.VnS(), z3.VnS());
13234
13235 __ Sunpklo(z3.VnD(), z3.VnS());
13236 __ Mov(z8, z2);
13237 __ Fscale(z8.VnD(), p0.Merging(), z8.VnD(), z3.VnD());
13238 __ Mov(z9, z2);
13239 __ Fscale(z9.VnD(), p1.Merging(), z9.VnD(), z3.VnD());
13240
13241 // Test full double precision range scaling.
13242 __ Dup(z10.VnD(), 2045);
13243 __ Dup(z11.VnD(), 0x0010000000000000); // 2^-1022
13244 __ Fscale(z11.VnD(), p0.Merging(), z11.VnD(), z10.VnD());
13245
13246 END();
13247
13248 if (CAN_RUN()) {
13249 RUN();
13250
13251 uint64_t expected_z4[] = {0x68004f0026003d00, 0x7c00460002003800};
13252 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
13253 uint64_t expected_z5[] = {0x68004f0026004500, 0x7c00420002003800};
13254 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
13255
13256 uint64_t expected_z6[] = {0x4880000040c00000, 0x380000003f000000};
13257 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
13258 uint64_t expected_z7[] = {0x4880000040400000, 0x400000003f000000};
13259 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
13260
13261 uint64_t expected_z8[] = {0x3ff0000000000000, 0x3ff0000000000000};
13262 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
13263 uint64_t expected_z9[] = {0x40f0000000000000, 0x3ff0000000000000};
13264 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
13265
13266 uint64_t expected_z11[] = {0x7fe0000000000000, 0x7fe0000000000000};
13267 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
13268 }
13269}
13270
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013271typedef void (MacroAssembler::*FcvtFrintMFn)(const ZRegister& zd,
13272 const PRegisterM& pg,
13273 const ZRegister& zn);
13274
13275typedef void (MacroAssembler::*FcvtFrintZFn)(const ZRegister& zd,
13276 const PRegisterZ& pg,
13277 const ZRegister& zn);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013278
13279template <typename F, size_t N>
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013280static void TestFcvtFrintHelper(Test* config,
13281 FcvtFrintMFn macro_m,
13282 FcvtFrintZFn macro_z,
13283 int dst_type_size_in_bits,
13284 int src_type_size_in_bits,
13285 const F (&zn_inputs)[N],
13286 const int (&pg_inputs)[N],
13287 const uint64_t (&zd_expected_all_active)[N]) {
13288 VIXL_ASSERT(macro_m != NULL);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013289 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13290 START();
13291
13292 // If the input and result types have a different size, the instruction
13293 // options on elements of the largest specified type is determined by the
13294 // larger type.
13295 int lane_size_in_bits =
13296 std::max(dst_type_size_in_bits, src_type_size_in_bits);
13297
13298 ZRegister zd_all_active = z25;
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013299 ZRegister zd_merging = z26;
TatWai Chongdb7437c2020-01-09 17:44:10 -080013300 ZRegister zn = z27;
13301
13302 uint64_t zn_rawbits[N];
13303 FPToRawbitsWithSize(zn_inputs, zn_rawbits, src_type_size_in_bits);
13304 InsrHelper(&masm, zn.WithLaneSize(lane_size_in_bits), zn_rawbits);
13305
13306 PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
13307 __ Ptrue(pg_all_active);
13308
13309 // Test floating-point conversions with all lanes actived.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013310 (masm.*macro_m)(zd_all_active.WithLaneSize(dst_type_size_in_bits),
13311 pg_all_active.Merging(),
13312 zn.WithLaneSize(src_type_size_in_bits));
TatWai Chongdb7437c2020-01-09 17:44:10 -080013313
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013314 PRegisterWithLaneSize pg_merging = p1.WithLaneSize(lane_size_in_bits);
13315 Initialise(&masm, pg_merging, pg_inputs);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013316
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013317 __ Dup(zd_merging.VnD(), 0x0bad0bad0bad0bad);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013318
13319 // Use the same `zn` inputs to test floating-point conversions but partial
13320 // lanes are set inactive.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013321 (masm.*macro_m)(zd_merging.WithLaneSize(dst_type_size_in_bits),
13322 pg_merging.Merging(),
13323 zn.WithLaneSize(src_type_size_in_bits));
13324
13325 ZRegister zd_zeroing = z24;
13326 PRegisterWithLaneSize pg_zeroing = p1.WithLaneSize(lane_size_in_bits);
13327 Initialise(&masm, pg_zeroing, pg_inputs);
13328
13329 if (macro_z != NULL) {
13330 __ Dup(zd_zeroing.VnD(), 0x0bad0bad0bad0bad);
13331 (masm.*macro_z)(zd_zeroing.WithLaneSize(dst_type_size_in_bits),
13332 pg_zeroing.Zeroing(),
13333 zn.WithLaneSize(src_type_size_in_bits));
13334 }
TatWai Chongdb7437c2020-01-09 17:44:10 -080013335
13336 END();
13337
13338 if (CAN_RUN()) {
13339 RUN();
13340
13341 ASSERT_EQUAL_SVE(zd_expected_all_active,
13342 zd_all_active.WithLaneSize(lane_size_in_bits));
13343
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013344 uint64_t zd_expected_merging[N];
TatWai Chongdb7437c2020-01-09 17:44:10 -080013345 for (unsigned i = 0; i < N; i++) {
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013346 zd_expected_merging[i] =
TatWai Chongdb7437c2020-01-09 17:44:10 -080013347 pg_inputs[i] ? zd_expected_all_active[i]
13348 : 0x0bad0bad0bad0bad & GetUintMask(lane_size_in_bits);
13349 }
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013350 ASSERT_EQUAL_SVE(zd_expected_merging,
13351 zd_merging.WithLaneSize(lane_size_in_bits));
13352
13353 if (macro_z != NULL) {
13354 uint64_t zd_expected_zeroing[N] = {0};
13355 for (unsigned i = 0; i < N; i++) {
13356 if (pg_inputs[i]) {
13357 zd_expected_zeroing[i] = zd_expected_all_active[i];
13358 }
13359 }
13360 ASSERT_EQUAL_SVE(zd_expected_zeroing,
13361 zd_zeroing.WithLaneSize(lane_size_in_bits));
13362 }
TatWai Chongdb7437c2020-01-09 17:44:10 -080013363 }
13364}
13365
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013366template <typename F, size_t N>
13367static void TestFcvtzHelper(Test* config,
13368 FcvtFrintMFn macro_m,
13369 int dst_type_size_in_bits,
13370 int src_type_size_in_bits,
13371 const F (&zn_inputs)[N],
13372 const int (&pg_inputs)[N],
13373 const uint64_t (&zd_expected_all_active)[N]) {
13374 TestFcvtFrintHelper(config,
13375 macro_m,
13376 // Fcvt variants have no zeroing predication form.
13377 NULL,
13378 dst_type_size_in_bits,
13379 src_type_size_in_bits,
13380 zn_inputs,
13381 pg_inputs,
13382 zd_expected_all_active);
13383}
13384
TatWai Chongdb7437c2020-01-09 17:44:10 -080013385TEST_SVE(fcvtzs_fcvtzu_float16) {
TatWai Chongdb7437c2020-01-09 17:44:10 -080013386 const double h_max_float16 = kHMaxInt; // Largest float16 == INT16_MAX.
13387 const double h_min_float16 = -h_max_float16; // Smallest float16 > INT16_MIN.
13388 const double largest_float16 = 0xffe0; // 65504
13389 const double smallest_float16 = -largest_float16;
13390 const double h_max_int_sub_one = kHMaxInt - 1;
13391 const double h_min_int_add_one = kHMinInt + 1;
13392
13393 double zn_inputs[] = {1.0,
13394 1.1,
13395 1.5,
13396 -1.5,
13397 h_max_float16,
13398 h_min_float16,
13399 largest_float16,
13400 smallest_float16,
13401 kFP64PositiveInfinity,
13402 kFP64NegativeInfinity,
13403 h_max_int_sub_one,
13404 h_min_int_add_one};
13405
13406 int pg_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
13407
13408 uint64_t expected_fcvtzs_fp162h[] = {1,
13409 1,
13410 1,
13411 0xffff,
13412 0x7fff,
13413 0x8000,
13414 0x7fff,
13415 0x8000,
13416 0x7fff,
13417 0x8000,
13418 0x7fff,
13419 0x8000};
13420
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013421 uint64_t expected_fcvtzu_fp162h[] =
13422 {1, 1, 1, 0, 0x8000, 0, 0xffe0, 0, 0xffff, 0, 0x8000, 0};
TatWai Chongdb7437c2020-01-09 17:44:10 -080013423
13424 // Float16 to 16-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013425 TestFcvtzHelper(config,
13426 &MacroAssembler::Fcvtzs,
13427 kHRegSize,
13428 kHRegSize,
13429 zn_inputs,
13430 pg_inputs,
13431 expected_fcvtzs_fp162h);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013432
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013433 TestFcvtzHelper(config,
13434 &MacroAssembler::Fcvtzu,
13435 kHRegSize,
13436 kHRegSize,
13437 zn_inputs,
13438 pg_inputs,
13439 expected_fcvtzu_fp162h);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013440
13441 uint64_t expected_fcvtzs_fp162w[] = {1,
13442 1,
13443 1,
13444 0xffffffff,
13445 0x8000,
13446 0xffff8000,
13447 0xffe0,
13448 0xffff0020,
13449 0x7fffffff,
13450 0x80000000,
13451 0x8000,
13452 0xffff8000};
13453
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013454 uint64_t expected_fcvtzu_fp162w[] =
13455 {1, 1, 1, 0, 0x8000, 0, 0xffe0, 0, 0xffffffff, 0, 0x8000, 0};
TatWai Chongdb7437c2020-01-09 17:44:10 -080013456
13457 // Float16 to 32-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013458 TestFcvtzHelper(config,
13459 &MacroAssembler::Fcvtzs,
13460 kSRegSize,
13461 kHRegSize,
13462 zn_inputs,
13463 pg_inputs,
13464 expected_fcvtzs_fp162w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013465
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013466 TestFcvtzHelper(config,
13467 &MacroAssembler::Fcvtzu,
13468 kSRegSize,
13469 kHRegSize,
13470 zn_inputs,
13471 pg_inputs,
13472 expected_fcvtzu_fp162w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013473
13474 uint64_t expected_fcvtzs_fp162x[] = {1,
13475 1,
13476 1,
13477 0xffffffffffffffff,
13478 0x8000,
13479 0xffffffffffff8000,
13480 0xffe0,
13481 0xffffffffffff0020,
13482 0x7fffffffffffffff,
13483 0x8000000000000000,
13484 0x8000,
13485 0xffffffffffff8000};
13486
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013487 uint64_t expected_fcvtzu_fp162x[] =
13488 {1, 1, 1, 0, 0x8000, 0, 0xffe0, 0, 0xffffffffffffffff, 0, 0x8000, 0};
TatWai Chongdb7437c2020-01-09 17:44:10 -080013489
13490 // Float16 to 64-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013491 TestFcvtzHelper(config,
13492 &MacroAssembler::Fcvtzs,
13493 kDRegSize,
13494 kHRegSize,
13495 zn_inputs,
13496 pg_inputs,
13497 expected_fcvtzs_fp162x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013498
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013499 TestFcvtzHelper(config,
13500 &MacroAssembler::Fcvtzu,
13501 kDRegSize,
13502 kHRegSize,
13503 zn_inputs,
13504 pg_inputs,
13505 expected_fcvtzu_fp162x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013506}
13507
13508TEST_SVE(fcvtzs_fcvtzu_float) {
13509 const double w_max_float = 0x7fffff80; // Largest float < INT32_MAX.
13510 const double w_min_float = -w_max_float; // Smallest float > INT32_MIN.
13511 const double x_max_float = 0x7fffff8000000000; // Largest float < INT64_MAX.
13512 const double x_min_float = -x_max_float; // Smallest float > INT64_MIN.
13513 const double w_max_int_sub_one = kWMaxInt - 1;
13514 const double w_min_int_add_one = kWMinInt + 1;
13515 const double x_max_int_sub_one = kXMaxInt - 1;
13516 const double x_min_int_add_one = kXMinInt + 1;
13517
TatWai Chongdb7437c2020-01-09 17:44:10 -080013518 double zn_inputs[] = {1.0,
13519 1.1,
13520 1.5,
13521 -1.5,
13522 w_max_float,
13523 w_min_float,
13524 x_max_float,
13525 x_min_float,
13526 kFP64PositiveInfinity,
13527 kFP64NegativeInfinity,
13528 w_max_int_sub_one,
13529 w_min_int_add_one,
13530 x_max_int_sub_one,
13531 x_min_int_add_one};
13532
13533 int pg_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0};
13534
13535 uint64_t expected_fcvtzs_s2w[] = {1,
13536 1,
13537 1,
13538 0xffffffff,
13539 0x7fffff80,
13540 0x80000080,
13541 0x7fffffff,
13542 0x80000000,
13543 0x7fffffff,
13544 0x80000000,
13545 0x7fffffff,
13546 0x80000000,
13547 0x7fffffff,
13548 0x80000000};
13549
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013550 uint64_t expected_fcvtzu_s2w[] = {1,
13551 1,
13552 1,
13553 0,
13554 0x7fffff80,
13555 0,
13556 0xffffffff,
13557 0,
13558 0xffffffff,
13559 0,
13560 0x80000000,
13561 0,
13562 0xffffffff,
13563 0};
TatWai Chongdb7437c2020-01-09 17:44:10 -080013564
13565 // Float to 32-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013566 TestFcvtzHelper(config,
13567 &MacroAssembler::Fcvtzs,
13568 kSRegSize,
13569 kSRegSize,
13570 zn_inputs,
13571 pg_inputs,
13572 expected_fcvtzs_s2w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013573
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013574 TestFcvtzHelper(config,
13575 &MacroAssembler::Fcvtzu,
13576 kSRegSize,
13577 kSRegSize,
13578 zn_inputs,
13579 pg_inputs,
13580 expected_fcvtzu_s2w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013581
13582 uint64_t expected_fcvtzs_s2x[] = {1,
13583 1,
13584 1,
13585 0xffffffffffffffff,
13586 0x7fffff80,
13587 0xffffffff80000080,
13588 0x7fffff8000000000,
13589 0x8000008000000000,
13590 0x7fffffffffffffff,
13591 0x8000000000000000,
13592 0x80000000,
13593 0xffffffff80000000,
13594 0x7fffffffffffffff,
13595 0x8000000000000000};
13596
13597 uint64_t expected_fcvtzu_s2x[] = {1,
13598 1,
13599 1,
13600 0,
13601 0x7fffff80,
13602 0,
13603 0x7fffff8000000000,
13604 0,
13605 0xffffffffffffffff,
13606 0,
13607 0x0000000080000000,
13608 0,
13609 0x8000000000000000,
13610 0};
13611
13612 // Float to 64-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013613 TestFcvtzHelper(config,
13614 &MacroAssembler::Fcvtzs,
13615 kDRegSize,
13616 kSRegSize,
13617 zn_inputs,
13618 pg_inputs,
13619 expected_fcvtzs_s2x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013620
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013621 TestFcvtzHelper(config,
13622 &MacroAssembler::Fcvtzu,
13623 kDRegSize,
13624 kSRegSize,
13625 zn_inputs,
13626 pg_inputs,
13627 expected_fcvtzu_s2x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013628}
13629
13630TEST_SVE(fcvtzs_fcvtzu_double) {
TatWai Chongdb7437c2020-01-09 17:44:10 -080013631 const double w_max_float = 0x7fffff80; // Largest float < INT32_MAX.
13632 const double w_min_float = -w_max_float; // Smallest float > INT32_MIN.
13633 const double x_max_float = 0x7fffff8000000000; // Largest float < INT64_MAX.
13634 const double x_min_float = -x_max_float; // Smallest float > INT64_MIN.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013635 const double w_max_double = kWMaxInt; // Largest double == INT32_MAX.
13636 const double w_min_double = -w_max_double; // Smallest double > INT32_MIN.
13637 const double x_max_double =
13638 0x7ffffffffffffc00; // Largest double < INT64_MAX.
13639 const double x_min_double = -x_max_double; // Smallest double > INT64_MIN.
TatWai Chongdb7437c2020-01-09 17:44:10 -080013640 const double w_max_int_sub_one = kWMaxInt - 1;
13641 const double w_min_int_add_one = kWMinInt + 1;
13642 const double x_max_int_sub_one = kXMaxInt - 1;
13643 const double x_min_int_add_one = kXMinInt + 1;
13644
13645 double zn_inputs[] = {1.0,
13646 1.1,
13647 1.5,
13648 -1.5,
13649 w_max_float,
13650 w_min_float,
13651 x_max_float,
13652 x_min_float,
13653 w_max_double,
13654 w_min_double,
13655 x_max_double,
13656 x_min_double,
13657 kFP64PositiveInfinity,
13658 kFP64NegativeInfinity,
13659 w_max_int_sub_one,
13660 w_min_int_add_one,
13661 x_max_int_sub_one,
13662 x_min_int_add_one};
13663
13664 int pg_inputs[] = {1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0};
13665
13666 uint64_t expected_fcvtzs_d2w[] = {1,
13667 1,
13668 1,
13669 0xffffffffffffffff,
13670 0x7fffff80,
13671 0xffffffff80000080,
13672 0x7fffffff,
13673 0xffffffff80000000,
13674 0x7fffffff,
13675 0xffffffff80000001,
13676 0x7fffffff,
13677 0xffffffff80000000,
13678 0x7fffffff,
13679 0xffffffff80000000,
13680 0x7ffffffe,
13681 0xffffffff80000001,
13682 0x7fffffff,
13683 0xffffffff80000000};
13684
13685 uint64_t expected_fcvtzu_d2w[] = {1,
13686 1,
13687 1,
13688 0,
13689 0x7fffff80,
13690 0,
13691 0xffffffff,
13692 0,
13693 0x7fffffff,
13694 0,
13695 0xffffffff,
13696 0,
13697 0xffffffff,
13698 0,
13699 0x7ffffffe,
13700 0,
13701 0xffffffff,
13702 0};
13703
13704 // Double to 32-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013705 TestFcvtzHelper(config,
13706 &MacroAssembler::Fcvtzs,
13707 kSRegSize,
13708 kDRegSize,
13709 zn_inputs,
13710 pg_inputs,
13711 expected_fcvtzs_d2w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013712
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013713 TestFcvtzHelper(config,
13714 &MacroAssembler::Fcvtzu,
13715 kSRegSize,
13716 kDRegSize,
13717 zn_inputs,
13718 pg_inputs,
13719 expected_fcvtzu_d2w);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013720
13721 uint64_t expected_fcvtzs_d2x[] = {1,
13722 1,
13723 1,
13724 0xffffffffffffffff,
13725 0x7fffff80,
13726 0xffffffff80000080,
13727 0x7fffff8000000000,
13728 0x8000008000000000,
13729 0x7fffffff,
13730 0xffffffff80000001,
13731 0x7ffffffffffffc00,
13732 0x8000000000000400,
13733 0x7fffffffffffffff,
13734 0x8000000000000000,
13735 0x7ffffffe,
13736 0xffffffff80000001,
13737 0x7fffffffffffffff,
13738 0x8000000000000000};
13739
13740 uint64_t expected_fcvtzu_d2x[] = {1,
13741 1,
13742 1,
13743 0,
13744 0x7fffff80,
13745 0,
13746 0x7fffff8000000000,
13747 0,
13748 0x7fffffff,
13749 0,
13750 0x7ffffffffffffc00,
13751 0,
13752 0xffffffffffffffff,
13753 0,
13754 0x000000007ffffffe,
13755 0,
13756 0x8000000000000000,
13757 0};
13758
13759 // Double to 64-bit integers.
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013760 TestFcvtzHelper(config,
13761 &MacroAssembler::Fcvtzs,
13762 kDRegSize,
13763 kDRegSize,
13764 zn_inputs,
13765 pg_inputs,
13766 expected_fcvtzs_d2x);
TatWai Chongdb7437c2020-01-09 17:44:10 -080013767
TatWai Chongf07b8ce2020-02-17 00:05:54 -080013768 TestFcvtzHelper(config,
13769 &MacroAssembler::Fcvtzu,
13770 kDRegSize,
13771 kDRegSize,
13772 zn_inputs,
13773 pg_inputs,
13774 expected_fcvtzu_d2x);
13775}
13776
13777template <typename F, size_t N>
13778static void TestFrintHelper(Test* config,
13779 FcvtFrintMFn macro_m,
13780 FcvtFrintZFn macro_z,
13781 int lane_size_in_bits,
13782 const F (&zn_inputs)[N],
13783 const int (&pg_inputs)[N],
13784 const F (&zd_expected)[N]) {
13785 uint64_t zd_expected_rawbits[N];
13786 FPToRawbitsWithSize(zd_expected, zd_expected_rawbits, lane_size_in_bits);
13787 TestFcvtFrintHelper(config,
13788 macro_m,
13789 macro_z,
13790 lane_size_in_bits,
13791 lane_size_in_bits,
13792 zn_inputs,
13793 pg_inputs,
13794 zd_expected_rawbits);
13795}
13796
13797TEST_SVE(frint) {
13798 const double inf_pos = kFP64PositiveInfinity;
13799 const double inf_neg = kFP64NegativeInfinity;
13800
13801 double zn_inputs[] =
13802 {1.1, 1.5, 1.9, 2.5, -1.5, -2.5, 0.0, -0.0, -0.2, inf_pos, inf_neg};
13803 double zd_expected_a[] =
13804 {1.0, 2.0, 2.0, 3.0, -2.0, -3.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
13805 double zd_expected_i[] =
13806 {1.0, 2.0, 2.0, 2.0, -2.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
13807 double zd_expected_m[] =
13808 {1.0, 1.0, 1.0, 2.0, -2.0, -3.0, 0.0, -0.0, -1.0, inf_pos, inf_neg};
13809 double zd_expected_n[] =
13810 {1.0, 2.0, 2.0, 2.0, -2.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
13811 double zd_expected_p[] =
13812 {2.0, 2.0, 2.0, 3.0, -1.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
13813 double zd_expected_x[] =
13814 {1.0, 2.0, 2.0, 2.0, -2.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
13815 double zd_expected_z[] =
13816 {1.0, 1.0, 1.0, 2.0, -1.0, -2.0, 0.0, -0.0, -0.0, inf_pos, inf_neg};
13817
13818 int pg_inputs[] = {0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0};
13819
13820 struct TestDataSet {
13821 FcvtFrintMFn macro_m; // merging form.
13822 FcvtFrintZFn macro_z; // zeroing form.
13823 double (&expected)[11];
13824 };
13825
13826 TestDataSet test_data[] =
13827 {{&MacroAssembler::Frinta, &MacroAssembler::Frinta, zd_expected_a},
13828 {&MacroAssembler::Frinti, &MacroAssembler::Frinti, zd_expected_i},
13829 {&MacroAssembler::Frintm, &MacroAssembler::Frintm, zd_expected_m},
13830 {&MacroAssembler::Frintn, &MacroAssembler::Frintn, zd_expected_n},
13831 {&MacroAssembler::Frintp, &MacroAssembler::Frintp, zd_expected_p},
13832 {&MacroAssembler::Frintx, &MacroAssembler::Frintx, zd_expected_x},
13833 {&MacroAssembler::Frintz, &MacroAssembler::Frintz, zd_expected_z}};
13834
13835 unsigned lane_sizes[] = {kHRegSize, kSRegSize, kDRegSize};
13836
13837 for (size_t i = 0; i < sizeof(test_data) / sizeof(TestDataSet); i++) {
13838 for (size_t j = 0; j < ArrayLength(lane_sizes); j++) {
13839 TestFrintHelper(config,
13840 test_data[i].macro_m,
13841 test_data[i].macro_z,
13842 lane_sizes[j],
13843 zn_inputs,
13844 pg_inputs,
13845 test_data[i].expected);
13846 }
13847 }
TatWai Chongdb7437c2020-01-09 17:44:10 -080013848}
13849
TatWai Chong31cd6a02020-01-10 13:03:26 -080013850struct CvtfTestDataSet {
13851 uint64_t int_value;
13852 uint64_t scvtf_result;
13853 uint64_t ucvtf_result;
13854};
13855
13856template <size_t N>
13857static void TestUScvtfHelper(Test* config,
13858 int dst_type_size_in_bits,
13859 int src_type_size_in_bits,
13860 const int (&pg_inputs)[N],
13861 const CvtfTestDataSet (&data_set)[N]) {
13862 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13863 START();
13864
13865 // Unpack the data from the array of struct into individual arrays that can
13866 // simplify the testing.
13867 uint64_t zn_inputs[N];
13868 uint64_t expected_zd_scvtf_all_active[N];
13869 uint64_t expected_zd_ucvtf_all_active[N];
13870 for (size_t i = 0; i < N; i++) {
13871 zn_inputs[i] = data_set[i].int_value;
13872 expected_zd_scvtf_all_active[i] = data_set[i].scvtf_result;
13873 expected_zd_ucvtf_all_active[i] = data_set[i].ucvtf_result;
13874 }
13875
13876 // If the input and result types have a different size, the instruction
13877 // operates on elements of the largest specified type.
13878 int lane_size_in_bits =
13879 std::max(dst_type_size_in_bits, src_type_size_in_bits);
13880
13881 ZRegister zd_scvtf_all_active = z25;
13882 ZRegister zd_ucvtf_all_active = z26;
13883 ZRegister zn = z27;
13884 InsrHelper(&masm, zn.WithLaneSize(lane_size_in_bits), zn_inputs);
13885
13886 PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
13887 __ Ptrue(pg_all_active);
13888
13889 // Test integer conversions with all lanes actived.
13890 __ Scvtf(zd_scvtf_all_active.WithLaneSize(dst_type_size_in_bits),
13891 pg_all_active.Merging(),
13892 zn.WithLaneSize(src_type_size_in_bits));
13893 __ Ucvtf(zd_ucvtf_all_active.WithLaneSize(dst_type_size_in_bits),
13894 pg_all_active.Merging(),
13895 zn.WithLaneSize(src_type_size_in_bits));
13896
13897 ZRegister zd_scvtf_merged = z23;
13898 ZRegister zd_ucvtf_merged = z24;
13899
13900 PRegisterWithLaneSize pg_merged = p1.WithLaneSize(lane_size_in_bits);
13901 Initialise(&masm, pg_merged, pg_inputs);
13902
13903 uint64_t snan;
13904 switch (lane_size_in_bits) {
13905 case kHRegSize:
13906 snan = 0x7c11;
13907 break;
13908 case kSRegSize:
13909 snan = 0x7f951111;
13910 break;
13911 case kDRegSize:
13912 snan = 0x7ff5555511111111;
13913 break;
13914 }
13915 __ Dup(zd_scvtf_merged.WithLaneSize(lane_size_in_bits), snan);
13916 __ Dup(zd_ucvtf_merged.WithLaneSize(lane_size_in_bits), snan);
13917
13918 // Use the same `zn` inputs to test integer conversions but some lanes are set
13919 // inactive.
13920 __ Scvtf(zd_scvtf_merged.WithLaneSize(dst_type_size_in_bits),
13921 pg_merged.Merging(),
13922 zn.WithLaneSize(src_type_size_in_bits));
13923 __ Ucvtf(zd_ucvtf_merged.WithLaneSize(dst_type_size_in_bits),
13924 pg_merged.Merging(),
13925 zn.WithLaneSize(src_type_size_in_bits));
13926
13927 END();
13928
13929 if (CAN_RUN()) {
13930 RUN();
13931
13932 ASSERT_EQUAL_SVE(expected_zd_scvtf_all_active,
13933 zd_scvtf_all_active.WithLaneSize(lane_size_in_bits));
13934 ASSERT_EQUAL_SVE(expected_zd_ucvtf_all_active,
13935 zd_ucvtf_all_active.WithLaneSize(lane_size_in_bits));
13936
13937 uint64_t expected_zd_scvtf_merged[N];
13938 for (size_t i = 0; i < N; i++) {
13939 expected_zd_scvtf_merged[i] =
13940 pg_inputs[i] ? expected_zd_scvtf_all_active[i] : snan;
13941 }
13942 ASSERT_EQUAL_SVE(expected_zd_scvtf_merged,
13943 zd_scvtf_merged.WithLaneSize(lane_size_in_bits));
13944
13945 uint64_t expected_zd_ucvtf_merged[N];
13946 for (size_t i = 0; i < N; i++) {
13947 expected_zd_ucvtf_merged[i] =
13948 pg_inputs[i] ? expected_zd_ucvtf_all_active[i] : snan;
13949 }
13950 ASSERT_EQUAL_SVE(expected_zd_ucvtf_merged,
13951 zd_ucvtf_merged.WithLaneSize(lane_size_in_bits));
13952 }
13953}
13954
13955TEST_SVE(scvtf_ucvtf_h_s_d_to_float16) {
13956 // clang-format off
13957 CvtfTestDataSet data_set_1[] = {
13958 // Simple conversions of positive numbers which require no rounding; the
13959 // results should not depened on the rounding mode, and ucvtf and scvtf should
13960 // produce the same result.
13961 {0x0000, 0x0000, 0x0000},
13962 {0x0001, 0x3c00, 0x3c00},
13963 {0x0010, 0x4c00, 0x4c00},
13964 {0x0080, 0x5800, 0x5800},
13965 {0x0400, 0x6400, 0x6400},
13966 // Conversions which require rounding.
13967 {0x4000, 0x7400, 0x7400},
13968 {0x4001, 0x7400, 0x7400},
13969 // Round up to produce a result that's too big for the input to represent.
13970 {0x7ff0, 0x77ff, 0x77ff},
13971 {0x7ff1, 0x77ff, 0x77ff},
13972 {0x7ffe, 0x7800, 0x7800},
13973 {0x7fff, 0x7800, 0x7800}};
13974 int pg_1[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
13975 TestUScvtfHelper(config, kHRegSize, kDRegSize, pg_1, data_set_1);
13976 TestUScvtfHelper(config, kHRegSize, kSRegSize, pg_1, data_set_1);
13977 TestUScvtfHelper(config, kHRegSize, kHRegSize, pg_1, data_set_1);
13978
13979 CvtfTestDataSet data_set_2[] = {
13980 // Test mantissa extremities.
13981 {0x0401, 0x6401, 0x6401},
13982 {0x4020, 0x7402, 0x7402},
13983 // The largest int16_t that fits in a float16.
13984 {0xffef, 0xcc40, 0x7bff},
13985 // Values that would be negative if treated as an int16_t.
13986 {0xff00, 0xdc00, 0x7bf8},
13987 {0x8000, 0xf800, 0x7800},
13988 {0x8100, 0xf7f0, 0x7808},
13989 // Check for bit pattern reproduction.
13990 {0x0123, 0x5c8c, 0x5c8c},
13991 {0x0cde, 0x6a6f, 0x6a6f},
13992 // Simple conversions of negative int64_t values. These require no rounding,
13993 // and the results should not depend on the rounding mode.
13994 {0xf800, 0xe800, 0x7bc0},
13995 {0xfc00, 0xe400, 0x7be0},
13996 {0xc000, 0xf400, 0x7a00},
13997 // Check rounding of negative int16_t values.
13998 {0x8ffe, 0xf700, 0x7880},
13999 {0x8fff, 0xf700, 0x7880},
14000 {0xffee, 0xcc80, 0x7bff},
14001 {0xffef, 0xcc40, 0x7bff}};
14002 int pg_2[] = {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1};
14003 // `32-bit to float16` and `64-bit to float16` of above tests has been tested
14004 // in `ucvtf` of `16-bit to float16`.
14005 TestUScvtfHelper(config, kHRegSize, kHRegSize, pg_2, data_set_2);
14006 // clang-format on
14007}
14008
14009TEST_SVE(scvtf_ucvtf_s_to_float) {
14010 // clang-format off
14011 int dst_lane_size = kSRegSize;
14012 int src_lane_size = kSRegSize;
14013
14014 // Simple conversions of positive numbers which require no rounding; the
14015 // results should not depened on the rounding mode, and ucvtf and scvtf should
14016 // produce the same result.
14017 CvtfTestDataSet data_set_1[] = {
14018 {0x00000000, 0x00000000, 0x00000000},
14019 {0x00000001, 0x3f800000, 0x3f800000},
14020 {0x00004000, 0x46800000, 0x46800000},
14021 {0x00010000, 0x47800000, 0x47800000},
14022 {0x40000000, 0x4e800000, 0x4e800000}};
14023 int pg_1[] = {1, 0, 1, 0, 0};
14024 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
14025
14026 CvtfTestDataSet data_set_2[] = {
14027 // Test mantissa extremities.
14028 {0x00800001, 0x4b000001, 0x4b000001},
14029 {0x40400000, 0x4e808000, 0x4e808000},
14030 // The largest int32_t that fits in a double.
14031 {0x7fffff80, 0x4effffff, 0x4effffff},
14032 // Values that would be negative if treated as an int32_t.
14033 {0xffffffff, 0xbf800000, 0x4f800000},
14034 {0xffffff00, 0xc3800000, 0x4f7fffff},
14035 {0x80000000, 0xcf000000, 0x4f000000},
14036 {0x80000001, 0xcf000000, 0x4f000000},
14037 // Check for bit pattern reproduction.
14038 {0x089abcde, 0x4d09abce, 0x4d09abce},
14039 {0x12345678, 0x4d91a2b4, 0x4d91a2b4}};
14040 int pg_2[] = {1, 0, 1, 0, 1, 1, 1, 0, 0};
14041 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
14042
14043 // Simple conversions of negative int32_t values. These require no rounding,
14044 // and the results should not depend on the rounding mode.
14045 CvtfTestDataSet data_set_3[] = {
14046 {0xffffc000, 0xc6800000, 0x4f7fffc0},
14047 {0xffff0000, 0xc7800000, 0x4f7fff00},
14048 {0xc0000000, 0xce800000, 0x4f400000},
14049 // Conversions which require rounding.
14050 {0x72800000, 0x4ee50000, 0x4ee50000},
14051 {0x72800001, 0x4ee50000, 0x4ee50000},
14052 {0x73000000, 0x4ee60000, 0x4ee60000},
14053 // Check rounding of negative int32_t values.
14054 {0x80000140, 0xcefffffe, 0x4f000001},
14055 {0x80000141, 0xcefffffd, 0x4f000001},
14056 {0x80000180, 0xcefffffd, 0x4f000002},
14057 // Round up to produce a result that's too big for the input to represent.
14058 {0x7fffffc0, 0x4f000000, 0x4f000000},
14059 {0x7fffffff, 0x4f000000, 0x4f000000}};
14060 int pg_3[] = {1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0};
14061 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
14062 // clang-format on
14063}
14064
14065TEST_SVE(scvtf_ucvtf_d_to_float) {
14066 // clang-format off
14067 int dst_lane_size = kSRegSize;
14068 int src_lane_size = kDRegSize;
14069
14070 // Simple conversions of positive numbers which require no rounding; the
14071 // results should not depened on the rounding mode, and ucvtf and scvtf should
14072 // produce the same result.
14073 CvtfTestDataSet data_set_1[] = {
14074 {0x0000000000000000, 0x00000000, 0x00000000},
14075 {0x0000000000000001, 0x3f800000, 0x3f800000},
14076 {0x0000000040000000, 0x4e800000, 0x4e800000},
14077 {0x0000000100000000, 0x4f800000, 0x4f800000},
14078 {0x4000000000000000, 0x5e800000, 0x5e800000}};
14079 int pg_1[] = {1, 1, 0, 1, 0};
14080 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
14081
14082 CvtfTestDataSet data_set_2[] = {
14083 // Test mantissa extremities.
14084 {0x0010000000000001, 0x59800000, 0x59800000},
14085 {0x4008000000000000, 0x5e801000, 0x5e801000},
14086 // The largest int32_t that fits in a float.
14087 {0x000000007fffff80, 0x4effffff, 0x4effffff},
14088 // Values that would be negative if treated as an int32_t.
14089 {0x00000000ffffffff, 0x4f800000, 0x4f800000},
14090 {0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff},
14091 {0x0000000080000000, 0x4f000000, 0x4f000000},
14092 {0x0000000080000100, 0x4f000001, 0x4f000001},
14093 // The largest int64_t that fits in a float.
14094 {0x7fffff8000000000, 0x5effffff, 0x5effffff},
14095 // Check for bit pattern reproduction.
14096 {0x0123456789abcde0, 0x5b91a2b4, 0x5b91a2b4},
14097 {0x0000000000876543, 0x4b076543, 0x4b076543}};
14098 int pg_2[] = {1, 0, 0, 0, 1, 0, 0, 0, 0, 1};
14099 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
14100
14101 CvtfTestDataSet data_set_3[] = {
14102 // Simple conversions of negative int64_t values. These require no rounding,
14103 // and the results should not depend on the rounding mode.
14104 {0xffffffffc0000000, 0xce800000, 0x5f800000},
14105 {0xffffffff00000000, 0xcf800000, 0x5f800000},
14106 {0xc000000000000000, 0xde800000, 0x5f400000},
14107 // Conversions which require rounding.
14108 {0x0000800002800000, 0x57000002, 0x57000002},
14109 {0x0000800002800001, 0x57000003, 0x57000003},
14110 {0x0000800003000000, 0x57000003, 0x57000003},
14111 // Check rounding of negative int64_t values.
14112 {0x8000014000000000, 0xdefffffe, 0x5f000001},
14113 {0x8000014000000001, 0xdefffffd, 0x5f000001},
14114 {0x8000018000000000, 0xdefffffd, 0x5f000002},
14115 // Round up to produce a result that's too big for the input to represent.
14116 {0x00000000ffffff80, 0x4f800000, 0x4f800000},
14117 {0x00000000ffffffff, 0x4f800000, 0x4f800000},
14118 {0xffffff8000000000, 0xd3000000, 0x5f800000},
14119 {0xffffffffffffffff, 0xbf800000, 0x5f800000}};
14120 int pg_3[] = {0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1};
14121 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
14122 // clang-format on
14123}
14124
14125TEST_SVE(scvtf_ucvtf_d_to_double) {
14126 // clang-format off
14127 int dst_lane_size = kDRegSize;
14128 int src_lane_size = kDRegSize;
14129
14130 // Simple conversions of positive numbers which require no rounding; the
14131 // results should not depened on the rounding mode, and ucvtf and scvtf should
14132 // produce the same result.
14133 CvtfTestDataSet data_set_1[] = {
14134 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
14135 {0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000},
14136 {0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000},
14137 {0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000},
14138 {0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000}};
14139 int pg_1[] = {0, 1, 1, 0, 0};
14140 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
14141
14142 CvtfTestDataSet data_set_2[] = {
14143 // Test mantissa extremities.
14144 {0x0010000000000001, 0x4330000000000001, 0x4330000000000001},
14145 {0x4008000000000000, 0x43d0020000000000, 0x43d0020000000000},
14146 // The largest int32_t that fits in a double.
14147 {0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000},
14148 // Values that would be negative if treated as an int32_t.
14149 {0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000},
14150 {0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000},
14151 {0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000},
14152 // The largest int64_t that fits in a double.
14153 {0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff},
14154 // Check for bit pattern reproduction.
14155 {0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde},
14156 {0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000}};
14157 int pg_2[] = {1, 1, 1, 1, 1, 0, 0, 0, 0};
14158 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
14159
14160 CvtfTestDataSet data_set_3[] = {
14161 // Simple conversions of negative int64_t values. These require no rounding,
14162 // and the results should not depend on the rounding mode.
14163 {0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000},
14164 {0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000},
14165 {0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000},
14166 // Conversions which require rounding.
14167 {0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002},
14168 {0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003},
14169 {0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003},
14170 // Check rounding of negative int64_t values.
14171 {0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001},
14172 {0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001},
14173 {0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002},
14174 // Round up to produce a result that's too big for the input to represent.
14175 {0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000},
14176 {0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000},
14177 {0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000},
14178 {0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000}};
14179 int pg_3[] = {1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0};
14180 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
14181 // clang-format on
14182}
14183
14184TEST_SVE(scvtf_ucvtf_s_to_double) {
14185 // clang-format off
14186 int dst_lane_size = kDRegSize;
14187 int src_lane_size = kSRegSize;
14188
14189 // Simple conversions of positive numbers which require no rounding; the
14190 // results should not depened on the rounding mode, and ucvtf and scvtf should
14191 // produce the same result.
14192 CvtfTestDataSet data_set_1[] = {
14193 {0x00000000, 0x0000000000000000, 0x0000000000000000},
14194 {0x00000001, 0x3ff0000000000000, 0x3ff0000000000000},
14195 {0x00004000, 0x40d0000000000000, 0x40d0000000000000},
14196 {0x00010000, 0x40f0000000000000, 0x40f0000000000000},
14197 {0x40000000, 0x41d0000000000000, 0x41d0000000000000}};
14198 int pg_1[] = {1, 0, 0, 0, 1};
14199 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
14200
14201 CvtfTestDataSet data_set_2[] = {
14202 // Test mantissa extremities.
14203 {0x40000400, 0x41d0000100000000, 0x41d0000100000000},
14204 // The largest int32_t that fits in a double.
14205 {0x7fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000},
14206 // Values that would be negative if treated as an int32_t.
14207 {0xffffffff, 0xbff0000000000000, 0x41efffffffe00000},
14208 {0x80000000, 0xc1e0000000000000, 0x41e0000000000000},
14209 {0x80000001, 0xc1dfffffffc00000, 0x41e0000000200000},
14210 // Check for bit pattern reproduction.
14211 {0x089abcde, 0x41a13579bc000000, 0x41a13579bc000000},
14212 {0x12345678, 0x41b2345678000000, 0x41b2345678000000},
14213 // Simple conversions of negative int32_t values. These require no rounding,
14214 // and the results should not depend on the rounding mode.
14215 {0xffffc000, 0xc0d0000000000000, 0x41effff800000000},
14216 {0xffff0000, 0xc0f0000000000000, 0x41efffe000000000},
14217 {0xc0000000, 0xc1d0000000000000, 0x41e8000000000000}};
14218 int pg_2[] = {1, 0, 1, 0, 0, 1, 1, 0, 1, 1};
14219 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
14220
14221 // Note that IEEE 754 double-precision format has 52-bits fraction, so all
14222 // 32-bits integers are representable in double.
14223 // clang-format on
14224}
14225
Martyn Capewell4a9829f2020-01-30 17:41:01 +000014226TEST_SVE(sve_fadda) {
14227 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
14228 CPUFeatures::kFP,
14229 CPUFeatures::kFPHalf);
14230 START();
14231
14232 __ Ptrue(p0.VnB());
14233 __ Pfalse(p1.VnB());
14234 __ Zip1(p1.VnH(), p0.VnH(), p1.VnH());
14235
14236 __ Index(z0.VnS(), 3, 3);
14237 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
14238 __ Fmov(s2, 2.0);
14239 __ Fadda(s2, p0, s2, z0.VnS());
14240
14241 __ Index(z0.VnD(), -7, -7);
14242 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
14243 __ Fmov(d3, 3.0);
14244 __ Fadda(d3, p0, d3, z0.VnD());
14245
14246 __ Index(z0.VnH(), 1, 1);
14247 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
14248 __ Fmov(h4, 0);
14249 __ Fadda(h4, p1, h4, z0.VnH());
14250 END();
14251
14252 if (CAN_RUN()) {
14253 RUN();
14254 // Sum of 1 .. n is n+1 * n/2, ie. n(n+1)/2.
14255 int n = core.GetSVELaneCount(kSRegSize);
14256 ASSERT_EQUAL_FP32(2 + 3 * ((n + 1) * (n / 2)), s2);
14257
14258 n /= 2; // Half as many lanes.
14259 ASSERT_EQUAL_FP64(3 + -7 * ((n + 1) * (n / 2)), d3);
14260
14261 // Sum of first n odd numbers is n^2.
14262 n = core.GetSVELaneCount(kHRegSize) / 2; // Half are odd numbers.
14263 ASSERT_EQUAL_FP16(Float16(n * n), h4);
14264 }
14265}
14266
Martyn Capewellac07af12019-12-02 14:55:05 +000014267TEST_SVE(sve_extract) {
14268 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14269 START();
14270
14271 __ Index(z0.VnB(), 0, 1);
14272
14273 __ Mov(z1, z0);
14274 __ Mov(z2, z0);
14275 __ Mov(z3, z0);
14276 __ Mov(z4, z0);
14277 __ Mov(z5, z0);
14278 __ Mov(z6, z0);
14279
14280 __ Ext(z1, z1, z0, 0);
14281 __ Ext(z2, z2, z0, 1);
14282 __ Ext(z3, z3, z0, 15);
14283 __ Ext(z4, z4, z0, 31);
14284 __ Ext(z5, z5, z0, 47);
14285 __ Ext(z6, z6, z0, 255);
14286
14287 END();
14288
14289 if (CAN_RUN()) {
14290 RUN();
14291
14292 ASSERT_EQUAL_SVE(z1, z0);
14293
14294 int lane_count = core.GetSVELaneCount(kBRegSize);
14295 if (lane_count == 16) {
14296 uint64_t z2_expected[] = {0x000f0e0d0c0b0a09, 0x0807060504030201};
14297 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
14298 } else {
14299 uint64_t z2_expected[] = {0x100f0e0d0c0b0a09, 0x0807060504030201};
14300 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
14301 }
14302
14303 if (lane_count == 16) {
14304 uint64_t z3_expected[] = {0x0e0d0c0b0a090807, 0x060504030201000f};
14305 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
14306 } else {
14307 uint64_t z3_expected[] = {0x1e1d1c1b1a191817, 0x161514131211100f};
14308 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
14309 }
14310
14311 if (lane_count < 32) {
14312 ASSERT_EQUAL_SVE(z4, z0);
14313 } else if (lane_count == 32) {
14314 uint64_t z4_expected[] = {0x0e0d0c0b0a090807, 0x060504030201001f};
14315 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
14316 } else {
14317 uint64_t z4_expected[] = {0x2e2d2c2b2a292827, 0x262524232221201f};
14318 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
14319 }
14320
14321 if (lane_count < 48) {
14322 ASSERT_EQUAL_SVE(z5, z0);
14323 } else if (lane_count == 48) {
14324 uint64_t z5_expected[] = {0x0e0d0c0b0a090807, 0x060504030201002f};
14325 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
14326 } else {
14327 uint64_t z5_expected[] = {0x3e3d3c3b3a393837, 0x363534333231302f};
14328 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
14329 }
14330
14331 if (lane_count < 256) {
14332 ASSERT_EQUAL_SVE(z6, z0);
14333 } else {
14334 uint64_t z6_expected[] = {0x0e0d0c0b0a090807, 0x06050403020100ff};
14335 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
14336 }
14337 }
14338}
14339
Martyn Capewell894962f2020-02-05 15:46:44 +000014340TEST_SVE(sve_fp_paired_across) {
14341 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14342
14343 START();
14344
14345 __ Ptrue(p0.VnB());
14346 __ Pfalse(p1.VnB());
14347 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
14348 __ Zip1(p3.VnD(), p0.VnD(), p1.VnD());
14349 __ Zip1(p4.VnH(), p0.VnH(), p1.VnH());
14350
14351 __ Index(z0.VnS(), 3, 3);
14352 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
14353 __ Faddv(s1, p0, z0.VnS());
14354 __ Fminv(s2, p2, z0.VnS());
14355 __ Fmaxv(s3, p2, z0.VnS());
14356
14357 __ Index(z0.VnD(), -7, -7);
14358 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
14359 __ Faddv(d4, p0, z0.VnD());
14360 __ Fminv(d5, p3, z0.VnD());
14361 __ Fmaxv(d6, p3, z0.VnD());
14362
14363 __ Index(z0.VnH(), 1, 1);
14364 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
14365 __ Faddv(h7, p4, z0.VnH());
14366 __ Fminv(h8, p4, z0.VnH());
14367 __ Fmaxv(h9, p4, z0.VnH());
14368
14369 __ Dup(z10.VnH(), 0);
14370 __ Fdiv(z10.VnH(), p0.Merging(), z10.VnH(), z10.VnH());
14371 __ Insr(z10.VnH(), 0x5140);
14372 __ Insr(z10.VnH(), 0xd140);
14373 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 2);
14374 __ Fmaxnmv(h11, p0, z10.VnH());
14375 __ Fmaxnmv(h12, p4, z10.VnH());
14376 __ Fminnmv(h13, p0, z10.VnH());
14377 __ Fminnmv(h14, p4, z10.VnH());
14378
14379 __ Dup(z10.VnS(), 0);
14380 __ Fdiv(z10.VnS(), p0.Merging(), z10.VnS(), z10.VnS());
14381 __ Insr(z10.VnS(), 0x42280000);
14382 __ Insr(z10.VnS(), 0xc2280000);
14383 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 4);
14384 __ Fmaxnmv(s15, p0, z10.VnS());
14385 __ Fmaxnmv(s16, p2, z10.VnS());
14386 __ Fminnmv(s17, p0, z10.VnS());
14387 __ Fminnmv(s18, p2, z10.VnS());
14388
14389 __ Dup(z10.VnD(), 0);
14390 __ Fdiv(z10.VnD(), p0.Merging(), z10.VnD(), z10.VnD());
14391 __ Insr(z10.VnD(), 0x4045000000000000);
14392 __ Insr(z10.VnD(), 0xc045000000000000);
14393 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 8);
14394 __ Fmaxnmv(d19, p0, z10.VnD());
14395 __ Fmaxnmv(d20, p3, z10.VnD());
14396 __ Fminnmv(d21, p0, z10.VnD());
14397 __ Fminnmv(d22, p3, z10.VnD());
14398 END();
14399
14400 if (CAN_RUN()) {
14401 RUN();
14402 // Sum of 1 .. n is n+1 * n/2, ie. n(n+1)/2.
14403 int n = core.GetSVELaneCount(kSRegSize);
14404 ASSERT_EQUAL_FP32(3 * ((n + 1) * (n / 2)), s1);
14405 ASSERT_EQUAL_FP32(3, s2);
14406 ASSERT_EQUAL_FP32(3 * n - 3, s3);
14407
14408 n /= 2; // Half as many lanes.
14409 ASSERT_EQUAL_FP64(-7 * ((n + 1) * (n / 2)), d4);
14410 ASSERT_EQUAL_FP64(-7 * (n - 1), d5);
14411 ASSERT_EQUAL_FP64(-7, d6);
14412
14413 // Sum of first n odd numbers is n^2.
14414 n = core.GetSVELaneCount(kHRegSize) / 2; // Half are odd numbers.
14415 ASSERT_EQUAL_FP16(Float16(n * n), h7);
14416 ASSERT_EQUAL_FP16(Float16(1), h8);
14417
14418 n = core.GetSVELaneCount(kHRegSize);
14419 ASSERT_EQUAL_FP16(Float16(n - 1), h9);
14420
14421 ASSERT_EQUAL_FP16(Float16(42), h11);
14422 ASSERT_EQUAL_FP16(Float16(42), h12);
14423 ASSERT_EQUAL_FP16(Float16(-42), h13);
14424 ASSERT_EQUAL_FP16(Float16(42), h14);
14425 ASSERT_EQUAL_FP32(42, s15);
14426 ASSERT_EQUAL_FP32(42, s16);
14427 ASSERT_EQUAL_FP32(-42, s17);
14428 ASSERT_EQUAL_FP32(42, s18);
14429 ASSERT_EQUAL_FP64(42, d19);
14430 ASSERT_EQUAL_FP64(42, d20);
14431 ASSERT_EQUAL_FP64(-42, d21);
14432 ASSERT_EQUAL_FP64(42, d22);
14433 }
14434}
14435
Martyn Capewell13050ca2020-02-11 16:43:40 +000014436TEST_SVE(sve_frecpe_frsqrte) {
14437 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14438
14439 START();
14440
14441 __ Ptrue(p0.VnB());
14442
14443 __ Index(z0.VnH(), 0, 1);
14444 __ Fdup(z1.VnH(), Float16(1));
14445 __ Fscale(z1.VnH(), p0.Merging(), z1.VnH(), z0.VnH());
14446 __ Insr(z1.VnH(), 0);
14447 __ Frsqrte(z2.VnH(), z1.VnH());
14448 __ Frecpe(z1.VnH(), z1.VnH());
14449
14450 __ Index(z0.VnS(), 0, 1);
14451 __ Fdup(z3.VnS(), Float16(1));
14452 __ Fscale(z3.VnS(), p0.Merging(), z3.VnS(), z0.VnS());
14453 __ Insr(z3.VnS(), 0);
14454 __ Frsqrte(z4.VnS(), z3.VnS());
14455 __ Frecpe(z3.VnS(), z3.VnS());
14456
14457 __ Index(z0.VnD(), 0, 1);
14458 __ Fdup(z5.VnD(), Float16(1));
14459 __ Fscale(z5.VnD(), p0.Merging(), z5.VnD(), z0.VnD());
14460 __ Insr(z5.VnD(), 0);
14461 __ Frsqrte(z6.VnD(), z5.VnD());
14462 __ Frecpe(z5.VnD(), z5.VnD());
14463 END();
14464
14465 if (CAN_RUN()) {
14466 RUN();
14467 uint64_t z1_expected[] = {0x23fc27fc2bfc2ffc, 0x33fc37fc3bfc7c00};
14468 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
14469 uint64_t z2_expected[] = {0x2ffc31a433fc35a4, 0x37fc39a43bfc7c00};
14470 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
14471
14472 uint64_t z3_expected[] = {0x3e7f80003eff8000, 0x3f7f80007f800000};
14473 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
14474 uint64_t z4_expected[] = {0x3eff80003f348000, 0x3f7f80007f800000};
14475 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
14476
14477 uint64_t z5_expected[] = {0x3feff00000000000, 0x7ff0000000000000};
14478 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
14479 uint64_t z6_expected[] = {0x3feff00000000000, 0x7ff0000000000000};
14480 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
14481 }
14482}
14483
Martyn Capewellefd9dc72020-02-13 10:46:29 +000014484TEST_SVE(sve_frecps_frsqrts) {
14485 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14486
14487 START();
14488 __ Ptrue(p0.VnB());
14489
14490 __ Index(z0.VnH(), 0, -1);
14491 __ Fdup(z1.VnH(), Float16(1));
14492 __ Fscale(z1.VnH(), p0.Merging(), z1.VnH(), z0.VnH());
14493 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
14494 __ Insr(z1.VnH(), 0);
14495 __ Frsqrts(z2.VnH(), z1.VnH(), z0.VnH());
14496 __ Frecps(z1.VnH(), z1.VnH(), z0.VnH());
14497
14498 __ Index(z0.VnS(), 0, -1);
14499 __ Fdup(z3.VnS(), Float16(1));
14500 __ Fscale(z3.VnS(), p0.Merging(), z3.VnS(), z0.VnS());
14501 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
14502 __ Insr(z3.VnS(), 0);
14503 __ Frsqrts(z4.VnS(), z3.VnS(), z0.VnS());
14504 __ Frecps(z3.VnS(), z3.VnS(), z0.VnS());
14505
14506 __ Index(z0.VnD(), 0, -1);
14507 __ Fdup(z5.VnD(), Float16(1));
14508 __ Fscale(z5.VnD(), p0.Merging(), z5.VnD(), z0.VnD());
14509 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
14510 __ Insr(z5.VnD(), 0);
14511 __ Frsqrts(z6.VnD(), z5.VnD(), z0.VnD());
14512 __ Frecps(z5.VnD(), z5.VnD(), z0.VnD());
14513 END();
14514
14515 if (CAN_RUN()) {
14516 RUN();
14517 uint64_t z1_expected[] = {0x4038406040a04100, 0x4180420042004000};
14518 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
14519 uint64_t z2_expected[] = {0x3e383e603ea03f00, 0x3f80400040003e00};
14520 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
14521
14522 uint64_t z3_expected[] = {0x4030000040400000, 0x4040000040000000};
14523 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
14524 uint64_t z4_expected[] = {0x3ff0000040000000, 0x400000003fc00000};
14525 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
14526
14527 uint64_t z5_expected[] = {0x4008000000000000, 0x4000000000000000};
14528 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
14529 uint64_t z6_expected[] = {0x4000000000000000, 0x3ff8000000000000};
14530 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
14531 }
14532}
14533
14534TEST_SVE(sve_ftsmul) {
14535 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14536
14537 START();
14538 __ Ptrue(p0.VnB());
14539
14540 __ Index(z0.VnH(), 0, 1);
14541 __ Rev(z1.VnH(), z0.VnH());
14542 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
14543 __ Dup(z2.VnH(), 0);
14544 __ Fdiv(z2.VnH(), p0.Merging(), z2.VnH(), z2.VnH());
14545 __ Ftsmul(z3.VnH(), z0.VnH(), z1.VnH());
14546 __ Ftsmul(z4.VnH(), z2.VnH(), z1.VnH());
14547
14548 __ Index(z0.VnS(), -7, 1);
14549 __ Rev(z1.VnS(), z0.VnS());
14550 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
14551 __ Dup(z2.VnS(), 0);
14552 __ Fdiv(z2.VnS(), p0.Merging(), z2.VnS(), z2.VnS());
14553 __ Ftsmul(z5.VnS(), z0.VnS(), z1.VnS());
14554 __ Ftsmul(z6.VnS(), z2.VnS(), z1.VnS());
14555
14556 __ Index(z0.VnD(), 2, -1);
14557 __ Rev(z1.VnD(), z0.VnD());
14558 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
14559 __ Dup(z2.VnD(), 0);
14560 __ Fdiv(z2.VnD(), p0.Merging(), z2.VnD(), z2.VnD());
14561 __ Ftsmul(z7.VnD(), z0.VnD(), z1.VnD());
14562 __ Ftsmul(z8.VnD(), z2.VnD(), z1.VnD());
14563 END();
14564
14565 if (CAN_RUN()) {
14566 RUN();
14567 uint64_t z3_expected[] = {0x5220d0804e40cc00, 0x4880c4003c008000};
14568 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
14569 uint64_t z4_expected[] = {0x7e007e007e007e00, 0x7e007e007e007e00};
14570 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
14571
14572 uint64_t z5_expected[] = {0x41800000c1c80000, 0x42100000c2440000};
14573 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
14574 uint64_t z6_expected[] = {0x7fc000007fc00000, 0x7fc000007fc00000};
14575 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
14576
14577 uint64_t z7_expected[] = {0x3ff0000000000000, 0xc010000000000000};
14578 ASSERT_EQUAL_SVE(z7_expected, z7.VnD());
14579 uint64_t z8_expected[] = {0x7ff8000000000000, 0x7ff8000000000000};
14580 ASSERT_EQUAL_SVE(z8_expected, z8.VnD());
14581 }
14582}
TatWai Chongf8d29f12020-02-16 22:53:18 -080014583
14584typedef void (MacroAssembler::*FPMulAccFn)(
14585 const ZRegister& zd,
14586 const PRegisterM& pg,
14587 const ZRegister& za,
14588 const ZRegister& zn,
14589 const ZRegister& zm,
14590 FPMacroNaNPropagationOption nan_option);
14591
14592// The `pg_inputs` is used for examining the predication correctness internally.
14593// It does not imply the value of `result` argument. `result` stands for the
14594// expected result on all-true predication.
14595template <typename T, size_t N>
14596static void FPMulAccHelper(
14597 Test* config,
14598 FPMulAccFn macro,
14599 unsigned lane_size_in_bits,
14600 const int (&pg_inputs)[N],
14601 const T (&za_inputs)[N],
14602 const T (&zn_inputs)[N],
14603 const T (&zm_inputs)[N],
14604 const uint64_t (&result)[N],
14605 FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
14606 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14607 START();
14608
14609 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
14610 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
14611 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
14612 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
14613
14614 uint64_t za_rawbits[N];
14615 uint64_t zn_rawbits[N];
14616 uint64_t zm_rawbits[N];
14617
14618 FPToRawbitsWithSize(za_inputs, za_rawbits, lane_size_in_bits);
14619 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
14620 FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
14621
14622 InsrHelper(&masm, za, za_rawbits);
14623 InsrHelper(&masm, zn, zn_rawbits);
14624 InsrHelper(&masm, zm, zm_rawbits);
14625
TatWai Chong2cb1b612020-03-04 23:51:21 -080014626 // Initialize `zd` with a signalling NaN.
14627 uint64_t sn = GetSignallingNan(lane_size_in_bits);
14628 __ Mov(x29, sn);
14629 __ Dup(zd, x29);
TatWai Chongf8d29f12020-02-16 22:53:18 -080014630
14631 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
14632
14633 // Fmla macro automatically selects between fmla, fmad and movprfx + fmla
14634 // Fmls `ditto` fmls, fmsb and movprfx + fmls
14635 // Fnmla `ditto` fnmla, fnmad and movprfx + fnmla
14636 // Fnmls `ditto` fnmls, fnmsb and movprfx + fnmls
14637 // based on what registers are aliased.
14638 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
14639 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
14640 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
14641 ZRegister d_result = z13.WithLaneSize(lane_size_in_bits);
14642
14643 __ Mov(da_result, za);
14644 (masm.*macro)(da_result, p0.Merging(), da_result, zn, zm, nan_option);
14645
14646 __ Mov(dn_result, zn);
14647 (masm.*macro)(dn_result, p0.Merging(), za, dn_result, zm, nan_option);
14648
14649 __ Mov(dm_result, zm);
14650 (masm.*macro)(dm_result, p0.Merging(), za, zn, dm_result, nan_option);
14651
14652 __ Mov(d_result, zd);
14653 (masm.*macro)(d_result, p0.Merging(), za, zn, zm, nan_option);
14654
14655 END();
14656
14657 if (CAN_RUN()) {
14658 RUN();
14659
14660 ASSERT_EQUAL_SVE(za_rawbits, za);
14661 ASSERT_EQUAL_SVE(zn_rawbits, zn);
14662 ASSERT_EQUAL_SVE(zm_rawbits, zm);
14663
14664 uint64_t da_expected[N];
14665 uint64_t dn_expected[N];
14666 uint64_t dm_expected[N];
14667 uint64_t d_expected[N];
14668 for (size_t i = 0; i < N; i++) {
14669 da_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : za_rawbits[i];
14670 dn_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zn_rawbits[i];
14671 dm_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zm_rawbits[i];
TatWai Chong2cb1b612020-03-04 23:51:21 -080014672 d_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : sn;
TatWai Chongf8d29f12020-02-16 22:53:18 -080014673 }
14674
14675 ASSERT_EQUAL_SVE(da_expected, da_result);
14676 ASSERT_EQUAL_SVE(dn_expected, dn_result);
14677 ASSERT_EQUAL_SVE(dm_expected, dm_result);
14678 ASSERT_EQUAL_SVE(d_expected, d_result);
14679 }
14680}
14681
14682TEST_SVE(sve_fmla_fmad) {
14683 // fmla : zd = za + zn * zm
14684 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
14685 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
14686 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
14687 int pg_inputs[] = {1, 1, 0, 1};
14688
14689 uint64_t fmla_result_h[] = {Float16ToRawbits(Float16(-84.0)),
14690 Float16ToRawbits(Float16(101.0)),
14691 Float16ToRawbits(Float16(33.0)),
14692 Float16ToRawbits(Float16(42.0))};
14693
14694 // `fmad` has been tested in the helper.
14695 FPMulAccHelper(config,
14696 &MacroAssembler::Fmla,
14697 kHRegSize,
14698 pg_inputs,
14699 za_inputs,
14700 zn_inputs,
14701 zm_inputs,
14702 fmla_result_h);
14703
14704 uint64_t fmla_result_s[] = {FloatToRawbits(-84.0f),
14705 FloatToRawbits(101.0f),
14706 FloatToRawbits(33.0f),
14707 FloatToRawbits(42.0f)};
14708
14709 FPMulAccHelper(config,
14710 &MacroAssembler::Fmla,
14711 kSRegSize,
14712 pg_inputs,
14713 za_inputs,
14714 zn_inputs,
14715 zm_inputs,
14716 fmla_result_s);
14717
14718 uint64_t fmla_result_d[] = {DoubleToRawbits(-84.0),
14719 DoubleToRawbits(101.0),
14720 DoubleToRawbits(33.0),
14721 DoubleToRawbits(42.0)};
14722
14723 FPMulAccHelper(config,
14724 &MacroAssembler::Fmla,
14725 kDRegSize,
14726 pg_inputs,
14727 za_inputs,
14728 zn_inputs,
14729 zm_inputs,
14730 fmla_result_d);
14731}
14732
14733TEST_SVE(sve_fmls_fmsb) {
14734 // fmls : zd = za - zn * zm
14735 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
14736 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
14737 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
14738 int pg_inputs[] = {1, 0, 1, 1};
14739
14740 uint64_t fmls_result_h[] = {Float16ToRawbits(Float16(6.0)),
14741 Float16ToRawbits(Float16(-99.0)),
14742 Float16ToRawbits(Float16(-39.0)),
14743 Float16ToRawbits(Float16(-38.0))};
14744
14745 // `fmsb` has been tested in the helper.
14746 FPMulAccHelper(config,
14747 &MacroAssembler::Fmls,
14748 kHRegSize,
14749 pg_inputs,
14750 za_inputs,
14751 zn_inputs,
14752 zm_inputs,
14753 fmls_result_h);
14754
14755 uint64_t fmls_result_s[] = {FloatToRawbits(6.0f),
14756 FloatToRawbits(-99.0f),
14757 FloatToRawbits(-39.0f),
14758 FloatToRawbits(-38.0f)};
14759
14760 FPMulAccHelper(config,
14761 &MacroAssembler::Fmls,
14762 kSRegSize,
14763 pg_inputs,
14764 za_inputs,
14765 zn_inputs,
14766 zm_inputs,
14767 fmls_result_s);
14768
14769 uint64_t fmls_result_d[] = {DoubleToRawbits(6.0),
14770 DoubleToRawbits(-99.0),
14771 DoubleToRawbits(-39.0),
14772 DoubleToRawbits(-38.0)};
14773
14774 FPMulAccHelper(config,
14775 &MacroAssembler::Fmls,
14776 kDRegSize,
14777 pg_inputs,
14778 za_inputs,
14779 zn_inputs,
14780 zm_inputs,
14781 fmls_result_d);
14782}
14783
14784TEST_SVE(sve_fnmla_fnmad) {
14785 // fnmla : zd = -za - zn * zm
14786 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
14787 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
14788 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
14789 int pg_inputs[] = {0, 1, 1, 1};
14790
14791 uint64_t fnmla_result_h[] = {Float16ToRawbits(Float16(84.0)),
14792 Float16ToRawbits(Float16(-101.0)),
14793 Float16ToRawbits(Float16(-33.0)),
14794 Float16ToRawbits(Float16(-42.0))};
14795
14796 // `fnmad` has been tested in the helper.
14797 FPMulAccHelper(config,
14798 &MacroAssembler::Fnmla,
14799 kHRegSize,
14800 pg_inputs,
14801 za_inputs,
14802 zn_inputs,
14803 zm_inputs,
14804 fnmla_result_h);
14805
14806 uint64_t fnmla_result_s[] = {FloatToRawbits(84.0f),
14807 FloatToRawbits(-101.0f),
14808 FloatToRawbits(-33.0f),
14809 FloatToRawbits(-42.0f)};
14810
14811 FPMulAccHelper(config,
14812 &MacroAssembler::Fnmla,
14813 kSRegSize,
14814 pg_inputs,
14815 za_inputs,
14816 zn_inputs,
14817 zm_inputs,
14818 fnmla_result_s);
14819
14820 uint64_t fnmla_result_d[] = {DoubleToRawbits(84.0),
14821 DoubleToRawbits(-101.0),
14822 DoubleToRawbits(-33.0),
14823 DoubleToRawbits(-42.0)};
14824
14825 FPMulAccHelper(config,
14826 &MacroAssembler::Fnmla,
14827 kDRegSize,
14828 pg_inputs,
14829 za_inputs,
14830 zn_inputs,
14831 zm_inputs,
14832 fnmla_result_d);
14833}
14834
14835TEST_SVE(sve_fnmls_fnmsb) {
14836 // fnmls : zd = -za + zn * zm
14837 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
14838 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
14839 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
14840 int pg_inputs[] = {1, 1, 1, 0};
14841
14842 uint64_t fnmls_result_h[] = {Float16ToRawbits(Float16(-6.0)),
14843 Float16ToRawbits(Float16(99.0)),
14844 Float16ToRawbits(Float16(39.0)),
14845 Float16ToRawbits(Float16(38.0))};
14846
14847 // `fnmsb` has been tested in the helper.
14848 FPMulAccHelper(config,
14849 &MacroAssembler::Fnmls,
14850 kHRegSize,
14851 pg_inputs,
14852 za_inputs,
14853 zn_inputs,
14854 zm_inputs,
14855 fnmls_result_h);
14856
14857 uint64_t fnmls_result_s[] = {FloatToRawbits(-6.0f),
14858 FloatToRawbits(99.0f),
14859 FloatToRawbits(39.0f),
14860 FloatToRawbits(38.0f)};
14861
14862 FPMulAccHelper(config,
14863 &MacroAssembler::Fnmls,
14864 kSRegSize,
14865 pg_inputs,
14866 za_inputs,
14867 zn_inputs,
14868 zm_inputs,
14869 fnmls_result_s);
14870
14871 uint64_t fnmls_result_d[] = {DoubleToRawbits(-6.0),
14872 DoubleToRawbits(99.0),
14873 DoubleToRawbits(39.0),
14874 DoubleToRawbits(38.0)};
14875
14876 FPMulAccHelper(config,
14877 &MacroAssembler::Fnmls,
14878 kDRegSize,
14879 pg_inputs,
14880 za_inputs,
14881 zn_inputs,
14882 zm_inputs,
14883 fnmls_result_d);
14884}
14885
TatWai Chonga2c1bb72020-02-16 23:16:47 -080014886typedef void (MacroAssembler::*FPMulAccIdxFn)(const ZRegister& zd,
14887 const ZRegister& za,
14888 const ZRegister& zn,
14889 const ZRegister& zm,
14890 int index);
14891
14892template <typename T, size_t N>
14893static void FPMulAccIdxHelper(Test* config,
14894 FPMulAccFn macro,
14895 FPMulAccIdxFn macro_idx,
14896 const T (&za_inputs)[N],
14897 const T (&zn_inputs)[N],
14898 const T (&zm_inputs)[N]) {
14899 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
14900 START();
14901
14902 InsrHelper(&masm, z0.VnD(), zm_inputs);
14903 InsrHelper(&masm, z1.VnD(), zn_inputs);
14904 InsrHelper(&masm, z2.VnD(), za_inputs);
14905
14906 __ Mov(z3, z0);
14907 (masm.*macro_idx)(z3.VnH(), z2.VnH(), z1.VnH(), z3.VnH(), 0); // zd == zm
14908 __ Mov(z4, z1);
14909 (masm.*macro_idx)(z4.VnH(), z2.VnH(), z4.VnH(), z0.VnH(), 1); // zd == zn
14910 __ Mov(z5, z2);
14911 (masm.*macro_idx)(z5.VnH(), z5.VnH(), z1.VnH(), z0.VnH(), 4); // zd == za
14912 (masm.*macro_idx)(z6.VnH(), z2.VnH(), z1.VnH(), z0.VnH(), 7);
14913
14914 __ Mov(z7, z0);
14915 (masm.*macro_idx)(z7.VnS(), z2.VnS(), z1.VnS(), z7.VnS(), 0); // zd == zm
14916 __ Mov(z8, z1);
14917 (masm.*macro_idx)(z8.VnS(), z2.VnS(), z8.VnS(), z0.VnS(), 1); // zd == zn
14918 __ Mov(z9, z2);
14919 (masm.*macro_idx)(z9.VnS(), z9.VnS(), z1.VnS(), z0.VnS(), 2); // zd == za
14920 (masm.*macro_idx)(z10.VnS(), z2.VnS(), z1.VnS(), z0.VnS(), 3);
14921
14922 __ Mov(z11, z0);
14923 (masm.*macro_idx)(z11.VnD(), z2.VnD(), z1.VnD(), z11.VnD(), 0); // zd == zm
14924 __ Mov(z12, z1);
14925 (masm.*macro_idx)(z12.VnD(), z2.VnD(), z12.VnD(), z0.VnD(), 1); // zd == zn
14926 __ Mov(z13, z2);
14927 (masm.*macro_idx)(z13.VnD(), z13.VnD(), z1.VnD(), z0.VnD(), 0); // zd == za
14928 __ Mov(z14, z0);
14929 // zd == zn == zm
14930 (masm.*macro_idx)(z14.VnD(), z2.VnD(), z14.VnD(), z14.VnD(), 1);
14931
14932 __ Ptrue(p0.VnB());
14933
14934 // Indexed form of Fmla and Fmls won't swap argument, passing strict NaN
14935 // propagation mode to ensure the following macros don't swap argument in
14936 // any cases.
14937 FPMacroNaNPropagationOption option = StrictNaNPropagation;
14938 // Compute the results using other instructions.
14939 __ Dup(z31.VnH(), z0.VnH(), 0);
14940 (masm.*macro)(z15.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
14941 __ Dup(z31.VnH(), z0.VnH(), 1);
14942 (masm.*macro)(z16.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
14943 __ Dup(z31.VnH(), z0.VnH(), 4);
14944 (masm.*macro)(z17.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
14945 __ Dup(z31.VnH(), z0.VnH(), 7);
14946 (masm.*macro)(z18.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
14947
14948 __ Dup(z31.VnS(), z0.VnS(), 0);
14949 (masm.*macro)(z19.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
14950 __ Dup(z31.VnS(), z0.VnS(), 1);
14951 (masm.*macro)(z20.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
14952 __ Dup(z31.VnS(), z0.VnS(), 2);
14953 (masm.*macro)(z21.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
14954 __ Dup(z31.VnS(), z0.VnS(), 3);
14955 (masm.*macro)(z22.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
14956
14957 __ Dup(z31.VnD(), z0.VnD(), 0);
14958 (masm.*macro)(z23.VnD(), p0.Merging(), z2.VnD(), z1.VnD(), z31.VnD(), option);
14959 __ Dup(z31.VnD(), z0.VnD(), 1);
14960 (masm.*macro)(z24.VnD(), p0.Merging(), z2.VnD(), z1.VnD(), z31.VnD(), option);
14961 __ Dup(z31.VnD(), z0.VnD(), 1);
14962 (masm.*macro)(z25.VnD(), p0.Merging(), z2.VnD(), z0.VnD(), z31.VnD(), option);
14963
14964 END();
14965
14966 if (CAN_RUN()) {
14967 RUN();
14968
14969 ASSERT_EQUAL_SVE(z15.VnH(), z3.VnH());
14970 ASSERT_EQUAL_SVE(z16.VnH(), z4.VnH());
14971 ASSERT_EQUAL_SVE(z17.VnH(), z5.VnH());
14972 ASSERT_EQUAL_SVE(z18.VnH(), z6.VnH());
14973
14974 ASSERT_EQUAL_SVE(z19.VnS(), z7.VnS());
14975 ASSERT_EQUAL_SVE(z20.VnS(), z8.VnS());
14976 ASSERT_EQUAL_SVE(z21.VnS(), z9.VnS());
14977 ASSERT_EQUAL_SVE(z22.VnS(), z10.VnS());
14978
14979 ASSERT_EQUAL_SVE(z23.VnD(), z11.VnD());
14980 ASSERT_EQUAL_SVE(z24.VnD(), z12.VnD());
14981 ASSERT_EQUAL_SVE(z11.VnD(), z13.VnD());
14982 ASSERT_EQUAL_SVE(z25.VnD(), z14.VnD());
14983 }
14984}
14985
14986TEST_SVE(sve_fmla_fmls_index) {
14987 uint64_t zm_inputs_1[] = {0x3ff000003f803c00, 0xbff00000bf80bc00};
14988 uint64_t zn_inputs_1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76};
14989 uint64_t za_inputs_1[] = {0x3c004000bc00c000, 0x64006800e400e800};
14990
14991 // Using the vector form of Fmla and Fmls to verify the indexed form.
14992 FPMulAccIdxHelper(config,
14993 &MacroAssembler::Fmla, // vector form
14994 &MacroAssembler::Fmla, // indexed form
14995 za_inputs_1,
14996 zn_inputs_1,
14997 zm_inputs_1);
14998
14999 FPMulAccIdxHelper(config,
15000 &MacroAssembler::Fmls, // vector form
15001 &MacroAssembler::Fmls, // indexed form
15002 za_inputs_1,
15003 zn_inputs_1,
15004 zm_inputs_1);
15005
15006 uint64_t zm_inputs_2[] = {0x7ff5555511111111, // NaN
15007 0xfff0000000000000}; // Infinity
15008 uint64_t zn_inputs_2[] = {0x7f9511117fc00000, // NaN
15009 0x7f800000ff800000}; // Infinity
15010 uint64_t za_inputs_2[] = {0x7c11000000007e00, // NaN
15011 0x000000007c00fc00}; // Infinity
15012 FPMulAccIdxHelper(config,
15013 &MacroAssembler::Fmla, // vector form
15014 &MacroAssembler::Fmla, // indexed form
15015 za_inputs_2,
15016 zn_inputs_2,
15017 zm_inputs_2);
15018
15019 FPMulAccIdxHelper(config,
15020 &MacroAssembler::Fmls, // vector form
15021 &MacroAssembler::Fmls, // indexed form
15022 za_inputs_2,
15023 zn_inputs_2,
15024 zm_inputs_2);
15025}
15026
TatWai Chongf8d29f12020-02-16 22:53:18 -080015027// Execute a number of instructions which all use ProcessNaNs, and check that
15028// they all propagate NaNs correctly.
15029template <typename Ti, typename Td, size_t N>
15030static void ProcessNaNsHelper(Test* config,
15031 int lane_size_in_bits,
15032 const Ti (&zn_inputs)[N],
15033 const Ti (&zm_inputs)[N],
15034 const Td (&zd_expected)[N],
15035 FPMacroNaNPropagationOption nan_option) {
15036 ArithFn arith_unpredicated_macro[] = {&MacroAssembler::Fadd,
15037 &MacroAssembler::Fsub,
15038 &MacroAssembler::Fmul};
15039
15040 for (size_t i = 0; i < ArrayLength(arith_unpredicated_macro); i++) {
15041 FPBinArithHelper(config,
15042 arith_unpredicated_macro[i],
15043 lane_size_in_bits,
15044 zn_inputs,
15045 zm_inputs,
15046 zd_expected);
15047 }
15048
15049 FPArithPredicatedFn arith_predicated_macro[] = {&MacroAssembler::Fmax,
15050 &MacroAssembler::Fmin};
15051 int pg_inputs[N];
15052 // With an all-true predicate, this helper aims to compare with special
15053 // numbers.
15054 for (size_t i = 0; i < N; i++) {
15055 pg_inputs[i] = 1;
15056 }
15057
15058 // fdivr propagates the quotient (Zm) preferentially, so we don't actually
15059 // need any special handling for StrictNaNPropagation.
15060 FPBinArithHelper(config,
15061 NULL,
15062 &MacroAssembler::Fdiv,
15063 lane_size_in_bits,
15064 // With an all-true predicate, the value in zd is
15065 // irrelevant to the operations.
15066 zn_inputs,
15067 pg_inputs,
15068 zn_inputs,
15069 zm_inputs,
15070 zd_expected);
15071
15072 for (size_t i = 0; i < ArrayLength(arith_predicated_macro); i++) {
15073 FPBinArithHelper(config,
15074 arith_predicated_macro[i],
15075 NULL,
15076 lane_size_in_bits,
15077 // With an all-true predicate, the value in zd is
15078 // irrelevant to the operations.
15079 zn_inputs,
15080 pg_inputs,
15081 zn_inputs,
15082 zm_inputs,
15083 zd_expected,
15084 nan_option);
15085 }
15086}
15087
15088template <typename Ti, typename Td, size_t N>
15089static void ProcessNaNsHelper3(Test* config,
15090 int lane_size_in_bits,
15091 const Ti (&za_inputs)[N],
15092 const Ti (&zn_inputs)[N],
15093 const Ti (&zm_inputs)[N],
15094 const Td (&zd_expected_fmla)[N],
15095 const Td (&zd_expected_fmls)[N],
15096 const Td (&zd_expected_fnmla)[N],
15097 const Td (&zd_expected_fnmls)[N],
15098 FPMacroNaNPropagationOption nan_option) {
15099 int pg_inputs[N];
15100 // With an all-true predicate, this helper aims to compare with special
15101 // numbers.
15102 for (size_t i = 0; i < N; i++) {
15103 pg_inputs[i] = 1;
15104 }
15105
15106 FPMulAccHelper(config,
15107 &MacroAssembler::Fmla,
15108 lane_size_in_bits,
15109 pg_inputs,
15110 za_inputs,
15111 zn_inputs,
15112 zm_inputs,
15113 zd_expected_fmla,
15114 nan_option);
15115
15116 FPMulAccHelper(config,
15117 &MacroAssembler::Fmls,
15118 lane_size_in_bits,
15119 pg_inputs,
15120 za_inputs,
15121 zn_inputs,
15122 zm_inputs,
15123 zd_expected_fmls,
15124 nan_option);
15125
15126 FPMulAccHelper(config,
15127 &MacroAssembler::Fnmla,
15128 lane_size_in_bits,
15129 pg_inputs,
15130 za_inputs,
15131 zn_inputs,
15132 zm_inputs,
15133 zd_expected_fnmla,
15134 nan_option);
15135
15136 FPMulAccHelper(config,
15137 &MacroAssembler::Fnmls,
15138 lane_size_in_bits,
15139 pg_inputs,
15140 za_inputs,
15141 zn_inputs,
15142 zm_inputs,
15143 zd_expected_fnmls,
15144 nan_option);
15145}
15146
15147TEST_SVE(sve_process_nans_double) {
15148 // Use non-standard NaNs to check that the payload bits are preserved.
15149 double sa = RawbitsToDouble(0x7ff5555511111111);
15150 double sn = RawbitsToDouble(0x7ff5555522222222);
15151 double sm = RawbitsToDouble(0x7ff5555533333333);
15152 double qa = RawbitsToDouble(0x7ffaaaaa11111111);
15153 double qn = RawbitsToDouble(0x7ffaaaaa22222222);
15154 double qm = RawbitsToDouble(0x7ffaaaaa33333333);
15155 VIXL_ASSERT(IsSignallingNaN(sa));
15156 VIXL_ASSERT(IsSignallingNaN(sn));
15157 VIXL_ASSERT(IsSignallingNaN(sm));
15158 VIXL_ASSERT(IsQuietNaN(qa));
15159 VIXL_ASSERT(IsQuietNaN(qn));
15160 VIXL_ASSERT(IsQuietNaN(qm));
15161
15162 // The input NaNs after passing through ProcessNaN.
15163 uint64_t sa_proc = 0x7ffd555511111111;
15164 uint64_t sn_proc = 0x7ffd555522222222;
15165 uint64_t sm_proc = 0x7ffd555533333333;
15166 uint64_t qa_proc = DoubleToRawbits(qa);
15167 uint64_t qn_proc = DoubleToRawbits(qn);
15168 uint64_t qm_proc = DoubleToRawbits(qm);
15169 uint64_t sa_proc_n = sa_proc ^ kDSignMask;
15170 uint64_t sn_proc_n = sn_proc ^ kDSignMask;
15171 uint64_t qa_proc_n = qa_proc ^ kDSignMask;
15172 uint64_t qn_proc_n = qn_proc ^ kDSignMask;
15173
15174 // Quiet NaNs are propagated.
15175 double zn_inputs_1[] = {qn, 0.0, 0.0, qm, qn, qm};
15176 double zm_inputs_1[] = {0.0, qn, qm, 0.0, qm, qn};
15177 uint64_t zd_expected_1[] =
15178 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
15179
15180 ProcessNaNsHelper(config,
15181 kDRegSize,
15182 zn_inputs_1,
15183 zm_inputs_1,
15184 zd_expected_1,
15185 StrictNaNPropagation);
15186
15187 // Signalling NaNs are propagated.
15188 double zn_inputs_2[] = {sn, 0.0, 0.0, sm, sn, sm};
15189 double zm_inputs_2[] = {0.0, sn, sm, 0.0, sm, sn};
15190 uint64_t zd_expected_2[] =
15191 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
15192 ProcessNaNsHelper(config,
15193 kDRegSize,
15194 zn_inputs_2,
15195 zm_inputs_2,
15196 zd_expected_2,
15197 StrictNaNPropagation);
15198
15199 // Signalling NaNs take precedence over quiet NaNs.
15200 double zn_inputs_3[] = {sn, qn, sn, sn, qn};
15201 double zm_inputs_3[] = {qm, sm, sm, qn, sn};
15202 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
15203 ProcessNaNsHelper(config,
15204 kDRegSize,
15205 zn_inputs_3,
15206 zm_inputs_3,
15207 zd_expected_3,
15208 StrictNaNPropagation);
15209
15210 double za_inputs_4[] = {qa, qa, 0.0, 0.0, qa, qa};
15211 double zn_inputs_4[] = {qn, 0.0, 0.0, qn, qn, qn};
15212 double zm_inputs_4[] = {0.0, qm, qm, qm, qm, 0.0};
15213
15214 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
15215 // If `n` is propagated, its sign is inverted by fmls and fnmla.
15216 // If `m` is propagated, its sign is never inverted.
15217 uint64_t zd_expected_fmla_4[] =
15218 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
15219 uint64_t zd_expected_fmls_4[] =
15220 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
15221 uint64_t zd_expected_fnmla_4[] =
15222 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
15223 uint64_t zd_expected_fnmls_4[] =
15224 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
15225
15226 ProcessNaNsHelper3(config,
15227 kDRegSize,
15228 za_inputs_4,
15229 zn_inputs_4,
15230 zm_inputs_4,
15231 zd_expected_fmla_4,
15232 zd_expected_fmls_4,
15233 zd_expected_fnmla_4,
15234 zd_expected_fnmls_4,
15235 StrictNaNPropagation);
15236
15237 // Signalling NaNs take precedence over quiet NaNs.
15238 double za_inputs_5[] = {qa, qa, sa, sa, sa};
15239 double zn_inputs_5[] = {qn, sn, sn, sn, qn};
15240 double zm_inputs_5[] = {sm, qm, sm, qa, sm};
15241 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
15242 uint64_t zd_expected_fmls_5[] = {sm_proc,
15243 sn_proc_n,
15244 sa_proc,
15245 sa_proc,
15246 sa_proc};
15247 uint64_t zd_expected_fnmla_5[] = {sm_proc,
15248 sn_proc_n,
15249 sa_proc_n,
15250 sa_proc_n,
15251 sa_proc_n};
15252 uint64_t zd_expected_fnmls_5[] = {sm_proc,
15253 sn_proc,
15254 sa_proc_n,
15255 sa_proc_n,
15256 sa_proc_n};
15257
15258 ProcessNaNsHelper3(config,
15259 kDRegSize,
15260 za_inputs_5,
15261 zn_inputs_5,
15262 zm_inputs_5,
15263 zd_expected_fmla_5,
15264 zd_expected_fmls_5,
15265 zd_expected_fnmla_5,
15266 zd_expected_fnmls_5,
15267 StrictNaNPropagation);
15268
15269 const double inf = kFP64PositiveInfinity;
15270 const double inf_n = kFP64NegativeInfinity;
15271 uint64_t inf_proc = DoubleToRawbits(inf);
15272 uint64_t inf_proc_n = DoubleToRawbits(inf_n);
15273 uint64_t d_inf_proc = DoubleToRawbits(kFP64DefaultNaN);
15274
15275 double za_inputs_6[] = {qa, qa, 0.0f, -0.0f, qa, sa};
15276 double zn_inputs_6[] = {inf, -0.0f, -0.0f, inf, inf_n, inf};
15277 double zm_inputs_6[] = {0.0f, inf_n, inf, inf, inf, 0.0f};
15278
15279 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
15280 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
15281 // quiet_nan.
15282 uint64_t zd_expected_fmla_6[] =
15283 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
15284 uint64_t zd_expected_fmls_6[] =
15285 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
15286 uint64_t zd_expected_fnmla_6[] =
15287 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
15288 uint64_t zd_expected_fnmls_6[] =
15289 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
15290
15291 ProcessNaNsHelper3(config,
15292 kDRegSize,
15293 za_inputs_6,
15294 zn_inputs_6,
15295 zm_inputs_6,
15296 zd_expected_fmla_6,
15297 zd_expected_fmls_6,
15298 zd_expected_fnmla_6,
15299 zd_expected_fnmls_6,
15300 StrictNaNPropagation);
15301}
15302
15303TEST_SVE(sve_process_nans_float) {
15304 // Use non-standard NaNs to check that the payload bits are preserved.
15305 float sa = RawbitsToFloat(0x7f951111);
15306 float sn = RawbitsToFloat(0x7f952222);
15307 float sm = RawbitsToFloat(0x7f953333);
15308 float qa = RawbitsToFloat(0x7fea1111);
15309 float qn = RawbitsToFloat(0x7fea2222);
15310 float qm = RawbitsToFloat(0x7fea3333);
15311 VIXL_ASSERT(IsSignallingNaN(sa));
15312 VIXL_ASSERT(IsSignallingNaN(sn));
15313 VIXL_ASSERT(IsSignallingNaN(sm));
15314 VIXL_ASSERT(IsQuietNaN(qa));
15315 VIXL_ASSERT(IsQuietNaN(qn));
15316 VIXL_ASSERT(IsQuietNaN(qm));
15317
15318 // The input NaNs after passing through ProcessNaN.
15319 uint32_t sa_proc = 0x7fd51111;
15320 uint32_t sn_proc = 0x7fd52222;
15321 uint32_t sm_proc = 0x7fd53333;
15322 uint32_t qa_proc = FloatToRawbits(qa);
15323 uint32_t qn_proc = FloatToRawbits(qn);
15324 uint32_t qm_proc = FloatToRawbits(qm);
15325 uint32_t sa_proc_n = sa_proc ^ kSSignMask;
15326 uint32_t sn_proc_n = sn_proc ^ kSSignMask;
15327 uint32_t qa_proc_n = qa_proc ^ kSSignMask;
15328 uint32_t qn_proc_n = qn_proc ^ kSSignMask;
15329
15330 // Quiet NaNs are propagated.
15331 float zn_inputs_1[] = {qn, 0.0f, 0.0f, qm, qn, qm};
15332 float zm_inputs_1[] = {0.0f, qn, qm, 0.0f, qm, qn};
15333 uint64_t zd_expected_1[] =
15334 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
15335
15336 ProcessNaNsHelper(config,
15337 kSRegSize,
15338 zn_inputs_1,
15339 zm_inputs_1,
15340 zd_expected_1,
15341 StrictNaNPropagation);
15342
15343 // Signalling NaNs are propagated.
15344 float zn_inputs_2[] = {sn, 0.0f, 0.0f, sm, sn, sm};
15345 float zm_inputs_2[] = {0.0f, sn, sm, 0.0f, sm, sn};
15346 uint64_t zd_expected_2[] =
15347 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
15348 ProcessNaNsHelper(config,
15349 kSRegSize,
15350 zn_inputs_2,
15351 zm_inputs_2,
15352 zd_expected_2,
15353 StrictNaNPropagation);
15354
15355 // Signalling NaNs take precedence over quiet NaNs.
15356 float zn_inputs_3[] = {sn, qn, sn, sn, qn};
15357 float zm_inputs_3[] = {qm, sm, sm, qn, sn};
15358 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
15359 ProcessNaNsHelper(config,
15360 kSRegSize,
15361 zn_inputs_3,
15362 zm_inputs_3,
15363 zd_expected_3,
15364 StrictNaNPropagation);
15365
15366 float za_inputs_4[] = {qa, qa, 0.0f, 0.0f, qa, qa};
15367 float zn_inputs_4[] = {qn, 0.0f, 0.0f, qn, qn, qn};
15368 float zm_inputs_4[] = {0.0f, qm, qm, qm, qm, 0.0f};
15369
15370 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
15371 // If `n` is propagated, its sign is inverted by fmls and fnmla.
15372 // If `m` is propagated, its sign is never inverted.
15373 uint64_t zd_expected_fmla_4[] =
15374 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
15375 uint64_t zd_expected_fmls_4[] =
15376 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
15377 uint64_t zd_expected_fnmla_4[] =
15378 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
15379 uint64_t zd_expected_fnmls_4[] =
15380 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
15381
15382 ProcessNaNsHelper3(config,
15383 kSRegSize,
15384 za_inputs_4,
15385 zn_inputs_4,
15386 zm_inputs_4,
15387 zd_expected_fmla_4,
15388 zd_expected_fmls_4,
15389 zd_expected_fnmla_4,
15390 zd_expected_fnmls_4,
15391 StrictNaNPropagation);
15392
15393 // Signalling NaNs take precedence over quiet NaNs.
15394 float za_inputs_5[] = {qa, qa, sa, sa, sa};
15395 float zn_inputs_5[] = {qn, sn, sn, sn, qn};
15396 float zm_inputs_5[] = {sm, qm, sm, qa, sm};
15397 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
15398 uint64_t zd_expected_fmls_5[] = {sm_proc,
15399 sn_proc_n,
15400 sa_proc,
15401 sa_proc,
15402 sa_proc};
15403 uint64_t zd_expected_fnmla_5[] = {sm_proc,
15404 sn_proc_n,
15405 sa_proc_n,
15406 sa_proc_n,
15407 sa_proc_n};
15408 uint64_t zd_expected_fnmls_5[] = {sm_proc,
15409 sn_proc,
15410 sa_proc_n,
15411 sa_proc_n,
15412 sa_proc_n};
15413
15414 ProcessNaNsHelper3(config,
15415 kSRegSize,
15416 za_inputs_5,
15417 zn_inputs_5,
15418 zm_inputs_5,
15419 zd_expected_fmla_5,
15420 zd_expected_fmls_5,
15421 zd_expected_fnmla_5,
15422 zd_expected_fnmls_5,
15423 StrictNaNPropagation);
15424
15425 const float inf = kFP32PositiveInfinity;
15426 const float inf_n = kFP32NegativeInfinity;
15427 uint32_t inf_proc = FloatToRawbits(inf);
15428 uint32_t inf_proc_n = FloatToRawbits(inf_n);
15429 uint32_t d_inf_proc = FloatToRawbits(kFP32DefaultNaN);
15430
15431 float za_inputs_6[] = {qa, qa, 0.0f, 0.0f, qa, sa};
15432 float zn_inputs_6[] = {inf, 0.0f, 0.0f, inf, inf_n, inf};
15433 float zm_inputs_6[] = {0.0f, inf_n, inf, inf, inf, 0.0f};
15434
15435 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
15436 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
15437 // quiet_nan.
15438 uint64_t zd_expected_fmla_6[] =
15439 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
15440 uint64_t zd_expected_fmls_6[] =
15441 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
15442 uint64_t zd_expected_fnmla_6[] =
15443 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
15444 uint64_t zd_expected_fnmls_6[] =
15445 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
15446
15447 ProcessNaNsHelper3(config,
15448 kSRegSize,
15449 za_inputs_6,
15450 zn_inputs_6,
15451 zm_inputs_6,
15452 zd_expected_fmla_6,
15453 zd_expected_fmls_6,
15454 zd_expected_fnmla_6,
15455 zd_expected_fnmls_6,
15456 StrictNaNPropagation);
15457}
15458
15459TEST_SVE(sve_process_nans_half) {
15460 // Use non-standard NaNs to check that the payload bits are preserved.
15461 Float16 sa(RawbitsToFloat16(0x7c11));
15462 Float16 sn(RawbitsToFloat16(0x7c22));
15463 Float16 sm(RawbitsToFloat16(0x7c33));
15464 Float16 qa(RawbitsToFloat16(0x7e44));
15465 Float16 qn(RawbitsToFloat16(0x7e55));
15466 Float16 qm(RawbitsToFloat16(0x7e66));
15467 VIXL_ASSERT(IsSignallingNaN(sa));
15468 VIXL_ASSERT(IsSignallingNaN(sn));
15469 VIXL_ASSERT(IsSignallingNaN(sm));
15470 VIXL_ASSERT(IsQuietNaN(qa));
15471 VIXL_ASSERT(IsQuietNaN(qn));
15472 VIXL_ASSERT(IsQuietNaN(qm));
15473
15474 // The input NaNs after passing through ProcessNaN.
15475 uint16_t sa_proc = 0x7e11;
15476 uint16_t sn_proc = 0x7e22;
15477 uint16_t sm_proc = 0x7e33;
15478 uint16_t qa_proc = Float16ToRawbits(qa);
15479 uint16_t qn_proc = Float16ToRawbits(qn);
15480 uint16_t qm_proc = Float16ToRawbits(qm);
15481 uint16_t sa_proc_n = sa_proc ^ kHSignMask;
15482 uint16_t sn_proc_n = sn_proc ^ kHSignMask;
15483 uint16_t qa_proc_n = qa_proc ^ kHSignMask;
15484 uint16_t qn_proc_n = qn_proc ^ kHSignMask;
15485 Float16 zero(0.0);
15486
15487 // Quiet NaNs are propagated.
15488 Float16 zn_inputs_1[] = {qn, zero, zero, qm, qn, qm};
15489 Float16 zm_inputs_1[] = {zero, qn, qm, zero, qm, qn};
15490 uint64_t zd_expected_1[] =
15491 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
15492
15493 ProcessNaNsHelper(config,
15494 kHRegSize,
15495 zn_inputs_1,
15496 zm_inputs_1,
15497 zd_expected_1,
15498 StrictNaNPropagation);
15499
15500 // Signalling NaNs are propagated.
15501 Float16 zn_inputs_2[] = {sn, zero, zero, sm, sn, sm};
15502 Float16 zm_inputs_2[] = {zero, sn, sm, zero, sm, sn};
15503 uint64_t zd_expected_2[] =
15504 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
15505 ProcessNaNsHelper(config,
15506 kHRegSize,
15507 zn_inputs_2,
15508 zm_inputs_2,
15509 zd_expected_2,
15510 StrictNaNPropagation);
15511
15512 // Signalling NaNs take precedence over quiet NaNs.
15513 Float16 zn_inputs_3[] = {sn, qn, sn, sn, qn};
15514 Float16 zm_inputs_3[] = {qm, sm, sm, qn, sn};
15515 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
15516 ProcessNaNsHelper(config,
15517 kHRegSize,
15518 zn_inputs_3,
15519 zm_inputs_3,
15520 zd_expected_3,
15521 StrictNaNPropagation);
15522
15523 Float16 za_inputs_4[] = {qa, qa, zero, zero, qa, qa};
15524 Float16 zn_inputs_4[] = {qn, zero, zero, qn, qn, qn};
15525 Float16 zm_inputs_4[] = {zero, qm, qm, qm, qm, zero};
15526
15527 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
15528 // If `n` is propagated, its sign is inverted by fmls and fnmla.
15529 // If `m` is propagated, its sign is never inverted.
15530 uint64_t zd_expected_fmla_4[] =
15531 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
15532 uint64_t zd_expected_fmls_4[] =
15533 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
15534 uint64_t zd_expected_fnmla_4[] =
15535 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
15536 uint64_t zd_expected_fnmls_4[] =
15537 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
15538
15539 ProcessNaNsHelper3(config,
15540 kHRegSize,
15541 za_inputs_4,
15542 zn_inputs_4,
15543 zm_inputs_4,
15544 zd_expected_fmla_4,
15545 zd_expected_fmls_4,
15546 zd_expected_fnmla_4,
15547 zd_expected_fnmls_4,
15548 StrictNaNPropagation);
15549
15550 // Signalling NaNs take precedence over quiet NaNs.
15551 Float16 za_inputs_5[] = {qa, qa, sa, sa, sa};
15552 Float16 zn_inputs_5[] = {qn, sn, sn, sn, qn};
15553 Float16 zm_inputs_5[] = {sm, qm, sm, qa, sm};
15554 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
15555 uint64_t zd_expected_fmls_5[] = {sm_proc,
15556 sn_proc_n,
15557 sa_proc,
15558 sa_proc,
15559 sa_proc};
15560 uint64_t zd_expected_fnmla_5[] = {sm_proc,
15561 sn_proc_n,
15562 sa_proc_n,
15563 sa_proc_n,
15564 sa_proc_n};
15565 uint64_t zd_expected_fnmls_5[] = {sm_proc,
15566 sn_proc,
15567 sa_proc_n,
15568 sa_proc_n,
15569 sa_proc_n};
15570
15571 ProcessNaNsHelper3(config,
15572 kHRegSize,
15573 za_inputs_5,
15574 zn_inputs_5,
15575 zm_inputs_5,
15576 zd_expected_fmla_5,
15577 zd_expected_fmls_5,
15578 zd_expected_fnmla_5,
15579 zd_expected_fnmls_5,
15580 StrictNaNPropagation);
15581
15582 const Float16 inf = kFP16PositiveInfinity;
15583 const Float16 inf_n = kFP16NegativeInfinity;
15584 uint64_t inf_proc = Float16ToRawbits(inf);
15585 uint64_t inf_proc_n = Float16ToRawbits(inf_n);
15586 uint64_t d_inf_proc = Float16ToRawbits(kFP16DefaultNaN);
15587
15588 Float16 za_inputs_6[] = {qa, qa, zero, zero, qa, sa};
15589 Float16 zn_inputs_6[] = {inf, zero, zero, inf, inf_n, inf};
15590 Float16 zm_inputs_6[] = {zero, inf_n, inf, inf, inf, zero};
15591
15592 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
15593 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
15594 // quiet_nan.
15595 uint64_t zd_expected_fmla_6[] =
15596 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
15597 uint64_t zd_expected_fmls_6[] =
15598 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
15599 uint64_t zd_expected_fnmla_6[] =
15600 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
15601 uint64_t zd_expected_fnmls_6[] =
15602 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
15603
15604 ProcessNaNsHelper3(config,
15605 kHRegSize,
15606 za_inputs_6,
15607 zn_inputs_6,
15608 zm_inputs_6,
15609 zd_expected_fmla_6,
15610 zd_expected_fmls_6,
15611 zd_expected_fnmla_6,
15612 zd_expected_fnmls_6,
15613 StrictNaNPropagation);
15614}
15615
TatWai Chong47c26842020-02-10 01:51:32 -080015616typedef void (MacroAssembler::*FCmpFn)(const PRegisterWithLaneSize& pd,
15617 const PRegisterZ& pg,
15618 const ZRegister& zn,
15619 const ZRegister& zm);
15620
TatWai Chonge3775132020-02-16 22:13:17 -080015621typedef void (MacroAssembler::*FCmpZeroFn)(const PRegisterWithLaneSize& pd,
15622 const PRegisterZ& pg,
15623 const ZRegister& zn);
15624
TatWai Chong47c26842020-02-10 01:51:32 -080015625typedef void (MacroAssembler::*CmpFn)(const PRegisterWithLaneSize& pd,
15626 const PRegisterZ& pg,
15627 const ZRegister& zn,
15628 const ZRegister& zm);
15629
15630static FCmpFn GetFpAbsCompareFn(Condition cond) {
15631 switch (cond) {
15632 case ge:
15633 return &MacroAssembler::Facge;
15634 case gt:
15635 return &MacroAssembler::Facgt;
15636 case le:
15637 return &MacroAssembler::Facle;
15638 case lt:
15639 return &MacroAssembler::Faclt;
15640 default:
15641 VIXL_UNIMPLEMENTED();
15642 return NULL;
15643 }
15644}
15645
15646static FCmpFn GetFpCompareFn(Condition cond) {
15647 switch (cond) {
15648 case ge:
15649 return &MacroAssembler::Fcmge;
15650 case gt:
15651 return &MacroAssembler::Fcmgt;
15652 case le:
15653 return &MacroAssembler::Fcmle;
15654 case lt:
15655 return &MacroAssembler::Fcmlt;
15656 case eq:
15657 return &MacroAssembler::Fcmeq;
15658 case ne:
15659 return &MacroAssembler::Fcmne;
15660 case uo:
15661 return &MacroAssembler::Fcmuo;
15662 default:
15663 VIXL_UNIMPLEMENTED();
15664 return NULL;
15665 }
15666}
15667
TatWai Chonge3775132020-02-16 22:13:17 -080015668static FCmpZeroFn GetFpCompareZeroFn(Condition cond) {
15669 switch (cond) {
15670 case ge:
15671 return &MacroAssembler::Fcmge;
15672 case gt:
15673 return &MacroAssembler::Fcmgt;
15674 case le:
15675 return &MacroAssembler::Fcmle;
15676 case lt:
15677 return &MacroAssembler::Fcmlt;
15678 case eq:
15679 return &MacroAssembler::Fcmeq;
15680 case ne:
15681 return &MacroAssembler::Fcmne;
15682 default:
15683 VIXL_UNIMPLEMENTED();
15684 return NULL;
15685 }
15686}
15687
TatWai Chong47c26842020-02-10 01:51:32 -080015688static CmpFn GetIntCompareFn(Condition cond) {
15689 switch (cond) {
15690 case ge:
15691 return &MacroAssembler::Cmpge;
15692 case gt:
15693 return &MacroAssembler::Cmpgt;
15694 case le:
15695 return &MacroAssembler::Cmple;
15696 case lt:
15697 return &MacroAssembler::Cmplt;
15698 case eq:
15699 return &MacroAssembler::Cmpeq;
15700 case ne:
15701 return &MacroAssembler::Cmpne;
15702 default:
15703 VIXL_UNIMPLEMENTED();
15704 return NULL;
15705 }
15706}
15707
15708template <size_t N>
15709static void TestFpCompareHelper(Test* config,
15710 int lane_size_in_bits,
15711 Condition cond,
15712 const double (&zn_inputs)[N],
15713 const double (&zm_inputs)[N],
15714 const int (&pd_expected)[N],
15715 bool is_absolute = false) {
15716 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15717 START();
15718
15719 ZRegister zt_int_1 = z1.WithLaneSize(lane_size_in_bits);
15720 ZRegister zt_int_2 = z2.WithLaneSize(lane_size_in_bits);
15721 ZRegister zt_int_3 = z3.WithLaneSize(lane_size_in_bits);
15722 ZRegister zt_fp_1 = z11.WithLaneSize(lane_size_in_bits);
15723 ZRegister zt_fp_2 = z12.WithLaneSize(lane_size_in_bits);
15724 ZRegister zt_fp_3 = z13.WithLaneSize(lane_size_in_bits);
15725 ZRegister fp_one = z31.WithLaneSize(lane_size_in_bits);
15726
15727 PRegisterWithLaneSize pd_result_int_1 = p15.WithLaneSize(lane_size_in_bits);
15728 PRegisterWithLaneSize pd_result_fp_1 = p14.WithLaneSize(lane_size_in_bits);
15729 PRegisterWithLaneSize pd_result_int_2 = p13.WithLaneSize(lane_size_in_bits);
15730 PRegisterWithLaneSize pd_result_fp_2 = p12.WithLaneSize(lane_size_in_bits);
15731
15732 FCmpFn fcmp = is_absolute ? GetFpAbsCompareFn(cond) : GetFpCompareFn(cond);
15733 __ Ptrue(p1.VnB());
15734
15735 if (cond != uo) {
15736 int pg_inputs[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1};
15737 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
15738
15739 __ Fdup(fp_one, 0.1f);
15740
15741 __ Index(zt_int_1, 3, 3);
15742 __ Scvtf(zt_fp_1, p0.Merging(), zt_int_1);
15743 __ Fadd(zt_fp_1, zt_fp_1, fp_one);
15744
15745 __ Index(zt_int_2, 3, -10);
15746 __ Scvtf(zt_fp_2, p0.Merging(), zt_int_2);
15747 __ Fadd(zt_fp_2, zt_fp_2, fp_one);
15748
15749 __ Index(zt_int_3, 3, 2);
15750 __ Scvtf(zt_fp_3, p0.Merging(), zt_int_3);
15751 __ Fadd(zt_fp_3, zt_fp_3, fp_one);
15752
15753
15754 // There is no absolute comparison in integer type, use `abs` with `cmp<cc>`
15755 // to synthesize the expected result for `fac<cc>`.
15756 if (is_absolute == true) {
15757 __ Abs(zt_int_2, p1.Merging(), zt_int_2);
15758 }
15759
15760 CmpFn cmp = GetIntCompareFn(cond);
15761 (masm.*cmp)(pd_result_int_1, p0.Zeroing(), zt_int_1, zt_int_2);
15762 (masm.*fcmp)(pd_result_fp_1, p0.Zeroing(), zt_fp_1, zt_fp_2);
15763
15764 (masm.*cmp)(pd_result_int_2, p0.Zeroing(), zt_int_1, zt_int_3);
15765 (masm.*fcmp)(pd_result_fp_2, p0.Zeroing(), zt_fp_1, zt_fp_3);
15766 }
15767
15768 uint64_t zn_inputs_rawbits[N];
15769 uint64_t zm_inputs_rawbits[N];
15770 FPToRawbitsWithSize(zn_inputs, zn_inputs_rawbits, lane_size_in_bits);
15771 FPToRawbitsWithSize(zm_inputs, zm_inputs_rawbits, lane_size_in_bits);
15772
15773 ZRegister zn_fp = z14.WithLaneSize(lane_size_in_bits);
15774 ZRegister zm_fp = z15.WithLaneSize(lane_size_in_bits);
15775 InsrHelper(&masm, zn_fp, zn_inputs_rawbits);
15776 InsrHelper(&masm, zm_fp, zm_inputs_rawbits);
15777
15778 PRegisterWithLaneSize pd_result_fp_3 = p11.WithLaneSize(lane_size_in_bits);
15779 (masm.*fcmp)(pd_result_fp_3, p1.Zeroing(), zn_fp, zm_fp);
15780
15781 END();
15782
15783 if (CAN_RUN()) {
15784 RUN();
15785
15786 if (cond != uo) {
15787 ASSERT_EQUAL_SVE(pd_result_int_1, pd_result_fp_1);
15788 ASSERT_EQUAL_SVE(pd_result_int_2, pd_result_fp_2);
15789 }
15790 ASSERT_EQUAL_SVE(pd_expected, pd_result_fp_3);
15791 }
15792}
15793
15794TEST_SVE(sve_fp_compare_vectors) {
15795 double inf_p = kFP64PositiveInfinity;
15796 double inf_n = kFP64NegativeInfinity;
15797 double nan = kFP64DefaultNaN;
15798
15799 // Normal floating point comparison has been tested in the helper.
15800 double zn[] = {0.0, inf_n, 1.0, inf_p, inf_p, nan, 0.0, nan};
15801 double zm[] = {-0.0, inf_n, inf_n, -2.0, inf_n, nan, nan, inf_p};
15802
15803 int pd_fcm_gt[] = {0, 0, 1, 1, 1, 0, 0, 0};
15804 int pd_fcm_lt[] = {0, 0, 0, 0, 0, 0, 0, 0};
15805 int pd_fcm_ge[] = {1, 1, 1, 1, 1, 0, 0, 0};
15806 int pd_fcm_le[] = {1, 1, 0, 0, 0, 0, 0, 0};
15807 int pd_fcm_eq[] = {1, 1, 0, 0, 0, 0, 0, 0};
15808 int pd_fcm_ne[] = {0, 0, 1, 1, 1, 0, 0, 0};
15809 int pd_fcm_uo[] = {0, 0, 0, 0, 0, 1, 1, 1};
15810 int pd_fac_gt[] = {0, 0, 0, 1, 0, 0, 0, 0};
15811 int pd_fac_lt[] = {0, 0, 1, 0, 0, 0, 0, 0};
15812 int pd_fac_ge[] = {1, 1, 0, 1, 1, 0, 0, 0};
15813 int pd_fac_le[] = {1, 1, 1, 0, 1, 0, 0, 0};
15814
15815 int lane_sizes[] = {kHRegSize, kSRegSize, kDRegSize};
15816
15817 for (size_t i = 0; i < ArrayLength(lane_sizes); i++) {
15818 int lane_size = lane_sizes[i];
15819 // Test floating-point compare vectors.
15820 TestFpCompareHelper(config, lane_size, gt, zn, zm, pd_fcm_gt);
15821 TestFpCompareHelper(config, lane_size, lt, zn, zm, pd_fcm_lt);
15822 TestFpCompareHelper(config, lane_size, ge, zn, zm, pd_fcm_ge);
15823 TestFpCompareHelper(config, lane_size, le, zn, zm, pd_fcm_le);
15824 TestFpCompareHelper(config, lane_size, eq, zn, zm, pd_fcm_eq);
15825 TestFpCompareHelper(config, lane_size, ne, zn, zm, pd_fcm_ne);
15826 TestFpCompareHelper(config, lane_size, uo, zn, zm, pd_fcm_uo);
15827
15828 // Test floating-point absolute compare vectors.
15829 TestFpCompareHelper(config, lane_size, gt, zn, zm, pd_fac_gt, true);
15830 TestFpCompareHelper(config, lane_size, lt, zn, zm, pd_fac_lt, true);
15831 TestFpCompareHelper(config, lane_size, ge, zn, zm, pd_fac_ge, true);
15832 TestFpCompareHelper(config, lane_size, le, zn, zm, pd_fac_le, true);
15833 }
15834}
15835
TatWai Chonge3775132020-02-16 22:13:17 -080015836template <size_t N, typename T>
15837static void TestFpCompareZeroHelper(Test* config,
15838 int lane_size_in_bits,
15839 Condition cond,
15840 const T (&zn_inputs)[N],
15841 const int (&pd_expected)[N]) {
15842 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15843 START();
15844
15845 ZRegister zn = z28.WithLaneSize(lane_size_in_bits);
15846 PRegisterWithLaneSize pd = p14.WithLaneSize(lane_size_in_bits);
15847
15848 uint64_t zn_rawbits[N];
15849 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
15850 InsrHelper(&masm, zn, zn_rawbits);
15851
15852 __ Ptrue(p0.VnB());
15853 (masm.*GetFpCompareZeroFn(cond))(pd, p0.Zeroing(), zn);
15854
15855 END();
15856
15857 if (CAN_RUN()) {
15858 RUN();
15859
15860 ASSERT_EQUAL_SVE(pd_expected, pd);
15861 }
15862}
15863
15864TEST_SVE(sve_fp_compare_vector_zero) {
15865 Float16 fp16_inf_p = kFP16PositiveInfinity;
15866 Float16 fp16_inf_n = kFP16NegativeInfinity;
15867 Float16 fp16_dn = kFP16DefaultNaN;
15868 Float16 fp16_sn = RawbitsToFloat16(0x7c22);
15869 Float16 fp16_qn = RawbitsToFloat16(0x7e55);
15870
15871 float fp32_inf_p = kFP32PositiveInfinity;
15872 float fp32_inf_n = kFP32NegativeInfinity;
15873 float fp32_dn = kFP32DefaultNaN;
15874 float fp32_sn = RawbitsToFloat(0x7f952222);
15875 float fp32_qn = RawbitsToFloat(0x7fea2222);
15876
15877 double fp64_inf_p = kFP64PositiveInfinity;
15878 double fp64_inf_n = kFP64NegativeInfinity;
15879 double fp64_dn = kFP64DefaultNaN;
15880 double fp64_sn = RawbitsToDouble(0x7ff5555511111111);
15881 double fp64_qn = RawbitsToDouble(0x7ffaaaaa11111111);
15882
15883 // Normal floating point comparison has been tested in the non-zero form.
15884 Float16 zn_inputs_h[] = {Float16(0.0),
15885 Float16(-0.0),
15886 fp16_inf_p,
15887 fp16_inf_n,
15888 fp16_dn,
15889 fp16_sn,
15890 fp16_qn};
15891 float zn_inputs_s[] =
15892 {0.0, -0.0, fp32_inf_p, fp32_inf_n, fp32_dn, fp32_sn, fp32_qn};
15893 double zn_inputs_d[] =
15894 {0.0, -0.0, fp64_inf_p, fp64_inf_n, fp64_dn, fp64_sn, fp64_qn};
15895
15896 int pd_expected_gt[] = {0, 0, 1, 0, 0, 0, 0};
15897 int pd_expected_lt[] = {0, 0, 0, 1, 0, 0, 0};
15898 int pd_expected_ge[] = {1, 1, 1, 0, 0, 0, 0};
15899 int pd_expected_le[] = {1, 1, 0, 1, 0, 0, 0};
15900 int pd_expected_eq[] = {1, 1, 0, 0, 0, 0, 0};
15901 int pd_expected_ne[] = {0, 0, 1, 1, 0, 0, 0};
15902
15903 TestFpCompareZeroHelper(config, kDRegSize, gt, zn_inputs_d, pd_expected_gt);
15904 TestFpCompareZeroHelper(config, kDRegSize, lt, zn_inputs_d, pd_expected_lt);
15905 TestFpCompareZeroHelper(config, kDRegSize, ge, zn_inputs_d, pd_expected_ge);
15906 TestFpCompareZeroHelper(config, kDRegSize, le, zn_inputs_d, pd_expected_le);
15907 TestFpCompareZeroHelper(config, kDRegSize, eq, zn_inputs_d, pd_expected_eq);
15908 TestFpCompareZeroHelper(config, kDRegSize, ne, zn_inputs_d, pd_expected_ne);
15909
15910 TestFpCompareZeroHelper(config, kSRegSize, gt, zn_inputs_s, pd_expected_gt);
15911 TestFpCompareZeroHelper(config, kSRegSize, lt, zn_inputs_s, pd_expected_lt);
15912 TestFpCompareZeroHelper(config, kSRegSize, ge, zn_inputs_s, pd_expected_ge);
15913 TestFpCompareZeroHelper(config, kSRegSize, le, zn_inputs_s, pd_expected_le);
15914 TestFpCompareZeroHelper(config, kSRegSize, eq, zn_inputs_s, pd_expected_eq);
15915 TestFpCompareZeroHelper(config, kSRegSize, ne, zn_inputs_s, pd_expected_ne);
15916
15917 TestFpCompareZeroHelper(config, kHRegSize, gt, zn_inputs_h, pd_expected_gt);
15918 TestFpCompareZeroHelper(config, kHRegSize, lt, zn_inputs_h, pd_expected_lt);
15919 TestFpCompareZeroHelper(config, kHRegSize, ge, zn_inputs_h, pd_expected_ge);
15920 TestFpCompareZeroHelper(config, kHRegSize, le, zn_inputs_h, pd_expected_le);
15921 TestFpCompareZeroHelper(config, kHRegSize, eq, zn_inputs_h, pd_expected_eq);
15922 TestFpCompareZeroHelper(config, kHRegSize, ne, zn_inputs_h, pd_expected_ne);
15923}
15924
TatWai Chong2cb1b612020-03-04 23:51:21 -080015925typedef void (MacroAssembler::*FPUnaryMFn)(const ZRegister& zd,
15926 const PRegisterM& pg,
15927 const ZRegister& zn);
15928
15929typedef void (MacroAssembler::*FPUnaryZFn)(const ZRegister& zd,
15930 const PRegisterZ& pg,
15931 const ZRegister& zn);
15932
15933template <size_t N, size_t M>
15934static void TestFPUnaryPredicatedHelper(Test* config,
15935 int src_size_in_bits,
15936 int dst_size_in_bits,
15937 uint64_t (&zn_inputs)[N],
15938 const uint64_t (&pg_inputs)[M],
15939 const uint64_t (&zd_expected)[N],
15940 FPUnaryMFn macro_m,
15941 FPUnaryZFn macro_z) {
15942 // Provide the full predicate input.
15943 VIXL_ASSERT(M == (kPRegMaxSize / kDRegSize));
15944 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
15945 START();
15946
15947 int ds = dst_size_in_bits;
15948 int ss = src_size_in_bits;
15949 int ls = std::max(ss, ds);
15950
15951 // When destination type is larger than source type, fill the high parts with
15952 // noise values, which should be ignored.
15953 if (ds > ss) {
15954 VIXL_ASSERT(ss < 64);
15955 uint64_t zn_inputs_mod[N];
15956 uint64_t sn = GetSignallingNan(ss);
15957 for (unsigned i = 0; i < N; i++) {
15958 zn_inputs_mod[i] = zn_inputs[i] | ((sn + i) << ss);
15959 }
15960 InsrHelper(&masm, z29.WithLaneSize(ls), zn_inputs_mod);
15961 } else {
15962 InsrHelper(&masm, z29.WithLaneSize(ls), zn_inputs);
15963 }
15964
15965 // Make a copy so we can check that constructive operations preserve zn.
15966 __ Mov(z28, z29);
15967
15968 // Run the operation on all lanes.
15969 __ Ptrue(p0.WithLaneSize(ls));
15970 (masm.*macro_m)(z27.WithLaneSize(ds), p0.Merging(), z28.WithLaneSize(ss));
15971
15972 Initialise(&masm,
15973 p1.VnB(),
15974 pg_inputs[3],
15975 pg_inputs[2],
15976 pg_inputs[1],
15977 pg_inputs[0]);
15978
15979 // Clear the irrelevant lanes.
15980 __ Index(z31.WithLaneSize(ls), 0, 1);
15981 __ Cmplt(p1.WithLaneSize(ls), p1.Zeroing(), z31.WithLaneSize(ls), N);
15982
15983 // Check merging predication.
15984 __ Index(z11.WithLaneSize(ls), 42, 1);
15985 // Preserve the base value so we can derive the expected result.
15986 __ Mov(z21, z11);
15987 __ Mov(z9, z11);
15988 (masm.*macro_m)(z11.WithLaneSize(ds), p1.Merging(), z28.WithLaneSize(ss));
15989
15990 // Generate expected values using explicit merging operations.
15991 InsrHelper(&masm, z25.WithLaneSize(ls), zd_expected);
15992 __ Mov(z21.WithLaneSize(ls), p1.Merging(), z25.WithLaneSize(ls));
15993
15994 // Check zeroing predication.
15995 __ Index(z12.WithLaneSize(ds), 42, -1);
15996 (masm.*macro_z)(z12.WithLaneSize(ds), p1.Zeroing(), z28.WithLaneSize(ss));
15997
15998 // Generate expected values using explicit zeroing operations.
15999 InsrHelper(&masm, z30.WithLaneSize(ls), zd_expected);
16000 // Emulate zeroing predication.
16001 __ Dup(z22.WithLaneSize(ls), 0);
16002 __ Mov(z22.WithLaneSize(ls), p1.Merging(), z30.WithLaneSize(ls));
16003
16004 // Check an in-place update.
16005 __ Mov(z9.WithLaneSize(ls), p1.Merging(), z28.WithLaneSize(ls));
16006 (masm.*macro_m)(z9.WithLaneSize(ds), p1.Merging(), z9.WithLaneSize(ss));
16007
16008 END();
16009
16010 if (CAN_RUN()) {
16011 RUN();
16012
16013 // Check all lanes.
16014 ASSERT_EQUAL_SVE(zd_expected, z27.WithLaneSize(ls));
16015
16016 // Check that constructive operations preserve their inputs.
16017 ASSERT_EQUAL_SVE(z28, z29);
16018
16019 // Check merging predication.
16020 ASSERT_EQUAL_SVE(z21.WithLaneSize(ls), z21.WithLaneSize(ls));
16021
16022 // Check zeroing predication.
16023 ASSERT_EQUAL_SVE(z22.WithLaneSize(ls), z12.WithLaneSize(ls));
16024
16025 // Check in-place operation where zd == zn.
16026 ASSERT_EQUAL_SVE(z21.WithLaneSize(ls), z9.WithLaneSize(ls));
16027 }
16028}
16029
16030template <size_t N, typename T>
16031static void TestFPUnaryPredicatedHelper(Test* config,
16032 int src_size_in_bits,
16033 int dst_size_in_bits,
16034 T (&zn_inputs)[N],
16035 const T (&zd_expected)[N],
16036 FPUnaryMFn macro_m,
16037 FPUnaryZFn macro_z) {
16038 uint64_t pg_inputs[] = {0xa55aa55aa55aa55a,
16039 0xa55aa55aa55aa55a,
16040 0xa55aa55aa55aa55a,
16041 0xa55aa55aa55aa55a};
16042
16043 TestFPUnaryPredicatedHelper(config,
16044 src_size_in_bits,
16045 dst_size_in_bits,
16046 zn_inputs,
16047 pg_inputs,
16048 zd_expected,
16049 macro_m,
16050 macro_z);
16051
16052 // The complementary of above precicate to get full input coverage.
16053 uint64_t pg_c_inputs[] = {0x5aa55aa55aa55aa5,
16054 0x5aa55aa55aa55aa5,
16055 0x5aa55aa55aa55aa5,
16056 0x5aa55aa55aa55aa5};
16057
16058 TestFPUnaryPredicatedHelper(config,
16059 src_size_in_bits,
16060 dst_size_in_bits,
16061 zn_inputs,
16062 pg_c_inputs,
16063 zd_expected,
16064 macro_m,
16065 macro_z);
16066}
16067
16068template <size_t N, typename T>
16069static void TestFcvtHelper(Test* config,
16070 int src_size_in_bits,
16071 int dst_size_in_bits,
16072 T (&zn_inputs)[N],
16073 const T (&zd_expected)[N]) {
16074 TestFPUnaryPredicatedHelper(config,
16075 src_size_in_bits,
16076 dst_size_in_bits,
16077 zn_inputs,
16078 zd_expected,
16079 &MacroAssembler::Fcvt, // Merging form.
16080 &MacroAssembler::Fcvt); // Zerging form.
16081}
16082
16083TEST_SVE(sve_fcvt) {
16084 uint64_t h_vals[] = {0x7c00,
16085 0xfc00,
16086 0,
16087 0x8000,
16088 0x7bff, // Max half precision.
16089 0x0400, // Min positive normal.
16090 0x03ff, // Max subnormal.
16091 0x0001}; // Min positive subnormal.
16092
16093 uint64_t s_vals[] = {0x7f800000,
16094 0xff800000,
16095 0,
16096 0x80000000,
16097 0x477fe000,
16098 0x38800000,
16099 0x387fc000,
16100 0x33800000};
16101
16102 uint64_t d_vals[] = {0x7ff0000000000000,
16103 0xfff0000000000000,
16104 0,
16105 0x8000000000000000,
16106 0x40effc0000000000,
16107 0x3f10000000000000,
16108 0x3f0ff80000000000,
16109 0x3e70000000000000};
16110
16111 TestFcvtHelper(config, kHRegSize, kSRegSize, h_vals, s_vals);
16112 TestFcvtHelper(config, kSRegSize, kHRegSize, s_vals, h_vals);
16113 TestFcvtHelper(config, kSRegSize, kDRegSize, s_vals, d_vals);
16114 TestFcvtHelper(config, kDRegSize, kSRegSize, d_vals, s_vals);
16115 TestFcvtHelper(config, kHRegSize, kDRegSize, h_vals, d_vals);
16116 TestFcvtHelper(config, kDRegSize, kHRegSize, d_vals, h_vals);
16117}
16118
16119TEST_SVE(sve_fcvt_nan) {
16120 uint64_t h_inputs[] = {0x7e55, // Quiet NaN.
16121 0x7c22}; // Signalling NaN.
16122
16123 uint64_t h2s_expected[] = {0x7fcaa000, 0x7fc44000};
16124
16125 uint64_t h2d_expected[] = {0x7ff9540000000000, 0x7ff8880000000000};
16126
16127 uint64_t s_inputs[] = {0x7fc12345, // Quiet NaN.
16128 0x7f812345}; // Signalling NaN.
16129
16130 uint64_t s2h_expected[] = {0x7e09, 0x7e09};
16131
16132 uint64_t s2d_expected[] = {0x7ff82468a0000000, 0x7ff82468a0000000};
16133
16134 uint64_t d_inputs[] = {0x7ffaaaaa22222222, // Quiet NaN.
16135 0x7ff5555511111111}; // Signalling NaN.
16136
16137 uint64_t d2h_expected[] = {0x7eaa, 0x7f55};
16138
16139 uint64_t d2s_expected[] = {0x7fd55551, 0x7feaaaa8};
16140
16141 TestFcvtHelper(config, kHRegSize, kSRegSize, h_inputs, h2s_expected);
16142 TestFcvtHelper(config, kSRegSize, kHRegSize, s_inputs, s2h_expected);
16143 TestFcvtHelper(config, kHRegSize, kDRegSize, h_inputs, h2d_expected);
16144 TestFcvtHelper(config, kDRegSize, kHRegSize, d_inputs, d2h_expected);
16145 TestFcvtHelper(config, kSRegSize, kDRegSize, s_inputs, s2d_expected);
16146 TestFcvtHelper(config, kDRegSize, kSRegSize, d_inputs, d2s_expected);
16147}
16148
TatWai Chongf60f6dc2020-02-21 10:48:11 -080016149template <size_t N, typename T>
16150static void TestFrecpxHelper(Test* config,
16151 int lane_size_in_bits,
16152 T (&zn_inputs)[N],
16153 const T (&zd_expected)[N]) {
16154 TestFPUnaryPredicatedHelper(config,
16155 lane_size_in_bits,
16156 lane_size_in_bits,
16157 zn_inputs,
16158 zd_expected,
16159 &MacroAssembler::Frecpx, // Merging form.
16160 &MacroAssembler::Frecpx); // Zerging form.
16161}
16162
16163TEST_SVE(sve_frecpx_h) {
16164 uint64_t zn_inputs[] = {Float16ToRawbits(kFP16PositiveInfinity),
16165 Float16ToRawbits(kFP16NegativeInfinity),
16166 Float16ToRawbits(Float16(0.0)),
16167 Float16ToRawbits(Float16(-0.0)),
16168 0x0001, // Smallest positive subnormal number.
16169 0x03ff, // Largest subnormal number.
16170 0x0400, // Smallest positive normal number.
16171 0x7bff, // Largest normal number.
16172 0x3bff, // Largest number less than one.
16173 0x3c01, // Smallest number larger than one.
16174 0x7c22, // Signalling NaN.
16175 0x7e55}; // Quiet NaN.
16176
16177 uint64_t zd_expected[] = {0,
16178 0x8000,
16179 0x7800,
16180 0xf800,
16181 // Exponent of subnormal numbers are zero.
16182 0x7800,
16183 0x7800,
16184 0x7800,
16185 0x0400,
16186 0x4400,
16187 0x4000,
16188 0x7e22, // To quiet NaN.
16189 0x7e55};
16190
16191 TestFrecpxHelper(config, kHRegSize, zn_inputs, zd_expected);
16192}
16193
16194TEST_SVE(sve_frecpx_s) {
16195 uint64_t zn_inputs[] = {FloatToRawbits(kFP32PositiveInfinity),
16196 FloatToRawbits(kFP32NegativeInfinity),
16197 FloatToRawbits(65504), // Max half precision.
16198 FloatToRawbits(6.10352e-5), // Min positive normal.
16199 FloatToRawbits(6.09756e-5), // Max subnormal.
16200 FloatToRawbits(
16201 5.96046e-8), // Min positive subnormal.
16202 FloatToRawbits(5e-9), // Not representable -> zero.
16203 FloatToRawbits(-0.0),
16204 FloatToRawbits(0.0),
16205 0x7f952222, // Signalling NaN.
16206 0x7fea2222}; // Quiet NaN;
16207
16208 uint64_t zd_expected[] = {0, // 0.0
16209 0x80000000, // -0.0
16210 0x38800000, // 6.10352e-05
16211 0x47000000, // 32768
16212 0x47800000, // 65536
16213 0x4c800000, // 6.71089e+07
16214 0x4e000000, // 5.36871e+08
16215 0xff000000, // -1.70141e+38
16216 0x7f000000, // 1.70141e+38
16217 0x7fd52222,
16218 0x7fea2222};
16219
16220 TestFrecpxHelper(config, kSRegSize, zn_inputs, zd_expected);
16221}
16222
16223TEST_SVE(sve_frecpx_d) {
16224 uint64_t zn_inputs[] = {DoubleToRawbits(kFP64PositiveInfinity),
16225 DoubleToRawbits(kFP64NegativeInfinity),
16226 DoubleToRawbits(65504), // Max half precision.
16227 DoubleToRawbits(6.10352e-5), // Min positive normal.
16228 DoubleToRawbits(6.09756e-5), // Max subnormal.
16229 DoubleToRawbits(
16230 5.96046e-8), // Min positive subnormal.
16231 DoubleToRawbits(5e-9), // Not representable -> zero.
16232 DoubleToRawbits(-0.0),
16233 DoubleToRawbits(0.0),
16234 0x7ff5555511111111, // Signalling NaN.
16235 0x7ffaaaaa11111111}; // Quiet NaN;
16236
16237 uint64_t zd_expected[] = {0, // 0.0
16238 0x8000000000000000, // -0.0
16239 0x3f10000000000000, // 6.10352e-05
16240 0x40e0000000000000, // 32768
16241 0x40f0000000000000, // 65536
16242 0x4190000000000000, // 6.71089e+07
16243 0x41c0000000000000, // 5.36871e+08
16244 0xffe0000000000000, // -1.70141e+38
16245 0x7fe0000000000000, // 1.70141e+38
16246 0x7ffd555511111111,
16247 0x7ffaaaaa11111111};
16248
16249 TestFrecpxHelper(config, kDRegSize, zn_inputs, zd_expected);
16250}
TatWai Chong2cb1b612020-03-04 23:51:21 -080016251
TatWai Chongb4a25f62020-02-27 00:53:57 -080016252template <size_t N, typename T>
16253static void TestFsqrtHelper(Test* config,
16254 int lane_size_in_bits,
16255 T (&zn_inputs)[N],
16256 const T (&zd_expected)[N]) {
16257 TestFPUnaryPredicatedHelper(config,
16258 lane_size_in_bits,
16259 lane_size_in_bits,
16260 zn_inputs,
16261 zd_expected,
16262 &MacroAssembler::Fsqrt, // Merging form.
16263 &MacroAssembler::Fsqrt); // Zerging form.
16264}
16265
16266TEST_SVE(sve_fsqrt_h) {
16267 uint64_t zn_inputs[] =
16268 {Float16ToRawbits(Float16(0.0)),
16269 Float16ToRawbits(Float16(-0.0)),
16270 Float16ToRawbits(Float16(1.0)),
16271 Float16ToRawbits(Float16(65025.0)),
16272 Float16ToRawbits(kFP16PositiveInfinity),
16273 Float16ToRawbits(kFP16NegativeInfinity),
16274 Float16ToRawbits(Float16(6.10352e-5)), // Min normal positive.
16275 Float16ToRawbits(Float16(65504.0)), // Max normal positive float.
16276 Float16ToRawbits(Float16(6.09756e-5)), // Max subnormal.
16277 Float16ToRawbits(Float16(5.96046e-8)), // Min subnormal positive.
16278 0x7c22, // Signaling NaN
16279 0x7e55}; // Quiet NaN
16280
16281 uint64_t zd_expected[] = {Float16ToRawbits(Float16(0.0)),
16282 Float16ToRawbits(Float16(-0.0)),
16283 Float16ToRawbits(Float16(1.0)),
16284 Float16ToRawbits(Float16(255.0)),
16285 Float16ToRawbits(kFP16PositiveInfinity),
16286 Float16ToRawbits(kFP16DefaultNaN),
16287 0x2000,
16288 0x5bff,
16289 0x1fff,
16290 0x0c00,
16291 0x7e22, // To quiet NaN.
16292 0x7e55};
16293
16294 TestFsqrtHelper(config, kHRegSize, zn_inputs, zd_expected);
16295}
16296
16297TEST_SVE(sve_fsqrt_s) {
16298 uint64_t zn_inputs[] = {FloatToRawbits(0.0f),
16299 FloatToRawbits(-0.0f),
16300 FloatToRawbits(1.0f),
16301 FloatToRawbits(65536.0f),
16302 FloatToRawbits(kFP32PositiveInfinity),
16303 FloatToRawbits(kFP32NegativeInfinity),
16304 0x00800000, // Min normal positive, ~1.17e−38
16305 0x7f7fffff, // Max normal positive, ~3.40e+38
16306 0x00000001, // Min subnormal positive, ~1.40e−45
16307 0x007fffff, // Max subnormal, ~1.17e−38
16308 0x7f951111, // Signaling NaN
16309 0x7fea1111}; // Quiet NaN
16310
16311 uint64_t zd_expected[] = {FloatToRawbits(0.0f),
16312 FloatToRawbits(-0.0f),
16313 FloatToRawbits(1.0f),
16314 FloatToRawbits(256.0f),
16315 FloatToRawbits(kFP32PositiveInfinity),
16316 FloatToRawbits(kFP32DefaultNaN),
16317 0x20000000, // ~1.08e-19
16318 0x5f7fffff, // ~1.84e+19
16319 0x1a3504f3, // ~3.74e-23
16320 0x1fffffff, // ~1.08e-19
16321 0x7fd51111, // To quiet NaN.
16322 0x7fea1111};
16323
16324 TestFsqrtHelper(config, kSRegSize, zn_inputs, zd_expected);
16325}
16326
16327TEST_SVE(sve_fsqrt_d) {
16328 uint64_t zn_inputs[] =
16329 {DoubleToRawbits(0.0),
16330 DoubleToRawbits(-0.0),
16331 DoubleToRawbits(1.0),
16332 DoubleToRawbits(65536.0),
16333 DoubleToRawbits(kFP64PositiveInfinity),
16334 DoubleToRawbits(kFP64NegativeInfinity),
16335 0x0010000000000000, // Min normal positive, ~2.22e-308
16336 0x7fefffffffffffff, // Max normal positive, ~1.79e+308
16337 0x0000000000000001, // Min subnormal positive, 5e-324
16338 0x000fffffffffffff, // Max subnormal, ~2.22e-308
16339 0x7ff5555511111111,
16340 0x7ffaaaaa11111111};
16341
16342 uint64_t zd_expected[] = {DoubleToRawbits(0.0),
16343 DoubleToRawbits(-0.0),
16344 DoubleToRawbits(1.0),
16345 DoubleToRawbits(256.0),
16346 DoubleToRawbits(kFP64PositiveInfinity),
16347 DoubleToRawbits(kFP64DefaultNaN),
16348 0x2000000000000000, // ~1.49e-154
16349 0x5fefffffffffffff, // ~1.34e+154
16350 0x1e60000000000000, // ~2.22e-162
16351 0x1fffffffffffffff, // ~1.49e-154
16352 0x7ffd555511111111, // To quiet NaN.
16353 0x7ffaaaaa11111111};
16354
16355 TestFsqrtHelper(config, kDRegSize, zn_inputs, zd_expected);
16356}
16357
Jacob Bramleyd77a8e42019-02-12 16:52:24 +000016358} // namespace aarch64
16359} // namespace vixl