blob: e72d00919914cad190d63abfca0f2ecdd70287cc [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
Jacob Bramley85a9c102019-12-09 17:48:29 +000028#include <unistd.h>
Jacob Bramleyd77a8e42019-02-12 16:52:24 +000029
30#include <cfloat>
31#include <cmath>
32#include <cstdio>
33#include <cstdlib>
34#include <cstring>
35
36#include "test-runner.h"
37#include "test-utils.h"
38#include "aarch64/test-utils-aarch64.h"
39
40#include "aarch64/cpu-aarch64.h"
41#include "aarch64/disasm-aarch64.h"
42#include "aarch64/macro-assembler-aarch64.h"
43#include "aarch64/simulator-aarch64.h"
44#include "test-assembler-aarch64.h"
45
46namespace vixl {
47namespace aarch64 {
48
Jacob Bramleye8289202019-07-31 11:25:23 +010049Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
50 // We never free this memory, but we need it to live for as long as the static
51 // linked list of tests, and this is the easiest way to do it.
52 Test* test = new Test(name, fn);
53 test->set_sve_vl_in_bits(vl);
54 return test;
55}
56
57// The TEST_SVE macro works just like the usual TEST macro, but the resulting
58// function receives a `const Test& config` argument, to allow it to query the
59// vector length.
60#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
61// On the Simulator, run SVE tests with several vector lengths, including the
62// extreme values and an intermediate value that isn't a power of two.
63
64#define TEST_SVE(name) \
65 void Test##name(Test* config); \
66 Test* test_##name##_list[] = \
67 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
68 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
69 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
70 void Test##name(Test* config)
71
72#define SVE_SETUP_WITH_FEATURES(...) \
73 SETUP_WITH_FEATURES(__VA_ARGS__); \
74 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
75
76#else
77// Otherwise, just use whatever the hardware provides.
78static const int kSVEVectorLengthInBits =
79 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
80 ? CPU::ReadSVEVectorLengthInBits()
81 : 0;
82
83#define TEST_SVE(name) \
84 void Test##name(Test* config); \
85 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
86 "AARCH64_ASM_" #name "_vlauto", \
87 &Test##name); \
88 void Test##name(Test* config)
89
90#define SVE_SETUP_WITH_FEATURES(...) \
91 SETUP_WITH_FEATURES(__VA_ARGS__); \
92 USE(config)
93
94#endif
95
Jacob Bramley03c0b512019-02-22 16:42:06 +000096// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
97// is optimised for call-site clarity, not generated code quality, so it doesn't
98// exist in the MacroAssembler itself.
99//
100// Usage:
101//
102// int values[] = { 42, 43, 44 };
103// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
104//
105// The rightmost (highest-indexed) array element maps to the lowest-numbered
106// lane.
107template <typename T, size_t N>
108void InsrHelper(MacroAssembler* masm,
109 const ZRegister& zdn,
110 const T (&values)[N]) {
111 for (size_t i = 0; i < N; i++) {
112 masm->Insr(zdn, values[i]);
113 }
114}
115
Jacob Bramley0ce75842019-07-17 18:12:50 +0100116// Conveniently initialise P registers with scalar bit patterns. The destination
117// lane size is ignored. This is optimised for call-site clarity, not generated
118// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100119//
120// Usage:
121//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100122// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100123void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100124 const PRegister& pd,
125 uint64_t value3,
126 uint64_t value2,
127 uint64_t value1,
128 uint64_t value0) {
129 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100130 UseScratchRegisterScope temps(masm);
131 Register temp = temps.AcquireX();
132 Label data;
133 Label done;
134
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100135 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100136 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100137 masm->B(&done);
138 {
139 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
140 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100141 masm->dc64(value0);
142 masm->dc64(value1);
143 masm->dc64(value2);
144 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100145 }
146 masm->Bind(&done);
147}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100148void Initialise(MacroAssembler* masm,
149 const PRegister& pd,
150 uint64_t value2,
151 uint64_t value1,
152 uint64_t value0) {
153 Initialise(masm, pd, 0, value2, value1, value0);
154}
155void Initialise(MacroAssembler* masm,
156 const PRegister& pd,
157 uint64_t value1,
158 uint64_t value0) {
159 Initialise(masm, pd, 0, 0, value1, value0);
160}
161void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
162 Initialise(masm, pd, 0, 0, 0, value0);
163}
164
165// Conveniently initialise P registers by lane. This is optimised for call-site
166// clarity, not generated code quality.
167//
168// Usage:
169//
170// int values[] = { 0x0, 0x1, 0x2 };
171// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
172//
173// The rightmost (highest-indexed) array element maps to the lowest-numbered
174// lane. Unspecified lanes are set to 0 (inactive).
175//
176// Each element of the `values` array is mapped onto a lane in `pd`. The
177// architecture only respects the lower bit, and writes zero the upper bits, but
178// other (encodable) values can be specified if required by the test.
179template <typename T, size_t N>
180void Initialise(MacroAssembler* masm,
181 const PRegisterWithLaneSize& pd,
182 const T (&values)[N]) {
183 // Turn the array into 64-bit chunks.
184 uint64_t chunks[4] = {0, 0, 0, 0};
185 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
186
187 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
188 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
189 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
190
191 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
192
193 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
194 size_t bit = 0;
195 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
196 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
197 uint64_t value = values[n] & p_lane_mask;
198 chunks[bit / 64] |= value << (bit % 64);
199 bit += p_bits_per_lane;
200 }
201
202 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
203}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100204
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000205// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100206TEST_SVE(sve_test_infrastructure_z) {
207 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000208 START();
209
Jacob Bramley03c0b512019-02-22 16:42:06 +0000210 __ Mov(x0, 0x0123456789abcdef);
211
212 // Test basic `Insr` behaviour.
213 __ Insr(z0.VnB(), 1);
214 __ Insr(z0.VnB(), 2);
215 __ Insr(z0.VnB(), x0);
216 __ Insr(z0.VnB(), -42);
217 __ Insr(z0.VnB(), 0);
218
219 // Test array inputs.
220 int z1_inputs[] = {3, 4, 5, -42, 0};
221 InsrHelper(&masm, z1.VnH(), z1_inputs);
222
223 // Test that sign-extension works as intended for various lane sizes.
224 __ Dup(z2.VnD(), 0); // Clear the register first.
225 __ Insr(z2.VnB(), -42); // 0xd6
226 __ Insr(z2.VnB(), 0xfe); // 0xfe
227 __ Insr(z2.VnH(), -42); // 0xffd6
228 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
229 __ Insr(z2.VnS(), -42); // 0xffffffd6
230 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
231 // Use another register for VnD(), so we can support 128-bit Z registers.
232 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
233 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
234
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000235 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000236
Jacob Bramley119bd212019-04-16 10:13:09 +0100237 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100238 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000239
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100240 // Test that array checks work properly on a register initialised
241 // lane-by-lane.
242 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
243 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000244
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100245 // Test that lane-by-lane checks work properly on a register initialised
246 // by array.
247 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
248 // The rightmost (highest-indexed) array element maps to the
249 // lowest-numbered lane.
250 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
251 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000252 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100253
254 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
255 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
256 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
257 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100258 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000259}
260
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100261// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100262TEST_SVE(sve_test_infrastructure_p) {
263 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100264 START();
265
266 // Simple cases: move boolean (0 or 1) values.
267
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100268 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100269 Initialise(&masm, p0.VnB(), p0_inputs);
270
271 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
272 Initialise(&masm, p1.VnH(), p1_inputs);
273
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100274 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100275 Initialise(&masm, p2.VnS(), p2_inputs);
276
277 int p3_inputs[] = {0, 1};
278 Initialise(&masm, p3.VnD(), p3_inputs);
279
280 // Advanced cases: move numeric value into architecturally-ignored bits.
281
282 // B-sized lanes get one bit in a P register, so there are no ignored bits.
283
284 // H-sized lanes get two bits in a P register.
285 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
286 Initialise(&masm, p4.VnH(), p4_inputs);
287
288 // S-sized lanes get four bits in a P register.
289 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
290 Initialise(&masm, p5.VnS(), p5_inputs);
291
292 // D-sized lanes get eight bits in a P register.
293 int p6_inputs[] = {0x81, 0xcc, 0x55};
294 Initialise(&masm, p6.VnD(), p6_inputs);
295
296 // The largest possible P register has 32 bytes.
297 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
298 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
299 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
300 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
301 Initialise(&masm, p7.VnD(), p7_inputs);
302
303 END();
304
305 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100306 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100307
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100308 // Test that lane-by-lane checks work properly. The rightmost
309 // (highest-indexed) array element maps to the lowest-numbered lane.
310 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
311 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
312 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100313 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100314 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
315 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
316 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
317 }
318 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
319 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
320 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
321 }
322 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
323 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
324 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
325 }
326
327 // Test that array checks work properly on predicates initialised with a
328 // possibly-different lane size.
329 // 0b...11'10'01'00'01'10'11
330 int p4_expected[] = {0x39, 0x1b};
331 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
332
333 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
334
335 // 0b...10000001'11001100'01010101
336 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
337 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
338
339 // 0b...10011100'10011101'10011110'10011111
340 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
341 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
342 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100343 }
344}
345
Jacob Bramley935b15b2019-07-04 14:09:22 +0100346// Test that writes to V registers clear the high bits of the corresponding Z
347// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100348TEST_SVE(sve_v_write_clear) {
349 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
350 CPUFeatures::kFP,
351 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100352 START();
353
354 // The Simulator has two mechansisms for writing V registers:
355 // - Write*Register, calling through to SimRegisterBase::Write.
356 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
357 // Try to cover both variants.
358
359 // Prepare some known inputs.
360 uint8_t data[kQRegSizeInBytes];
361 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
362 data[i] = 42 + i;
363 }
364 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
365 __ Fmov(d30, 42.0);
366
Jacob Bramley199339d2019-08-05 18:49:13 +0100367 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100368 // diagnose.
369 __ Index(z0.VnB(), 0, 1);
370 __ Index(z1.VnB(), 0, 1);
371 __ Index(z2.VnB(), 0, 1);
372 __ Index(z3.VnB(), 0, 1);
373 __ Index(z4.VnB(), 0, 1);
374
375 __ Index(z10.VnB(), 0, -1);
376 __ Index(z11.VnB(), 0, -1);
377 __ Index(z12.VnB(), 0, -1);
378 __ Index(z13.VnB(), 0, -1);
379 __ Index(z14.VnB(), 0, -1);
380
381 // Instructions using Write*Register (and SimRegisterBase::Write).
382 __ Ldr(b0, MemOperand(x10));
383 __ Fcvt(h1, d30);
384 __ Fmov(s2, 1.5f);
385 __ Fmov(d3, d30);
386 __ Ldr(q4, MemOperand(x10));
387
388 // Instructions using LogicVRegister::ClearForWrite.
389 // These also (incidentally) test that across-lane instructions correctly
390 // ignore the high-order Z register lanes.
391 __ Sminv(b10, v10.V16B());
392 __ Addv(h11, v11.V4H());
393 __ Saddlv(s12, v12.V8H());
394 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
395 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
396
397 END();
398
399 if (CAN_RUN()) {
400 RUN();
401
402 // Check the Q part first.
403 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
404 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
405 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
406 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
407 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
408 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
409 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
410 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
411 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
412 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
413 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
414 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
415 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
416 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
417 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
418
419 // Check that the upper lanes are all clear.
420 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
421 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
422 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
430 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
431 }
432 }
433}
434
Jacob Bramleye8289202019-07-31 11:25:23 +0100435static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
436 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100437 START();
438
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100439 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100440 int za_inputs[] = {-39, 1, -3, 2};
441 int zn_inputs[] = {-5, -20, 9, 8};
442 int zm_inputs[] = {9, -5, 4, 5};
443
444 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
445 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
446 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
447 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
448
449 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100450 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100451 InsrHelper(&masm, za, za_inputs);
452 InsrHelper(&masm, zn, zn_inputs);
453 InsrHelper(&masm, zm, zm_inputs);
454
455 int p0_inputs[] = {1, 1, 0, 1};
456 int p1_inputs[] = {1, 0, 1, 1};
457 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100458 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100459
460 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
461 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
462 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
463 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
464
465 // The Mla macro automatically selects between mla, mad and movprfx + mla
466 // based on what registers are aliased.
467 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
468 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
469 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100470 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100471
472 __ Mov(mla_da_result, za);
473 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
474
475 __ Mov(mla_dn_result, zn);
476 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
477
478 __ Mov(mla_dm_result, zm);
479 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
480
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100481 __ Mov(mla_d_result, zd);
482 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100483
484 // The Mls macro automatically selects between mls, msb and movprfx + mls
485 // based on what registers are aliased.
486 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
487 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
488 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100489 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100490
491 __ Mov(mls_da_result, za);
492 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
493
494 __ Mov(mls_dn_result, zn);
495 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
496
497 __ Mov(mls_dm_result, zm);
498 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
499
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100500 __ Mov(mls_d_result, zd);
501 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100502
503 END();
504
505 if (CAN_RUN()) {
506 RUN();
507
508 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
509 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
510 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
511
512 int mla[] = {-84, 101, 33, 42};
513 int mls[] = {6, -99, -39, -38};
514
515 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
516 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
517
518 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
519 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
520
521 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
522 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
523
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100524 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
525 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100526
527 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
528 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
529
530 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
531 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
532
533 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
534 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
535
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100536 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
537 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100538 }
539}
540
Jacob Bramleye8289202019-07-31 11:25:23 +0100541TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
542TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
543TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
544TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100545
Jacob Bramleye8289202019-07-31 11:25:23 +0100546TEST_SVE(sve_bitwise_unpredicate_logical) {
547 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700548 START();
549
550 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
551 InsrHelper(&masm, z8.VnD(), z8_inputs);
552 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
553 InsrHelper(&masm, z15.VnD(), z15_inputs);
554
555 __ And(z1.VnD(), z8.VnD(), z15.VnD());
556 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
557 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
558 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
559
560 END();
561
562 if (CAN_RUN()) {
563 RUN();
564 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
565 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
566 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
567 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
568
569 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
570 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
571 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
572 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
573 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700574}
575
Jacob Bramleye8289202019-07-31 11:25:23 +0100576TEST_SVE(sve_predicate_logical) {
577 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700578 START();
579
580 // 0b...01011010'10110111
581 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
582 // 0b...11011001'01010010
583 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
584 // 0b...01010101'10110010
585 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
586
587 Initialise(&masm, p10.VnB(), p10_inputs);
588 Initialise(&masm, p11.VnB(), p11_inputs);
589 Initialise(&masm, p12.VnB(), p12_inputs);
590
591 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
592 __ Mrs(x0, NZCV);
593 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
594 __ Mrs(x1, NZCV);
595 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
596 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
597 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
598 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
599 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
600 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
601
602 END();
603
604 if (CAN_RUN()) {
605 RUN();
606
607 // 0b...01010000'00010010
608 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
609 // 0b...00000001'00000000
610 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
611 // 0b...00000001'10100000
612 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
613 // 0b...00000101'10100000
614 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
615 // 0b...00000100'00000000
616 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
617 // 0b...01010101'00010010
618 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
619 // 0b...01010001'10110010
620 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
621 // 0b...01011011'00010111
622 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
623
624 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
625 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
626 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
627 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
628 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
629 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
630 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
631 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
632
TatWai Chong96713fe2019-06-04 16:39:37 -0700633 ASSERT_EQUAL_32(SVEFirstFlag, w0);
634 ASSERT_EQUAL_32(SVENotLastFlag, w1);
635 }
636}
TatWai Chongf4fa8222019-06-17 12:08:14 -0700637
Jacob Bramleye8289202019-07-31 11:25:23 +0100638TEST_SVE(sve_int_compare_vectors) {
639 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700640 START();
641
642 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
643 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
644 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
645 InsrHelper(&masm, z10.VnB(), z10_inputs);
646 InsrHelper(&masm, z11.VnB(), z11_inputs);
647 Initialise(&masm, p0.VnB(), p0_inputs);
648
649 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
650 __ Mrs(x6, NZCV);
651
652 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
653 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
654 int p1_inputs[] = {1, 1};
655 InsrHelper(&masm, z12.VnD(), z12_inputs);
656 InsrHelper(&masm, z13.VnD(), z13_inputs);
657 Initialise(&masm, p1.VnD(), p1_inputs);
658
659 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
660 __ Mrs(x7, NZCV);
661
662 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
663 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
664
665 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
666 InsrHelper(&masm, z14.VnH(), z14_inputs);
667 InsrHelper(&masm, z15.VnH(), z15_inputs);
668 Initialise(&masm, p2.VnH(), p2_inputs);
669
670 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
671 __ Mrs(x8, NZCV);
672
673 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
674 __ Mrs(x9, NZCV);
675
676 int z16_inputs[] = {0, -1, 0, 0};
677 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
678 int p3_inputs[] = {1, 1, 1, 1};
679 InsrHelper(&masm, z16.VnS(), z16_inputs);
680 InsrHelper(&masm, z17.VnS(), z17_inputs);
681 Initialise(&masm, p3.VnS(), p3_inputs);
682
683 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
684 __ Mrs(x10, NZCV);
685
686 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
687 __ Mrs(x11, NZCV);
688
689 // Architectural aliases testing.
690 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
691 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
692 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
693 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
694
695 END();
696
697 if (CAN_RUN()) {
698 RUN();
699
700 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
701 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
702 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
703 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
704 }
705
706 int p7_expected[] = {1, 0};
707 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
708
709 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
710 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
711
712 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
713 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
714
715 int p10_expected[] = {0, 0, 0, 1};
716 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
717
718 int p11_expected[] = {0, 1, 1, 1};
719 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
720
721 // Reuse the expected results to verify the architectural aliases.
722 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
723 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
724 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
725 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
726
727 ASSERT_EQUAL_32(SVEFirstFlag, w6);
728 ASSERT_EQUAL_32(NoFlag, w7);
729 ASSERT_EQUAL_32(NoFlag, w8);
730 ASSERT_EQUAL_32(NoFlag, w9);
731 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
732 }
733}
734
Jacob Bramleye8289202019-07-31 11:25:23 +0100735TEST_SVE(sve_int_compare_vectors_wide_elements) {
736 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700737 START();
738
739 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
740 int src2_inputs_1[] = {0, -1};
741 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
742 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
743 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
744 Initialise(&masm, p0.VnB(), mask_inputs_1);
745
746 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
747 __ Mrs(x2, NZCV);
748 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
749 __ Mrs(x3, NZCV);
750
751 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
752 int src2_inputs_2[] = {0, -32767};
753 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
754 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
755 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
756 Initialise(&masm, p0.VnH(), mask_inputs_2);
757
758 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
759 __ Mrs(x4, NZCV);
760 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
761 __ Mrs(x5, NZCV);
762
763 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
764 int src2_inputs_3[] = {0, -2147483648};
765 int mask_inputs_3[] = {1, 1, 1, 1};
766 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
767 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
768 Initialise(&masm, p0.VnS(), mask_inputs_3);
769
770 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
771 __ Mrs(x6, NZCV);
772 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
773 __ Mrs(x7, NZCV);
774
775 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
776 int src2_inputs_4[] = {0x00, 0x7f};
777 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
778 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
779 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
780 Initialise(&masm, p0.VnB(), mask_inputs_4);
781
782 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
783 __ Mrs(x8, NZCV);
784 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
785 __ Mrs(x9, NZCV);
786
787 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
788 int src2_inputs_5[] = {0x8000, 0xffff};
789 int mask_inputs_5[] = {1, 1, 1, 1};
790 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
791 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
792 Initialise(&masm, p0.VnS(), mask_inputs_5);
793
794 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
795 __ Mrs(x10, NZCV);
796 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
797 __ Mrs(x11, NZCV);
798
799 END();
800
801 if (CAN_RUN()) {
802 RUN();
803 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
804 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
805
806 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
807 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
808
809 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
810 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
811
812 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
813 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
814
815 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
816 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
817
818 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
819 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
820
821 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
822 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
823
824 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
825 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
826
827 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
828 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
829
830 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
831 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
832
833 ASSERT_EQUAL_32(NoFlag, w2);
834 ASSERT_EQUAL_32(NoFlag, w3);
835 ASSERT_EQUAL_32(NoFlag, w4);
836 ASSERT_EQUAL_32(SVENotLastFlag, w5);
837 ASSERT_EQUAL_32(SVEFirstFlag, w6);
838 ASSERT_EQUAL_32(SVENotLastFlag, w7);
839 ASSERT_EQUAL_32(SVEFirstFlag, w8);
840 ASSERT_EQUAL_32(SVEFirstFlag, w9);
841 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
842 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700843 }
TatWai Chongf4fa8222019-06-17 12:08:14 -0700844}
845
Jacob Bramleye8289202019-07-31 11:25:23 +0100846TEST_SVE(sve_bitwise_imm) {
847 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -0700848 START();
849
850 // clang-format off
851 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
852 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
853 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
854 0x0123, 0x4567, 0x89ab, 0xcdef};
855 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
856 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
857 // clang-format on
858
859 InsrHelper(&masm, z1.VnD(), z21_inputs);
860 InsrHelper(&masm, z2.VnS(), z22_inputs);
861 InsrHelper(&masm, z3.VnH(), z23_inputs);
862 InsrHelper(&masm, z4.VnB(), z24_inputs);
863
864 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
865 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
866 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
867 __ And(z4.VnB(), z4.VnB(), 0x3f);
868
869 InsrHelper(&masm, z5.VnD(), z21_inputs);
870 InsrHelper(&masm, z6.VnS(), z22_inputs);
871 InsrHelper(&masm, z7.VnH(), z23_inputs);
872 InsrHelper(&masm, z8.VnB(), z24_inputs);
873
874 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
875 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
876 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
877 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
878
879 InsrHelper(&masm, z9.VnD(), z21_inputs);
880 InsrHelper(&masm, z10.VnS(), z22_inputs);
881 InsrHelper(&masm, z11.VnH(), z23_inputs);
882 InsrHelper(&masm, z12.VnB(), z24_inputs);
883
884 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
885 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
886 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
887 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
888
Jacob Bramley6069fd42019-06-24 10:20:45 +0100889 {
890 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
891 // so here we test `dupm` directly.
892 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
893 __ dupm(z13.VnD(), 0x7ffffff800000000);
894 __ dupm(z14.VnS(), 0x7ffc7ffc);
895 __ dupm(z15.VnH(), 0x3ffc);
896 __ dupm(z16.VnB(), 0xc3);
897 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700898
899 END();
900
901 if (CAN_RUN()) {
902 RUN();
903
904 // clang-format off
905 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
906 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
907 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
908 0x0120, 0x0560, 0x09a0, 0x0de0};
909 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
910 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
911
912 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
913 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
914 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
915 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
916
917 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
918 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
919 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
920 0x0ed3, 0x4a97, 0x865b, 0xc21f};
921 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
922 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
923
924 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
925 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
926 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
927 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
928
929 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
930 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
931 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
932 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
933 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
934 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
935
936 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
937 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
938 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
939 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
940
941 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
942 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
943 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
944 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
945 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
946 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
947 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
948 // clang-format on
949 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700950}
951
Jacob Bramleye8289202019-07-31 11:25:23 +0100952TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +0100953 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
954 // unencodable immediates.
955
Jacob Bramleye8289202019-07-31 11:25:23 +0100956 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100957 START();
958
959 // Encodable with `dup` (shift 0).
960 __ Dup(z0.VnD(), -1);
961 __ Dup(z1.VnS(), 0x7f);
962 __ Dup(z2.VnH(), -0x80);
963 __ Dup(z3.VnB(), 42);
964
965 // Encodable with `dup` (shift 8).
TatWai Chong6995bfd2019-09-26 10:48:05 +0100966 __ Dup(z4.VnD(), -42 * 256);
967 __ Dup(z5.VnS(), -0x8000);
968 __ Dup(z6.VnH(), 0x7f00);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100969 // B-sized lanes cannot take a shift of 8.
970
971 // Encodable with `dupm` (but not `dup`).
972 __ Dup(z10.VnD(), 0x3fc);
973 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
974 __ Dup(z12.VnH(), 0x0001);
975 // All values that fit B-sized lanes are encodable with `dup`.
976
977 // Cases that require immediate synthesis.
978 __ Dup(z20.VnD(), 0x1234);
979 __ Dup(z21.VnD(), -4242);
980 __ Dup(z22.VnD(), 0xfedcba9876543210);
981 __ Dup(z23.VnS(), 0x01020304);
982 __ Dup(z24.VnS(), -0x01020304);
983 __ Dup(z25.VnH(), 0x3c38);
984 // All values that fit B-sized lanes are directly encodable.
985
986 END();
987
988 if (CAN_RUN()) {
989 RUN();
990
991 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
992 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
993 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
994 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
995
TatWai Chong6995bfd2019-09-26 10:48:05 +0100996 ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
997 ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
998 ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley6069fd42019-06-24 10:20:45 +0100999
1000 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1001 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1002 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1003
1004 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1005 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1006 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1007 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1008 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1009 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1010 }
1011}
1012
Jacob Bramleye8289202019-07-31 11:25:23 +01001013TEST_SVE(sve_inc_dec_p_scalar) {
1014 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001015 START();
1016
1017 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1018 Initialise(&masm, p0.VnB(), p0_inputs);
1019
1020 int p0_b_count = 9;
1021 int p0_h_count = 5;
1022 int p0_s_count = 3;
1023 int p0_d_count = 2;
1024
1025 // 64-bit operations preserve their high bits.
1026 __ Mov(x0, 0x123456780000002a);
1027 __ Decp(x0, p0.VnB());
1028
1029 __ Mov(x1, 0x123456780000002a);
1030 __ Incp(x1, p0.VnH());
1031
1032 // Check that saturation does not occur.
1033 __ Mov(x10, 1);
1034 __ Decp(x10, p0.VnS());
1035
1036 __ Mov(x11, UINT64_MAX);
1037 __ Incp(x11, p0.VnD());
1038
1039 __ Mov(x12, INT64_MAX);
1040 __ Incp(x12, p0.VnB());
1041
1042 // With an all-true predicate, these instructions increment or decrement by
1043 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001044 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001045
1046 __ Mov(x20, 0x4000000000000000);
1047 __ Decp(x20, p15.VnB());
1048
1049 __ Mov(x21, 0x4000000000000000);
1050 __ Incp(x21, p15.VnH());
1051
1052 END();
1053 if (CAN_RUN()) {
1054 RUN();
1055
1056 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1057 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1058
1059 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1060 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1061 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1062
1063 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1064 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1065 }
1066}
1067
Jacob Bramleye8289202019-07-31 11:25:23 +01001068TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1069 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001070 START();
1071
1072 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1073 Initialise(&masm, p0.VnB(), p0_inputs);
1074
1075 int p0_b_count = 9;
1076 int p0_h_count = 5;
1077 int p0_s_count = 3;
1078 int p0_d_count = 2;
1079
1080 uint64_t dummy_high = 0x1234567800000000;
1081
1082 // 64-bit operations preserve their high bits.
1083 __ Mov(x0, dummy_high + 42);
1084 __ Sqdecp(x0, p0.VnB());
1085
1086 __ Mov(x1, dummy_high + 42);
1087 __ Sqincp(x1, p0.VnH());
1088
1089 // 32-bit operations sign-extend into their high bits.
1090 __ Mov(x2, dummy_high + 42);
1091 __ Sqdecp(x2, p0.VnS(), w2);
1092
1093 __ Mov(x3, dummy_high + 42);
1094 __ Sqincp(x3, p0.VnD(), w3);
1095
1096 __ Mov(x4, dummy_high + 1);
1097 __ Sqdecp(x4, p0.VnS(), w4);
1098
1099 __ Mov(x5, dummy_high - 1);
1100 __ Sqincp(x5, p0.VnD(), w5);
1101
1102 // Check that saturation behaves correctly.
1103 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001104 __ Sqdecp(x10, p0.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001105
1106 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1107 __ Sqdecp(x11, p0.VnH(), w11);
1108
1109 __ Mov(x12, 1);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001110 __ Sqdecp(x12, p0.VnS());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001111
1112 __ Mov(x13, dummy_high + 1);
1113 __ Sqdecp(x13, p0.VnD(), w13);
1114
1115 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001116 __ Sqincp(x14, p0.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001117
1118 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1119 __ Sqincp(x15, p0.VnH(), w15);
1120
1121 // Don't use x16 and x17 since they are scratch registers by default.
1122
1123 __ Mov(x18, 0xffffffffffffffff);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001124 __ Sqincp(x18, p0.VnS());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001125
1126 __ Mov(x19, dummy_high + 0xffffffff);
1127 __ Sqincp(x19, p0.VnD(), w19);
1128
1129 __ Mov(x20, dummy_high + 0xffffffff);
1130 __ Sqdecp(x20, p0.VnB(), w20);
1131
1132 // With an all-true predicate, these instructions increment or decrement by
1133 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001134 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001135
1136 __ Mov(x21, 0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001137 __ Sqdecp(x21, p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001138
1139 __ Mov(x22, 0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00001140 __ Sqincp(x22, p15.VnH());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001141
1142 __ Mov(x23, dummy_high);
1143 __ Sqdecp(x23, p15.VnS(), w23);
1144
1145 __ Mov(x24, dummy_high);
1146 __ Sqincp(x24, p15.VnD(), w24);
1147
1148 END();
1149 if (CAN_RUN()) {
1150 RUN();
1151
1152 // 64-bit operations preserve their high bits.
1153 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1154 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1155
1156 // 32-bit operations sign-extend into their high bits.
1157 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1158 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1159 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1160 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1161
1162 // Check that saturation behaves correctly.
1163 ASSERT_EQUAL_64(INT64_MIN, x10);
1164 ASSERT_EQUAL_64(INT32_MIN, x11);
1165 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1166 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1167 ASSERT_EQUAL_64(INT64_MAX, x14);
1168 ASSERT_EQUAL_64(INT32_MAX, x15);
1169 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1170 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1171 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1172
1173 // Check all-true predicates.
1174 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1175 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1176 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1177 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1178 }
1179}
1180
Jacob Bramleye8289202019-07-31 11:25:23 +01001181TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1182 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001183 START();
1184
1185 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1186 Initialise(&masm, p0.VnB(), p0_inputs);
1187
1188 int p0_b_count = 9;
1189 int p0_h_count = 5;
1190 int p0_s_count = 3;
1191 int p0_d_count = 2;
1192
1193 uint64_t dummy_high = 0x1234567800000000;
1194
1195 // 64-bit operations preserve their high bits.
1196 __ Mov(x0, dummy_high + 42);
1197 __ Uqdecp(x0, p0.VnB());
1198
1199 __ Mov(x1, dummy_high + 42);
1200 __ Uqincp(x1, p0.VnH());
1201
1202 // 32-bit operations zero-extend into their high bits.
1203 __ Mov(x2, dummy_high + 42);
1204 __ Uqdecp(x2, p0.VnS(), w2);
1205
1206 __ Mov(x3, dummy_high + 42);
1207 __ Uqincp(x3, p0.VnD(), w3);
1208
1209 __ Mov(x4, dummy_high + 0x80000001);
1210 __ Uqdecp(x4, p0.VnS(), w4);
1211
1212 __ Mov(x5, dummy_high + 0x7fffffff);
1213 __ Uqincp(x5, p0.VnD(), w5);
1214
1215 // Check that saturation behaves correctly.
1216 __ Mov(x10, 1);
1217 __ Uqdecp(x10, p0.VnB(), x10);
1218
1219 __ Mov(x11, dummy_high + 1);
1220 __ Uqdecp(x11, p0.VnH(), w11);
1221
1222 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1223 __ Uqdecp(x12, p0.VnS(), x12);
1224
1225 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1226 __ Uqdecp(x13, p0.VnD(), w13);
1227
1228 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1229 __ Uqincp(x14, p0.VnB(), x14);
1230
1231 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1232 __ Uqincp(x15, p0.VnH(), w15);
1233
1234 // Don't use x16 and x17 since they are scratch registers by default.
1235
1236 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1237 __ Uqincp(x18, p0.VnS(), x18);
1238
1239 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1240 __ Uqincp(x19, p0.VnD(), w19);
1241
1242 // With an all-true predicate, these instructions increment or decrement by
1243 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001244 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001245
1246 __ Mov(x20, 0x4000000000000000);
1247 __ Uqdecp(x20, p15.VnB(), x20);
1248
1249 __ Mov(x21, 0x4000000000000000);
1250 __ Uqincp(x21, p15.VnH(), x21);
1251
1252 __ Mov(x22, dummy_high + 0x40000000);
1253 __ Uqdecp(x22, p15.VnS(), w22);
1254
1255 __ Mov(x23, dummy_high + 0x40000000);
1256 __ Uqincp(x23, p15.VnD(), w23);
1257
1258 END();
1259 if (CAN_RUN()) {
1260 RUN();
1261
1262 // 64-bit operations preserve their high bits.
1263 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1264 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1265
1266 // 32-bit operations zero-extend into their high bits.
1267 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1268 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1269 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1270 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1271
1272 // Check that saturation behaves correctly.
1273 ASSERT_EQUAL_64(0, x10);
1274 ASSERT_EQUAL_64(0, x11);
1275 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1276 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1277 ASSERT_EQUAL_64(UINT64_MAX, x14);
1278 ASSERT_EQUAL_64(UINT32_MAX, x15);
1279 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1280 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1281
1282 // Check all-true predicates.
1283 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1284 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1285 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1286 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1287 }
1288}
1289
Jacob Bramleye8289202019-07-31 11:25:23 +01001290TEST_SVE(sve_inc_dec_p_vector) {
1291 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001292 START();
1293
1294 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1295 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1296 Initialise(&masm, p0.VnB(), p0_inputs);
1297
1298 // Check that saturation does not occur.
1299
1300 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1301 InsrHelper(&masm, z0.VnD(), z0_inputs);
1302
1303 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1304 InsrHelper(&masm, z1.VnD(), z1_inputs);
1305
1306 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1307 InsrHelper(&masm, z2.VnS(), z2_inputs);
1308
1309 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1310 InsrHelper(&masm, z3.VnH(), z3_inputs);
1311
1312 // The MacroAssembler implements non-destructive operations using movprfx.
1313 __ Decp(z10.VnD(), p0, z0.VnD());
1314 __ Decp(z11.VnD(), p0, z1.VnD());
1315 __ Decp(z12.VnS(), p0, z2.VnS());
1316 __ Decp(z13.VnH(), p0, z3.VnH());
1317
1318 __ Incp(z14.VnD(), p0, z0.VnD());
1319 __ Incp(z15.VnD(), p0, z1.VnD());
1320 __ Incp(z16.VnS(), p0, z2.VnS());
1321 __ Incp(z17.VnH(), p0, z3.VnH());
1322
1323 // Also test destructive forms.
1324 __ Mov(z4, z0);
1325 __ Mov(z5, z1);
1326 __ Mov(z6, z2);
1327 __ Mov(z7, z3);
1328
1329 __ Decp(z0.VnD(), p0);
1330 __ Decp(z1.VnD(), p0);
1331 __ Decp(z2.VnS(), p0);
1332 __ Decp(z3.VnH(), p0);
1333
1334 __ Incp(z4.VnD(), p0);
1335 __ Incp(z5.VnD(), p0);
1336 __ Incp(z6.VnS(), p0);
1337 __ Incp(z7.VnH(), p0);
1338
1339 END();
1340 if (CAN_RUN()) {
1341 RUN();
1342
1343 // z0_inputs[...] - number of active D lanes (2)
1344 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1345 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1346
1347 // z1_inputs[...] - number of active D lanes (2)
1348 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1349 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1350
1351 // z2_inputs[...] - number of active S lanes (3)
1352 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1353 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1354
1355 // z3_inputs[...] - number of active H lanes (5)
1356 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1357 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1358
1359 // z0_inputs[...] + number of active D lanes (2)
1360 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1361 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1362
1363 // z1_inputs[...] + number of active D lanes (2)
1364 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1365 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1366
1367 // z2_inputs[...] + number of active S lanes (3)
1368 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1369 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1370
1371 // z3_inputs[...] + number of active H lanes (5)
1372 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1373 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1374
1375 // Check that the non-destructive macros produced the same results.
1376 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1377 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1378 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1379 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1380 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1381 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1382 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1383 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1384 }
1385}
1386
Jacob Bramleye8289202019-07-31 11:25:23 +01001387TEST_SVE(sve_inc_dec_ptrue_vector) {
1388 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001389 START();
1390
1391 // With an all-true predicate, these instructions increment or decrement by
1392 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001393 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001394
1395 __ Dup(z0.VnD(), 0);
1396 __ Decp(z0.VnD(), p15);
1397
1398 __ Dup(z1.VnS(), 0);
1399 __ Decp(z1.VnS(), p15);
1400
1401 __ Dup(z2.VnH(), 0);
1402 __ Decp(z2.VnH(), p15);
1403
1404 __ Dup(z3.VnD(), 0);
1405 __ Incp(z3.VnD(), p15);
1406
1407 __ Dup(z4.VnS(), 0);
1408 __ Incp(z4.VnS(), p15);
1409
1410 __ Dup(z5.VnH(), 0);
1411 __ Incp(z5.VnH(), p15);
1412
1413 END();
1414 if (CAN_RUN()) {
1415 RUN();
1416
1417 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1418 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1419 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1420
1421 for (int i = 0; i < d_lane_count; i++) {
1422 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1423 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1424 }
1425
1426 for (int i = 0; i < s_lane_count; i++) {
1427 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1428 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1429 }
1430
1431 for (int i = 0; i < h_lane_count; i++) {
1432 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1433 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1434 }
1435 }
1436}
1437
Jacob Bramleye8289202019-07-31 11:25:23 +01001438TEST_SVE(sve_sqinc_sqdec_p_vector) {
1439 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001440 START();
1441
1442 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1443 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1444 Initialise(&masm, p0.VnB(), p0_inputs);
1445
1446 // Check that saturation behaves correctly.
1447
1448 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1449 InsrHelper(&masm, z0.VnD(), z0_inputs);
1450
1451 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1452 InsrHelper(&masm, z1.VnD(), z1_inputs);
1453
1454 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1455 InsrHelper(&masm, z2.VnS(), z2_inputs);
1456
1457 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1458 InsrHelper(&masm, z3.VnH(), z3_inputs);
1459
1460 // The MacroAssembler implements non-destructive operations using movprfx.
1461 __ Sqdecp(z10.VnD(), p0, z0.VnD());
1462 __ Sqdecp(z11.VnD(), p0, z1.VnD());
1463 __ Sqdecp(z12.VnS(), p0, z2.VnS());
1464 __ Sqdecp(z13.VnH(), p0, z3.VnH());
1465
1466 __ Sqincp(z14.VnD(), p0, z0.VnD());
1467 __ Sqincp(z15.VnD(), p0, z1.VnD());
1468 __ Sqincp(z16.VnS(), p0, z2.VnS());
1469 __ Sqincp(z17.VnH(), p0, z3.VnH());
1470
1471 // Also test destructive forms.
1472 __ Mov(z4, z0);
1473 __ Mov(z5, z1);
1474 __ Mov(z6, z2);
1475 __ Mov(z7, z3);
1476
1477 __ Sqdecp(z0.VnD(), p0);
1478 __ Sqdecp(z1.VnD(), p0);
1479 __ Sqdecp(z2.VnS(), p0);
1480 __ Sqdecp(z3.VnH(), p0);
1481
1482 __ Sqincp(z4.VnD(), p0);
1483 __ Sqincp(z5.VnD(), p0);
1484 __ Sqincp(z6.VnS(), p0);
1485 __ Sqincp(z7.VnH(), p0);
1486
1487 END();
1488 if (CAN_RUN()) {
1489 RUN();
1490
1491 // z0_inputs[...] - number of active D lanes (2)
1492 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
1493 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1494
1495 // z1_inputs[...] - number of active D lanes (2)
1496 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1497 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1498
1499 // z2_inputs[...] - number of active S lanes (3)
1500 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
1501 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1502
1503 // z3_inputs[...] - number of active H lanes (5)
1504 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
1505 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1506
1507 // z0_inputs[...] + number of active D lanes (2)
1508 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1509 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1510
1511 // z1_inputs[...] + number of active D lanes (2)
1512 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
1513 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1514
1515 // z2_inputs[...] + number of active S lanes (3)
1516 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
1517 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1518
1519 // z3_inputs[...] + number of active H lanes (5)
1520 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
1521 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1522
1523 // Check that the non-destructive macros produced the same results.
1524 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1525 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1526 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1527 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1528 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1529 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1530 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1531 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1532 }
1533}
1534
Jacob Bramleye8289202019-07-31 11:25:23 +01001535TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
1536 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001537 START();
1538
1539 // With an all-true predicate, these instructions increment or decrement by
1540 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001541 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001542
1543 __ Dup(z0.VnD(), 0);
1544 __ Sqdecp(z0.VnD(), p15);
1545
1546 __ Dup(z1.VnS(), 0);
1547 __ Sqdecp(z1.VnS(), p15);
1548
1549 __ Dup(z2.VnH(), 0);
1550 __ Sqdecp(z2.VnH(), p15);
1551
1552 __ Dup(z3.VnD(), 0);
1553 __ Sqincp(z3.VnD(), p15);
1554
1555 __ Dup(z4.VnS(), 0);
1556 __ Sqincp(z4.VnS(), p15);
1557
1558 __ Dup(z5.VnH(), 0);
1559 __ Sqincp(z5.VnH(), p15);
1560
1561 END();
1562 if (CAN_RUN()) {
1563 RUN();
1564
1565 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1566 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1567 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1568
1569 for (int i = 0; i < d_lane_count; i++) {
1570 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1571 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1572 }
1573
1574 for (int i = 0; i < s_lane_count; i++) {
1575 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1576 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1577 }
1578
1579 for (int i = 0; i < h_lane_count; i++) {
1580 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1581 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1582 }
1583 }
1584}
1585
Jacob Bramleye8289202019-07-31 11:25:23 +01001586TEST_SVE(sve_uqinc_uqdec_p_vector) {
1587 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001588 START();
1589
1590 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1591 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1592 Initialise(&masm, p0.VnB(), p0_inputs);
1593
1594 // Check that saturation behaves correctly.
1595
1596 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
1597 InsrHelper(&masm, z0.VnD(), z0_inputs);
1598
1599 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
1600 InsrHelper(&masm, z1.VnD(), z1_inputs);
1601
1602 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
1603 InsrHelper(&masm, z2.VnS(), z2_inputs);
1604
1605 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
1606 InsrHelper(&masm, z3.VnH(), z3_inputs);
1607
1608 // The MacroAssembler implements non-destructive operations using movprfx.
1609 __ Uqdecp(z10.VnD(), p0, z0.VnD());
1610 __ Uqdecp(z11.VnD(), p0, z1.VnD());
1611 __ Uqdecp(z12.VnS(), p0, z2.VnS());
1612 __ Uqdecp(z13.VnH(), p0, z3.VnH());
1613
1614 __ Uqincp(z14.VnD(), p0, z0.VnD());
1615 __ Uqincp(z15.VnD(), p0, z1.VnD());
1616 __ Uqincp(z16.VnS(), p0, z2.VnS());
1617 __ Uqincp(z17.VnH(), p0, z3.VnH());
1618
1619 // Also test destructive forms.
1620 __ Mov(z4, z0);
1621 __ Mov(z5, z1);
1622 __ Mov(z6, z2);
1623 __ Mov(z7, z3);
1624
1625 __ Uqdecp(z0.VnD(), p0);
1626 __ Uqdecp(z1.VnD(), p0);
1627 __ Uqdecp(z2.VnS(), p0);
1628 __ Uqdecp(z3.VnH(), p0);
1629
1630 __ Uqincp(z4.VnD(), p0);
1631 __ Uqincp(z5.VnD(), p0);
1632 __ Uqincp(z6.VnS(), p0);
1633 __ Uqincp(z7.VnH(), p0);
1634
1635 END();
1636 if (CAN_RUN()) {
1637 RUN();
1638
1639 // z0_inputs[...] - number of active D lanes (2)
1640 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
1641 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1642
1643 // z1_inputs[...] - number of active D lanes (2)
1644 uint64_t z1_expected[] = {0x12345678ffffff28,
1645 0,
1646 0xfffffffffffffffd,
1647 0x7ffffffffffffffd};
1648 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1649
1650 // z2_inputs[...] - number of active S lanes (3)
1651 uint32_t z2_expected[] =
1652 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
1653 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1654
1655 // z3_inputs[...] - number of active H lanes (5)
1656 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
1657 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1658
1659 // z0_inputs[...] + number of active D lanes (2)
1660 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1661 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1662
1663 // z1_inputs[...] + number of active D lanes (2)
1664 uint64_t z5_expected[] = {0x12345678ffffff2c,
1665 2,
1666 UINT64_MAX,
1667 0x8000000000000001};
1668 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1669
1670 // z2_inputs[...] + number of active S lanes (3)
1671 uint32_t z6_expected[] =
1672 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
1673 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1674
1675 // z3_inputs[...] + number of active H lanes (5)
1676 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
1677 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1678
1679 // Check that the non-destructive macros produced the same results.
1680 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1681 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1682 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1683 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1684 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1685 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1686 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1687 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1688 }
1689}
1690
Jacob Bramleye8289202019-07-31 11:25:23 +01001691TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
1692 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001693 START();
1694
1695 // With an all-true predicate, these instructions increment or decrement by
1696 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001697 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001698
1699 __ Mov(x0, 0x1234567800000000);
1700 __ Mov(x1, 0x12340000);
1701 __ Mov(x2, 0x1200);
1702
1703 __ Dup(z0.VnD(), x0);
1704 __ Uqdecp(z0.VnD(), p15);
1705
1706 __ Dup(z1.VnS(), x1);
1707 __ Uqdecp(z1.VnS(), p15);
1708
1709 __ Dup(z2.VnH(), x2);
1710 __ Uqdecp(z2.VnH(), p15);
1711
1712 __ Dup(z3.VnD(), x0);
1713 __ Uqincp(z3.VnD(), p15);
1714
1715 __ Dup(z4.VnS(), x1);
1716 __ Uqincp(z4.VnS(), p15);
1717
1718 __ Dup(z5.VnH(), x2);
1719 __ Uqincp(z5.VnH(), p15);
1720
1721 END();
1722 if (CAN_RUN()) {
1723 RUN();
1724
1725 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1726 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1727 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1728
1729 for (int i = 0; i < d_lane_count; i++) {
1730 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
1731 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
1732 }
1733
1734 for (int i = 0; i < s_lane_count; i++) {
1735 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
1736 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
1737 }
1738
1739 for (int i = 0; i < h_lane_count; i++) {
1740 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
1741 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
1742 }
1743 }
1744}
1745
Jacob Bramleye8289202019-07-31 11:25:23 +01001746TEST_SVE(sve_index) {
1747 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01001748 START();
1749
1750 // Simple cases.
1751 __ Index(z0.VnB(), 0, 1);
1752 __ Index(z1.VnH(), 1, 1);
1753 __ Index(z2.VnS(), 2, 1);
1754 __ Index(z3.VnD(), 3, 1);
1755
1756 // Synthesised immediates.
1757 __ Index(z4.VnB(), 42, -1);
1758 __ Index(z5.VnH(), -1, 42);
1759 __ Index(z6.VnS(), 42, 42);
1760
1761 // Register arguments.
1762 __ Mov(x0, 42);
1763 __ Mov(x1, -3);
1764 __ Index(z10.VnD(), x0, x1);
1765 __ Index(z11.VnB(), w0, w1);
1766 // The register size should correspond to the lane size, but VIXL allows any
1767 // register at least as big as the lane size.
1768 __ Index(z12.VnB(), x0, x1);
1769 __ Index(z13.VnH(), w0, x1);
1770 __ Index(z14.VnS(), x0, w1);
1771
1772 // Integer overflow.
1773 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
1774 __ Index(z21.VnH(), 7, -3);
1775 __ Index(z22.VnS(), INT32_MAX - 2, 1);
1776 __ Index(z23.VnD(), INT64_MIN + 6, -7);
1777
1778 END();
1779
1780 if (CAN_RUN()) {
1781 RUN();
1782
1783 int b_lane_count = core.GetSVELaneCount(kBRegSize);
1784 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1785 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1786 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1787
1788 uint64_t b_mask = GetUintMask(kBRegSize);
1789 uint64_t h_mask = GetUintMask(kHRegSize);
1790 uint64_t s_mask = GetUintMask(kSRegSize);
1791 uint64_t d_mask = GetUintMask(kDRegSize);
1792
1793 // Simple cases.
1794 for (int i = 0; i < b_lane_count; i++) {
1795 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
1796 }
1797 for (int i = 0; i < h_lane_count; i++) {
1798 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
1799 }
1800 for (int i = 0; i < s_lane_count; i++) {
1801 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
1802 }
1803 for (int i = 0; i < d_lane_count; i++) {
1804 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
1805 }
1806
1807 // Synthesised immediates.
1808 for (int i = 0; i < b_lane_count; i++) {
1809 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
1810 }
1811 for (int i = 0; i < h_lane_count; i++) {
1812 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
1813 }
1814 for (int i = 0; i < s_lane_count; i++) {
1815 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
1816 }
1817
1818 // Register arguments.
1819 for (int i = 0; i < d_lane_count; i++) {
1820 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
1821 }
1822 for (int i = 0; i < b_lane_count; i++) {
1823 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
1824 }
1825 for (int i = 0; i < b_lane_count; i++) {
1826 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
1827 }
1828 for (int i = 0; i < h_lane_count; i++) {
1829 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
1830 }
1831 for (int i = 0; i < s_lane_count; i++) {
1832 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
1833 }
1834
1835 // Integer overflow.
1836 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
1837 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
1838 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
1839 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
1840 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
1841 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
1842 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
1843 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
1844 }
1845}
1846
TatWai Chongc844bb22019-06-10 15:32:53 -07001847TEST(sve_int_compare_count_and_limit_scalars) {
1848 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1849 START();
1850
1851 __ Mov(w20, 0xfffffffd);
1852 __ Mov(w21, 0xffffffff);
1853
1854 __ Whilele(p0.VnB(), w20, w21);
1855 __ Mrs(x0, NZCV);
1856 __ Whilele(p1.VnH(), w20, w21);
1857 __ Mrs(x1, NZCV);
1858
1859 __ Mov(w20, 0xffffffff);
1860 __ Mov(w21, 0x00000000);
1861
1862 __ Whilelt(p2.VnS(), w20, w21);
1863 __ Mrs(x2, NZCV);
1864 __ Whilelt(p3.VnD(), w20, w21);
1865 __ Mrs(x3, NZCV);
1866
1867 __ Mov(w20, 0xfffffffd);
1868 __ Mov(w21, 0xffffffff);
1869
1870 __ Whilels(p4.VnB(), w20, w21);
1871 __ Mrs(x4, NZCV);
1872 __ Whilels(p5.VnH(), w20, w21);
1873 __ Mrs(x5, NZCV);
1874
1875 __ Mov(w20, 0xffffffff);
1876 __ Mov(w21, 0x00000000);
1877
1878 __ Whilelo(p6.VnS(), w20, w21);
1879 __ Mrs(x6, NZCV);
1880 __ Whilelo(p7.VnD(), w20, w21);
1881 __ Mrs(x7, NZCV);
1882
1883 __ Mov(x20, 0xfffffffffffffffd);
1884 __ Mov(x21, 0xffffffffffffffff);
1885
1886 __ Whilele(p8.VnB(), x20, x21);
1887 __ Mrs(x8, NZCV);
1888 __ Whilele(p9.VnH(), x20, x21);
1889 __ Mrs(x9, NZCV);
1890
1891 __ Mov(x20, 0xffffffffffffffff);
1892 __ Mov(x21, 0x0000000000000000);
1893
1894 __ Whilelt(p10.VnS(), x20, x21);
1895 __ Mrs(x10, NZCV);
1896 __ Whilelt(p11.VnD(), x20, x21);
1897 __ Mrs(x11, NZCV);
1898
1899 __ Mov(x20, 0xfffffffffffffffd);
1900 __ Mov(x21, 0xffffffffffffffff);
1901
1902 __ Whilels(p12.VnB(), x20, x21);
1903 __ Mrs(x12, NZCV);
1904 __ Whilels(p13.VnH(), x20, x21);
1905 __ Mrs(x13, NZCV);
1906
1907 __ Mov(x20, 0xffffffffffffffff);
1908 __ Mov(x21, 0x0000000000000000);
1909
1910 __ Whilelo(p14.VnS(), x20, x21);
1911 __ Mrs(x14, NZCV);
1912 __ Whilelo(p15.VnD(), x20, x21);
1913 __ Mrs(x15, NZCV);
1914
1915 END();
1916
1917 if (CAN_RUN()) {
1918 RUN();
1919
1920 // 0b...00000000'00000111
1921 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1922 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1923
1924 // 0b...00000000'00010101
1925 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1926 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
1927
1928 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
1929 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
1930
1931 int p3_expected[] = {0x00, 0x01};
1932 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
1933
1934 // 0b...11111111'11111111
1935 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1936 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1937
1938 // 0b...01010101'01010101
1939 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1940 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1941
1942 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
1943 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1944
1945 int p7_expected[] = {0x00, 0x00};
1946 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1947
1948 // 0b...00000000'00000111
1949 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1950 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1951
1952 // 0b...00000000'00010101
1953 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1954 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1955
1956 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
1957 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1958
1959 int p11_expected[] = {0x00, 0x01};
1960 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
1961
1962 // 0b...11111111'11111111
1963 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1964 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
1965
1966 // 0b...01010101'01010101
1967 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1968 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
1969
1970 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
1971 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
1972
1973 int p15_expected[] = {0x00, 0x00};
1974 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
1975
1976 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
1977 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
1978 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
1979 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
1980 ASSERT_EQUAL_32(SVEFirstFlag, w4);
1981 ASSERT_EQUAL_32(SVEFirstFlag, w5);
1982 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
1983 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
1984 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
1985 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
1986 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1987 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
1988 ASSERT_EQUAL_32(SVEFirstFlag, w12);
1989 ASSERT_EQUAL_32(SVEFirstFlag, w13);
1990 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
1991 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
1992 }
1993}
1994
TatWai Chong302729c2019-06-14 16:18:51 -07001995TEST(sve_int_compare_vectors_signed_imm) {
1996 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1997 START();
1998
1999 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
2000 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2001 InsrHelper(&masm, z13.VnB(), z13_inputs);
2002 Initialise(&masm, p0.VnB(), mask_inputs1);
2003
2004 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2005 __ Mrs(x2, NZCV);
2006 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2007
2008 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2009 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2010 InsrHelper(&masm, z14.VnH(), z14_inputs);
2011 Initialise(&masm, p0.VnH(), mask_inputs2);
2012
2013 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2014 __ Mrs(x4, NZCV);
2015 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2016
2017 int z15_inputs[] = {0, 1, -1, INT_MIN};
2018 int mask_inputs3[] = {0, 1, 1, 1};
2019 InsrHelper(&masm, z15.VnS(), z15_inputs);
2020 Initialise(&masm, p0.VnS(), mask_inputs3);
2021
2022 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2023 __ Mrs(x6, NZCV);
2024 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2025
2026 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2027 __ Mrs(x8, NZCV);
2028 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2029
2030 int64_t z16_inputs[] = {0, -1};
2031 int mask_inputs4[] = {1, 1};
2032 InsrHelper(&masm, z16.VnD(), z16_inputs);
2033 Initialise(&masm, p0.VnD(), mask_inputs4);
2034
2035 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2036 __ Mrs(x10, NZCV);
2037 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2038
2039 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2040 __ Mrs(x12, NZCV);
2041 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2042
2043 END();
2044
2045 if (CAN_RUN()) {
2046 RUN();
2047
2048 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2049 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2050
2051 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2052 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2053
2054 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2055 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2056
2057 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2058 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2059
2060 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2061 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2062
2063 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2064 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2065
2066 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2067 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2068
2069 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2070 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2071
2072 int p10_expected[] = {0x00, 0x01};
2073 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2074
2075 int p11_expected[] = {0x00, 0x00};
2076 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2077
2078 int p12_expected[] = {0x01, 0x00};
2079 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2080
2081 int p13_expected[] = {0x01, 0x01};
2082 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2083
2084 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2085 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2086 ASSERT_EQUAL_32(NoFlag, w6);
2087 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2088 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2089 ASSERT_EQUAL_32(NoFlag, w12);
2090 }
2091}
2092
2093TEST(sve_int_compare_vectors_unsigned_imm) {
2094 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2095 START();
2096
2097 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2098 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2099 InsrHelper(&masm, z13.VnB(), src1_inputs);
2100 Initialise(&masm, p0.VnB(), mask_inputs1);
2101
2102 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2103 __ Mrs(x2, NZCV);
2104 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2105
2106 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2107 int mask_inputs2[] = {1, 1, 1, 1, 0};
2108 InsrHelper(&masm, z13.VnH(), src2_inputs);
2109 Initialise(&masm, p0.VnH(), mask_inputs2);
2110
2111 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2112 __ Mrs(x4, NZCV);
2113 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2114
2115 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2116 int mask_inputs3[] = {1, 1, 1, 1};
2117 InsrHelper(&masm, z13.VnS(), src3_inputs);
2118 Initialise(&masm, p0.VnS(), mask_inputs3);
2119
2120 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2121 __ Mrs(x6, NZCV);
2122 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2123
2124 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2125 int mask_inputs4[] = {1, 1};
2126 InsrHelper(&masm, z13.VnD(), src4_inputs);
2127 Initialise(&masm, p0.VnD(), mask_inputs4);
2128
2129 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2130 __ Mrs(x8, NZCV);
2131 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2132
2133 END();
2134
2135 if (CAN_RUN()) {
2136 RUN();
2137
2138 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2139 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2140
2141 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2142 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2143
2144 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2145 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2146
2147 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2148 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2149
2150 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2151 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2152
2153 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2154 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2155
2156 int p8_expected[] = {0x00, 0x01};
2157 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2158
2159 int p9_expected[] = {0x00, 0x01};
2160 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2161
2162 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2163 ASSERT_EQUAL_32(NoFlag, w4);
2164 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2165 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2166 }
2167}
2168
TatWai Chongc844bb22019-06-10 15:32:53 -07002169TEST(sve_int_compare_conditionally_terminate_scalars) {
2170 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2171 START();
2172
2173 __ Mov(x0, 0xfedcba9887654321);
2174 __ Mov(x1, 0x1000100010001000);
2175
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002176 // Initialise Z and C. These are preserved by cterm*, and the V flag is set to
2177 // !C if the condition does not hold.
2178 __ Mov(x10, NoFlag);
2179 __ Msr(NZCV, x10);
2180
TatWai Chongc844bb22019-06-10 15:32:53 -07002181 __ Ctermeq(w0, w0);
2182 __ Mrs(x2, NZCV);
2183 __ Ctermeq(x0, x1);
2184 __ Mrs(x3, NZCV);
2185 __ Ctermne(x0, x0);
2186 __ Mrs(x4, NZCV);
2187 __ Ctermne(w0, w1);
2188 __ Mrs(x5, NZCV);
2189
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002190 // As above, but with all flags initially set.
2191 __ Mov(x10, NZCVFlag);
2192 __ Msr(NZCV, x10);
2193
2194 __ Ctermeq(w0, w0);
2195 __ Mrs(x6, NZCV);
2196 __ Ctermeq(x0, x1);
2197 __ Mrs(x7, NZCV);
2198 __ Ctermne(x0, x0);
2199 __ Mrs(x8, NZCV);
2200 __ Ctermne(w0, w1);
2201 __ Mrs(x9, NZCV);
2202
TatWai Chongc844bb22019-06-10 15:32:53 -07002203 END();
2204
2205 if (CAN_RUN()) {
2206 RUN();
2207
2208 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2209 ASSERT_EQUAL_32(VFlag, w3);
2210 ASSERT_EQUAL_32(VFlag, w4);
2211 ASSERT_EQUAL_32(SVEFirstFlag, w5);
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002212
2213 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w6);
2214 ASSERT_EQUAL_32(ZCFlag, w7);
2215 ASSERT_EQUAL_32(ZCFlag, w8);
2216 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w9);
TatWai Chongc844bb22019-06-10 15:32:53 -07002217 }
2218}
2219
Jacob Bramley0ce75842019-07-17 18:12:50 +01002220// Work out what the architectural `PredTest` pseudocode should produce for the
2221// given result and governing predicate.
2222template <typename Tg, typename Td, int N>
2223static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2224 const Tg (&pg)[N],
2225 int vl) {
2226 int first = -1;
2227 int last = -1;
2228 bool any_active = false;
2229
2230 // Only consider potentially-active lanes.
2231 int start = (N > vl) ? (N - vl) : 0;
2232 for (int i = start; i < N; i++) {
2233 if ((pg[i] & 1) == 1) {
2234 // Look for the first and last active lanes.
2235 // Note that the 'first' lane is the one with the highest index.
2236 if (last < 0) last = i;
2237 first = i;
2238 // Look for any active lanes that are also active in pd.
2239 if ((pd[i] & 1) == 1) any_active = true;
2240 }
2241 }
2242
2243 uint32_t flags = 0;
2244 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2245 if (!any_active) flags |= SVENoneFlag;
2246 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2247 return static_cast<StatusFlags>(flags);
2248}
2249
2250typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2251 const PRegister& pg,
2252 const PRegisterWithLaneSize& pn);
2253template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002254static void PfirstPnextHelper(Test* config,
2255 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002256 unsigned lane_size_in_bits,
2257 const Tg& pg_inputs,
2258 const Tn& pn_inputs,
2259 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002260 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002261 START();
2262
2263 PRegister pg = p15;
2264 PRegister pn = p14;
2265 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2266 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2267
2268 // Initialise NZCV to an impossible value, to check that we actually write it.
2269 __ Mov(x10, NZCVFlag);
2270
2271 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2272 // the Assembler.
2273 __ Msr(NZCV, x10);
2274 __ Mov(p0, pn);
2275 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2276 pg,
2277 p0.WithLaneSize(lane_size_in_bits));
2278 __ Mrs(x0, NZCV);
2279
2280 // The MacroAssembler supports non-destructive use.
2281 __ Msr(NZCV, x10);
2282 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2283 pg,
2284 pn.WithLaneSize(lane_size_in_bits));
2285 __ Mrs(x1, NZCV);
2286
2287 // If pd.Aliases(pg) the macro requires a scratch register.
2288 {
2289 UseScratchRegisterScope temps(&masm);
2290 temps.Include(p13);
2291 __ Msr(NZCV, x10);
2292 __ Mov(p2, p15);
2293 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2294 p2,
2295 pn.WithLaneSize(lane_size_in_bits));
2296 __ Mrs(x2, NZCV);
2297 }
2298
2299 END();
2300
2301 if (CAN_RUN()) {
2302 RUN();
2303
2304 // Check that the inputs weren't modified.
2305 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2306 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2307
2308 // Check the primary operation.
2309 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2310 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2311 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2312
2313 // Check that the flags were properly set.
2314 StatusFlags nzcv_expected =
2315 GetPredTestFlags(pd_expected,
2316 pg_inputs,
2317 core.GetSVELaneCount(kBRegSize));
2318 ASSERT_EQUAL_64(nzcv_expected, x0);
2319 ASSERT_EQUAL_64(nzcv_expected, x1);
2320 ASSERT_EQUAL_64(nzcv_expected, x2);
2321 }
2322}
2323
2324template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002325static void PfirstHelper(Test* config,
2326 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002327 const Tn& pn_inputs,
2328 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002329 PfirstPnextHelper(config,
2330 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002331 kBRegSize, // pfirst only accepts B-sized lanes.
2332 pg_inputs,
2333 pn_inputs,
2334 pd_expected);
2335}
2336
2337template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002338static void PnextHelper(Test* config,
2339 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002340 const Tg& pg_inputs,
2341 const Tn& pn_inputs,
2342 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002343 PfirstPnextHelper(config,
2344 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002345 lane_size_in_bits,
2346 pg_inputs,
2347 pn_inputs,
2348 pd_expected);
2349}
2350
Jacob Bramleye8289202019-07-31 11:25:23 +01002351TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002352 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2353 // large VL), but few enough to make the test easy to read.
2354 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2355 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2356 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2357 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2358 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2359 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2360
2361 // Pfirst finds the first active lane in pg, and activates the corresponding
2362 // lane in pn (if it isn't already active).
2363
2364 // The first active lane in in1 is here. |
2365 // v
2366 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2367 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2368 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2369 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002370 PfirstHelper(config, in1, in0, exp10);
2371 PfirstHelper(config, in1, in2, exp12);
2372 PfirstHelper(config, in1, in3, exp13);
2373 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002374
2375 // The first active lane in in2 is here. |
2376 // v
2377 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2378 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2379 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2380 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002381 PfirstHelper(config, in2, in0, exp20);
2382 PfirstHelper(config, in2, in1, exp21);
2383 PfirstHelper(config, in2, in3, exp23);
2384 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002385
2386 // The first active lane in in3 is here. |
2387 // v
2388 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2389 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2390 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2391 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002392 PfirstHelper(config, in3, in0, exp30);
2393 PfirstHelper(config, in3, in1, exp31);
2394 PfirstHelper(config, in3, in2, exp32);
2395 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002396
2397 // | The first active lane in in4 is here.
2398 // v
2399 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2400 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2401 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2402 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002403 PfirstHelper(config, in4, in0, exp40);
2404 PfirstHelper(config, in4, in1, exp41);
2405 PfirstHelper(config, in4, in2, exp42);
2406 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002407
2408 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002409 PfirstHelper(config, in0, in0, in0);
2410 PfirstHelper(config, in0, in1, in1);
2411 PfirstHelper(config, in0, in2, in2);
2412 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002413
2414 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002415 PfirstHelper(config, in0, in0, in0);
2416 PfirstHelper(config, in1, in1, in1);
2417 PfirstHelper(config, in2, in2, in2);
2418 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002419}
2420
Jacob Bramleye8289202019-07-31 11:25:23 +01002421TEST_SVE(sve_pfirst_alias) {
2422 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002423 START();
2424
2425 // Check that the Simulator behaves correctly when all arguments are aliased.
2426 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2427 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2428 int in_s[] = {0, 1, 1, 0};
2429 int in_d[] = {1, 1};
2430
2431 Initialise(&masm, p0.VnB(), in_b);
2432 Initialise(&masm, p1.VnH(), in_h);
2433 Initialise(&masm, p2.VnS(), in_s);
2434 Initialise(&masm, p3.VnD(), in_d);
2435
2436 // Initialise NZCV to an impossible value, to check that we actually write it.
2437 __ Mov(x10, NZCVFlag);
2438
2439 __ Msr(NZCV, x10);
2440 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
2441 __ Mrs(x0, NZCV);
2442
2443 __ Msr(NZCV, x10);
2444 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
2445 __ Mrs(x1, NZCV);
2446
2447 __ Msr(NZCV, x10);
2448 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
2449 __ Mrs(x2, NZCV);
2450
2451 __ Msr(NZCV, x10);
2452 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
2453 __ Mrs(x3, NZCV);
2454
2455 END();
2456
2457 if (CAN_RUN()) {
2458 RUN();
2459
2460 // The first lane from pg is already active in pdn, so the P register should
2461 // be unchanged.
2462 ASSERT_EQUAL_SVE(in_b, p0.VnB());
2463 ASSERT_EQUAL_SVE(in_h, p1.VnH());
2464 ASSERT_EQUAL_SVE(in_s, p2.VnS());
2465 ASSERT_EQUAL_SVE(in_d, p3.VnD());
2466
2467 ASSERT_EQUAL_64(SVEFirstFlag, x0);
2468 ASSERT_EQUAL_64(SVEFirstFlag, x1);
2469 ASSERT_EQUAL_64(SVEFirstFlag, x2);
2470 ASSERT_EQUAL_64(SVEFirstFlag, x3);
2471 }
2472}
2473
Jacob Bramleye8289202019-07-31 11:25:23 +01002474TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002475 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2476 // (to check propagation if we have a large VL), but few enough to make the
2477 // test easy to read.
2478 // For now, we just use kPRegMinSize so that the test works anywhere.
2479 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2480 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2481 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2482 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
2483 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2484
2485 // Pnext activates the next element that is true in pg, after the last-active
2486 // element in pn. If all pn elements are false (as in in0), it starts looking
2487 // at element 0.
2488
2489 // There are no active lanes in in0, so the result is simply the first active
2490 // lane from pg.
2491 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2492 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2493 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2494 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2495 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2496
2497 // The last active lane in in1 is here. |
2498 // v
2499 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2500 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2501 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2502 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2503 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2504
2505 // | The last active lane in in2 is here.
2506 // v
2507 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2508 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2509 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2510 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2511 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2512
2513 // | The last active lane in in3 is here.
2514 // v
2515 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2516 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2517 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2518 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2519 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2520
2521 // | The last active lane in in4 is here.
2522 // v
2523 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2524 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2525 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2526 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2527 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2528
Jacob Bramleye8289202019-07-31 11:25:23 +01002529 PnextHelper(config, kBRegSize, in0, in0, exp00);
2530 PnextHelper(config, kBRegSize, in1, in0, exp10);
2531 PnextHelper(config, kBRegSize, in2, in0, exp20);
2532 PnextHelper(config, kBRegSize, in3, in0, exp30);
2533 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002534
Jacob Bramleye8289202019-07-31 11:25:23 +01002535 PnextHelper(config, kBRegSize, in0, in1, exp01);
2536 PnextHelper(config, kBRegSize, in1, in1, exp11);
2537 PnextHelper(config, kBRegSize, in2, in1, exp21);
2538 PnextHelper(config, kBRegSize, in3, in1, exp31);
2539 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002540
Jacob Bramleye8289202019-07-31 11:25:23 +01002541 PnextHelper(config, kBRegSize, in0, in2, exp02);
2542 PnextHelper(config, kBRegSize, in1, in2, exp12);
2543 PnextHelper(config, kBRegSize, in2, in2, exp22);
2544 PnextHelper(config, kBRegSize, in3, in2, exp32);
2545 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002546
Jacob Bramleye8289202019-07-31 11:25:23 +01002547 PnextHelper(config, kBRegSize, in0, in3, exp03);
2548 PnextHelper(config, kBRegSize, in1, in3, exp13);
2549 PnextHelper(config, kBRegSize, in2, in3, exp23);
2550 PnextHelper(config, kBRegSize, in3, in3, exp33);
2551 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002552
Jacob Bramleye8289202019-07-31 11:25:23 +01002553 PnextHelper(config, kBRegSize, in0, in4, exp04);
2554 PnextHelper(config, kBRegSize, in1, in4, exp14);
2555 PnextHelper(config, kBRegSize, in2, in4, exp24);
2556 PnextHelper(config, kBRegSize, in3, in4, exp34);
2557 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002558}
2559
Jacob Bramleye8289202019-07-31 11:25:23 +01002560TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002561 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2562 // (to check propagation if we have a large VL), but few enough to make the
2563 // test easy to read.
2564 // For now, we just use kPRegMinSize so that the test works anywhere.
2565 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
2566 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
2567 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
2568 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
2569 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
2570
2571 // Pnext activates the next element that is true in pg, after the last-active
2572 // element in pn. If all pn elements are false (as in in0), it starts looking
2573 // at element 0.
2574 //
2575 // As for other SVE instructions, elements are only considered to be active if
2576 // the _first_ bit in each field is one. Other bits are ignored.
2577
2578 // There are no active lanes in in0, so the result is simply the first active
2579 // lane from pg.
2580 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
2581 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
2582 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
2583 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
2584 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
2585
2586 // | The last active lane in in1 is here.
2587 // v
2588 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
2589 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
2590 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
2591 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
2592 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
2593
2594 // | The last active lane in in2 is here.
2595 // v
2596 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
2597 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
2598 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
2599 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
2600 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
2601
2602 // | The last active lane in in3 is here.
2603 // v
2604 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
2605 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
2606 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
2607 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
2608 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
2609
2610 // | The last active lane in in4 is here.
2611 // v
2612 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
2613 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
2614 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
2615 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
2616 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
2617
Jacob Bramleye8289202019-07-31 11:25:23 +01002618 PnextHelper(config, kHRegSize, in0, in0, exp00);
2619 PnextHelper(config, kHRegSize, in1, in0, exp10);
2620 PnextHelper(config, kHRegSize, in2, in0, exp20);
2621 PnextHelper(config, kHRegSize, in3, in0, exp30);
2622 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002623
Jacob Bramleye8289202019-07-31 11:25:23 +01002624 PnextHelper(config, kHRegSize, in0, in1, exp01);
2625 PnextHelper(config, kHRegSize, in1, in1, exp11);
2626 PnextHelper(config, kHRegSize, in2, in1, exp21);
2627 PnextHelper(config, kHRegSize, in3, in1, exp31);
2628 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002629
Jacob Bramleye8289202019-07-31 11:25:23 +01002630 PnextHelper(config, kHRegSize, in0, in2, exp02);
2631 PnextHelper(config, kHRegSize, in1, in2, exp12);
2632 PnextHelper(config, kHRegSize, in2, in2, exp22);
2633 PnextHelper(config, kHRegSize, in3, in2, exp32);
2634 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002635
Jacob Bramleye8289202019-07-31 11:25:23 +01002636 PnextHelper(config, kHRegSize, in0, in3, exp03);
2637 PnextHelper(config, kHRegSize, in1, in3, exp13);
2638 PnextHelper(config, kHRegSize, in2, in3, exp23);
2639 PnextHelper(config, kHRegSize, in3, in3, exp33);
2640 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002641
Jacob Bramleye8289202019-07-31 11:25:23 +01002642 PnextHelper(config, kHRegSize, in0, in4, exp04);
2643 PnextHelper(config, kHRegSize, in1, in4, exp14);
2644 PnextHelper(config, kHRegSize, in2, in4, exp24);
2645 PnextHelper(config, kHRegSize, in3, in4, exp34);
2646 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002647}
2648
Jacob Bramleye8289202019-07-31 11:25:23 +01002649TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002650 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2651 // (to check propagation if we have a large VL), but few enough to make the
2652 // test easy to read.
2653 // For now, we just use kPRegMinSize so that the test works anywhere.
2654 int in0[] = {0xe, 0xc, 0x8, 0x0};
2655 int in1[] = {0x0, 0x2, 0x0, 0x1};
2656 int in2[] = {0x0, 0x1, 0xf, 0x0};
2657 int in3[] = {0xf, 0x0, 0x0, 0x0};
2658
2659 // Pnext activates the next element that is true in pg, after the last-active
2660 // element in pn. If all pn elements are false (as in in0), it starts looking
2661 // at element 0.
2662 //
2663 // As for other SVE instructions, elements are only considered to be active if
2664 // the _first_ bit in each field is one. Other bits are ignored.
2665
2666 // There are no active lanes in in0, so the result is simply the first active
2667 // lane from pg.
2668 int exp00[] = {0, 0, 0, 0};
2669 int exp10[] = {0, 0, 0, 1};
2670 int exp20[] = {0, 0, 1, 0};
2671 int exp30[] = {1, 0, 0, 0};
2672
2673 // | The last active lane in in1 is here.
2674 // v
2675 int exp01[] = {0, 0, 0, 0};
2676 int exp11[] = {0, 0, 0, 0};
2677 int exp21[] = {0, 0, 1, 0};
2678 int exp31[] = {1, 0, 0, 0};
2679
2680 // | The last active lane in in2 is here.
2681 // v
2682 int exp02[] = {0, 0, 0, 0};
2683 int exp12[] = {0, 0, 0, 0};
2684 int exp22[] = {0, 0, 0, 0};
2685 int exp32[] = {1, 0, 0, 0};
2686
2687 // | The last active lane in in3 is here.
2688 // v
2689 int exp03[] = {0, 0, 0, 0};
2690 int exp13[] = {0, 0, 0, 0};
2691 int exp23[] = {0, 0, 0, 0};
2692 int exp33[] = {0, 0, 0, 0};
2693
Jacob Bramleye8289202019-07-31 11:25:23 +01002694 PnextHelper(config, kSRegSize, in0, in0, exp00);
2695 PnextHelper(config, kSRegSize, in1, in0, exp10);
2696 PnextHelper(config, kSRegSize, in2, in0, exp20);
2697 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002698
Jacob Bramleye8289202019-07-31 11:25:23 +01002699 PnextHelper(config, kSRegSize, in0, in1, exp01);
2700 PnextHelper(config, kSRegSize, in1, in1, exp11);
2701 PnextHelper(config, kSRegSize, in2, in1, exp21);
2702 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002703
Jacob Bramleye8289202019-07-31 11:25:23 +01002704 PnextHelper(config, kSRegSize, in0, in2, exp02);
2705 PnextHelper(config, kSRegSize, in1, in2, exp12);
2706 PnextHelper(config, kSRegSize, in2, in2, exp22);
2707 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002708
Jacob Bramleye8289202019-07-31 11:25:23 +01002709 PnextHelper(config, kSRegSize, in0, in3, exp03);
2710 PnextHelper(config, kSRegSize, in1, in3, exp13);
2711 PnextHelper(config, kSRegSize, in2, in3, exp23);
2712 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002713}
2714
Jacob Bramleye8289202019-07-31 11:25:23 +01002715TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002716 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2717 // (to check propagation if we have a large VL), but few enough to make the
2718 // test easy to read.
2719 // For now, we just use kPRegMinSize so that the test works anywhere.
2720 int in0[] = {0xfe, 0xf0};
2721 int in1[] = {0x00, 0x55};
2722 int in2[] = {0x33, 0xff};
2723
2724 // Pnext activates the next element that is true in pg, after the last-active
2725 // element in pn. If all pn elements are false (as in in0), it starts looking
2726 // at element 0.
2727 //
2728 // As for other SVE instructions, elements are only considered to be active if
2729 // the _first_ bit in each field is one. Other bits are ignored.
2730
2731 // There are no active lanes in in0, so the result is simply the first active
2732 // lane from pg.
2733 int exp00[] = {0, 0};
2734 int exp10[] = {0, 1};
2735 int exp20[] = {0, 1};
2736
2737 // | The last active lane in in1 is here.
2738 // v
2739 int exp01[] = {0, 0};
2740 int exp11[] = {0, 0};
2741 int exp21[] = {1, 0};
2742
2743 // | The last active lane in in2 is here.
2744 // v
2745 int exp02[] = {0, 0};
2746 int exp12[] = {0, 0};
2747 int exp22[] = {0, 0};
2748
Jacob Bramleye8289202019-07-31 11:25:23 +01002749 PnextHelper(config, kDRegSize, in0, in0, exp00);
2750 PnextHelper(config, kDRegSize, in1, in0, exp10);
2751 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002752
Jacob Bramleye8289202019-07-31 11:25:23 +01002753 PnextHelper(config, kDRegSize, in0, in1, exp01);
2754 PnextHelper(config, kDRegSize, in1, in1, exp11);
2755 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002756
Jacob Bramleye8289202019-07-31 11:25:23 +01002757 PnextHelper(config, kDRegSize, in0, in2, exp02);
2758 PnextHelper(config, kDRegSize, in1, in2, exp12);
2759 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002760}
2761
Jacob Bramleye8289202019-07-31 11:25:23 +01002762TEST_SVE(sve_pnext_alias) {
2763 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002764 START();
2765
2766 // Check that the Simulator behaves correctly when all arguments are aliased.
2767 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2768 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2769 int in_s[] = {0, 1, 1, 0};
2770 int in_d[] = {1, 1};
2771
2772 Initialise(&masm, p0.VnB(), in_b);
2773 Initialise(&masm, p1.VnH(), in_h);
2774 Initialise(&masm, p2.VnS(), in_s);
2775 Initialise(&masm, p3.VnD(), in_d);
2776
2777 // Initialise NZCV to an impossible value, to check that we actually write it.
2778 __ Mov(x10, NZCVFlag);
2779
2780 __ Msr(NZCV, x10);
2781 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
2782 __ Mrs(x0, NZCV);
2783
2784 __ Msr(NZCV, x10);
2785 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
2786 __ Mrs(x1, NZCV);
2787
2788 __ Msr(NZCV, x10);
2789 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
2790 __ Mrs(x2, NZCV);
2791
2792 __ Msr(NZCV, x10);
2793 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
2794 __ Mrs(x3, NZCV);
2795
2796 END();
2797
2798 if (CAN_RUN()) {
2799 RUN();
2800
2801 // Since pg.Is(pdn), there can be no active lanes in pg above the last
2802 // active lane in pdn, so the result should always be zero.
2803 ASSERT_EQUAL_SVE(0, p0.VnB());
2804 ASSERT_EQUAL_SVE(0, p1.VnH());
2805 ASSERT_EQUAL_SVE(0, p2.VnS());
2806 ASSERT_EQUAL_SVE(0, p3.VnD());
2807
2808 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
2809 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
2810 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
2811 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
2812 }
2813}
2814
Jacob Bramleye8289202019-07-31 11:25:23 +01002815static void PtrueHelper(Test* config,
2816 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002817 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002818 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002819 START();
2820
2821 PRegisterWithLaneSize p[kNumberOfPRegisters];
2822 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
2823 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
2824 }
2825
2826 // Initialise NZCV to an impossible value, to check that we actually write it.
2827 StatusFlags nzcv_unmodified = NZCVFlag;
2828 __ Mov(x20, nzcv_unmodified);
2829
2830 // We don't have enough registers to conveniently test every pattern, so take
2831 // samples from each group.
2832 __ Msr(NZCV, x20);
2833 __ Ptrue(p[0], SVE_POW2, s);
2834 __ Mrs(x0, NZCV);
2835
2836 __ Msr(NZCV, x20);
2837 __ Ptrue(p[1], SVE_VL1, s);
2838 __ Mrs(x1, NZCV);
2839
2840 __ Msr(NZCV, x20);
2841 __ Ptrue(p[2], SVE_VL2, s);
2842 __ Mrs(x2, NZCV);
2843
2844 __ Msr(NZCV, x20);
2845 __ Ptrue(p[3], SVE_VL5, s);
2846 __ Mrs(x3, NZCV);
2847
2848 __ Msr(NZCV, x20);
2849 __ Ptrue(p[4], SVE_VL6, s);
2850 __ Mrs(x4, NZCV);
2851
2852 __ Msr(NZCV, x20);
2853 __ Ptrue(p[5], SVE_VL8, s);
2854 __ Mrs(x5, NZCV);
2855
2856 __ Msr(NZCV, x20);
2857 __ Ptrue(p[6], SVE_VL16, s);
2858 __ Mrs(x6, NZCV);
2859
2860 __ Msr(NZCV, x20);
2861 __ Ptrue(p[7], SVE_VL64, s);
2862 __ Mrs(x7, NZCV);
2863
2864 __ Msr(NZCV, x20);
2865 __ Ptrue(p[8], SVE_VL256, s);
2866 __ Mrs(x8, NZCV);
2867
2868 {
2869 // We have to use the Assembler to use values not defined by
2870 // SVEPredicateConstraint, so call `ptrues` directly..
2871 typedef void (
2872 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
2873 int pattern);
2874 AssemblePtrueFn assemble =
2875 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
2876
2877 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
2878 __ msr(NZCV, x20);
2879 (masm.*assemble)(p[9], 0xe);
2880 __ mrs(x9, NZCV);
2881
2882 __ msr(NZCV, x20);
2883 (masm.*assemble)(p[10], 0x16);
2884 __ mrs(x10, NZCV);
2885
2886 __ msr(NZCV, x20);
2887 (masm.*assemble)(p[11], 0x1a);
2888 __ mrs(x11, NZCV);
2889
2890 __ msr(NZCV, x20);
2891 (masm.*assemble)(p[12], 0x1c);
2892 __ mrs(x12, NZCV);
2893 }
2894
2895 __ Msr(NZCV, x20);
2896 __ Ptrue(p[13], SVE_MUL4, s);
2897 __ Mrs(x13, NZCV);
2898
2899 __ Msr(NZCV, x20);
2900 __ Ptrue(p[14], SVE_MUL3, s);
2901 __ Mrs(x14, NZCV);
2902
2903 __ Msr(NZCV, x20);
2904 __ Ptrue(p[15], SVE_ALL, s);
2905 __ Mrs(x15, NZCV);
2906
2907 END();
2908
2909 if (CAN_RUN()) {
2910 RUN();
2911
2912 int all = core.GetSVELaneCount(lane_size_in_bits);
2913 int pow2 = 1 << HighestSetBitPosition(all);
2914 int mul4 = all - (all % 4);
2915 int mul3 = all - (all % 3);
2916
2917 // Check P register results.
2918 for (int i = 0; i < all; i++) {
2919 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
2920 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
2921 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
2922 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
2923 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
2924 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
2925 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
2926 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
2927 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
2928 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
2929 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
2930 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
2931 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
2932 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
2933 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
2934 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
2935 }
2936
2937 // Check NZCV results.
2938 if (s == LeaveFlags) {
2939 // No flags should have been updated.
2940 for (int i = 0; i <= 15; i++) {
2941 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
2942 }
2943 } else {
2944 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
2945 StatusFlags nonzero = SVEFirstFlag;
2946
2947 // POW2
2948 ASSERT_EQUAL_64(nonzero, x0);
2949 // VL*
2950 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
2951 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
2952 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
2953 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
2954 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
2955 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
2956 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
2957 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
2958 // #uimm5
2959 ASSERT_EQUAL_64(zero, x9);
2960 ASSERT_EQUAL_64(zero, x10);
2961 ASSERT_EQUAL_64(zero, x11);
2962 ASSERT_EQUAL_64(zero, x12);
2963 // MUL*
2964 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
2965 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
2966 // ALL
2967 ASSERT_EQUAL_64(nonzero, x15);
2968 }
2969 }
2970}
2971
Jacob Bramleye8289202019-07-31 11:25:23 +01002972TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
2973TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
2974TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
2975TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002976
Jacob Bramleye8289202019-07-31 11:25:23 +01002977TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
2978TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
2979TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
2980TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002981
Jacob Bramleye8289202019-07-31 11:25:23 +01002982TEST_SVE(sve_pfalse) {
2983 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002984 START();
2985
2986 // Initialise non-zero inputs.
2987 __ Ptrue(p0.VnB());
2988 __ Ptrue(p1.VnH());
2989 __ Ptrue(p2.VnS());
2990 __ Ptrue(p3.VnD());
2991
2992 // The instruction only supports B-sized lanes, but the lane size has no
2993 // logical effect, so the MacroAssembler accepts anything.
2994 __ Pfalse(p0.VnB());
2995 __ Pfalse(p1.VnH());
2996 __ Pfalse(p2.VnS());
2997 __ Pfalse(p3.VnD());
2998
2999 END();
3000
3001 if (CAN_RUN()) {
3002 RUN();
3003
3004 ASSERT_EQUAL_SVE(0, p0.VnB());
3005 ASSERT_EQUAL_SVE(0, p1.VnB());
3006 ASSERT_EQUAL_SVE(0, p2.VnB());
3007 ASSERT_EQUAL_SVE(0, p3.VnB());
3008 }
3009}
3010
Jacob Bramleye8289202019-07-31 11:25:23 +01003011TEST_SVE(sve_ptest) {
3012 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003013 START();
3014
3015 // Initialise NZCV to a known (impossible) value.
3016 StatusFlags nzcv_unmodified = NZCVFlag;
3017 __ Mov(x0, nzcv_unmodified);
3018 __ Msr(NZCV, x0);
3019
3020 // Construct some test inputs.
3021 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
3022 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
3023 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3024 __ Pfalse(p0.VnB());
3025 __ Ptrue(p1.VnB());
3026 Initialise(&masm, p2.VnB(), in2);
3027 Initialise(&masm, p3.VnB(), in3);
3028 Initialise(&masm, p4.VnB(), in4);
3029
3030 // All-inactive pg.
3031 __ Ptest(p0, p0.VnB());
3032 __ Mrs(x0, NZCV);
3033 __ Ptest(p0, p1.VnB());
3034 __ Mrs(x1, NZCV);
3035 __ Ptest(p0, p2.VnB());
3036 __ Mrs(x2, NZCV);
3037 __ Ptest(p0, p3.VnB());
3038 __ Mrs(x3, NZCV);
3039 __ Ptest(p0, p4.VnB());
3040 __ Mrs(x4, NZCV);
3041
3042 // All-active pg.
3043 __ Ptest(p1, p0.VnB());
3044 __ Mrs(x5, NZCV);
3045 __ Ptest(p1, p1.VnB());
3046 __ Mrs(x6, NZCV);
3047 __ Ptest(p1, p2.VnB());
3048 __ Mrs(x7, NZCV);
3049 __ Ptest(p1, p3.VnB());
3050 __ Mrs(x8, NZCV);
3051 __ Ptest(p1, p4.VnB());
3052 __ Mrs(x9, NZCV);
3053
3054 // Combinations of other inputs.
3055 __ Ptest(p2, p2.VnB());
3056 __ Mrs(x20, NZCV);
3057 __ Ptest(p2, p3.VnB());
3058 __ Mrs(x21, NZCV);
3059 __ Ptest(p2, p4.VnB());
3060 __ Mrs(x22, NZCV);
3061 __ Ptest(p3, p2.VnB());
3062 __ Mrs(x23, NZCV);
3063 __ Ptest(p3, p3.VnB());
3064 __ Mrs(x24, NZCV);
3065 __ Ptest(p3, p4.VnB());
3066 __ Mrs(x25, NZCV);
3067 __ Ptest(p4, p2.VnB());
3068 __ Mrs(x26, NZCV);
3069 __ Ptest(p4, p3.VnB());
3070 __ Mrs(x27, NZCV);
3071 __ Ptest(p4, p4.VnB());
3072 __ Mrs(x28, NZCV);
3073
3074 END();
3075
3076 if (CAN_RUN()) {
3077 RUN();
3078
3079 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3080
3081 // If pg is all inactive, the value of pn is irrelevant.
3082 ASSERT_EQUAL_64(zero, x0);
3083 ASSERT_EQUAL_64(zero, x1);
3084 ASSERT_EQUAL_64(zero, x2);
3085 ASSERT_EQUAL_64(zero, x3);
3086 ASSERT_EQUAL_64(zero, x4);
3087
3088 // All-active pg.
3089 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3090 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3091 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3092 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3093 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3094 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3095
3096 // Other inputs.
3097 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3098 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3099 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3100 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3101 x23); // pg: in3, pn: in2
3102 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3103 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3104 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3105 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3106 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3107 }
3108}
3109
Jacob Bramleye8289202019-07-31 11:25:23 +01003110TEST_SVE(sve_cntp) {
3111 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003112 START();
3113
3114 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3115 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3116 Initialise(&masm, p0.VnB(), p0_inputs);
3117
3118 // With an all-true predicate, these instructions measure the vector length.
3119 __ Ptrue(p10.VnB());
3120 __ Ptrue(p11.VnH());
3121 __ Ptrue(p12.VnS());
3122 __ Ptrue(p13.VnD());
3123
3124 // `ptrue p10.b` provides an all-active pg.
3125 __ Cntp(x10, p10, p10.VnB());
3126 __ Cntp(x11, p10, p11.VnH());
3127 __ Cntp(x12, p10, p12.VnS());
3128 __ Cntp(x13, p10, p13.VnD());
3129
3130 // Check that the predicate mask is applied properly.
3131 __ Cntp(x14, p10, p10.VnB());
3132 __ Cntp(x15, p11, p10.VnB());
3133 __ Cntp(x16, p12, p10.VnB());
3134 __ Cntp(x17, p13, p10.VnB());
3135
3136 // Check other patterns (including some ignored bits).
3137 __ Cntp(x0, p10, p0.VnB());
3138 __ Cntp(x1, p10, p0.VnH());
3139 __ Cntp(x2, p10, p0.VnS());
3140 __ Cntp(x3, p10, p0.VnD());
3141 __ Cntp(x4, p0, p10.VnB());
3142 __ Cntp(x5, p0, p10.VnH());
3143 __ Cntp(x6, p0, p10.VnS());
3144 __ Cntp(x7, p0, p10.VnD());
3145
3146 END();
3147
3148 if (CAN_RUN()) {
3149 RUN();
3150
3151 int vl_b = core.GetSVELaneCount(kBRegSize);
3152 int vl_h = core.GetSVELaneCount(kHRegSize);
3153 int vl_s = core.GetSVELaneCount(kSRegSize);
3154 int vl_d = core.GetSVELaneCount(kDRegSize);
3155
3156 // Check all-active predicates in various combinations.
3157 ASSERT_EQUAL_64(vl_b, x10);
3158 ASSERT_EQUAL_64(vl_h, x11);
3159 ASSERT_EQUAL_64(vl_s, x12);
3160 ASSERT_EQUAL_64(vl_d, x13);
3161
3162 ASSERT_EQUAL_64(vl_b, x14);
3163 ASSERT_EQUAL_64(vl_h, x15);
3164 ASSERT_EQUAL_64(vl_s, x16);
3165 ASSERT_EQUAL_64(vl_d, x17);
3166
3167 // Check that irrelevant bits are properly ignored.
3168 ASSERT_EQUAL_64(7, x0);
3169 ASSERT_EQUAL_64(5, x1);
3170 ASSERT_EQUAL_64(2, x2);
3171 ASSERT_EQUAL_64(1, x3);
3172
3173 ASSERT_EQUAL_64(7, x4);
3174 ASSERT_EQUAL_64(5, x5);
3175 ASSERT_EQUAL_64(2, x6);
3176 ASSERT_EQUAL_64(1, x7);
3177 }
3178}
3179
Martyn Capewell74f84f62019-10-30 15:30:44 +00003180typedef void (MacroAssembler::*CntFn)(const Register& dst,
3181 int pattern,
3182 int multiplier);
3183
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003184template <typename T>
3185void GenerateCntSequence(MacroAssembler* masm,
3186 CntFn cnt,
3187 T acc_value,
3188 int multiplier) {
3189 // Initialise accumulators.
3190 masm->Mov(x0, acc_value);
3191 masm->Mov(x1, acc_value);
3192 masm->Mov(x2, acc_value);
3193 masm->Mov(x3, acc_value);
3194 masm->Mov(x4, acc_value);
3195 masm->Mov(x5, acc_value);
3196 masm->Mov(x6, acc_value);
3197 masm->Mov(x7, acc_value);
3198 masm->Mov(x8, acc_value);
3199 masm->Mov(x9, acc_value);
3200 masm->Mov(x10, acc_value);
3201 masm->Mov(x11, acc_value);
3202 masm->Mov(x12, acc_value);
3203 masm->Mov(x13, acc_value);
3204 masm->Mov(x14, acc_value);
3205 masm->Mov(x15, acc_value);
3206 masm->Mov(x18, acc_value);
3207 masm->Mov(x19, acc_value);
3208 masm->Mov(x20, acc_value);
3209 masm->Mov(x21, acc_value);
3210
3211 (masm->*cnt)(Register(0, sizeof(T) * kBitsPerByte), SVE_POW2, multiplier);
3212 (masm->*cnt)(Register(1, sizeof(T) * kBitsPerByte), SVE_VL1, multiplier);
3213 (masm->*cnt)(Register(2, sizeof(T) * kBitsPerByte), SVE_VL2, multiplier);
3214 (masm->*cnt)(Register(3, sizeof(T) * kBitsPerByte), SVE_VL3, multiplier);
3215 (masm->*cnt)(Register(4, sizeof(T) * kBitsPerByte), SVE_VL4, multiplier);
3216 (masm->*cnt)(Register(5, sizeof(T) * kBitsPerByte), SVE_VL5, multiplier);
3217 (masm->*cnt)(Register(6, sizeof(T) * kBitsPerByte), SVE_VL6, multiplier);
3218 (masm->*cnt)(Register(7, sizeof(T) * kBitsPerByte), SVE_VL7, multiplier);
3219 (masm->*cnt)(Register(8, sizeof(T) * kBitsPerByte), SVE_VL8, multiplier);
3220 (masm->*cnt)(Register(9, sizeof(T) * kBitsPerByte), SVE_VL16, multiplier);
3221 (masm->*cnt)(Register(10, sizeof(T) * kBitsPerByte), SVE_VL32, multiplier);
3222 (masm->*cnt)(Register(11, sizeof(T) * kBitsPerByte), SVE_VL64, multiplier);
3223 (masm->*cnt)(Register(12, sizeof(T) * kBitsPerByte), SVE_VL128, multiplier);
3224 (masm->*cnt)(Register(13, sizeof(T) * kBitsPerByte), SVE_VL256, multiplier);
3225 (masm->*cnt)(Register(14, sizeof(T) * kBitsPerByte), 16, multiplier);
3226 (masm->*cnt)(Register(15, sizeof(T) * kBitsPerByte), 23, multiplier);
3227 (masm->*cnt)(Register(18, sizeof(T) * kBitsPerByte), 28, multiplier);
3228 (masm->*cnt)(Register(19, sizeof(T) * kBitsPerByte), SVE_MUL4, multiplier);
3229 (masm->*cnt)(Register(20, sizeof(T) * kBitsPerByte), SVE_MUL3, multiplier);
3230 (masm->*cnt)(Register(21, sizeof(T) * kBitsPerByte), SVE_ALL, multiplier);
3231}
3232
3233int FixedVL(int fixed, int length) {
3234 VIXL_ASSERT(((fixed >= 1) && (fixed <= 8)) || (fixed == 16) ||
3235 (fixed == 32) || (fixed == 64) || (fixed == 128) ||
3236 (fixed = 256));
3237 return (length >= fixed) ? fixed : 0;
3238}
3239
Martyn Capewell74f84f62019-10-30 15:30:44 +00003240static void CntHelper(Test* config,
3241 CntFn cnt,
3242 int multiplier,
Martyn Capewell579c92d2019-10-30 17:48:52 +00003243 int lane_size_in_bits,
3244 int64_t acc_value = 0,
3245 bool is_increment = true) {
Martyn Capewell74f84f62019-10-30 15:30:44 +00003246 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3247 START();
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003248 GenerateCntSequence(&masm, cnt, acc_value, multiplier);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003249 END();
3250
3251 if (CAN_RUN()) {
3252 RUN();
3253
3254 int all = core.GetSVELaneCount(lane_size_in_bits);
3255 int pow2 = 1 << HighestSetBitPosition(all);
3256 int mul4 = all - (all % 4);
3257 int mul3 = all - (all % 3);
3258
Martyn Capewell579c92d2019-10-30 17:48:52 +00003259 multiplier = is_increment ? multiplier : -multiplier;
3260
3261 ASSERT_EQUAL_64(acc_value + (multiplier * pow2), x0);
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003262 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(1, all)), x1);
3263 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(2, all)), x2);
3264 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(3, all)), x3);
3265 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(4, all)), x4);
3266 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(5, all)), x5);
3267 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(6, all)), x6);
3268 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(7, all)), x7);
3269 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(8, all)), x8);
3270 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(16, all)), x9);
3271 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(32, all)), x10);
3272 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(64, all)), x11);
3273 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(128, all)), x12);
3274 ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(256, all)), x13);
Martyn Capewell579c92d2019-10-30 17:48:52 +00003275 ASSERT_EQUAL_64(acc_value, x14);
3276 ASSERT_EQUAL_64(acc_value, x15);
3277 ASSERT_EQUAL_64(acc_value, x18);
3278 ASSERT_EQUAL_64(acc_value + (multiplier * mul4), x19);
3279 ASSERT_EQUAL_64(acc_value + (multiplier * mul3), x20);
3280 ASSERT_EQUAL_64(acc_value + (multiplier * all), x21);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003281 }
3282}
3283
Martyn Capewell579c92d2019-10-30 17:48:52 +00003284static void IncHelper(Test* config,
3285 CntFn cnt,
3286 int multiplier,
3287 int lane_size_in_bits,
3288 int64_t acc_value) {
3289 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3290}
3291
3292static void DecHelper(Test* config,
3293 CntFn cnt,
3294 int multiplier,
3295 int lane_size_in_bits,
3296 int64_t acc_value) {
3297 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
3298}
3299
Martyn Capewell74f84f62019-10-30 15:30:44 +00003300TEST_SVE(sve_cntb) {
3301 CntHelper(config, &MacroAssembler::Cntb, 1, kBRegSize);
3302 CntHelper(config, &MacroAssembler::Cntb, 2, kBRegSize);
3303 CntHelper(config, &MacroAssembler::Cntb, 15, kBRegSize);
3304 CntHelper(config, &MacroAssembler::Cntb, 16, kBRegSize);
3305}
3306
3307TEST_SVE(sve_cnth) {
3308 CntHelper(config, &MacroAssembler::Cnth, 1, kHRegSize);
3309 CntHelper(config, &MacroAssembler::Cnth, 2, kHRegSize);
3310 CntHelper(config, &MacroAssembler::Cnth, 15, kHRegSize);
3311 CntHelper(config, &MacroAssembler::Cnth, 16, kHRegSize);
3312}
3313
3314TEST_SVE(sve_cntw) {
3315 CntHelper(config, &MacroAssembler::Cntw, 1, kWRegSize);
3316 CntHelper(config, &MacroAssembler::Cntw, 2, kWRegSize);
3317 CntHelper(config, &MacroAssembler::Cntw, 15, kWRegSize);
3318 CntHelper(config, &MacroAssembler::Cntw, 16, kWRegSize);
3319}
3320
3321TEST_SVE(sve_cntd) {
3322 CntHelper(config, &MacroAssembler::Cntd, 1, kDRegSize);
3323 CntHelper(config, &MacroAssembler::Cntd, 2, kDRegSize);
3324 CntHelper(config, &MacroAssembler::Cntd, 15, kDRegSize);
3325 CntHelper(config, &MacroAssembler::Cntd, 16, kDRegSize);
3326}
3327
Martyn Capewell579c92d2019-10-30 17:48:52 +00003328TEST_SVE(sve_decb) {
3329 DecHelper(config, &MacroAssembler::Decb, 1, kBRegSize, 42);
3330 DecHelper(config, &MacroAssembler::Decb, 2, kBRegSize, -1);
3331 DecHelper(config, &MacroAssembler::Decb, 15, kBRegSize, INT64_MIN);
3332 DecHelper(config, &MacroAssembler::Decb, 16, kBRegSize, -42);
3333}
3334
3335TEST_SVE(sve_dech) {
3336 DecHelper(config, &MacroAssembler::Dech, 1, kHRegSize, 42);
3337 DecHelper(config, &MacroAssembler::Dech, 2, kHRegSize, -1);
3338 DecHelper(config, &MacroAssembler::Dech, 15, kHRegSize, INT64_MIN);
3339 DecHelper(config, &MacroAssembler::Dech, 16, kHRegSize, -42);
3340}
3341
3342TEST_SVE(sve_decw) {
3343 DecHelper(config, &MacroAssembler::Decw, 1, kWRegSize, 42);
3344 DecHelper(config, &MacroAssembler::Decw, 2, kWRegSize, -1);
3345 DecHelper(config, &MacroAssembler::Decw, 15, kWRegSize, INT64_MIN);
3346 DecHelper(config, &MacroAssembler::Decw, 16, kWRegSize, -42);
3347}
3348
3349TEST_SVE(sve_decd) {
3350 DecHelper(config, &MacroAssembler::Decd, 1, kDRegSize, 42);
3351 DecHelper(config, &MacroAssembler::Decd, 2, kDRegSize, -1);
3352 DecHelper(config, &MacroAssembler::Decd, 15, kDRegSize, INT64_MIN);
3353 DecHelper(config, &MacroAssembler::Decd, 16, kDRegSize, -42);
3354}
3355
3356TEST_SVE(sve_incb) {
3357 IncHelper(config, &MacroAssembler::Incb, 1, kBRegSize, 42);
3358 IncHelper(config, &MacroAssembler::Incb, 2, kBRegSize, -1);
3359 IncHelper(config, &MacroAssembler::Incb, 15, kBRegSize, INT64_MAX);
3360 IncHelper(config, &MacroAssembler::Incb, 16, kBRegSize, -42);
3361}
3362
3363TEST_SVE(sve_inch) {
3364 IncHelper(config, &MacroAssembler::Inch, 1, kHRegSize, 42);
3365 IncHelper(config, &MacroAssembler::Inch, 2, kHRegSize, -1);
3366 IncHelper(config, &MacroAssembler::Inch, 15, kHRegSize, INT64_MAX);
3367 IncHelper(config, &MacroAssembler::Inch, 16, kHRegSize, -42);
3368}
3369
3370TEST_SVE(sve_incw) {
3371 IncHelper(config, &MacroAssembler::Incw, 1, kWRegSize, 42);
3372 IncHelper(config, &MacroAssembler::Incw, 2, kWRegSize, -1);
3373 IncHelper(config, &MacroAssembler::Incw, 15, kWRegSize, INT64_MAX);
3374 IncHelper(config, &MacroAssembler::Incw, 16, kWRegSize, -42);
3375}
3376
3377TEST_SVE(sve_incd) {
3378 IncHelper(config, &MacroAssembler::Incd, 1, kDRegSize, 42);
3379 IncHelper(config, &MacroAssembler::Incd, 2, kDRegSize, -1);
3380 IncHelper(config, &MacroAssembler::Incd, 15, kDRegSize, INT64_MAX);
3381 IncHelper(config, &MacroAssembler::Incd, 16, kDRegSize, -42);
3382}
3383
Martyn Capewell91d5ba32019-11-01 18:11:23 +00003384template <typename T>
3385static T QAdd(T x, int y) {
3386 VIXL_ASSERT(y > INT_MIN);
3387 T result;
3388 T min = std::numeric_limits<T>::min();
3389 T max = std::numeric_limits<T>::max();
3390 if ((x >= 0) && (y >= 0)) {
3391 // For positive a and b, saturate at max.
3392 result = (max - x) < static_cast<T>(y) ? max : x + y;
3393 } else if ((y < 0) && ((x < 0) || (min == 0))) {
3394 // For negative b, where either a negative or T unsigned.
3395 result = (x - min) < static_cast<T>(-y) ? min : x + y;
3396 } else {
3397 result = x + y;
3398 }
3399 return result;
3400}
3401
3402template <typename T>
3403static void QIncDecHelper(Test* config,
3404 CntFn cnt,
3405 int multiplier,
3406 int lane_size_in_bits,
3407 T acc_value,
3408 bool is_increment) {
3409 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3410 START();
3411 GenerateCntSequence(&masm, cnt, acc_value, multiplier);
3412 END();
3413
3414 if (CAN_RUN()) {
3415 RUN();
3416
3417 int all = core.GetSVELaneCount(lane_size_in_bits);
3418 int pow2 = 1 << HighestSetBitPosition(all);
3419 int mul4 = all - (all % 4);
3420 int mul3 = all - (all % 3);
3421
3422 multiplier = is_increment ? multiplier : -multiplier;
3423
3424 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
3425 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
3426 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
3427 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
3428 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
3429 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
3430 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
3431 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
3432 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
3433 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
3434 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
3435 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
3436 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
3437 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
3438 ASSERT_EQUAL_64(acc_value, x14);
3439 ASSERT_EQUAL_64(acc_value, x15);
3440 ASSERT_EQUAL_64(acc_value, x18);
3441 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
3442 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
3443 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
3444 }
3445}
3446
3447template <typename T>
3448static void QIncHelper(Test* config,
3449 CntFn cnt,
3450 int multiplier,
3451 int lane_size_in_bits,
3452 T acc_value) {
3453 QIncDecHelper<T>(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3454}
3455
3456template <typename T>
3457static void QDecHelper(Test* config,
3458 CntFn cnt,
3459 int multiplier,
3460 int lane_size_in_bits,
3461 T acc_value) {
3462 QIncDecHelper<T>(config,
3463 cnt,
3464 multiplier,
3465 lane_size_in_bits,
3466 acc_value,
3467 false);
3468}
3469
3470TEST_SVE(sve_sqdecb) {
3471 int64_t bigneg = INT64_MIN + 42;
3472 int64_t bigpos = INT64_MAX - 42;
3473 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
3474 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 2, kBRegSize, bigneg);
3475 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
3476 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 16, kBRegSize, bigpos);
3477}
3478
3479TEST_SVE(sve_sqdech) {
3480 int64_t bigneg = INT64_MIN + 42;
3481 int64_t bigpos = INT64_MAX - 42;
3482 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
3483 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 2, kHRegSize, bigneg);
3484 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
3485 QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 16, kHRegSize, bigpos);
3486}
3487
3488TEST_SVE(sve_sqdecw) {
3489 int64_t bigneg = INT64_MIN + 42;
3490 int64_t bigpos = INT64_MAX - 42;
3491 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
3492 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 2, kWRegSize, bigneg);
3493 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
3494 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 16, kWRegSize, bigpos);
3495}
3496
3497TEST_SVE(sve_sqdecd) {
3498 int64_t bigneg = INT64_MIN + 42;
3499 int64_t bigpos = INT64_MAX - 42;
3500 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
3501 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 2, kDRegSize, bigneg);
3502 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
3503 QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 16, kDRegSize, bigpos);
3504}
3505
3506TEST_SVE(sve_sqincb) {
3507 int64_t bigneg = INT64_MIN + 42;
3508 int64_t bigpos = INT64_MAX - 42;
3509 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
3510 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 2, kBRegSize, bigneg);
3511 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
3512 QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 16, kBRegSize, bigpos);
3513}
3514
3515TEST_SVE(sve_sqinch) {
3516 int64_t bigneg = INT64_MIN + 42;
3517 int64_t bigpos = INT64_MAX - 42;
3518 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
3519 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 2, kHRegSize, bigneg);
3520 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
3521 QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 16, kHRegSize, bigpos);
3522}
3523
3524TEST_SVE(sve_sqincw) {
3525 int64_t bigneg = INT64_MIN + 42;
3526 int64_t bigpos = INT64_MAX - 42;
3527 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
3528 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 2, kWRegSize, bigneg);
3529 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
3530 QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 16, kWRegSize, bigpos);
3531}
3532
3533TEST_SVE(sve_sqincd) {
3534 int64_t bigneg = INT64_MIN + 42;
3535 int64_t bigpos = INT64_MAX - 42;
3536 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
3537 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 2, kDRegSize, bigneg);
3538 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
3539 QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 16, kDRegSize, bigpos);
3540}
3541
3542TEST_SVE(sve_uqdecb) {
3543 int32_t big32 = UINT32_MAX - 42;
3544 int64_t big64 = UINT64_MAX - 42;
3545 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
3546 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
3547 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
3548 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big32);
3549 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
3550 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
3551 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
3552 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big64);
3553}
3554
3555TEST_SVE(sve_uqdech) {
3556 int32_t big32 = UINT32_MAX - 42;
3557 int64_t big64 = UINT64_MAX - 42;
3558 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
3559 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
3560 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
3561 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big32);
3562 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
3563 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
3564 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
3565 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big64);
3566}
3567
3568TEST_SVE(sve_uqdecw) {
3569 int32_t big32 = UINT32_MAX - 42;
3570 int64_t big64 = UINT64_MAX - 42;
3571 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
3572 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
3573 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
3574 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big32);
3575 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
3576 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
3577 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
3578 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big64);
3579}
3580
3581TEST_SVE(sve_uqdecd) {
3582 int32_t big32 = UINT32_MAX - 42;
3583 int64_t big64 = UINT64_MAX - 42;
3584 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
3585 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
3586 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
3587 QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big32);
3588 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
3589 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
3590 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
3591 QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big64);
3592}
3593
3594TEST_SVE(sve_uqincb) {
3595 int32_t big32 = UINT32_MAX - 42;
3596 int64_t big64 = UINT64_MAX - 42;
3597 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
3598 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
3599 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
3600 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big32);
3601 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
3602 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
3603 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
3604 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big64);
3605}
3606
3607TEST_SVE(sve_uqinch) {
3608 int32_t big32 = UINT32_MAX - 42;
3609 int64_t big64 = UINT64_MAX - 42;
3610 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
3611 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
3612 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
3613 QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big32);
3614 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
3615 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
3616 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
3617 QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big64);
3618}
3619
3620TEST_SVE(sve_uqincw) {
3621 int32_t big32 = UINT32_MAX - 42;
3622 int64_t big64 = UINT64_MAX - 42;
3623 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
3624 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
3625 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
3626 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big32);
3627 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
3628 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
3629 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
3630 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big64);
3631}
3632
3633TEST_SVE(sve_uqincd) {
3634 int32_t big32 = UINT32_MAX - 42;
3635 int64_t big64 = UINT64_MAX - 42;
3636 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
3637 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
3638 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
3639 QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big32);
3640 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
3641 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
3642 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
3643 QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big64);
3644}
3645
3646typedef void (MacroAssembler::*QIncDecXWFn)(const Register& dst,
3647 const Register& src,
3648 int pattern,
3649 int multiplier);
3650
3651static void QIncDecXWHelper(Test* config,
3652 QIncDecXWFn cnt,
3653 int multiplier,
3654 int lane_size_in_bits,
3655 int32_t acc_value,
3656 bool is_increment) {
3657 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3658 START();
3659
3660 // Initialise accumulators.
3661 __ Mov(x0, acc_value);
3662 __ Mov(x1, acc_value);
3663 __ Mov(x2, acc_value);
3664 __ Mov(x3, acc_value);
3665 __ Mov(x4, acc_value);
3666 __ Mov(x5, acc_value);
3667 __ Mov(x6, acc_value);
3668 __ Mov(x7, acc_value);
3669 __ Mov(x8, acc_value);
3670 __ Mov(x9, acc_value);
3671 __ Mov(x10, acc_value);
3672 __ Mov(x11, acc_value);
3673 __ Mov(x12, acc_value);
3674 __ Mov(x13, acc_value);
3675 __ Mov(x14, acc_value);
3676 __ Mov(x15, acc_value);
3677 __ Mov(x18, acc_value);
3678 __ Mov(x19, acc_value);
3679 __ Mov(x20, acc_value);
3680 __ Mov(x21, acc_value);
3681
3682 (masm.*cnt)(x0, w0, SVE_POW2, multiplier);
3683 (masm.*cnt)(x1, w1, SVE_VL1, multiplier);
3684 (masm.*cnt)(x2, w2, SVE_VL2, multiplier);
3685 (masm.*cnt)(x3, w3, SVE_VL3, multiplier);
3686 (masm.*cnt)(x4, w4, SVE_VL4, multiplier);
3687 (masm.*cnt)(x5, w5, SVE_VL5, multiplier);
3688 (masm.*cnt)(x6, w6, SVE_VL6, multiplier);
3689 (masm.*cnt)(x7, w7, SVE_VL7, multiplier);
3690 (masm.*cnt)(x8, w8, SVE_VL8, multiplier);
3691 (masm.*cnt)(x9, w9, SVE_VL16, multiplier);
3692 (masm.*cnt)(x10, w10, SVE_VL32, multiplier);
3693 (masm.*cnt)(x11, w11, SVE_VL64, multiplier);
3694 (masm.*cnt)(x12, w12, SVE_VL128, multiplier);
3695 (masm.*cnt)(x13, w13, SVE_VL256, multiplier);
3696 (masm.*cnt)(x14, w14, 16, multiplier);
3697 (masm.*cnt)(x15, w15, 23, multiplier);
3698 (masm.*cnt)(x18, w18, 28, multiplier);
3699 (masm.*cnt)(x19, w19, SVE_MUL4, multiplier);
3700 (masm.*cnt)(x20, w20, SVE_MUL3, multiplier);
3701 (masm.*cnt)(x21, w21, SVE_ALL, multiplier);
3702
3703 END();
3704
3705 if (CAN_RUN()) {
3706 RUN();
3707
3708 int all = core.GetSVELaneCount(lane_size_in_bits);
3709 int pow2 = 1 << HighestSetBitPosition(all);
3710 int mul4 = all - (all % 4);
3711 int mul3 = all - (all % 3);
3712
3713 multiplier = is_increment ? multiplier : -multiplier;
3714
3715 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
3716 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
3717 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
3718 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
3719 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
3720 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
3721 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
3722 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
3723 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
3724 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
3725 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
3726 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
3727 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
3728 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
3729 ASSERT_EQUAL_64(acc_value, x14);
3730 ASSERT_EQUAL_64(acc_value, x15);
3731 ASSERT_EQUAL_64(acc_value, x18);
3732 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
3733 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
3734 ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
3735 }
3736}
3737
3738static void QIncXWHelper(Test* config,
3739 QIncDecXWFn cnt,
3740 int multiplier,
3741 int lane_size_in_bits,
3742 int32_t acc_value) {
3743 QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3744}
3745
3746static void QDecXWHelper(Test* config,
3747 QIncDecXWFn cnt,
3748 int multiplier,
3749 int lane_size_in_bits,
3750 int32_t acc_value) {
3751 QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
3752}
3753
3754TEST_SVE(sve_sqdecb_xw) {
3755 QDecXWHelper(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
3756 QDecXWHelper(config, &MacroAssembler::Sqdecb, 2, kBRegSize, INT32_MIN + 42);
3757 QDecXWHelper(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
3758 QDecXWHelper(config, &MacroAssembler::Sqdecb, 16, kBRegSize, INT32_MAX - 42);
3759}
3760
3761TEST_SVE(sve_sqdech_xw) {
3762 QDecXWHelper(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
3763 QDecXWHelper(config, &MacroAssembler::Sqdech, 2, kHRegSize, INT32_MIN + 42);
3764 QDecXWHelper(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
3765 QDecXWHelper(config, &MacroAssembler::Sqdech, 16, kHRegSize, INT32_MAX - 42);
3766}
3767
3768TEST_SVE(sve_sqdecw_xw) {
3769 QDecXWHelper(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
3770 QDecXWHelper(config, &MacroAssembler::Sqdecw, 2, kWRegSize, INT32_MIN + 42);
3771 QDecXWHelper(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
3772 QDecXWHelper(config, &MacroAssembler::Sqdecw, 16, kWRegSize, INT32_MAX - 42);
3773}
3774
3775TEST_SVE(sve_sqdecd_xw) {
3776 QDecXWHelper(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
3777 QDecXWHelper(config, &MacroAssembler::Sqdecd, 2, kDRegSize, INT32_MIN + 42);
3778 QDecXWHelper(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
3779 QDecXWHelper(config, &MacroAssembler::Sqdecd, 16, kDRegSize, INT32_MAX - 42);
3780}
3781
3782TEST_SVE(sve_sqincb_xw) {
3783 QIncXWHelper(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
3784 QIncXWHelper(config, &MacroAssembler::Sqincb, 2, kBRegSize, INT32_MIN + 42);
3785 QIncXWHelper(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
3786 QIncXWHelper(config, &MacroAssembler::Sqincb, 16, kBRegSize, INT32_MAX - 42);
3787}
3788
3789TEST_SVE(sve_sqinch_xw) {
3790 QIncXWHelper(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
3791 QIncXWHelper(config, &MacroAssembler::Sqinch, 2, kHRegSize, INT32_MIN + 42);
3792 QIncXWHelper(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
3793 QIncXWHelper(config, &MacroAssembler::Sqinch, 16, kHRegSize, INT32_MAX - 42);
3794}
3795
3796TEST_SVE(sve_sqincw_xw) {
3797 QIncXWHelper(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
3798 QIncXWHelper(config, &MacroAssembler::Sqincw, 2, kWRegSize, INT32_MIN + 42);
3799 QIncXWHelper(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
3800 QIncXWHelper(config, &MacroAssembler::Sqincw, 16, kWRegSize, INT32_MAX - 42);
3801}
3802
3803TEST_SVE(sve_sqincd_xw) {
3804 QIncXWHelper(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
3805 QIncXWHelper(config, &MacroAssembler::Sqincd, 2, kDRegSize, INT32_MIN + 42);
3806 QIncXWHelper(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
3807 QIncXWHelper(config, &MacroAssembler::Sqincd, 16, kDRegSize, INT32_MAX - 42);
3808}
3809
Martyn Capewell8188ddf2019-11-21 17:09:34 +00003810typedef void (MacroAssembler::*IncDecZFn)(const ZRegister& dst,
3811 int pattern,
3812 int multiplier);
3813typedef void (MacroAssembler::*AddSubFn)(const ZRegister& dst,
3814 const ZRegister& src1,
3815 const ZRegister& src2);
3816
3817static void IncDecZHelper(Test* config,
3818 IncDecZFn fn,
3819 CntFn cnt,
3820 AddSubFn addsub,
3821 int multiplier,
3822 int lane_size_in_bits) {
3823 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3824 START();
3825
3826 uint64_t acc_inputs[] = {0x7766554433221100,
3827 0xffffffffffffffff,
3828 0x0000000000000000,
3829 0xffffffff0000ffff,
3830 0x7fffffffffffffff,
3831 0x8000000000000000,
3832 0x7fffffff7fff7fff,
3833 0x8000000080008000};
3834
3835 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
3836 for (int j = 0; j < 4; j++) {
3837 InsrHelper(&masm, ZRegister(i, kDRegSize), acc_inputs);
3838 }
3839 }
3840 for (unsigned i = 0; i < 15; i++) {
3841 __ Mov(XRegister(i), 0);
3842 }
3843
3844 (masm.*fn)(z16.WithLaneSize(lane_size_in_bits), SVE_POW2, multiplier);
3845 (masm.*fn)(z17.WithLaneSize(lane_size_in_bits), SVE_VL1, multiplier);
3846 (masm.*fn)(z18.WithLaneSize(lane_size_in_bits), SVE_VL2, multiplier);
3847 (masm.*fn)(z19.WithLaneSize(lane_size_in_bits), SVE_VL3, multiplier);
3848 (masm.*fn)(z20.WithLaneSize(lane_size_in_bits), SVE_VL4, multiplier);
3849 (masm.*fn)(z21.WithLaneSize(lane_size_in_bits), SVE_VL7, multiplier);
3850 (masm.*fn)(z22.WithLaneSize(lane_size_in_bits), SVE_VL8, multiplier);
3851 (masm.*fn)(z23.WithLaneSize(lane_size_in_bits), SVE_VL16, multiplier);
3852 (masm.*fn)(z24.WithLaneSize(lane_size_in_bits), SVE_VL64, multiplier);
3853 (masm.*fn)(z25.WithLaneSize(lane_size_in_bits), SVE_VL256, multiplier);
3854 (masm.*fn)(z26.WithLaneSize(lane_size_in_bits), 16, multiplier);
3855 (masm.*fn)(z27.WithLaneSize(lane_size_in_bits), 28, multiplier);
3856 (masm.*fn)(z28.WithLaneSize(lane_size_in_bits), SVE_MUL3, multiplier);
3857 (masm.*fn)(z29.WithLaneSize(lane_size_in_bits), SVE_MUL4, multiplier);
3858 (masm.*fn)(z30.WithLaneSize(lane_size_in_bits), SVE_ALL, multiplier);
3859
3860 // Perform computation using alternative instructions.
3861 (masm.*cnt)(x0, SVE_POW2, multiplier);
3862 (masm.*cnt)(x1, SVE_VL1, multiplier);
3863 (masm.*cnt)(x2, SVE_VL2, multiplier);
3864 (masm.*cnt)(x3, SVE_VL3, multiplier);
3865 (masm.*cnt)(x4, SVE_VL4, multiplier);
3866 (masm.*cnt)(x5, SVE_VL7, multiplier);
3867 (masm.*cnt)(x6, SVE_VL8, multiplier);
3868 (masm.*cnt)(x7, SVE_VL16, multiplier);
3869 (masm.*cnt)(x8, SVE_VL64, multiplier);
3870 (masm.*cnt)(x9, SVE_VL256, multiplier);
3871 (masm.*cnt)(x10, 16, multiplier);
3872 (masm.*cnt)(x11, 28, multiplier);
3873 (masm.*cnt)(x12, SVE_MUL3, multiplier);
3874 (masm.*cnt)(x13, SVE_MUL4, multiplier);
3875 (masm.*cnt)(x14, SVE_ALL, multiplier);
3876
3877 ZRegister zscratch = z15.WithLaneSize(lane_size_in_bits);
3878 for (unsigned i = 0; i < 15; i++) {
3879 ZRegister zsrcdst = ZRegister(i, lane_size_in_bits);
3880 Register x = Register(i, kXRegSize);
3881 __ Dup(zscratch, x);
3882 (masm.*addsub)(zsrcdst, zsrcdst, zscratch);
3883 }
3884
3885 END();
3886
3887 if (CAN_RUN()) {
3888 RUN();
3889
3890 ASSERT_EQUAL_SVE(z0, z16);
3891 ASSERT_EQUAL_SVE(z1, z17);
3892 ASSERT_EQUAL_SVE(z2, z18);
3893 ASSERT_EQUAL_SVE(z3, z19);
3894 ASSERT_EQUAL_SVE(z4, z20);
3895 ASSERT_EQUAL_SVE(z5, z21);
3896 ASSERT_EQUAL_SVE(z6, z22);
3897 ASSERT_EQUAL_SVE(z7, z23);
3898 ASSERT_EQUAL_SVE(z8, z24);
3899 ASSERT_EQUAL_SVE(z9, z25);
3900 ASSERT_EQUAL_SVE(z10, z26);
3901 ASSERT_EQUAL_SVE(z11, z27);
3902 ASSERT_EQUAL_SVE(z12, z28);
3903 ASSERT_EQUAL_SVE(z13, z29);
3904 ASSERT_EQUAL_SVE(z14, z30);
3905 }
3906}
3907
3908TEST_SVE(sve_inc_dec_vec) {
3909 CntFn cnth = &MacroAssembler::Cnth;
3910 CntFn cntw = &MacroAssembler::Cntw;
3911 CntFn cntd = &MacroAssembler::Cntd;
3912 AddSubFn sub = &MacroAssembler::Sub;
3913 AddSubFn add = &MacroAssembler::Add;
3914 for (int mult = 1; mult <= 16; mult += 5) {
3915 IncDecZHelper(config, &MacroAssembler::Dech, cnth, sub, mult, kHRegSize);
3916 IncDecZHelper(config, &MacroAssembler::Decw, cntw, sub, mult, kSRegSize);
3917 IncDecZHelper(config, &MacroAssembler::Decd, cntd, sub, mult, kDRegSize);
3918 IncDecZHelper(config, &MacroAssembler::Inch, cnth, add, mult, kHRegSize);
3919 IncDecZHelper(config, &MacroAssembler::Incw, cntw, add, mult, kSRegSize);
3920 IncDecZHelper(config, &MacroAssembler::Incd, cntd, add, mult, kDRegSize);
3921 }
3922}
3923
3924TEST_SVE(sve_unsigned_sat_inc_dec_vec) {
3925 CntFn cnth = &MacroAssembler::Cnth;
3926 CntFn cntw = &MacroAssembler::Cntw;
3927 CntFn cntd = &MacroAssembler::Cntd;
3928 AddSubFn sub = &MacroAssembler::Uqsub;
3929 AddSubFn add = &MacroAssembler::Uqadd;
3930 for (int mult = 1; mult <= 16; mult += 5) {
3931 IncDecZHelper(config, &MacroAssembler::Uqdech, cnth, sub, mult, kHRegSize);
3932 IncDecZHelper(config, &MacroAssembler::Uqdecw, cntw, sub, mult, kSRegSize);
3933 IncDecZHelper(config, &MacroAssembler::Uqdecd, cntd, sub, mult, kDRegSize);
3934 IncDecZHelper(config, &MacroAssembler::Uqinch, cnth, add, mult, kHRegSize);
3935 IncDecZHelper(config, &MacroAssembler::Uqincw, cntw, add, mult, kSRegSize);
3936 IncDecZHelper(config, &MacroAssembler::Uqincd, cntd, add, mult, kDRegSize);
3937 }
3938}
3939
3940TEST_SVE(sve_signed_sat_inc_dec_vec) {
3941 CntFn cnth = &MacroAssembler::Cnth;
3942 CntFn cntw = &MacroAssembler::Cntw;
3943 CntFn cntd = &MacroAssembler::Cntd;
3944 AddSubFn sub = &MacroAssembler::Sqsub;
3945 AddSubFn add = &MacroAssembler::Sqadd;
3946 for (int mult = 1; mult <= 16; mult += 5) {
3947 IncDecZHelper(config, &MacroAssembler::Sqdech, cnth, sub, mult, kHRegSize);
3948 IncDecZHelper(config, &MacroAssembler::Sqdecw, cntw, sub, mult, kSRegSize);
3949 IncDecZHelper(config, &MacroAssembler::Sqdecd, cntd, sub, mult, kDRegSize);
3950 IncDecZHelper(config, &MacroAssembler::Sqinch, cnth, add, mult, kHRegSize);
3951 IncDecZHelper(config, &MacroAssembler::Sqincw, cntw, add, mult, kSRegSize);
3952 IncDecZHelper(config, &MacroAssembler::Sqincd, cntd, add, mult, kDRegSize);
3953 }
3954}
3955
TatWai Chong7a0d3672019-10-23 17:35:18 -07003956typedef void (MacroAssembler::*ArithPredicatedFn)(const ZRegister& zd,
3957 const PRegisterM& pg,
3958 const ZRegister& zn,
3959 const ZRegister& zm);
TatWai Chong13634762019-07-16 16:20:45 -07003960
3961template <typename Td, typename Tg, typename Tn>
3962static void IntBinArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07003963 ArithPredicatedFn macro,
TatWai Chong13634762019-07-16 16:20:45 -07003964 unsigned lane_size_in_bits,
3965 const Tg& pg_inputs,
3966 const Tn& zn_inputs,
3967 const Tn& zm_inputs,
3968 const Td& zd_expected) {
3969 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3970 START();
3971
3972 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
3973 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
3974 InsrHelper(&masm, src_a, zn_inputs);
3975 InsrHelper(&masm, src_b, zm_inputs);
3976
3977 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
3978
3979 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
3980 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
3981 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
3982
3983 // `instr` zd(dst), zd(src_a), zn(src_b)
3984 __ Mov(zd_1, src_a);
3985 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
3986
3987 // `instr` zd(dst), zm(src_a), zd(src_b)
3988 // Based on whether zd and zm registers are aliased, the macro of instructions
3989 // (`Instr`) swaps the order of operands if it has the commutative property,
3990 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
3991 __ Mov(zd_2, src_b);
3992 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
3993
3994 // `instr` zd(dst), zm(src_a), zn(src_b)
3995 // The macro of instructions (`Instr`) automatically selects between `instr`
3996 // and movprfx + `instr` based on whether zd and zn registers are aliased.
TatWai Chongd316c5e2019-10-16 12:22:10 -07003997 // A generated movprfx instruction is predicated that using the same
TatWai Chong13634762019-07-16 16:20:45 -07003998 // governing predicate register. In order to keep the result constant,
3999 // initialize the destination register first.
4000 __ Mov(zd_3, src_a);
4001 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
4002
4003 END();
4004
4005 if (CAN_RUN()) {
4006 RUN();
4007 ASSERT_EQUAL_SVE(zd_expected, zd_1);
4008
4009 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
4010 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
4011 if (!core.HasSVELane(zd_1, lane)) break;
TatWai Chongd316c5e2019-10-16 12:22:10 -07004012 if ((pg_inputs[i] & 1) != 0) {
TatWai Chong13634762019-07-16 16:20:45 -07004013 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
4014 } else {
4015 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
4016 }
4017 }
4018
4019 ASSERT_EQUAL_SVE(zd_expected, zd_3);
4020 }
4021}
4022
4023TEST_SVE(sve_binary_arithmetic_predicated_add) {
4024 // clang-format off
4025 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
4026
4027 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
4028
4029 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
4030
4031 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
4032
4033 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
4034 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
4035
4036 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
4037 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
4038
4039 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
4040 0x1010101010101010, 0x8181818181818181,
4041 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
4042 0x0101010101010101, 0x7f7f7f7fffffffff};
4043
4044 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
4045 0x1010101010101010, 0x0000000000000000,
4046 0x8181818181818181, 0x8080808080808080,
4047 0xffffffffffffffff, 0xffffffffffffffff};
4048
4049 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4050 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4051 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4052 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
4053
4054 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
4055
4056 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
4057 0x8180, 0x8f8f, 0x0101, 0x7f7e};
4058
4059 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
4060 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
4061
4062 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
4063 0x2020202020202020, 0x8181818181818181,
4064 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
4065 0x0101010101010100, 0x7f7f7f7ffffffffe};
4066
TatWai Chong7a0d3672019-10-23 17:35:18 -07004067 ArithPredicatedFn fn = &MacroAssembler::Add;
TatWai Chong13634762019-07-16 16:20:45 -07004068 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
4069 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
4070 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
4071 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
4072
4073 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
4074
4075 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
4076 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
4077
4078 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
4079 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
4080
4081 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
4082 0x0000000000000000, 0x8181818181818181,
4083 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
4084 0x0101010101010102, 0x7f7f7f8000000000};
4085
4086 fn = &MacroAssembler::Sub;
4087 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
4088 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
4089 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
4090 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
4091 // clang-format on
4092}
4093
4094TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
4095 // clang-format off
4096 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
4097
4098 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
4099
4100 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
4101 0xff00, 0xba98, 0x5555, 0x4567};
4102
4103 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
4104 0xfe00, 0xabab, 0xcdcd, 0x5678};
4105
4106 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
4107 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
4108
4109 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
4110 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
4111
4112 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
4113 0x5555555555555555, 0x0000000001234567};
4114
4115 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
4116 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
4117
4118 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4119 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4120 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4121 int pg_d[] = {1, 0, 1, 1};
4122
4123 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
4124
4125 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
4126 0xff00, 0xba98, 0x5555, 0x5678};
4127
4128 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
4129 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
4130
4131 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
4132 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
4133
TatWai Chong7a0d3672019-10-23 17:35:18 -07004134 ArithPredicatedFn fn = &MacroAssembler::Umax;
TatWai Chong13634762019-07-16 16:20:45 -07004135 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
4136 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
4137 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
4138 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
4139
4140 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
4141
4142 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
4143 0xfe00, 0xabab, 0x5555, 0x4567};
4144
4145 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
4146 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
4147
4148 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
4149 0x5555555555555555, 0x0000000001234567};
4150 fn = &MacroAssembler::Umin;
4151 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
4152 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
4153 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
4154 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
4155
4156 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
4157
4158 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
4159 0x0100, 0x0eed, 0x5555, 0x1111};
4160
4161 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
4162 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
4163
4164 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
4165 0x7878787878787878, 0x0000000011111111};
4166
4167 fn = &MacroAssembler::Uabd;
4168 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
4169 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
4170 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
4171 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
4172 // clang-format on
4173}
4174
4175TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
4176 // clang-format off
4177 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
4178
4179 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
4180
4181 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
4182 INT16_MIN, INT16_MAX, INT16_MAX, 1};
4183
4184 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
4185 INT16_MAX, INT16_MAX - 1, -1, 0};
4186
4187 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
4188 INT32_MIN, INT32_MAX, INT32_MAX, 1};
4189
4190 int zm_s[] = {-1, 0, -1, -INT32_MAX,
4191 INT32_MAX, INT32_MAX - 1, -1, 0};
4192
4193 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
4194 INT64_MIN, INT64_MAX, INT64_MAX, 1};
4195
4196 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
4197 INT64_MAX, INT64_MAX - 1, -1, 0};
4198
4199 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
4200 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
4201 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
4202 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
4203
4204 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
4205
4206 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
4207 INT16_MAX, INT16_MAX, INT16_MAX, 1};
4208
4209 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
4210 INT32_MAX, INT32_MAX, INT32_MAX, 1};
4211
4212 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
4213 INT64_MIN, INT64_MAX, INT64_MAX, 1};
4214
TatWai Chong7a0d3672019-10-23 17:35:18 -07004215 ArithPredicatedFn fn = &MacroAssembler::Smax;
TatWai Chong13634762019-07-16 16:20:45 -07004216 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
4217 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
4218 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
4219 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
4220
4221 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
4222
4223 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
4224 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
4225
4226 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
4227 INT32_MIN, INT32_MAX, -1, 0};
4228
4229 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
4230 INT64_MIN, INT64_MAX - 1, -1, 0};
4231
4232 fn = &MacroAssembler::Smin;
4233 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
4234 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
4235 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
4236 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
4237
4238 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
4239
4240 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
4241
4242 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
4243 0xffffffff, 0x7fffffff, 0x80000000, 1};
4244
4245 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
4246 0x8000000000000000, 1, 0x8000000000000000, 1};
4247
4248 fn = &MacroAssembler::Sabd;
4249 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
4250 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
4251 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
4252 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
4253 // clang-format on
4254}
4255
4256TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
4257 // clang-format off
4258 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
4259
4260 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
4261
4262 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
4263 0x8000, 0xff00, 0x5555, 0xaaaa};
4264
4265 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
4266 0x5555, 0xaaaa, 0x0001, 0x1234};
4267
4268 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
4269 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
4270
4271 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
4272 0x12345678, 0x22223333, 0x55556666, 0x77778888};
4273
4274 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
4275 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
4276
4277 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
4278 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
4279
4280 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4281 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4282 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
4283 int pg_d[] = {1, 1, 0, 1};
4284
4285 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
4286
4287 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
4288 0x8000, 0xff00, 0x5555, 0x9e88};
4289
4290 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
4291 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
4292
4293 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
4294 0xffffffffffffffff, 0x38e38e38e38e38e4};
4295
TatWai Chong7a0d3672019-10-23 17:35:18 -07004296 ArithPredicatedFn fn = &MacroAssembler::Mul;
TatWai Chong13634762019-07-16 16:20:45 -07004297 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
4298 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
4299 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
4300 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
4301
4302 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
4303
4304 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
4305 0x2aaa, 0xff00, 0x0000, 0x0c22};
4306
4307 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
4308 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
4309
4310 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
4311 0xffffffffffffffff, 0x71c71c71c71c71c6};
4312
4313 fn = &MacroAssembler::Umulh;
4314 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
4315 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
4316 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
4317 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
4318 // clang-format on
4319}
4320
4321TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
4322 // clang-format off
4323 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
4324
4325 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
4326
4327 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
4328
4329 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
4330
4331 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
4332
4333 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
4334
4335 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
4336
4337 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
4338
4339 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4340 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4341 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
4342 int pg_d[] = {1, 1, 0, 1};
4343
4344 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
4345
4346 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
4347
4348 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
4349
4350 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
4351
TatWai Chong7a0d3672019-10-23 17:35:18 -07004352 ArithPredicatedFn fn = &MacroAssembler::Smulh;
TatWai Chong13634762019-07-16 16:20:45 -07004353 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
4354 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
4355 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
4356 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
4357 // clang-format on
4358}
4359
4360TEST_SVE(sve_binary_arithmetic_predicated_logical) {
4361 // clang-format off
4362 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
4363 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
4364
4365 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
4366 0x8000, 0xffff, 0x5555, 0xaaaa};
4367 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
4368 0x5555, 0xaaaa, 0x0000, 0x0800};
4369
4370 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
4371 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
4372
4373 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
4374 0x0001200880ff55aa, 0x0022446688aaccee};
4375 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
4376 0x7fcd80ff55aa0008, 0x1133557799bbddff};
4377
4378 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
4379 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
4380 int pg_s[] = {1, 1, 1, 0};
4381 int pg_d[] = {1, 1, 0, 1};
4382
4383 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
4384
4385 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
4386 0x0000, 0xffff, 0x0000, 0x0800};
4387
4388 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
4389
4390 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
4391 0x0001200880ff55aa, 0x0022446688aaccee};
4392
TatWai Chong7a0d3672019-10-23 17:35:18 -07004393 ArithPredicatedFn fn = &MacroAssembler::And;
TatWai Chong13634762019-07-16 16:20:45 -07004394 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
4395 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
4396 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
4397 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
4398
4399 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
4400
4401 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
4402 0x8000, 0xffff, 0x5555, 0xa2aa};
4403
4404 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
4405
4406 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
4407 0x0001200880ff55aa, 0x0000000000000000};
4408
4409 fn = &MacroAssembler::Bic;
4410 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
4411 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
4412 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
4413 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
4414
4415 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
4416
4417 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
4418 0xd555, 0xffff, 0x5555, 0xa2aa};
4419
4420 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
4421
4422 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
4423 0x0001200880ff55aa, 0x1111111111111111};
4424
4425 fn = &MacroAssembler::Eor;
4426 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
4427 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
4428 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
4429 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
4430
4431 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
4432
4433 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
4434 0xd555, 0xffff, 0x5555, 0xaaaa};
4435
4436 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
4437
4438 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
4439 0x0001200880ff55aa, 0x1133557799bbddff};
4440
4441 fn = &MacroAssembler::Orr;
4442 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
4443 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
4444 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
4445 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
4446 // clang-format on
4447}
4448
4449TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
4450 // clang-format off
4451 int zn_s[] = {0, 1, -1, 2468,
4452 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
4453 -11111111, 87654321, 0, 0};
4454
4455 int zm_s[] = {1, -1, 1, 1234,
4456 -1, INT32_MIN, 1, -1,
4457 22222222, 80000000, -1, 0};
4458
4459 int64_t zn_d[] = {0, 1, -1, 2468,
4460 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
4461 -11111111, 87654321, 0, 0};
4462
4463 int64_t zm_d[] = {1, -1, 1, 1234,
4464 -1, INT64_MIN, 1, -1,
4465 22222222, 80000000, -1, 0};
4466
4467 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
4468 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
4469
4470 int exp_s[] = {0, 1, -1, 2,
4471 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
4472 0, 1, 0, 0};
4473
4474 int64_t exp_d[] = {0, -1, -1, 2,
4475 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
4476 0, 1, 0, 0};
4477
TatWai Chong7a0d3672019-10-23 17:35:18 -07004478 ArithPredicatedFn fn = &MacroAssembler::Sdiv;
TatWai Chong13634762019-07-16 16:20:45 -07004479 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
4480 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
4481 // clang-format on
4482}
4483
4484TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
4485 // clang-format off
4486 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
4487 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
4488
4489 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
4490 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
4491
4492 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
4493 0xffffffffffffffff, 0x8000000000000000,
4494 0xffffffffffffffff, 0x8000000000000000,
4495 0xffffffffffffffff, 0xf0000000f0000000};
4496
4497 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
4498 0x8000000000000000, 0x0000000000000002,
4499 0x8888888888888888, 0x0000000000000001,
4500 0x0000000080000000, 0x00000000f0000000};
4501
4502 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
4503 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
4504
4505 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
4506 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
4507
4508 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
4509 0x0000000000000001, 0x4000000000000000,
4510 0x0000000000000001, 0x8000000000000000,
4511 0xffffffffffffffff, 0x0000000100000001};
4512
TatWai Chong7a0d3672019-10-23 17:35:18 -07004513 ArithPredicatedFn fn = &MacroAssembler::Udiv;
TatWai Chong13634762019-07-16 16:20:45 -07004514 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
4515 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
4516 // clang-format on
4517}
4518
TatWai Chong7a0d3672019-10-23 17:35:18 -07004519typedef void (MacroAssembler::*ArithFn)(const ZRegister& zd,
4520 const ZRegister& zn,
4521 const ZRegister& zm);
TatWai Chong845246b2019-08-08 00:01:58 -07004522
4523template <typename T>
4524static void IntArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07004525 ArithFn macro,
TatWai Chong845246b2019-08-08 00:01:58 -07004526 unsigned lane_size_in_bits,
4527 const T& zn_inputs,
4528 const T& zm_inputs,
4529 const T& zd_expected) {
4530 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4531 START();
4532
4533 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
4534 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
4535 InsrHelper(&masm, zn, zn_inputs);
4536 InsrHelper(&masm, zm, zm_inputs);
4537
4538 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
4539 (masm.*macro)(zd, zn, zm);
4540
4541 END();
4542
4543 if (CAN_RUN()) {
4544 RUN();
4545 ASSERT_EQUAL_SVE(zd_expected, zd);
4546 }
4547}
4548
4549TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
4550 // clang-format off
TatWai Chong6995bfd2019-09-26 10:48:05 +01004551 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
4552 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
4553 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
4554 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong845246b2019-08-08 00:01:58 -07004555 0x1000000010001010, 0xf0000000f000f0f0};
4556
TatWai Chong7a0d3672019-10-23 17:35:18 -07004557 ArithFn fn = &MacroAssembler::Add;
TatWai Chong845246b2019-08-08 00:01:58 -07004558
4559 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
4560 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
4561 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
4562 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
4563 0x2000000020002020, 0xe0000001e001e1e0};
4564
TatWai Chong6995bfd2019-09-26 10:48:05 +01004565 IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
4566 IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
4567 IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
4568 IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07004569
4570 fn = &MacroAssembler::Sqadd;
4571
4572 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
4573 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
4574 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
4575 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
4576 0x2000000020002020, 0xe0000001e001e1e0};
4577
TatWai Chong6995bfd2019-09-26 10:48:05 +01004578 IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
4579 IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
4580 IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
4581 IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07004582
4583 fn = &MacroAssembler::Uqadd;
4584
4585 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
4586 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
4587 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
4588 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
4589 0x2000000020002020, 0xffffffffffffffff};
4590
TatWai Chong6995bfd2019-09-26 10:48:05 +01004591 IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
4592 IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
4593 IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
4594 IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07004595 // clang-format on
4596}
4597
4598TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
4599 // clang-format off
4600
4601 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
4602 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
4603
4604 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
4605 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
4606
4607 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
4608 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
4609
4610 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
4611 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
4612 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
4613 0xf0000000f000f0f0, 0x5555555555555555};
4614
TatWai Chong7a0d3672019-10-23 17:35:18 -07004615 ArithFn fn = &MacroAssembler::Sub;
TatWai Chong845246b2019-08-08 00:01:58 -07004616
4617 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
4618 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
4619 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
4620 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
4621 0x8eeeeeed8eed8d8e, 0x5555555555555555};
4622
4623 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
4624 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
4625 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
4626 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
4627
4628 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
4629 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
4630 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
4631 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
4632 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
4633
4634 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
4635 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
4636 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
4637 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
4638
4639 fn = &MacroAssembler::Sqsub;
4640
4641 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
4642 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
4643 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
4644 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
4645 0x7fffffffffffffff, 0x8000000000000000};
4646
4647 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
4648 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
4649 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
4650 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
4651
4652 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
4653 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
4654 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
4655 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
4656 0x8000000000000000, 0x7fffffffffffffff};
4657
4658 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
4659 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
4660 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
4661 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
4662
4663 fn = &MacroAssembler::Uqsub;
4664
4665 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
4666 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
4667 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
4668 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
4669 0x0000000000000000, 0x5555555555555555};
4670
4671 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
4672 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
4673 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
4674 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
4675
4676 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
4677 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
4678 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
4679 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
4680 0x7111111271127272, 0x0000000000000000};
4681
4682 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
4683 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
4684 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
4685 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
4686 // clang-format on
4687}
4688
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004689TEST_SVE(sve_rdvl) {
4690 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4691 START();
4692
4693 // Encodable multipliers.
4694 __ Rdvl(x0, 0);
4695 __ Rdvl(x1, 1);
4696 __ Rdvl(x2, 2);
4697 __ Rdvl(x3, 31);
4698 __ Rdvl(x4, -1);
4699 __ Rdvl(x5, -2);
4700 __ Rdvl(x6, -32);
4701
4702 // For unencodable multipliers, the MacroAssembler uses a sequence of
4703 // instructions.
4704 __ Rdvl(x10, 32);
4705 __ Rdvl(x11, -33);
4706 __ Rdvl(x12, 42);
4707 __ Rdvl(x13, -42);
4708
4709 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
4710 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
4711 // occurs in the macro.
4712 __ Rdvl(x14, 0x007fffffffffffff);
4713 __ Rdvl(x15, -0x0080000000000000);
4714
4715 END();
4716
4717 if (CAN_RUN()) {
4718 RUN();
4719
4720 uint64_t vl = config->sve_vl_in_bytes();
4721
4722 ASSERT_EQUAL_64(vl * 0, x0);
4723 ASSERT_EQUAL_64(vl * 1, x1);
4724 ASSERT_EQUAL_64(vl * 2, x2);
4725 ASSERT_EQUAL_64(vl * 31, x3);
4726 ASSERT_EQUAL_64(vl * -1, x4);
4727 ASSERT_EQUAL_64(vl * -2, x5);
4728 ASSERT_EQUAL_64(vl * -32, x6);
4729
4730 ASSERT_EQUAL_64(vl * 32, x10);
4731 ASSERT_EQUAL_64(vl * -33, x11);
4732 ASSERT_EQUAL_64(vl * 42, x12);
4733 ASSERT_EQUAL_64(vl * -42, x13);
4734
4735 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
4736 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
4737 }
4738}
4739
4740TEST_SVE(sve_rdpl) {
4741 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4742 START();
4743
4744 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
4745 // Addpl(xd, xzr, ...).
4746
4747 // Encodable multipliers (as `addvl`).
4748 __ Rdpl(x0, 0);
4749 __ Rdpl(x1, 8);
4750 __ Rdpl(x2, 248);
4751 __ Rdpl(x3, -8);
4752 __ Rdpl(x4, -256);
4753
4754 // Encodable multipliers (as `movz` + `addpl`).
4755 __ Rdpl(x7, 31);
Jacob Bramley889984c2019-10-28 17:28:48 +00004756 __ Rdpl(x8, -31);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004757
4758 // For unencodable multipliers, the MacroAssembler uses a sequence of
4759 // instructions.
4760 __ Rdpl(x10, 42);
4761 __ Rdpl(x11, -42);
4762
4763 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
4764 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
4765 // occurs in the macro.
4766 __ Rdpl(x12, 0x007fffffffffffff);
4767 __ Rdpl(x13, -0x0080000000000000);
4768
4769 END();
4770
4771 if (CAN_RUN()) {
4772 RUN();
4773
4774 uint64_t vl = config->sve_vl_in_bytes();
4775 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4776 uint64_t pl = vl / kZRegBitsPerPRegBit;
4777
4778 ASSERT_EQUAL_64(pl * 0, x0);
4779 ASSERT_EQUAL_64(pl * 8, x1);
4780 ASSERT_EQUAL_64(pl * 248, x2);
4781 ASSERT_EQUAL_64(pl * -8, x3);
4782 ASSERT_EQUAL_64(pl * -256, x4);
4783
4784 ASSERT_EQUAL_64(pl * 31, x7);
Jacob Bramley889984c2019-10-28 17:28:48 +00004785 ASSERT_EQUAL_64(pl * -31, x8);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004786
4787 ASSERT_EQUAL_64(pl * 42, x10);
4788 ASSERT_EQUAL_64(pl * -42, x11);
4789
4790 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
4791 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
4792 }
4793}
4794
4795TEST_SVE(sve_addvl) {
4796 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4797 START();
4798
4799 uint64_t base = 0x1234567800000000;
4800 __ Mov(x30, base);
4801
4802 // Encodable multipliers.
4803 __ Addvl(x0, x30, 0);
4804 __ Addvl(x1, x30, 1);
4805 __ Addvl(x2, x30, 31);
4806 __ Addvl(x3, x30, -1);
4807 __ Addvl(x4, x30, -32);
4808
4809 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
4810 __ Addvl(x5, x30, 32);
4811 __ Addvl(x6, x30, -33);
4812
4813 // Test the limits of the multiplier supported by the `Rdvl` macro.
4814 __ Addvl(x7, x30, 0x007fffffffffffff);
4815 __ Addvl(x8, x30, -0x0080000000000000);
4816
4817 // Check that xzr behaves correctly.
4818 __ Addvl(x9, xzr, 8);
4819 __ Addvl(x10, xzr, 42);
4820
4821 // Check that sp behaves correctly with encodable and unencodable multipliers.
4822 __ Addvl(sp, sp, -5);
4823 __ Addvl(sp, sp, -37);
4824 __ Addvl(x11, sp, -2);
4825 __ Addvl(sp, x11, 2);
4826 __ Addvl(x12, sp, -42);
4827
4828 // Restore the value of sp.
4829 __ Addvl(sp, x11, 39);
4830 __ Addvl(sp, sp, 5);
4831
4832 // Adjust x11 and x12 to make the test sp-agnostic.
4833 __ Sub(x11, sp, x11);
4834 __ Sub(x12, sp, x12);
4835
4836 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4837 __ Mov(x20, x30);
4838 __ Mov(x21, x30);
4839 __ Mov(x22, x30);
4840 __ Addvl(x20, x20, 4);
4841 __ Addvl(x21, x21, 42);
4842 __ Addvl(x22, x22, -0x0080000000000000);
4843
4844 END();
4845
4846 if (CAN_RUN()) {
4847 RUN();
4848
4849 uint64_t vl = config->sve_vl_in_bytes();
4850
4851 ASSERT_EQUAL_64(base + (vl * 0), x0);
4852 ASSERT_EQUAL_64(base + (vl * 1), x1);
4853 ASSERT_EQUAL_64(base + (vl * 31), x2);
4854 ASSERT_EQUAL_64(base + (vl * -1), x3);
4855 ASSERT_EQUAL_64(base + (vl * -32), x4);
4856
4857 ASSERT_EQUAL_64(base + (vl * 32), x5);
4858 ASSERT_EQUAL_64(base + (vl * -33), x6);
4859
4860 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
4861 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
4862
4863 ASSERT_EQUAL_64(vl * 8, x9);
4864 ASSERT_EQUAL_64(vl * 42, x10);
4865
4866 ASSERT_EQUAL_64(vl * 44, x11);
4867 ASSERT_EQUAL_64(vl * 84, x12);
4868
4869 ASSERT_EQUAL_64(base + (vl * 4), x20);
4870 ASSERT_EQUAL_64(base + (vl * 42), x21);
4871 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
4872
4873 ASSERT_EQUAL_64(base, x30);
4874 }
4875}
4876
4877TEST_SVE(sve_addpl) {
4878 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4879 START();
4880
4881 uint64_t base = 0x1234567800000000;
4882 __ Mov(x30, base);
4883
4884 // Encodable multipliers.
4885 __ Addpl(x0, x30, 0);
4886 __ Addpl(x1, x30, 1);
4887 __ Addpl(x2, x30, 31);
4888 __ Addpl(x3, x30, -1);
4889 __ Addpl(x4, x30, -32);
4890
4891 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
4892 // it falls back to `Rdvl` and `Add`.
4893 __ Addpl(x5, x30, 32);
4894 __ Addpl(x6, x30, -33);
4895
4896 // Test the limits of the multiplier supported by the `Rdvl` macro.
4897 __ Addpl(x7, x30, 0x007fffffffffffff);
4898 __ Addpl(x8, x30, -0x0080000000000000);
4899
4900 // Check that xzr behaves correctly.
4901 __ Addpl(x9, xzr, 8);
4902 __ Addpl(x10, xzr, 42);
4903
4904 // Check that sp behaves correctly with encodable and unencodable multipliers.
4905 __ Addpl(sp, sp, -5);
4906 __ Addpl(sp, sp, -37);
4907 __ Addpl(x11, sp, -2);
4908 __ Addpl(sp, x11, 2);
4909 __ Addpl(x12, sp, -42);
4910
4911 // Restore the value of sp.
4912 __ Addpl(sp, x11, 39);
4913 __ Addpl(sp, sp, 5);
4914
4915 // Adjust x11 and x12 to make the test sp-agnostic.
4916 __ Sub(x11, sp, x11);
4917 __ Sub(x12, sp, x12);
4918
4919 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4920 __ Mov(x20, x30);
4921 __ Mov(x21, x30);
4922 __ Mov(x22, x30);
4923 __ Addpl(x20, x20, 4);
4924 __ Addpl(x21, x21, 42);
4925 __ Addpl(x22, x22, -0x0080000000000000);
4926
4927 END();
4928
4929 if (CAN_RUN()) {
4930 RUN();
4931
4932 uint64_t vl = config->sve_vl_in_bytes();
4933 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4934 uint64_t pl = vl / kZRegBitsPerPRegBit;
4935
4936 ASSERT_EQUAL_64(base + (pl * 0), x0);
4937 ASSERT_EQUAL_64(base + (pl * 1), x1);
4938 ASSERT_EQUAL_64(base + (pl * 31), x2);
4939 ASSERT_EQUAL_64(base + (pl * -1), x3);
4940 ASSERT_EQUAL_64(base + (pl * -32), x4);
4941
4942 ASSERT_EQUAL_64(base + (pl * 32), x5);
4943 ASSERT_EQUAL_64(base + (pl * -33), x6);
4944
4945 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
4946 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
4947
4948 ASSERT_EQUAL_64(pl * 8, x9);
4949 ASSERT_EQUAL_64(pl * 42, x10);
4950
4951 ASSERT_EQUAL_64(pl * 44, x11);
4952 ASSERT_EQUAL_64(pl * 84, x12);
4953
4954 ASSERT_EQUAL_64(base + (pl * 4), x20);
4955 ASSERT_EQUAL_64(base + (pl * 42), x21);
4956 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
4957
4958 ASSERT_EQUAL_64(base, x30);
4959 }
4960}
4961
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004962TEST_SVE(sve_calculate_sve_address) {
4963 // Shadow the `MacroAssembler` type so that the test macros work without
4964 // modification.
4965 typedef CalculateSVEAddressMacroAssembler MacroAssembler;
4966
Jacob Bramley1314c462019-08-08 10:54:16 +01004967 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004968 START(); // NOLINT(clang-diagnostic-local-type-template-args)
Jacob Bramley1314c462019-08-08 10:54:16 +01004969
4970 uint64_t base = 0x1234567800000000;
4971 __ Mov(x28, base);
4972 __ Mov(x29, 48);
4973 __ Mov(x30, -48);
4974
4975 // Simple scalar (or equivalent) cases.
4976
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004977 __ CalculateSVEAddress(x0, SVEMemOperand(x28));
4978 __ CalculateSVEAddress(x1, SVEMemOperand(x28, 0));
4979 __ CalculateSVEAddress(x2, SVEMemOperand(x28, 0, SVE_MUL_VL));
4980 __ CalculateSVEAddress(x3, SVEMemOperand(x28, 0, SVE_MUL_VL), 3);
4981 __ CalculateSVEAddress(x4, SVEMemOperand(x28, xzr));
4982 __ CalculateSVEAddress(x5, SVEMemOperand(x28, xzr, LSL, 42));
Jacob Bramley1314c462019-08-08 10:54:16 +01004983
4984 // scalar-plus-immediate
4985
4986 // Unscaled immediates, handled with `Add`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004987 __ CalculateSVEAddress(x6, SVEMemOperand(x28, 42));
4988 __ CalculateSVEAddress(x7, SVEMemOperand(x28, -42));
Jacob Bramley1314c462019-08-08 10:54:16 +01004989 // Scaled immediates, handled with `Addvl` or `Addpl`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004990 __ CalculateSVEAddress(x8, SVEMemOperand(x28, 31, SVE_MUL_VL), 0);
4991 __ CalculateSVEAddress(x9, SVEMemOperand(x28, -32, SVE_MUL_VL), 0);
Jacob Bramley1314c462019-08-08 10:54:16 +01004992 // Out of `addvl` or `addpl` range.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004993 __ CalculateSVEAddress(x10, SVEMemOperand(x28, 42, SVE_MUL_VL), 0);
4994 __ CalculateSVEAddress(x11, SVEMemOperand(x28, -42, SVE_MUL_VL), 0);
4995 // As above, for VL-based accesses smaller than a Z register.
4996 VIXL_STATIC_ASSERT(kZRegBitsPerPRegBitLog2 == 3);
4997 __ CalculateSVEAddress(x12, SVEMemOperand(x28, -32 * 8, SVE_MUL_VL), 3);
4998 __ CalculateSVEAddress(x13, SVEMemOperand(x28, -42 * 8, SVE_MUL_VL), 3);
4999 __ CalculateSVEAddress(x14, SVEMemOperand(x28, -32 * 4, SVE_MUL_VL), 2);
5000 __ CalculateSVEAddress(x15, SVEMemOperand(x28, -42 * 4, SVE_MUL_VL), 2);
5001 __ CalculateSVEAddress(x18, SVEMemOperand(x28, -32 * 2, SVE_MUL_VL), 1);
5002 __ CalculateSVEAddress(x19, SVEMemOperand(x28, -42 * 2, SVE_MUL_VL), 1);
Jacob Bramley1314c462019-08-08 10:54:16 +01005003
5004 // scalar-plus-scalar
5005
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005006 __ CalculateSVEAddress(x20, SVEMemOperand(x28, x29));
5007 __ CalculateSVEAddress(x21, SVEMemOperand(x28, x30));
5008 __ CalculateSVEAddress(x22, SVEMemOperand(x28, x29, LSL, 8));
5009 __ CalculateSVEAddress(x23, SVEMemOperand(x28, x30, LSL, 8));
Jacob Bramley1314c462019-08-08 10:54:16 +01005010
5011 // In-place updates, to stress scratch register allocation.
5012
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005013 __ Mov(x24, 0xabcd000000000000);
5014 __ Mov(x25, 0xabcd101100000000);
5015 __ Mov(x26, 0xabcd202200000000);
5016 __ Mov(x27, 0xabcd303300000000);
5017 __ Mov(x28, 0xabcd404400000000);
5018 __ Mov(x29, 0xabcd505500000000);
Jacob Bramley1314c462019-08-08 10:54:16 +01005019
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005020 __ CalculateSVEAddress(x24, SVEMemOperand(x24));
5021 __ CalculateSVEAddress(x25, SVEMemOperand(x25, 0x42));
5022 __ CalculateSVEAddress(x26, SVEMemOperand(x26, 3, SVE_MUL_VL), 0);
5023 __ CalculateSVEAddress(x27, SVEMemOperand(x27, 0x42, SVE_MUL_VL), 3);
5024 __ CalculateSVEAddress(x28, SVEMemOperand(x28, x30));
5025 __ CalculateSVEAddress(x29, SVEMemOperand(x29, x30, LSL, 4));
Jacob Bramley1314c462019-08-08 10:54:16 +01005026
5027 END();
5028
5029 if (CAN_RUN()) {
5030 RUN();
5031
5032 uint64_t vl = config->sve_vl_in_bytes();
5033 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5034 uint64_t pl = vl / kZRegBitsPerPRegBit;
5035
5036 // Simple scalar (or equivalent) cases.
5037 ASSERT_EQUAL_64(base, x0);
5038 ASSERT_EQUAL_64(base, x1);
5039 ASSERT_EQUAL_64(base, x2);
5040 ASSERT_EQUAL_64(base, x3);
5041 ASSERT_EQUAL_64(base, x4);
5042 ASSERT_EQUAL_64(base, x5);
5043
5044 // scalar-plus-immediate
5045 ASSERT_EQUAL_64(base + 42, x6);
5046 ASSERT_EQUAL_64(base - 42, x7);
5047 ASSERT_EQUAL_64(base + (31 * vl), x8);
5048 ASSERT_EQUAL_64(base - (32 * vl), x9);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005049 ASSERT_EQUAL_64(base + (42 * vl), x10);
5050 ASSERT_EQUAL_64(base - (42 * vl), x11);
5051 ASSERT_EQUAL_64(base - (32 * vl), x12);
Jacob Bramley1314c462019-08-08 10:54:16 +01005052 ASSERT_EQUAL_64(base - (42 * vl), x13);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005053 ASSERT_EQUAL_64(base - (32 * vl), x14);
5054 ASSERT_EQUAL_64(base - (42 * vl), x15);
5055 ASSERT_EQUAL_64(base - (32 * vl), x18);
5056 ASSERT_EQUAL_64(base - (42 * vl), x19);
Jacob Bramley1314c462019-08-08 10:54:16 +01005057
5058 // scalar-plus-scalar
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005059 ASSERT_EQUAL_64(base + 48, x20);
5060 ASSERT_EQUAL_64(base - 48, x21);
5061 ASSERT_EQUAL_64(base + (48 << 8), x22);
5062 ASSERT_EQUAL_64(base - (48 << 8), x23);
Jacob Bramley1314c462019-08-08 10:54:16 +01005063
5064 // In-place updates.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005065 ASSERT_EQUAL_64(0xabcd000000000000, x24);
5066 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x25);
5067 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x26);
5068 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x27);
5069 ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28);
5070 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29);
Jacob Bramley1314c462019-08-08 10:54:16 +01005071 }
5072}
5073
TatWai Chong4f28df72019-08-14 17:50:30 -07005074TEST_SVE(sve_permute_vector_unpredicated) {
5075 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5076 START();
5077
Jacob Bramleye4983d42019-10-08 10:56:15 +01005078 // Initialise registers with known values first.
5079 __ Dup(z1.VnB(), 0x11);
5080 __ Dup(z2.VnB(), 0x22);
5081 __ Dup(z3.VnB(), 0x33);
5082 __ Dup(z4.VnB(), 0x44);
5083
TatWai Chong4f28df72019-08-14 17:50:30 -07005084 __ Mov(x0, 0x0123456789abcdef);
5085 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
5086 __ Insr(z1.VnS(), w0);
5087 __ Insr(z2.VnD(), x0);
5088 __ Insr(z3.VnH(), h0);
5089 __ Insr(z4.VnD(), d0);
5090
5091 uint64_t inputs[] = {0xfedcba9876543210,
5092 0x0123456789abcdef,
5093 0x8f8e8d8c8b8a8988,
5094 0x8786858483828180};
5095
5096 // Initialize a distinguishable value throughout the register first.
5097 __ Dup(z9.VnB(), 0xff);
5098 InsrHelper(&masm, z9.VnD(), inputs);
5099
5100 __ Rev(z5.VnB(), z9.VnB());
5101 __ Rev(z6.VnH(), z9.VnH());
5102 __ Rev(z7.VnS(), z9.VnS());
5103 __ Rev(z8.VnD(), z9.VnD());
5104
5105 int index[7] = {22, 7, 7, 3, 1, 1, 63};
5106 // Broadcasting an data within the input array.
5107 __ Dup(z10.VnB(), z9.VnB(), index[0]);
5108 __ Dup(z11.VnH(), z9.VnH(), index[1]);
5109 __ Dup(z12.VnS(), z9.VnS(), index[2]);
5110 __ Dup(z13.VnD(), z9.VnD(), index[3]);
5111 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
5112 // Test dst == src
5113 __ Mov(z15, z9);
5114 __ Dup(z15.VnS(), z15.VnS(), index[5]);
5115 // Selecting an data beyond the input array.
5116 __ Dup(z16.VnB(), z9.VnB(), index[6]);
5117
5118 END();
5119
5120 if (CAN_RUN()) {
5121 RUN();
5122
5123 // Insr
Jacob Bramleye4983d42019-10-08 10:56:15 +01005124 uint64_t z1_expected[] = {0x1111111111111111, 0x1111111189abcdef};
5125 uint64_t z2_expected[] = {0x2222222222222222, 0x0123456789abcdef};
5126 uint64_t z3_expected[] = {0x3333333333333333, 0x3333333333333456};
5127 uint64_t z4_expected[] = {0x4444444444444444, 0x7ffaaaaa22223456};
TatWai Chong4f28df72019-08-14 17:50:30 -07005128 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
5129 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
5130 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
5131 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
5132
5133 // Rev
5134 int lane_count = core.GetSVELaneCount(kBRegSize);
5135 for (int i = 0; i < lane_count; i++) {
5136 uint64_t expected =
5137 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
5138 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
5139 ASSERT_EQUAL_64(expected, input);
5140 }
5141
5142 lane_count = core.GetSVELaneCount(kHRegSize);
5143 for (int i = 0; i < lane_count; i++) {
5144 uint64_t expected =
5145 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
5146 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
5147 ASSERT_EQUAL_64(expected, input);
5148 }
5149
5150 lane_count = core.GetSVELaneCount(kSRegSize);
5151 for (int i = 0; i < lane_count; i++) {
5152 uint64_t expected =
5153 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
5154 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
5155 ASSERT_EQUAL_64(expected, input);
5156 }
5157
5158 lane_count = core.GetSVELaneCount(kDRegSize);
5159 for (int i = 0; i < lane_count; i++) {
5160 uint64_t expected =
5161 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
5162 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
5163 ASSERT_EQUAL_64(expected, input);
5164 }
5165
5166 // Dup
5167 unsigned vl = config->sve_vl_in_bits();
5168 lane_count = core.GetSVELaneCount(kBRegSize);
5169 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
5170 for (int i = 0; i < lane_count; i++) {
5171 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
5172 }
5173
5174 lane_count = core.GetSVELaneCount(kHRegSize);
5175 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
5176 for (int i = 0; i < lane_count; i++) {
5177 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
5178 }
5179
5180 lane_count = core.GetSVELaneCount(kSRegSize);
5181 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
5182 for (int i = 0; i < lane_count; i++) {
5183 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
5184 }
5185
5186 lane_count = core.GetSVELaneCount(kDRegSize);
5187 uint64_t expected_z13 =
5188 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
5189 for (int i = 0; i < lane_count; i++) {
5190 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
5191 }
5192
5193 lane_count = core.GetSVELaneCount(kDRegSize);
5194 uint64_t expected_z14_lo = 0;
5195 uint64_t expected_z14_hi = 0;
5196 if (vl > (index[4] * kQRegSize)) {
5197 expected_z14_lo = 0x0123456789abcdef;
5198 expected_z14_hi = 0xfedcba9876543210;
5199 }
5200 for (int i = 0; i < lane_count; i += 2) {
5201 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
5202 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
5203 }
5204
5205 lane_count = core.GetSVELaneCount(kSRegSize);
5206 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
5207 for (int i = 0; i < lane_count; i++) {
5208 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
5209 }
5210
5211 lane_count = core.GetSVELaneCount(kBRegSize);
5212 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
5213 for (int i = 0; i < lane_count; i++) {
5214 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
5215 }
5216 }
5217}
5218
Martyn Capewell2e954292020-01-14 14:56:42 +00005219TEST_SVE(sve_permute_vector_unpredicated_unpack_vector_elements) {
TatWai Chong4f28df72019-08-14 17:50:30 -07005220 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5221 START();
5222
5223 uint64_t z9_inputs[] = {0xfedcba9876543210,
5224 0x0123456789abcdef,
5225 0x8f8e8d8c8b8a8988,
5226 0x8786858483828180};
5227 InsrHelper(&masm, z9.VnD(), z9_inputs);
5228
5229 __ Sunpkhi(z10.VnH(), z9.VnB());
5230 __ Sunpkhi(z11.VnS(), z9.VnH());
5231 __ Sunpkhi(z12.VnD(), z9.VnS());
5232
5233 __ Sunpklo(z13.VnH(), z9.VnB());
5234 __ Sunpklo(z14.VnS(), z9.VnH());
5235 __ Sunpklo(z15.VnD(), z9.VnS());
5236
5237 __ Uunpkhi(z16.VnH(), z9.VnB());
5238 __ Uunpkhi(z17.VnS(), z9.VnH());
5239 __ Uunpkhi(z18.VnD(), z9.VnS());
5240
5241 __ Uunpklo(z19.VnH(), z9.VnB());
5242 __ Uunpklo(z20.VnS(), z9.VnH());
5243 __ Uunpklo(z21.VnD(), z9.VnS());
5244
Martyn Capewell2e954292020-01-14 14:56:42 +00005245 // Test unpacking with same source and destination.
5246 __ Mov(z22, z9);
5247 __ Sunpklo(z22.VnH(), z22.VnB());
5248 __ Mov(z23, z9);
5249 __ Uunpklo(z23.VnH(), z23.VnB());
5250
TatWai Chong4f28df72019-08-14 17:50:30 -07005251 END();
5252
5253 if (CAN_RUN()) {
5254 RUN();
5255
5256 // Suunpkhi
5257 int lane_count = core.GetSVELaneCount(kHRegSize);
5258 for (int i = lane_count - 1; i >= 0; i--) {
5259 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
5260 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
5261 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
5262 ASSERT_EQUAL_64(expected, input);
5263 }
5264
5265 lane_count = core.GetSVELaneCount(kSRegSize);
5266 for (int i = lane_count - 1; i >= 0; i--) {
5267 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
5268 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
5269 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
5270 ASSERT_EQUAL_64(expected, input);
5271 }
5272
5273 lane_count = core.GetSVELaneCount(kDRegSize);
5274 for (int i = lane_count - 1; i >= 0; i--) {
5275 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
5276 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
5277 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
5278 ASSERT_EQUAL_64(expected, input);
5279 }
5280
5281 // Suunpklo
5282 lane_count = core.GetSVELaneCount(kHRegSize);
5283 for (int i = lane_count - 1; i >= 0; i--) {
5284 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
5285 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
5286 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
5287 ASSERT_EQUAL_64(expected, input);
5288 }
5289
5290 lane_count = core.GetSVELaneCount(kSRegSize);
5291 for (int i = lane_count - 1; i >= 0; i--) {
5292 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
5293 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
5294 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
5295 ASSERT_EQUAL_64(expected, input);
5296 }
5297
5298 lane_count = core.GetSVELaneCount(kDRegSize);
5299 for (int i = lane_count - 1; i >= 0; i--) {
5300 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
5301 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
5302 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
5303 ASSERT_EQUAL_64(expected, input);
5304 }
5305
5306 // Uuunpkhi
5307 lane_count = core.GetSVELaneCount(kHRegSize);
5308 for (int i = lane_count - 1; i >= 0; i--) {
5309 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
5310 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
5311 ASSERT_EQUAL_64(expected, input);
5312 }
5313
5314 lane_count = core.GetSVELaneCount(kSRegSize);
5315 for (int i = lane_count - 1; i >= 0; i--) {
5316 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
5317 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
5318 ASSERT_EQUAL_64(expected, input);
5319 }
5320
5321 lane_count = core.GetSVELaneCount(kDRegSize);
5322 for (int i = lane_count - 1; i >= 0; i--) {
5323 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
5324 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
5325 ASSERT_EQUAL_64(expected, input);
5326 }
5327
5328 // Uuunpklo
5329 lane_count = core.GetSVELaneCount(kHRegSize);
5330 for (int i = lane_count - 1; i >= 0; i--) {
5331 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
5332 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
5333 ASSERT_EQUAL_64(expected, input);
5334 }
5335
5336 lane_count = core.GetSVELaneCount(kSRegSize);
5337 for (int i = lane_count - 1; i >= 0; i--) {
5338 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
5339 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
5340 ASSERT_EQUAL_64(expected, input);
5341 }
5342
5343 lane_count = core.GetSVELaneCount(kDRegSize);
5344 for (int i = lane_count - 1; i >= 0; i--) {
5345 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
5346 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
5347 ASSERT_EQUAL_64(expected, input);
5348 }
Martyn Capewell2e954292020-01-14 14:56:42 +00005349
5350 ASSERT_EQUAL_SVE(z13, z22);
5351 ASSERT_EQUAL_SVE(z19, z23);
TatWai Chong4f28df72019-08-14 17:50:30 -07005352 }
5353}
5354
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005355TEST_SVE(sve_cnot_not) {
5356 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5357 START();
5358
5359 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
5360
5361 // For simplicity, we re-use the same pg for various lane sizes.
5362 // For D lanes: 1, 1, 0
5363 // For S lanes: 1, 1, 1, 0, 0
5364 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5365 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5366 Initialise(&masm, p0.VnB(), pg_in);
5367 PRegisterM pg = p0.Merging();
5368
5369 // These are merging operations, so we have to initialise the result register.
5370 // We use a mixture of constructive and destructive operations.
5371
5372 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005373 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005374 __ Mov(z30, z31);
5375
5376 // For constructive operations, use a different initial result value.
5377 __ Index(z29.VnB(), 0, -1);
5378
5379 __ Mov(z0, z31);
5380 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
5381 __ Mov(z1, z29);
5382 __ Cnot(z1.VnH(), pg, z31.VnH());
5383 __ Mov(z2, z31);
5384 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
5385 __ Mov(z3, z29);
5386 __ Cnot(z3.VnD(), pg, z31.VnD());
5387
5388 __ Mov(z4, z29);
5389 __ Not(z4.VnB(), pg, z31.VnB());
5390 __ Mov(z5, z31);
5391 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
5392 __ Mov(z6, z29);
5393 __ Not(z6.VnS(), pg, z31.VnS());
5394 __ Mov(z7, z31);
5395 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
5396
5397 END();
5398
5399 if (CAN_RUN()) {
5400 RUN();
5401
5402 // Check that constructive operations preserve their inputs.
5403 ASSERT_EQUAL_SVE(z30, z31);
5404
5405 // clang-format off
5406
5407 // Cnot (B) destructive
5408 uint64_t expected_z0[] =
5409 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5410 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
5411 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5412
5413 // Cnot (H)
5414 uint64_t expected_z1[] =
5415 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5416 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
5417 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5418
5419 // Cnot (S) destructive
5420 uint64_t expected_z2[] =
5421 // pg: 0 1 1 1 0 0
5422 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
5423 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5424
5425 // Cnot (D)
5426 uint64_t expected_z3[] =
5427 // pg: 1 1 0
5428 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
5429 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5430
5431 // Not (B)
5432 uint64_t expected_z4[] =
5433 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5434 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
5435 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5436
5437 // Not (H) destructive
5438 uint64_t expected_z5[] =
5439 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5440 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
5441 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5442
5443 // Not (S)
5444 uint64_t expected_z6[] =
5445 // pg: 0 1 1 1 0 0
5446 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
5447 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
5448
5449 // Not (D) destructive
5450 uint64_t expected_z7[] =
5451 // pg: 1 1 0
5452 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
5453 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
5454
5455 // clang-format on
5456 }
5457}
5458
5459TEST_SVE(sve_fabs_fneg) {
5460 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5461 START();
5462
5463 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
5464 // NaNs, but fabs and fneg do not.
5465 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
5466 0xfff00000ff80fc01, // Signalling NaNs.
5467 0x123456789abcdef0};
5468
5469 // For simplicity, we re-use the same pg for various lane sizes.
5470 // For D lanes: 1, 1, 0
5471 // For S lanes: 1, 1, 1, 0, 0
5472 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5473 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5474 Initialise(&masm, p0.VnB(), pg_in);
5475 PRegisterM pg = p0.Merging();
5476
5477 // These are merging operations, so we have to initialise the result register.
5478 // We use a mixture of constructive and destructive operations.
5479
5480 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005481 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005482 __ Mov(z30, z31);
5483
5484 // For constructive operations, use a different initial result value.
5485 __ Index(z29.VnB(), 0, -1);
5486
5487 __ Mov(z0, z29);
5488 __ Fabs(z0.VnH(), pg, z31.VnH());
5489 __ Mov(z1, z31);
5490 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
5491 __ Mov(z2, z29);
5492 __ Fabs(z2.VnD(), pg, z31.VnD());
5493
5494 __ Mov(z3, z31);
5495 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
5496 __ Mov(z4, z29);
5497 __ Fneg(z4.VnS(), pg, z31.VnS());
5498 __ Mov(z5, z31);
5499 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
5500
5501 END();
5502
5503 if (CAN_RUN()) {
5504 RUN();
5505
5506 // Check that constructive operations preserve their inputs.
5507 ASSERT_EQUAL_SVE(z30, z31);
5508
5509 // clang-format off
5510
5511 // Fabs (H)
5512 uint64_t expected_z0[] =
5513 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5514 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
5515 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5516
5517 // Fabs (S) destructive
5518 uint64_t expected_z1[] =
5519 // pg: 0 1 1 1 0 0
5520 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
5521 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5522
5523 // Fabs (D)
5524 uint64_t expected_z2[] =
5525 // pg: 1 1 0
5526 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
5527 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5528
5529 // Fneg (H) destructive
5530 uint64_t expected_z3[] =
5531 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5532 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
5533 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5534
5535 // Fneg (S)
5536 uint64_t expected_z4[] =
5537 // pg: 0 1 1 1 0 0
5538 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
5539 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5540
5541 // Fneg (D) destructive
5542 uint64_t expected_z5[] =
5543 // pg: 1 1 0
5544 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
5545 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5546
5547 // clang-format on
5548 }
5549}
5550
5551TEST_SVE(sve_cls_clz_cnt) {
5552 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5553 START();
5554
5555 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5556
5557 // For simplicity, we re-use the same pg for various lane sizes.
5558 // For D lanes: 1, 1, 0
5559 // For S lanes: 1, 1, 1, 0, 0
5560 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5561 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5562 Initialise(&masm, p0.VnB(), pg_in);
5563 PRegisterM pg = p0.Merging();
5564
5565 // These are merging operations, so we have to initialise the result register.
5566 // We use a mixture of constructive and destructive operations.
5567
5568 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005569 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005570 __ Mov(z30, z31);
5571
5572 // For constructive operations, use a different initial result value.
5573 __ Index(z29.VnB(), 0, -1);
5574
5575 __ Mov(z0, z29);
5576 __ Cls(z0.VnB(), pg, z31.VnB());
5577 __ Mov(z1, z31);
5578 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
5579 __ Mov(z2, z29);
5580 __ Cnt(z2.VnS(), pg, z31.VnS());
5581 __ Mov(z3, z31);
5582 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
5583
5584 END();
5585
5586 if (CAN_RUN()) {
5587 RUN();
5588 // Check that non-destructive operations preserve their inputs.
5589 ASSERT_EQUAL_SVE(z30, z31);
5590
5591 // clang-format off
5592
5593 // cls (B)
5594 uint8_t expected_z0[] =
5595 // pg: 0 0 0 0 1 0 1 1
5596 // pg: 1 0 0 1 0 1 1 1
5597 // pg: 0 0 1 0 1 1 1 0
5598 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
5599 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
5600 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
5601 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
5602
5603 // clz (H) destructive
5604 uint16_t expected_z1[] =
5605 // pg: 0 0 0 1
5606 // pg: 0 1 1 1
5607 // pg: 0 0 1 0
5608 {0x0000, 0x0000, 0x0000, 16,
5609 0xfefc, 0, 0, 0,
5610 0x1234, 0x5678, 0, 0xdef0};
5611 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
5612
5613 // cnt (S)
5614 uint32_t expected_z2[] =
5615 // pg: 0 1
5616 // pg: 1 1
5617 // pg: 0 0
5618 {0xe9eaebec, 0,
5619 22, 16,
5620 0xf9fafbfc, 0xfdfeff00};
5621 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
5622
5623 // cnt (D) destructive
5624 uint64_t expected_z3[] =
5625 // pg: 1 1 0
5626 { 0, 38, 0x123456789abcdef0};
5627 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5628
5629 // clang-format on
5630 }
5631}
5632
5633TEST_SVE(sve_sxt) {
5634 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5635 START();
5636
5637 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5638
5639 // For simplicity, we re-use the same pg for various lane sizes.
5640 // For D lanes: 1, 1, 0
5641 // For S lanes: 1, 1, 1, 0, 0
5642 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5643 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5644 Initialise(&masm, p0.VnB(), pg_in);
5645 PRegisterM pg = p0.Merging();
5646
5647 // These are merging operations, so we have to initialise the result register.
5648 // We use a mixture of constructive and destructive operations.
5649
5650 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005651 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005652 __ Mov(z30, z31);
5653
5654 // For constructive operations, use a different initial result value.
5655 __ Index(z29.VnB(), 0, -1);
5656
5657 __ Mov(z0, z31);
5658 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
5659 __ Mov(z1, z29);
5660 __ Sxtb(z1.VnS(), pg, z31.VnS());
5661 __ Mov(z2, z31);
5662 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
5663 __ Mov(z3, z29);
5664 __ Sxth(z3.VnS(), pg, z31.VnS());
5665 __ Mov(z4, z31);
5666 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
5667 __ Mov(z5, z29);
5668 __ Sxtw(z5.VnD(), pg, z31.VnD());
5669
5670 END();
5671
5672 if (CAN_RUN()) {
5673 RUN();
5674 // Check that constructive operations preserve their inputs.
5675 ASSERT_EQUAL_SVE(z30, z31);
5676
5677 // clang-format off
5678
5679 // Sxtb (H) destructive
5680 uint64_t expected_z0[] =
5681 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5682 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
5683 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5684
5685 // Sxtb (S)
5686 uint64_t expected_z1[] =
5687 // pg: 0 1 1 1 0 0
5688 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
5689 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5690
5691 // Sxtb (D) destructive
5692 uint64_t expected_z2[] =
5693 // pg: 1 1 0
5694 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
5695 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5696
5697 // Sxth (S)
5698 uint64_t expected_z3[] =
5699 // pg: 0 1 1 1 0 0
5700 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
5701 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5702
5703 // Sxth (D) destructive
5704 uint64_t expected_z4[] =
5705 // pg: 1 1 0
5706 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
5707 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5708
5709 // Sxtw (D)
5710 uint64_t expected_z5[] =
5711 // pg: 1 1 0
5712 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
5713 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5714
5715 // clang-format on
5716 }
5717}
5718
5719TEST_SVE(sve_uxt) {
5720 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5721 START();
5722
5723 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5724
5725 // For simplicity, we re-use the same pg for various lane sizes.
5726 // For D lanes: 1, 1, 0
5727 // For S lanes: 1, 1, 1, 0, 0
5728 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5729 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5730 Initialise(&masm, p0.VnB(), pg_in);
5731 PRegisterM pg = p0.Merging();
5732
5733 // These are merging operations, so we have to initialise the result register.
5734 // We use a mixture of constructive and destructive operations.
5735
5736 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005737 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005738 __ Mov(z30, z31);
5739
5740 // For constructive operations, use a different initial result value.
5741 __ Index(z29.VnB(), 0, -1);
5742
5743 __ Mov(z0, z29);
5744 __ Uxtb(z0.VnH(), pg, z31.VnH());
5745 __ Mov(z1, z31);
5746 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
5747 __ Mov(z2, z29);
5748 __ Uxtb(z2.VnD(), pg, z31.VnD());
5749 __ Mov(z3, z31);
5750 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
5751 __ Mov(z4, z29);
5752 __ Uxth(z4.VnD(), pg, z31.VnD());
5753 __ Mov(z5, z31);
5754 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
5755
5756 END();
5757
5758 if (CAN_RUN()) {
5759 RUN();
5760 // clang-format off
5761
5762 // Uxtb (H)
5763 uint64_t expected_z0[] =
5764 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5765 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
5766 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5767
5768 // Uxtb (S) destructive
5769 uint64_t expected_z1[] =
5770 // pg: 0 1 1 1 0 0
5771 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
5772 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5773
5774 // Uxtb (D)
5775 uint64_t expected_z2[] =
5776 // pg: 1 1 0
5777 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
5778 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5779
5780 // Uxth (S) destructive
5781 uint64_t expected_z3[] =
5782 // pg: 0 1 1 1 0 0
5783 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
5784 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5785
5786 // Uxth (D)
5787 uint64_t expected_z4[] =
5788 // pg: 1 1 0
5789 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
5790 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5791
5792 // Uxtw (D) destructive
5793 uint64_t expected_z5[] =
5794 // pg: 1 1 0
5795 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
5796 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5797
5798 // clang-format on
5799 }
5800}
5801
5802TEST_SVE(sve_abs_neg) {
5803 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5804 START();
5805
5806 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5807
5808 // For simplicity, we re-use the same pg for various lane sizes.
5809 // For D lanes: 1, 1, 0
5810 // For S lanes: 1, 1, 1, 0, 0
5811 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5812 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5813 Initialise(&masm, p0.VnB(), pg_in);
5814 PRegisterM pg = p0.Merging();
5815
5816 InsrHelper(&masm, z31.VnD(), in);
5817
5818 // These are merging operations, so we have to initialise the result register.
5819 // We use a mixture of constructive and destructive operations.
5820
5821 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005822 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005823 __ Mov(z30, z31);
5824
5825 // For constructive operations, use a different initial result value.
5826 __ Index(z29.VnB(), 0, -1);
5827
5828 __ Mov(z0, z31);
5829 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
5830 __ Mov(z1, z29);
5831 __ Abs(z1.VnB(), pg, z31.VnB());
5832
5833 __ Mov(z2, z31);
5834 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
5835 __ Mov(z3, z29);
5836 __ Neg(z3.VnS(), pg, z31.VnS());
5837
Jacob Bramleyc0066272019-09-30 16:30:47 +01005838 // The unpredicated form of `Neg` is implemented using `subr`.
5839 __ Mov(z4, z31);
5840 __ Neg(z4.VnB(), z4.VnB()); // destructive
5841 __ Mov(z5, z29);
5842 __ Neg(z5.VnD(), z31.VnD());
5843
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005844 END();
5845
5846 if (CAN_RUN()) {
5847 RUN();
Jacob Bramleyc0066272019-09-30 16:30:47 +01005848
5849 ASSERT_EQUAL_SVE(z30, z31);
5850
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005851 // clang-format off
5852
5853 // Abs (D) destructive
5854 uint64_t expected_z0[] =
5855 // pg: 1 1 0
5856 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
5857 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5858
5859 // Abs (B)
5860 uint64_t expected_z1[] =
5861 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5862 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
5863 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5864
5865 // Neg (H) destructive
5866 uint64_t expected_z2[] =
5867 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5868 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
5869 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5870
5871 // Neg (S)
5872 uint64_t expected_z3[] =
5873 // pg: 0 1 1 1 0 0
5874 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
5875 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5876
Jacob Bramleyc0066272019-09-30 16:30:47 +01005877 // Neg (B) destructive, unpredicated
5878 uint64_t expected_z4[] =
5879 {0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
5880 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5881
5882 // Neg (D) unpredicated
5883 uint64_t expected_z5[] =
5884 {0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
5885 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5886
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005887 // clang-format on
5888 }
5889}
5890
Jacob Bramley0093bb92019-10-04 15:54:10 +01005891TEST_SVE(sve_cpy) {
5892 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5893 START();
5894
5895 // For simplicity, we re-use the same pg for various lane sizes.
5896 // For D lanes: 0, 1, 1
5897 // For S lanes: 0, 1, 1, 0, 1
5898 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5899 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5900
5901 PRegisterM pg = p7.Merging();
5902 Initialise(&masm, pg.VnB(), pg_in);
5903
5904 // These are merging operations, so we have to initialise the result registers
5905 // for each operation.
5906 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5907 __ Index(ZRegister(i, kBRegSize), 0, -1);
5908 }
5909
5910 // Recognisable values to copy.
5911 __ Mov(x0, 0xdeadbeefdeadbe42);
5912 __ Mov(x1, 0xdeadbeefdead8421);
5913 __ Mov(x2, 0xdeadbeef80042001);
5914 __ Mov(x3, 0x8000000420000001);
5915
5916 // Use NEON moves, to avoid testing SVE `cpy` against itself.
5917 __ Dup(v28.V2D(), x0);
5918 __ Dup(v29.V2D(), x1);
5919 __ Dup(v30.V2D(), x2);
5920 __ Dup(v31.V2D(), x3);
5921
5922 // Register forms (CPY_z_p_r)
5923 __ Cpy(z0.VnB(), pg, w0);
5924 __ Cpy(z1.VnH(), pg, x1); // X registers are accepted for small lanes.
5925 __ Cpy(z2.VnS(), pg, w2);
5926 __ Cpy(z3.VnD(), pg, x3);
5927
5928 // VRegister forms (CPY_z_p_v)
5929 __ Cpy(z4.VnB(), pg, b28);
5930 __ Cpy(z5.VnH(), pg, h29);
5931 __ Cpy(z6.VnS(), pg, s30);
5932 __ Cpy(z7.VnD(), pg, d31);
5933
5934 // Check that we can copy the stack pointer.
5935 __ Mov(x10, sp);
5936 __ Mov(sp, 0xabcabcabcabcabca); // Set sp to a known value.
5937 __ Cpy(z16.VnB(), pg, sp);
5938 __ Cpy(z17.VnH(), pg, wsp);
5939 __ Cpy(z18.VnS(), pg, wsp);
5940 __ Cpy(z19.VnD(), pg, sp);
5941 __ Mov(sp, x10); // Restore sp.
5942
5943 END();
5944
5945 if (CAN_RUN()) {
5946 RUN();
5947 // clang-format off
5948
5949 uint64_t expected_b[] =
5950 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5951 {0xe9eaebec424242f0, 0x42f2f34242f64242, 0xf942fbfcfdfeff42};
5952 ASSERT_EQUAL_SVE(expected_b, z0.VnD());
5953 ASSERT_EQUAL_SVE(expected_b, z4.VnD());
5954
5955 uint64_t expected_h[] =
5956 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5957 {0xe9eaebec8421eff0, 0xf1f28421f5f68421, 0x8421fbfcfdfe8421};
5958 ASSERT_EQUAL_SVE(expected_h, z1.VnD());
5959 ASSERT_EQUAL_SVE(expected_h, z5.VnD());
5960
5961 uint64_t expected_s[] =
5962 // pg: 0 0 1 1 0 1
5963 {0xe9eaebecedeeeff0, 0x8004200180042001, 0xf9fafbfc80042001};
5964 ASSERT_EQUAL_SVE(expected_s, z2.VnD());
5965 ASSERT_EQUAL_SVE(expected_s, z6.VnD());
5966
5967 uint64_t expected_d[] =
5968 // pg: 0 1 1
5969 {0xe9eaebecedeeeff0, 0x8000000420000001, 0x8000000420000001};
5970 ASSERT_EQUAL_SVE(expected_d, z3.VnD());
5971 ASSERT_EQUAL_SVE(expected_d, z7.VnD());
5972
5973
5974 uint64_t expected_b_sp[] =
5975 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5976 {0xe9eaebeccacacaf0, 0xcaf2f3cacaf6caca, 0xf9cafbfcfdfeffca};
5977 ASSERT_EQUAL_SVE(expected_b_sp, z16.VnD());
5978
5979 uint64_t expected_h_sp[] =
5980 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5981 {0xe9eaebecabcaeff0, 0xf1f2abcaf5f6abca, 0xabcafbfcfdfeabca};
5982 ASSERT_EQUAL_SVE(expected_h_sp, z17.VnD());
5983
5984 uint64_t expected_s_sp[] =
5985 // pg: 0 0 1 1 0 1
5986 {0xe9eaebecedeeeff0, 0xcabcabcacabcabca, 0xf9fafbfccabcabca};
5987 ASSERT_EQUAL_SVE(expected_s_sp, z18.VnD());
5988
5989 uint64_t expected_d_sp[] =
5990 // pg: 0 1 1
5991 {0xe9eaebecedeeeff0, 0xabcabcabcabcabca, 0xabcabcabcabcabca};
5992 ASSERT_EQUAL_SVE(expected_d_sp, z19.VnD());
5993
5994 // clang-format on
5995 }
5996}
5997
Jacob Bramley0f62eab2019-10-23 17:07:47 +01005998TEST_SVE(sve_cpy_imm) {
5999 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6000 START();
6001
6002 // For simplicity, we re-use the same pg for various lane sizes.
6003 // For D lanes: 0, 1, 1
6004 // For S lanes: 0, 1, 1, 0, 1
6005 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6006 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6007
6008 PRegister pg = p7;
6009 Initialise(&masm, pg.VnB(), pg_in);
6010
6011 // These are (mostly) merging operations, so we have to initialise the result
6012 // registers for each operation.
6013 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6014 __ Index(ZRegister(i, kBRegSize), 0, -1);
6015 }
6016
6017 // Encodable integer forms (CPY_z_p_i)
6018 __ Cpy(z0.VnB(), pg.Merging(), 0);
6019 __ Cpy(z1.VnB(), pg.Zeroing(), 42);
6020 __ Cpy(z2.VnB(), pg.Merging(), -42);
6021 __ Cpy(z3.VnB(), pg.Zeroing(), 0xff);
6022 __ Cpy(z4.VnH(), pg.Merging(), 127);
6023 __ Cpy(z5.VnS(), pg.Zeroing(), -128);
6024 __ Cpy(z6.VnD(), pg.Merging(), -1);
6025
6026 // Forms encodable using fcpy.
6027 __ Cpy(z7.VnH(), pg.Merging(), Float16ToRawbits(Float16(-31.0)));
6028 __ Cpy(z8.VnS(), pg.Zeroing(), FloatToRawbits(2.0f));
6029 __ Cpy(z9.VnD(), pg.Merging(), DoubleToRawbits(-4.0));
6030
6031 // Other forms use a scratch register.
6032 __ Cpy(z10.VnH(), pg.Merging(), 0xff);
6033 __ Cpy(z11.VnD(), pg.Zeroing(), 0x0123456789abcdef);
6034
6035 END();
6036
6037 if (CAN_RUN()) {
6038 RUN();
6039 // clang-format off
6040
6041 uint64_t expected_z0[] =
6042 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6043 {0xe9eaebec000000f0, 0x00f2f30000f60000, 0xf900fbfcfdfeff00};
6044 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6045
6046 uint64_t expected_z1[] =
6047 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6048 {0x000000002a2a2a00, 0x2a00002a2a002a2a, 0x002a00000000002a};
6049 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6050
6051 uint64_t expected_z2[] =
6052 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6053 {0xe9eaebecd6d6d6f0, 0xd6f2f3d6d6f6d6d6, 0xf9d6fbfcfdfeffd6};
6054 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6055
6056 uint64_t expected_z3[] =
6057 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
6058 {0x00000000ffffff00, 0xff0000ffff00ffff, 0x00ff0000000000ff};
6059 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6060
6061 uint64_t expected_z4[] =
6062 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6063 {0xe9eaebec007feff0, 0xf1f2007ff5f6007f, 0x007ffbfcfdfe007f};
6064 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6065
6066 uint64_t expected_z5[] =
6067 // pg: 0 0 1 1 0 1
6068 {0x0000000000000000, 0xffffff80ffffff80, 0x00000000ffffff80};
6069 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6070
6071 uint64_t expected_z6[] =
6072 // pg: 0 1 1
6073 {0xe9eaebecedeeeff0, 0xffffffffffffffff, 0xffffffffffffffff};
6074 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6075
6076 uint64_t expected_z7[] =
6077 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6078 {0xe9eaebeccfc0eff0, 0xf1f2cfc0f5f6cfc0, 0xcfc0fbfcfdfecfc0};
6079 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6080
6081 uint64_t expected_z8[] =
6082 // pg: 0 0 1 1 0 1
6083 {0x0000000000000000, 0x4000000040000000, 0x0000000040000000};
6084 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
6085
6086 uint64_t expected_z9[] =
6087 // pg: 0 1 1
6088 {0xe9eaebecedeeeff0, 0xc010000000000000, 0xc010000000000000};
6089 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
6090
6091 uint64_t expected_z10[] =
6092 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6093 {0xe9eaebec00ffeff0, 0xf1f200fff5f600ff, 0x00fffbfcfdfe00ff};
6094 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
6095
6096 uint64_t expected_z11[] =
6097 // pg: 0 1 1
6098 {0x0000000000000000, 0x0123456789abcdef, 0x0123456789abcdef};
6099 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
6100
6101 // clang-format on
6102 }
6103}
6104
6105TEST_SVE(sve_fcpy_imm) {
6106 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6107 START();
6108
6109 // For simplicity, we re-use the same pg for various lane sizes.
6110 // For D lanes: 0, 1, 1
6111 // For S lanes: 0, 1, 1, 0, 1
6112 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
6113 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
6114
6115 PRegister pg = p7;
6116 Initialise(&masm, pg.VnB(), pg_in);
6117
6118 // These are (mostly) merging operations, so we have to initialise the result
6119 // registers for each operation.
6120 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
6121 __ Index(ZRegister(i, kBRegSize), 0, -1);
6122 }
6123
6124 // Encodable floating-point forms (FCPY_z_p_i)
6125 __ Fcpy(z1.VnH(), pg.Merging(), Float16(1.0));
6126 __ Fcpy(z2.VnH(), pg.Merging(), -2.0f);
6127 __ Fcpy(z3.VnH(), pg.Merging(), 3.0);
6128 __ Fcpy(z4.VnS(), pg.Merging(), Float16(-4.0));
6129 __ Fcpy(z5.VnS(), pg.Merging(), 5.0f);
6130 __ Fcpy(z6.VnS(), pg.Merging(), 6.0);
6131 __ Fcpy(z7.VnD(), pg.Merging(), Float16(7.0));
6132 __ Fcpy(z8.VnD(), pg.Merging(), 8.0f);
6133 __ Fcpy(z9.VnD(), pg.Merging(), -9.0);
6134
6135 // Unencodable immediates.
6136 __ Fcpy(z10.VnS(), pg.Merging(), 0.0);
6137 __ Fcpy(z11.VnH(), pg.Merging(), Float16(42.0));
6138 __ Fcpy(z12.VnD(), pg.Merging(), RawbitsToDouble(0x7ff0000012340000)); // NaN
6139 __ Fcpy(z13.VnH(), pg.Merging(), kFP64NegativeInfinity);
6140
6141 END();
6142
6143 if (CAN_RUN()) {
6144 RUN();
6145 // clang-format off
6146
6147 // 1.0 as FP16: 0x3c00
6148 uint64_t expected_z1[] =
6149 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6150 {0xe9eaebec3c00eff0, 0xf1f23c00f5f63c00, 0x3c00fbfcfdfe3c00};
6151 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
6152
6153 // -2.0 as FP16: 0xc000
6154 uint64_t expected_z2[] =
6155 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6156 {0xe9eaebecc000eff0, 0xf1f2c000f5f6c000, 0xc000fbfcfdfec000};
6157 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
6158
6159 // 3.0 as FP16: 0x4200
6160 uint64_t expected_z3[] =
6161 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6162 {0xe9eaebec4200eff0, 0xf1f24200f5f64200, 0x4200fbfcfdfe4200};
6163 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
6164
6165 // -4.0 as FP32: 0xc0800000
6166 uint64_t expected_z4[] =
6167 // pg: 0 0 1 1 0 1
6168 {0xe9eaebecedeeeff0, 0xc0800000c0800000, 0xf9fafbfcc0800000};
6169 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6170
6171 // 5.0 as FP32: 0x40a00000
6172 uint64_t expected_z5[] =
6173 // pg: 0 0 1 1 0 1
6174 {0xe9eaebecedeeeff0, 0x40a0000040a00000, 0xf9fafbfc40a00000};
6175 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
6176
6177 // 6.0 as FP32: 0x40c00000
6178 uint64_t expected_z6[] =
6179 // pg: 0 0 1 1 0 1
6180 {0xe9eaebecedeeeff0, 0x40c0000040c00000, 0xf9fafbfc40c00000};
6181 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
6182
6183 // 7.0 as FP64: 0x401c000000000000
6184 uint64_t expected_z7[] =
6185 // pg: 0 1 1
6186 {0xe9eaebecedeeeff0, 0x401c000000000000, 0x401c000000000000};
6187 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6188
6189 // 8.0 as FP64: 0x4020000000000000
6190 uint64_t expected_z8[] =
6191 // pg: 0 1 1
6192 {0xe9eaebecedeeeff0, 0x4020000000000000, 0x4020000000000000};
6193 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
6194
6195 // -9.0 as FP64: 0xc022000000000000
6196 uint64_t expected_z9[] =
6197 // pg: 0 1 1
6198 {0xe9eaebecedeeeff0, 0xc022000000000000, 0xc022000000000000};
6199 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
6200
6201 // 0.0 as FP32: 0x00000000
6202 uint64_t expected_z10[] =
6203 // pg: 0 0 1 1 0 1
6204 {0xe9eaebecedeeeff0, 0x0000000000000000, 0xf9fafbfc00000000};
6205 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
6206
6207 // 42.0 as FP16: 0x5140
6208 uint64_t expected_z11[] =
6209 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6210 {0xe9eaebec5140eff0, 0xf1f25140f5f65140, 0x5140fbfcfdfe5140};
6211 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
6212
6213 // Signalling NaN (with payload): 0x7ff0000012340000
6214 uint64_t expected_z12[] =
6215 // pg: 0 1 1
6216 {0xe9eaebecedeeeff0, 0x7ff0000012340000, 0x7ff0000012340000};
6217 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
6218
6219 // -infinity as FP16: 0xfc00
6220 uint64_t expected_z13[] =
6221 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
6222 {0xe9eaebecfc00eff0, 0xf1f2fc00f5f6fc00, 0xfc00fbfcfdfefc00};
6223 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
6224
6225 // clang-format on
6226 }
6227}
6228
TatWai Chong4f28df72019-08-14 17:50:30 -07006229TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
6230 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6231 START();
6232
6233 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
6234
6235 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
6236
6237 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
6238
6239 int index_s[] = {1, 3, 2, 31, -1};
6240
6241 int index_d[] = {31, 1};
6242
6243 // Initialize the register with a value that doesn't existed in the table.
6244 __ Dup(z9.VnB(), 0x1f);
6245 InsrHelper(&masm, z9.VnD(), table_inputs);
6246
6247 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
6248 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
6249 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
6250 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
6251
6252 InsrHelper(&masm, ind_b, index_b);
6253 InsrHelper(&masm, ind_h, index_h);
6254 InsrHelper(&masm, ind_s, index_s);
6255 InsrHelper(&masm, ind_d, index_d);
6256
6257 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
6258
6259 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
6260
6261 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
6262
6263 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
6264
6265 END();
6266
6267 if (CAN_RUN()) {
6268 RUN();
6269
6270 // clang-format off
6271 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
6272 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
6273
6274 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
6275 0x5544, 0x7766, 0xddcc, 0x9988};
6276
6277 unsigned z28_expected[] =
6278 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
6279
6280 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
6281 // clang-format on
6282
6283 unsigned vl = config->sve_vl_in_bits();
6284 for (size_t i = 0; i < ArrayLength(index_b); i++) {
6285 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
6286 if (!core.HasSVELane(z26.VnB(), lane)) break;
6287 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
6288 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
6289 }
6290
6291 for (size_t i = 0; i < ArrayLength(index_h); i++) {
6292 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
6293 if (!core.HasSVELane(z27.VnH(), lane)) break;
6294 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
6295 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
6296 }
6297
6298 for (size_t i = 0; i < ArrayLength(index_s); i++) {
6299 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
6300 if (!core.HasSVELane(z28.VnS(), lane)) break;
6301 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
6302 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
6303 }
6304
6305 for (size_t i = 0; i < ArrayLength(index_d); i++) {
6306 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
6307 if (!core.HasSVELane(z29.VnD(), lane)) break;
6308 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
6309 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
6310 }
6311 }
6312}
6313
Jacob Bramley199339d2019-08-05 18:49:13 +01006314TEST_SVE(ldr_str_z_bi) {
6315 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6316 START();
6317
6318 int vl = config->sve_vl_in_bytes();
6319
6320 // The immediate can address [-256, 255] times the VL, so allocate enough
6321 // space to exceed that in both directions.
6322 int data_size = vl * 1024;
6323
6324 uint8_t* data = new uint8_t[data_size];
6325 memset(data, 0, data_size);
6326
6327 // Set the base half-way through the buffer so we can use negative indices.
6328 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6329
6330 __ Index(z1.VnB(), 1, 3);
6331 __ Index(z2.VnB(), 2, 5);
6332 __ Index(z3.VnB(), 3, 7);
6333 __ Index(z4.VnB(), 4, 11);
6334 __ Index(z5.VnB(), 5, 13);
6335 __ Index(z6.VnB(), 6, 2);
6336 __ Index(z7.VnB(), 7, 3);
6337 __ Index(z8.VnB(), 8, 5);
6338 __ Index(z9.VnB(), 9, 7);
6339
6340 // Encodable cases.
6341 __ Str(z1, SVEMemOperand(x0));
6342 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
6343 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
6344 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
6345 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
6346
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006347 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01006348 __ Str(z6, SVEMemOperand(x0, 6 * vl));
6349 __ Str(z7, SVEMemOperand(x0, -7 * vl));
6350 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
6351 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
6352
6353 // Corresponding loads.
6354 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
6355 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
6356 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
6357 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
6358 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
6359
6360 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
6361 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
6362 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
6363 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
6364
6365 END();
6366
6367 if (CAN_RUN()) {
6368 RUN();
6369
6370 uint8_t* expected = new uint8_t[data_size];
6371 memset(expected, 0, data_size);
6372 uint8_t* middle = &expected[data_size / 2];
6373
6374 for (int i = 0; i < vl; i++) {
6375 middle[i] = (1 + (3 * i)) & 0xff; // z1
6376 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
6377 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
6378 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
6379 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
6380 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
6381 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
6382 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
6383 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
6384 }
6385
Jacob Bramley33c99f92019-10-08 15:24:12 +01006386 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01006387
6388 ASSERT_EQUAL_SVE(z1, z11);
6389 ASSERT_EQUAL_SVE(z2, z12);
6390 ASSERT_EQUAL_SVE(z3, z13);
6391 ASSERT_EQUAL_SVE(z4, z14);
6392 ASSERT_EQUAL_SVE(z5, z15);
6393 ASSERT_EQUAL_SVE(z6, z16);
6394 ASSERT_EQUAL_SVE(z7, z17);
6395 ASSERT_EQUAL_SVE(z8, z18);
6396 ASSERT_EQUAL_SVE(z9, z19);
6397
6398 delete[] expected;
6399 }
6400 delete[] data;
6401}
6402
6403TEST_SVE(ldr_str_p_bi) {
6404 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6405 START();
6406
6407 int vl = config->sve_vl_in_bytes();
6408 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
6409 int pl = vl / kZRegBitsPerPRegBit;
6410
6411 // The immediate can address [-256, 255] times the PL, so allocate enough
6412 // space to exceed that in both directions.
6413 int data_size = pl * 1024;
6414
6415 uint8_t* data = new uint8_t[data_size];
6416 memset(data, 0, data_size);
6417
6418 // Set the base half-way through the buffer so we can use negative indices.
6419 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6420
6421 uint64_t pattern[4] = {0x1010101011101111,
6422 0x0010111011000101,
6423 0x1001101110010110,
6424 0x1010110101100011};
6425 for (int i = 8; i <= 15; i++) {
6426 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
6427 Initialise(&masm,
6428 PRegister(i),
6429 pattern[3] * i,
6430 pattern[2] * i,
6431 pattern[1] * i,
6432 pattern[0] * i);
6433 }
6434
6435 // Encodable cases.
6436 __ Str(p8, SVEMemOperand(x0));
6437 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
6438 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
6439 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
6440
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006441 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01006442 __ Str(p12, SVEMemOperand(x0, 6 * pl));
6443 __ Str(p13, SVEMemOperand(x0, -7 * pl));
6444 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
6445 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
6446
6447 // Corresponding loads.
6448 __ Ldr(p0, SVEMemOperand(x0));
6449 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
6450 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
6451 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
6452
6453 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
6454 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
6455 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
6456 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
6457
6458 END();
6459
6460 if (CAN_RUN()) {
6461 RUN();
6462
6463 uint8_t* expected = new uint8_t[data_size];
6464 memset(expected, 0, data_size);
6465 uint8_t* middle = &expected[data_size / 2];
6466
6467 for (int i = 0; i < pl; i++) {
6468 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
6469 size_t index = i / sizeof(pattern[0]);
6470 VIXL_ASSERT(index < ArrayLength(pattern));
6471 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
6472 // Each byte of `pattern` can be multiplied by 15 without carry.
6473 VIXL_ASSERT((byte * 15) <= 0xff);
6474
6475 middle[i] = byte * 8; // p8
6476 middle[(2 * pl) + i] = byte * 9; // p9
6477 middle[(-3 * pl) + i] = byte * 10; // p10
6478 middle[(255 * pl) + i] = byte * 11; // p11
6479 middle[(6 * pl) + i] = byte * 12; // p12
6480 middle[(-7 * pl) + i] = byte * 13; // p13
6481 middle[(314 * pl) + i] = byte * 14; // p14
6482 middle[(-314 * pl) + i] = byte * 15; // p15
6483 }
6484
Jacob Bramley33c99f92019-10-08 15:24:12 +01006485 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01006486
6487 ASSERT_EQUAL_SVE(p0, p8);
6488 ASSERT_EQUAL_SVE(p1, p9);
6489 ASSERT_EQUAL_SVE(p2, p10);
6490 ASSERT_EQUAL_SVE(p3, p11);
6491 ASSERT_EQUAL_SVE(p4, p12);
6492 ASSERT_EQUAL_SVE(p5, p13);
6493 ASSERT_EQUAL_SVE(p6, p14);
6494 ASSERT_EQUAL_SVE(p7, p15);
6495
6496 delete[] expected;
6497 }
6498 delete[] data;
6499}
6500
Jacob Bramleye668b202019-08-14 17:57:34 +01006501template <typename T>
6502static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
6503 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
6504}
6505
6506TEST_SVE(sve_ld1_st1_contiguous) {
6507 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6508 START();
6509
6510 int vl = config->sve_vl_in_bytes();
6511
6512 // The immediate can address [-8, 7] times the VL, so allocate enough space to
6513 // exceed that in both directions.
6514 int data_size = vl * 128;
6515
6516 uint8_t* data = new uint8_t[data_size];
6517 memset(data, 0, data_size);
6518
6519 // Set the base half-way through the buffer so we can use negative indeces.
6520 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6521
Jacob Bramleye668b202019-08-14 17:57:34 +01006522 // Encodable scalar-plus-immediate cases.
6523 __ Index(z1.VnB(), 1, -3);
6524 __ Ptrue(p1.VnB());
6525 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
6526
6527 __ Index(z2.VnH(), -2, 5);
6528 __ Ptrue(p2.VnH(), SVE_MUL3);
6529 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
6530
6531 __ Index(z3.VnS(), 3, -7);
6532 __ Ptrue(p3.VnS(), SVE_POW2);
6533 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
6534
6535 // Encodable scalar-plus-scalar cases.
6536 __ Index(z4.VnD(), -4, 11);
6537 __ Ptrue(p4.VnD(), SVE_VL3);
6538 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
6539 __ Mov(x2, 17);
6540 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
6541
6542 __ Index(z5.VnD(), 6, -2);
6543 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01006544 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
6545 __ Mov(x4, 6);
6546 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01006547
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006548 // Unencodable cases fall back on `CalculateSVEAddress`.
Jacob Bramleye668b202019-08-14 17:57:34 +01006549 __ Index(z6.VnS(), -7, 3);
6550 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
6551 // predicate bits when handling larger lanes.
6552 __ Ptrue(p6.VnB(), SVE_ALL);
6553 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
6554
TatWai Chong6205eb42019-09-24 10:07:20 +01006555 __ Index(z7.VnD(), 32, -11);
6556 __ Ptrue(p7.VnD(), SVE_MUL4);
6557 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01006558
TatWai Chong6205eb42019-09-24 10:07:20 +01006559 // Corresponding loads.
6560 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
6561 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
6562 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
6563 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
6564 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
6565 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
6566
6567 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
6568 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
6569 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
6570 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
6571
6572 // We can test ld1 by comparing the value loaded with the value stored. In
6573 // most cases, there are two complications:
6574 // - Loads have zeroing predication, so we have to clear the inactive
6575 // elements on our reference.
6576 // - We have to replicate any sign- or zero-extension.
6577
6578 // Ld1b(z8.VnB(), ...)
6579 __ Dup(z18.VnB(), 0);
6580 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
6581
6582 // Ld1b(z9.VnH(), ...)
6583 __ Dup(z19.VnH(), 0);
6584 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
6585
6586 // Ld1h(z10.VnS(), ...)
6587 __ Dup(z20.VnS(), 0);
6588 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
6589
6590 // Ld1b(z11.VnD(), ...)
6591 __ Dup(z21.VnD(), 0);
6592 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
6593
6594 // Ld1d(z12.VnD(), ...)
6595 __ Dup(z22.VnD(), 0);
6596 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
6597
6598 // Ld1w(z13.VnS(), ...)
6599 __ Dup(z23.VnS(), 0);
6600 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
6601
6602 // Ld1sb(z14.VnH(), ...)
6603 __ Dup(z24.VnH(), 0);
6604 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
6605
6606 // Ld1sh(z15.VnS(), ...)
6607 __ Dup(z25.VnS(), 0);
6608 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
6609
6610 // Ld1sb(z16.VnD(), ...)
6611 __ Dup(z26.VnD(), 0);
6612 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
6613
6614 // Ld1sw(z17.VnD(), ...)
6615 __ Dup(z27.VnD(), 0);
6616 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01006617
6618 END();
6619
6620 if (CAN_RUN()) {
6621 RUN();
6622
6623 uint8_t* expected = new uint8_t[data_size];
6624 memset(expected, 0, data_size);
6625 uint8_t* middle = &expected[data_size / 2];
6626
6627 int vl_b = vl / kBRegSizeInBytes;
6628 int vl_h = vl / kHRegSizeInBytes;
6629 int vl_s = vl / kSRegSizeInBytes;
6630 int vl_d = vl / kDRegSizeInBytes;
6631
6632 // Encodable cases.
6633
6634 // st1b { z1.b }, SVE_ALL
6635 for (int i = 0; i < vl_b; i++) {
6636 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
6637 }
6638
6639 // st1b { z2.h }, SVE_MUL3
6640 int vl_h_mul3 = vl_h - (vl_h % 3);
6641 for (int i = 0; i < vl_h_mul3; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006642 int64_t offset = 7 * static_cast<int>(vl / (kHRegSize / kBRegSize));
6643 MemoryWrite(middle, offset, i, static_cast<uint8_t>(-2 + (5 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01006644 }
6645
6646 // st1h { z3.s }, SVE_POW2
6647 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
6648 for (int i = 0; i < vl_s_pow2; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006649 int64_t offset = -8 * static_cast<int>(vl / (kSRegSize / kHRegSize));
6650 MemoryWrite(middle, offset, i, static_cast<uint16_t>(3 - (7 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01006651 }
6652
6653 // st1b { z4.d }, SVE_VL3
6654 if (vl_d >= 3) {
6655 for (int i = 0; i < 3; i++) {
6656 MemoryWrite(middle,
6657 (8 * vl) + 17,
6658 i,
6659 static_cast<uint8_t>(-4 + (11 * i)));
6660 }
6661 }
6662
6663 // st1d { z5.d }, SVE_VL16
6664 if (vl_d >= 16) {
6665 for (int i = 0; i < 16; i++) {
6666 MemoryWrite(middle,
6667 (10 * vl) + (6 * kDRegSizeInBytes),
6668 i,
6669 static_cast<uint64_t>(6 - (2 * i)));
6670 }
6671 }
6672
6673 // Unencodable cases.
6674
6675 // st1w { z6.s }, SVE_ALL
6676 for (int i = 0; i < vl_s; i++) {
6677 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
6678 }
6679
TatWai Chong6205eb42019-09-24 10:07:20 +01006680 // st1w { z7.d }, SVE_MUL4
6681 int vl_d_mul4 = vl_d - (vl_d % 4);
6682 for (int i = 0; i < vl_d_mul4; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006683 int64_t offset = 22 * static_cast<int>(vl / (kDRegSize / kWRegSize));
6684 MemoryWrite(middle, offset, i, static_cast<uint32_t>(32 + (-11 * i)));
TatWai Chong6205eb42019-09-24 10:07:20 +01006685 }
6686
Jacob Bramley33c99f92019-10-08 15:24:12 +01006687 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramleye668b202019-08-14 17:57:34 +01006688
TatWai Chong6205eb42019-09-24 10:07:20 +01006689 // Check that we loaded back the expected values.
6690
6691 ASSERT_EQUAL_SVE(z18, z8);
6692 ASSERT_EQUAL_SVE(z19, z9);
6693 ASSERT_EQUAL_SVE(z20, z10);
6694 ASSERT_EQUAL_SVE(z21, z11);
6695 ASSERT_EQUAL_SVE(z22, z12);
6696 ASSERT_EQUAL_SVE(z23, z13);
6697 ASSERT_EQUAL_SVE(z24, z14);
6698 ASSERT_EQUAL_SVE(z25, z15);
6699 ASSERT_EQUAL_SVE(z26, z16);
6700 ASSERT_EQUAL_SVE(z27, z17);
6701
Jacob Bramleye668b202019-08-14 17:57:34 +01006702 delete[] expected;
6703 }
6704 delete[] data;
6705}
6706
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006707TEST_SVE(sve_ld2_st2_scalar_plus_imm) {
6708 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6709 START();
6710
6711 int vl = config->sve_vl_in_bytes();
6712
6713 // The immediate can address [-16, 14] times the VL, so allocate enough space
6714 // to exceed that in both directions.
6715 int data_size = vl * 128;
6716
6717 uint8_t* data = new uint8_t[data_size];
6718 memset(data, 0, data_size);
6719
6720 // Set the base half-way through the buffer so we can use negative indeces.
6721 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6722
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006723 __ Index(z14.VnB(), 1, -3);
6724 __ Index(z15.VnB(), 2, -3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006725 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006726 __ St2b(z14.VnB(), z15.VnB(), p0, SVEMemOperand(x0));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006727
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006728 __ Index(z16.VnH(), -2, 5);
6729 __ Index(z17.VnH(), -3, 5);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006730 __ Ptrue(p1.VnH(), SVE_MUL3);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006731 __ St2h(z16.VnH(), z17.VnH(), p1, SVEMemOperand(x0, 8, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006732
6733 // Wrap around from z31 to z0.
6734 __ Index(z31.VnS(), 3, -7);
6735 __ Index(z0.VnS(), 4, -7);
6736 __ Ptrue(p2.VnS(), SVE_POW2);
6737 __ St2w(z31.VnS(), z0.VnS(), p2, SVEMemOperand(x0, -12, SVE_MUL_VL));
6738
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006739 __ Index(z18.VnD(), -7, 3);
6740 __ Index(z19.VnD(), -8, 3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006741 // Sparse predication, including some irrelevant bits (0xe). To make the
6742 // results easy to check, activate each lane <n> where n is a multiple of 5.
6743 Initialise(&masm,
6744 p3,
6745 0xeee10000000001ee,
6746 0xeeeeeee100000000,
6747 0x01eeeeeeeee10000,
6748 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006749 __ St2d(z18.VnD(), z19.VnD(), p3, SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006750
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006751 // We can test ld2 by comparing the values loaded with the values stored.
6752 // There are two complications:
6753 // - Loads have zeroing predication, so we have to clear the inactive
6754 // elements on our reference.
6755 // - We want to test both loads and stores that span { z31, z0 }, so we have
6756 // to move some values around.
6757 //
6758 // Registers z4-z11 will hold as-stored values (with inactive elements
6759 // cleared). Registers z20-z27 will hold the values that were loaded.
6760
6761 // Ld2b(z14.VnB(), z15.VnB(), ...)
6762 __ Dup(z4.VnB(), 0);
6763 __ Dup(z5.VnB(), 0);
6764 __ Mov(z4.VnB(), p0.Merging(), z14.VnB());
6765 __ Mov(z5.VnB(), p0.Merging(), z15.VnB());
6766
6767 // Ld2h(z16.VnH(), z17.VnH(), ...)
6768 __ Dup(z6.VnH(), 0);
6769 __ Dup(z7.VnH(), 0);
6770 __ Mov(z6.VnH(), p1.Merging(), z16.VnH());
6771 __ Mov(z7.VnH(), p1.Merging(), z17.VnH());
6772
6773 // Ld2w(z31.VnS(), z0.VnS(), ...)
6774 __ Dup(z8.VnS(), 0);
6775 __ Dup(z9.VnS(), 0);
6776 __ Mov(z8.VnS(), p2.Merging(), z31.VnS());
6777 __ Mov(z9.VnS(), p2.Merging(), z0.VnS());
6778
6779 // Ld2d(z18.VnD(), z19.VnD(), ...)
6780 __ Dup(z10.VnD(), 0);
6781 __ Dup(z11.VnD(), 0);
6782 __ Mov(z10.VnD(), p3.Merging(), z18.VnD());
6783 __ Mov(z11.VnD(), p3.Merging(), z19.VnD());
6784
6785 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
6786 __ Ld2b(z31.VnB(), z0.VnB(), p0.Zeroing(), SVEMemOperand(x0));
6787 __ Mov(z20, z31);
6788 __ Mov(z21, z0);
6789
6790 __ Ld2h(z22.VnH(), z23.VnH(), p1.Zeroing(), SVEMemOperand(x0, 8, SVE_MUL_VL));
6791 __ Ld2w(z24.VnS(),
6792 z25.VnS(),
6793 p2.Zeroing(),
6794 SVEMemOperand(x0, -12, SVE_MUL_VL));
6795 __ Ld2d(z26.VnD(),
6796 z27.VnD(),
6797 p3.Zeroing(),
6798 SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006799
6800 END();
6801
6802 if (CAN_RUN()) {
6803 RUN();
6804
6805 uint8_t* expected = new uint8_t[data_size];
6806 memset(expected, 0, data_size);
6807 uint8_t* middle = &expected[data_size / 2];
6808
6809 int vl_b = vl / kBRegSizeInBytes;
6810 int vl_h = vl / kHRegSizeInBytes;
6811 int vl_s = vl / kSRegSizeInBytes;
6812 int vl_d = vl / kDRegSizeInBytes;
6813
6814 int reg_count = 2;
6815
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006816 // st2b { z14.b, z15.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006817 for (int i = 0; i < vl_b; i++) {
6818 uint8_t lane0 = 1 - (3 * i);
6819 uint8_t lane1 = 2 - (3 * i);
6820 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
6821 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
6822 }
6823
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006824 // st2h { z16.h, z17.h }, SVE_MUL3
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006825 int vl_h_mul3 = vl_h - (vl_h % 3);
6826 for (int i = 0; i < vl_h_mul3; i++) {
6827 int64_t offset = 8 * vl;
6828 uint16_t lane0 = -2 + (5 * i);
6829 uint16_t lane1 = -3 + (5 * i);
6830 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
6831 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
6832 }
6833
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006834 // st2w { z31.s, z0.s }, SVE_POW2
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006835 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
6836 for (int i = 0; i < vl_s_pow2; i++) {
6837 int64_t offset = -12 * vl;
6838 uint32_t lane0 = 3 - (7 * i);
6839 uint32_t lane1 = 4 - (7 * i);
6840 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
6841 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
6842 }
6843
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006844 // st2d { z18.d, z19.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006845 for (int i = 0; i < vl_d; i++) {
6846 if ((i % 5) == 0) {
6847 int64_t offset = 14 * vl;
6848 uint64_t lane0 = -7 + (3 * i);
6849 uint64_t lane1 = -8 + (3 * i);
6850 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
6851 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
6852 }
6853 }
6854
6855 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
6856
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00006857 // Check that we loaded back the expected values.
6858
6859 // st2b/ld2b
6860 ASSERT_EQUAL_SVE(z4, z20);
6861 ASSERT_EQUAL_SVE(z5, z21);
6862
6863 // st2h/ld2h
6864 ASSERT_EQUAL_SVE(z6, z22);
6865 ASSERT_EQUAL_SVE(z7, z23);
6866
6867 // st2w/ld2w
6868 ASSERT_EQUAL_SVE(z8, z24);
6869 ASSERT_EQUAL_SVE(z9, z25);
6870
6871 // st2d/ld2d
6872 ASSERT_EQUAL_SVE(z10, z26);
6873 ASSERT_EQUAL_SVE(z11, z27);
6874
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00006875 delete[] expected;
6876 }
6877 delete[] data;
6878}
6879
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006880TEST_SVE(sve_ld2_st2_scalar_plus_scalar) {
6881 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6882 START();
6883
6884 int vl = config->sve_vl_in_bytes();
6885
6886 // Allocate plenty of space to enable indexing in both directions.
6887 int data_size = vl * 128;
6888
6889 uint8_t* data = new uint8_t[data_size];
6890 memset(data, 0, data_size);
6891
6892 // Set the base half-way through the buffer so we can use negative indeces.
6893 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
6894
Jacob Bramleye483ce52019-11-05 16:52:29 +00006895 __ Index(z10.VnB(), -4, 11);
6896 __ Index(z11.VnB(), -5, 11);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006897 __ Ptrue(p7.VnB(), SVE_MUL4);
6898 __ Mov(x1, 0);
Jacob Bramleye483ce52019-11-05 16:52:29 +00006899 __ St2b(z10.VnB(), z11.VnB(), p7, SVEMemOperand(x0, x1));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006900
Jacob Bramleye483ce52019-11-05 16:52:29 +00006901 __ Index(z12.VnH(), 6, -2);
6902 __ Index(z13.VnH(), 7, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006903 __ Ptrue(p6.VnH(), SVE_VL16);
6904 __ Rdvl(x2, 3); // Make offsets VL-dependent so we can avoid overlap.
Jacob Bramleye483ce52019-11-05 16:52:29 +00006905 __ St2h(z12.VnH(), z13.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006906
Jacob Bramleye483ce52019-11-05 16:52:29 +00006907 __ Index(z14.VnS(), -7, 3);
6908 __ Index(z15.VnS(), -8, 3);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006909 // Sparse predication, including some irrelevant bits (0xe). To make the
6910 // results easy to check, activate each lane <n> where n is a multiple of 5.
6911 Initialise(&masm,
6912 p5,
6913 0xeee1000010000100,
6914 0x001eeee100001000,
6915 0x0100001eeee10000,
6916 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00006917 __ Rdvl(x3, -3);
6918 __ St2w(z14.VnS(), z15.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006919
6920 // Wrap around from z31 to z0.
6921 __ Index(z31.VnD(), 32, -11);
6922 __ Index(z0.VnD(), 33, -11);
6923 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00006924 __ Rdvl(x4, 1);
6925 __ St2d(z31.VnD(), z0.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006926
Jacob Bramleye483ce52019-11-05 16:52:29 +00006927 // We can test ld2 by comparing the values loaded with the values stored.
6928 // There are two complications:
6929 // - Loads have zeroing predication, so we have to clear the inactive
6930 // elements on our reference.
6931 // - We want to test both loads and stores that span { z31, z0 }, so we have
6932 // to move some values around.
6933 //
6934 // Registers z4-z11 will hold as-stored values (with inactive elements
6935 // cleared). Registers z20-z27 will hold the values that were loaded.
6936
6937 // Ld2b(z20.VnB(), z21.VnB(), ...)
6938 __ Dup(z4.VnB(), 0);
6939 __ Dup(z5.VnB(), 0);
6940 __ Mov(z4.VnB(), p7.Merging(), z10.VnB());
6941 __ Mov(z5.VnB(), p7.Merging(), z11.VnB());
6942
6943 // Ld2h(z22.VnH(), z23.VnH(), ...)
6944 __ Dup(z6.VnH(), 0);
6945 __ Dup(z7.VnH(), 0);
6946 __ Mov(z6.VnH(), p6.Merging(), z12.VnH());
6947 __ Mov(z7.VnH(), p6.Merging(), z13.VnH());
6948
6949 // Ld2w(z24.VnS(), z25.VnS(), ...)
6950 __ Dup(z8.VnS(), 0);
6951 __ Dup(z9.VnS(), 0);
6952 __ Mov(z8.VnS(), p5.Merging(), z14.VnS());
6953 __ Mov(z9.VnS(), p5.Merging(), z15.VnS());
6954
6955 // Ld2d(z31.VnD(), z0.VnD(), ...)
6956 __ Dup(z10.VnD(), 0);
6957 __ Dup(z11.VnD(), 0);
6958 __ Mov(z10.VnD(), p4.Merging(), z31.VnD());
6959 __ Mov(z11.VnD(), p4.Merging(), z0.VnD());
6960
6961 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
6962 __ Ld2b(z31.VnB(), z0.VnB(), p7.Zeroing(), SVEMemOperand(x0, x1));
6963 __ Mov(z20, z31);
6964 __ Mov(z21, z0);
6965
6966 __ Ld2h(z22.VnH(), z23.VnH(), p6.Zeroing(), SVEMemOperand(x0, x2, LSL, 1));
6967 __ Ld2w(z24.VnS(), z25.VnS(), p5.Zeroing(), SVEMemOperand(x0, x3, LSL, 2));
6968 __ Ld2d(z26.VnD(), z27.VnD(), p4.Zeroing(), SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006969
6970 END();
6971
6972 if (CAN_RUN()) {
6973 RUN();
6974
6975 uint8_t* expected = new uint8_t[data_size];
6976 memset(expected, 0, data_size);
6977 uint8_t* middle = &expected[data_size / 2];
6978
6979 int vl_b = vl / kBRegSizeInBytes;
6980 int vl_h = vl / kHRegSizeInBytes;
6981 int vl_s = vl / kSRegSizeInBytes;
6982 int vl_d = vl / kDRegSizeInBytes;
6983
6984 int reg_count = 2;
6985
Jacob Bramleye483ce52019-11-05 16:52:29 +00006986 // st2b { z10.b, z11.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006987 int vl_b_mul4 = vl_b - (vl_b % 4);
6988 for (int i = 0; i < vl_b_mul4; i++) {
6989 uint8_t lane0 = -4 + (11 * i);
6990 uint8_t lane1 = -5 + (11 * i);
6991 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
6992 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
6993 }
6994
Jacob Bramleye483ce52019-11-05 16:52:29 +00006995 // st2h { z12.h, z13.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006996 if (vl_h >= 16) {
6997 for (int i = 0; i < 16; i++) {
6998 int64_t offset = (3 << kHRegSizeInBytesLog2) * vl;
6999 uint16_t lane0 = 6 - (2 * i);
7000 uint16_t lane1 = 7 - (2 * i);
7001 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7002 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7003 }
7004 }
7005
Jacob Bramleye483ce52019-11-05 16:52:29 +00007006 // st2w { z14.s, z15.s }, ((i % 5) == 0)
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007007 for (int i = 0; i < vl_s; i++) {
7008 if ((i % 5) == 0) {
7009 int64_t offset = -(3 << kSRegSizeInBytesLog2) * vl;
7010 uint32_t lane0 = -7 + (3 * i);
7011 uint32_t lane1 = -8 + (3 * i);
7012 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7013 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7014 }
7015 }
7016
7017 // st2d { z31.b, z0.b }, SVE_MUL3
7018 int vl_d_mul3 = vl_d - (vl_d % 3);
7019 for (int i = 0; i < vl_d_mul3; i++) {
7020 int64_t offset = (1 << kDRegSizeInBytesLog2) * vl;
7021 uint64_t lane0 = 32 - (11 * i);
7022 uint64_t lane1 = 33 - (11 * i);
7023 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7024 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7025 }
7026
7027 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7028
Jacob Bramleye483ce52019-11-05 16:52:29 +00007029 // Check that we loaded back the expected values.
7030
7031 // st2b/ld2b
7032 ASSERT_EQUAL_SVE(z4, z20);
7033 ASSERT_EQUAL_SVE(z5, z21);
7034
7035 // st2h/ld2h
7036 ASSERT_EQUAL_SVE(z6, z22);
7037 ASSERT_EQUAL_SVE(z7, z23);
7038
7039 // st2w/ld2w
7040 ASSERT_EQUAL_SVE(z8, z24);
7041 ASSERT_EQUAL_SVE(z9, z25);
7042
7043 // st2d/ld2d
7044 ASSERT_EQUAL_SVE(z10, z26);
7045 ASSERT_EQUAL_SVE(z11, z27);
7046
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007047 delete[] expected;
7048 }
7049 delete[] data;
7050}
7051
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007052TEST_SVE(sve_ld3_st3_scalar_plus_imm) {
7053 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7054 START();
7055
7056 int vl = config->sve_vl_in_bytes();
7057
7058 // The immediate can address [-24, 21] times the VL, so allocate enough space
7059 // to exceed that in both directions.
7060 int data_size = vl * 128;
7061
7062 uint8_t* data = new uint8_t[data_size];
7063 memset(data, 0, data_size);
7064
7065 // Set the base half-way through the buffer so we can use negative indeces.
7066 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7067
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007068 // We can test ld3 by comparing the values loaded with the values stored.
7069 // There are two complications:
7070 // - Loads have zeroing predication, so we have to clear the inactive
7071 // elements on our reference.
7072 // - We want to test both loads and stores that span { z31, z0 }, so we have
7073 // to move some values around.
7074 //
7075 // Registers z4-z15 will hold as-stored values (with inactive elements
7076 // cleared). Registers z16-z27 will hold the values that were loaded.
7077
7078 __ Index(z10.VnB(), 1, -3);
7079 __ Index(z11.VnB(), 2, -3);
7080 __ Index(z12.VnB(), 3, -3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007081 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007082 __ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p0, SVEMemOperand(x0));
7083 // Save the stored values for ld3 tests.
7084 __ Dup(z4.VnB(), 0);
7085 __ Dup(z5.VnB(), 0);
7086 __ Dup(z6.VnB(), 0);
7087 __ Mov(z4.VnB(), p0.Merging(), z10.VnB());
7088 __ Mov(z5.VnB(), p0.Merging(), z11.VnB());
7089 __ Mov(z6.VnB(), p0.Merging(), z12.VnB());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007090
7091 // Wrap around from z31 to z0.
7092 __ Index(z31.VnH(), -2, 5);
7093 __ Index(z0.VnH(), -3, 5);
7094 __ Index(z1.VnH(), -4, 5);
7095 __ Ptrue(p1.VnH(), SVE_MUL3);
7096 __ St3h(z31.VnH(), z0.VnH(), z1.VnH(), p1, SVEMemOperand(x0, 9, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007097 // Save the stored values for ld3 tests.
7098 __ Dup(z7.VnH(), 0);
7099 __ Dup(z8.VnH(), 0);
7100 __ Dup(z9.VnH(), 0);
7101 __ Mov(z7.VnH(), p1.Merging(), z31.VnH());
7102 __ Mov(z8.VnH(), p1.Merging(), z0.VnH());
7103 __ Mov(z9.VnH(), p1.Merging(), z1.VnH());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007104
7105 __ Index(z30.VnS(), 3, -7);
7106 __ Index(z31.VnS(), 4, -7);
7107 __ Index(z0.VnS(), 5, -7);
7108 __ Ptrue(p2.VnS(), SVE_POW2);
7109 __ St3w(z30.VnS(),
7110 z31.VnS(),
7111 z0.VnS(),
7112 p2,
7113 SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007114 // Save the stored values for ld3 tests.
7115 __ Dup(z10.VnS(), 0);
7116 __ Dup(z11.VnS(), 0);
7117 __ Dup(z12.VnS(), 0);
7118 __ Mov(z10.VnS(), p2.Merging(), z30.VnS());
7119 __ Mov(z11.VnS(), p2.Merging(), z31.VnS());
7120 __ Mov(z12.VnS(), p2.Merging(), z0.VnS());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007121
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007122 __ Index(z0.VnD(), -7, 3);
7123 __ Index(z1.VnD(), -8, 3);
7124 __ Index(z2.VnD(), -9, 3);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007125 // Sparse predication, including some irrelevant bits (0xee). To make the
7126 // results easy to check, activate each lane <n> where n is a multiple of 5.
7127 Initialise(&masm,
7128 p3,
7129 0xeee10000000001ee,
7130 0xeeeeeee100000000,
7131 0x01eeeeeeeee10000,
7132 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007133 __ St3d(z0.VnD(), z1.VnD(), z2.VnD(), p3, SVEMemOperand(x0, 15, SVE_MUL_VL));
7134 // Save the stored values for ld3 tests.
7135 __ Dup(z13.VnD(), 0);
7136 __ Dup(z14.VnD(), 0);
7137 __ Dup(z15.VnD(), 0);
7138 __ Mov(z13.VnD(), p3.Merging(), z0.VnD());
7139 __ Mov(z14.VnD(), p3.Merging(), z1.VnD());
7140 __ Mov(z15.VnD(), p3.Merging(), z2.VnD());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007141
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007142 // Corresponding loads.
7143 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7144 __ Ld3b(z31.VnB(), z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(x0));
7145 __ Mov(z16, z31);
7146 __ Mov(z17, z0);
7147 __ Mov(z18, z1);
7148 __ Ld3h(z30.VnH(),
7149 z31.VnH(),
7150 z0.VnH(),
7151 p1.Zeroing(),
7152 SVEMemOperand(x0, 9, SVE_MUL_VL));
7153 __ Mov(z19, z30);
7154 __ Mov(z20, z31);
7155 __ Mov(z21, z0);
7156 __ Ld3w(z22.VnS(),
7157 z23.VnS(),
7158 z24.VnS(),
7159 p2.Zeroing(),
7160 SVEMemOperand(x0, -12, SVE_MUL_VL));
7161 __ Ld3d(z25.VnD(),
7162 z26.VnD(),
7163 z27.VnD(),
7164 p3.Zeroing(),
7165 SVEMemOperand(x0, 15, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007166
7167 END();
7168
7169 if (CAN_RUN()) {
7170 RUN();
7171
7172 uint8_t* expected = new uint8_t[data_size];
7173 memset(expected, 0, data_size);
7174 uint8_t* middle = &expected[data_size / 2];
7175
7176 int vl_b = vl / kBRegSizeInBytes;
7177 int vl_h = vl / kHRegSizeInBytes;
7178 int vl_s = vl / kSRegSizeInBytes;
7179 int vl_d = vl / kDRegSizeInBytes;
7180
7181 int reg_count = 3;
7182
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007183 // st3b { z10.b, z11.b, z12.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007184 for (int i = 0; i < vl_b; i++) {
7185 uint8_t lane0 = 1 - (3 * i);
7186 uint8_t lane1 = 2 - (3 * i);
7187 uint8_t lane2 = 3 - (3 * i);
7188 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7189 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7190 MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
7191 }
7192
7193 // st3h { z31.h, z0.h, z1.h }, SVE_MUL3
7194 int vl_h_mul3 = vl_h - (vl_h % 3);
7195 for (int i = 0; i < vl_h_mul3; i++) {
7196 int64_t offset = 9 * vl;
7197 uint16_t lane0 = -2 + (5 * i);
7198 uint16_t lane1 = -3 + (5 * i);
7199 uint16_t lane2 = -4 + (5 * i);
7200 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7201 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7202 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7203 }
7204
7205 // st3w { z30.s, z31.s, z0.s }, SVE_POW2
7206 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7207 for (int i = 0; i < vl_s_pow2; i++) {
7208 int64_t offset = -12 * vl;
7209 uint32_t lane0 = 3 - (7 * i);
7210 uint32_t lane1 = 4 - (7 * i);
7211 uint32_t lane2 = 5 - (7 * i);
7212 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7213 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7214 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7215 }
7216
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007217 // st3d { z0.d, z1.d, z2.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007218 for (int i = 0; i < vl_d; i++) {
7219 if ((i % 5) == 0) {
7220 int64_t offset = 15 * vl;
7221 uint64_t lane0 = -7 + (3 * i);
7222 uint64_t lane1 = -8 + (3 * i);
7223 uint64_t lane2 = -9 + (3 * i);
7224 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7225 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7226 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7227 }
7228 }
7229
7230 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7231
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007232 // Check that we loaded back the expected values.
7233
7234 // st3b/ld3b
7235 ASSERT_EQUAL_SVE(z4, z16);
7236 ASSERT_EQUAL_SVE(z5, z17);
7237 ASSERT_EQUAL_SVE(z6, z18);
7238
7239 // st3h/ld3h
7240 ASSERT_EQUAL_SVE(z7, z19);
7241 ASSERT_EQUAL_SVE(z8, z20);
7242 ASSERT_EQUAL_SVE(z9, z21);
7243
7244 // st3w/ld3w
7245 ASSERT_EQUAL_SVE(z10, z22);
7246 ASSERT_EQUAL_SVE(z11, z23);
7247 ASSERT_EQUAL_SVE(z12, z24);
7248
7249 // st3d/ld3d
7250 ASSERT_EQUAL_SVE(z13, z25);
7251 ASSERT_EQUAL_SVE(z14, z26);
7252 ASSERT_EQUAL_SVE(z15, z27);
7253
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007254 delete[] expected;
7255 }
7256 delete[] data;
7257}
7258
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007259TEST_SVE(sve_ld3_st3_scalar_plus_scalar) {
7260 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7261 START();
7262
7263 int vl = config->sve_vl_in_bytes();
7264
7265 // Allocate plenty of space to enable indexing in both directions.
7266 int data_size = vl * 128;
7267
7268 uint8_t* data = new uint8_t[data_size];
7269 memset(data, 0, data_size);
7270
7271 // Set the base half-way through the buffer so we can use negative indeces.
7272 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7273
Jacob Bramleye483ce52019-11-05 16:52:29 +00007274 // We can test ld3 by comparing the values loaded with the values stored.
7275 // There are two complications:
7276 // - Loads have zeroing predication, so we have to clear the inactive
7277 // elements on our reference.
7278 // - We want to test both loads and stores that span { z31, z0 }, so we have
7279 // to move some values around.
7280 //
7281 // Registers z4-z15 will hold as-stored values (with inactive elements
7282 // cleared). Registers z16-z27 will hold the values that were loaded.
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007283
Jacob Bramleye483ce52019-11-05 16:52:29 +00007284 __ Index(z10.VnB(), -4, 11);
7285 __ Index(z11.VnB(), -5, 11);
7286 __ Index(z12.VnB(), -6, 11);
7287 __ Ptrue(p7.VnB(), SVE_MUL4);
7288 __ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
7289 __ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p7, SVEMemOperand(x0, x1, LSL, 0));
7290 // Save the stored values for ld3 tests.
7291 __ Dup(z4.VnB(), 0);
7292 __ Dup(z5.VnB(), 0);
7293 __ Dup(z6.VnB(), 0);
7294 __ Mov(z4.VnB(), p7.Merging(), z10.VnB());
7295 __ Mov(z5.VnB(), p7.Merging(), z11.VnB());
7296 __ Mov(z6.VnB(), p7.Merging(), z12.VnB());
7297
7298 __ Index(z13.VnH(), 6, -2);
7299 __ Index(z14.VnH(), 7, -2);
7300 __ Index(z15.VnH(), 8, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007301 __ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007302 __ Rdvl(x2, 5); // (5 * vl) << 1 = 10 * vl
7303 __ St3h(z13.VnH(), z14.VnH(), z15.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
7304 // Save the stored values for ld3 tests.
7305 __ Dup(z7.VnH(), 0);
7306 __ Dup(z8.VnH(), 0);
7307 __ Dup(z9.VnH(), 0);
7308 __ Mov(z7.VnH(), p6.Merging(), z13.VnH());
7309 __ Mov(z8.VnH(), p6.Merging(), z14.VnH());
7310 __ Mov(z9.VnH(), p6.Merging(), z15.VnH());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007311
7312 // Wrap around from z31 to z0.
7313 __ Index(z30.VnS(), -7, 3);
7314 __ Index(z31.VnS(), -8, 3);
7315 __ Index(z0.VnS(), -9, 3);
7316 // Sparse predication, including some irrelevant bits (0xe). To make the
7317 // results easy to check, activate each lane <n> where n is a multiple of 5.
7318 Initialise(&masm,
7319 p5,
7320 0xeee1000010000100,
7321 0x001eeee100001000,
7322 0x0100001eeee10000,
7323 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007324 __ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
7325 __ St3w(z30.VnS(), z31.VnS(), z0.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
7326 // Save the stored values for ld3 tests.
7327 __ Dup(z10.VnS(), 0);
7328 __ Dup(z11.VnS(), 0);
7329 __ Dup(z12.VnS(), 0);
7330 __ Mov(z10.VnS(), p5.Merging(), z30.VnS());
7331 __ Mov(z11.VnS(), p5.Merging(), z31.VnS());
7332 __ Mov(z12.VnS(), p5.Merging(), z0.VnS());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007333
7334 __ Index(z31.VnD(), 32, -11);
7335 __ Index(z0.VnD(), 33, -11);
7336 __ Index(z1.VnD(), 34, -11);
7337 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007338 __ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 * vl
7339 __ St3d(z31.VnD(), z0.VnD(), z1.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
7340 // Save the stored values for ld3 tests.
7341 __ Dup(z13.VnD(), 0);
7342 __ Dup(z14.VnD(), 0);
7343 __ Dup(z15.VnD(), 0);
7344 __ Mov(z13.VnD(), p4.Merging(), z31.VnD());
7345 __ Mov(z14.VnD(), p4.Merging(), z0.VnD());
7346 __ Mov(z15.VnD(), p4.Merging(), z1.VnD());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007347
Jacob Bramleye483ce52019-11-05 16:52:29 +00007348 // Corresponding loads.
7349 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7350 __ Ld3b(z31.VnB(),
7351 z0.VnB(),
7352 z1.VnB(),
7353 p7.Zeroing(),
7354 SVEMemOperand(x0, x1, LSL, 0));
7355 __ Mov(z16, z31);
7356 __ Mov(z17, z0);
7357 __ Mov(z18, z1);
7358 __ Ld3h(z30.VnH(),
7359 z31.VnH(),
7360 z0.VnH(),
7361 p6.Zeroing(),
7362 SVEMemOperand(x0, x2, LSL, 1));
7363 __ Mov(z19, z30);
7364 __ Mov(z20, z31);
7365 __ Mov(z21, z0);
7366 __ Ld3w(z22.VnS(),
7367 z23.VnS(),
7368 z24.VnS(),
7369 p5.Zeroing(),
7370 SVEMemOperand(x0, x3, LSL, 2));
7371 __ Ld3d(z25.VnD(),
7372 z26.VnD(),
7373 z27.VnD(),
7374 p4.Zeroing(),
7375 SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007376
7377 END();
7378
7379 if (CAN_RUN()) {
7380 RUN();
7381
7382 uint8_t* expected = new uint8_t[data_size];
7383 memset(expected, 0, data_size);
7384 uint8_t* middle = &expected[data_size / 2];
7385
7386 int vl_b = vl / kBRegSizeInBytes;
7387 int vl_h = vl / kHRegSizeInBytes;
7388 int vl_s = vl / kSRegSizeInBytes;
7389 int vl_d = vl / kDRegSizeInBytes;
7390
7391 int reg_count = 3;
7392
Jacob Bramleye483ce52019-11-05 16:52:29 +00007393 // st3b { z10.b, z11.b, z12.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007394 int vl_b_mul4 = vl_b - (vl_b % 4);
7395 for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007396 int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007397 uint8_t lane0 = -4 + (11 * i);
7398 uint8_t lane1 = -5 + (11 * i);
7399 uint8_t lane2 = -6 + (11 * i);
7400 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7401 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7402 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7403 }
7404
Jacob Bramleye483ce52019-11-05 16:52:29 +00007405 // st3h { z13.h, z14.h, z15.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007406 if (vl_h >= 16) {
7407 for (int i = 0; i < 16; i++) {
7408 int64_t offset = (5 << kHRegSizeInBytesLog2) * vl;
7409 uint16_t lane0 = 6 - (2 * i);
7410 uint16_t lane1 = 7 - (2 * i);
7411 uint16_t lane2 = 8 - (2 * i);
7412 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7413 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7414 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7415 }
7416 }
7417
7418 // st3w { z30.s, z31.s, z0.s }, ((i % 5) == 0)
7419 for (int i = 0; i < vl_s; i++) {
7420 if ((i % 5) == 0) {
7421 int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
7422 uint32_t lane0 = -7 + (3 * i);
7423 uint32_t lane1 = -8 + (3 * i);
7424 uint32_t lane2 = -9 + (3 * i);
7425 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7426 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7427 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7428 }
7429 }
7430
7431 // st3d { z31.d, z0.d, z1.d }, SVE_MUL3
7432 int vl_d_mul3 = vl_d - (vl_d % 3);
7433 for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007434 int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007435 uint64_t lane0 = 32 - (11 * i);
7436 uint64_t lane1 = 33 - (11 * i);
7437 uint64_t lane2 = 34 - (11 * i);
7438 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7439 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7440 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7441 }
7442
7443 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7444
Jacob Bramleye483ce52019-11-05 16:52:29 +00007445 // Check that we loaded back the expected values.
7446
7447 // st3b/ld3b
7448 ASSERT_EQUAL_SVE(z4, z16);
7449 ASSERT_EQUAL_SVE(z5, z17);
7450 ASSERT_EQUAL_SVE(z6, z18);
7451
7452 // st3h/ld3h
7453 ASSERT_EQUAL_SVE(z7, z19);
7454 ASSERT_EQUAL_SVE(z8, z20);
7455 ASSERT_EQUAL_SVE(z9, z21);
7456
7457 // st3w/ld3w
7458 ASSERT_EQUAL_SVE(z10, z22);
7459 ASSERT_EQUAL_SVE(z11, z23);
7460 ASSERT_EQUAL_SVE(z12, z24);
7461
7462 // st3d/ld3d
7463 ASSERT_EQUAL_SVE(z13, z25);
7464 ASSERT_EQUAL_SVE(z14, z26);
7465 ASSERT_EQUAL_SVE(z15, z27);
7466
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007467 delete[] expected;
7468 }
7469 delete[] data;
7470}
7471
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007472TEST_SVE(sve_ld4_st4_scalar_plus_imm) {
7473 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7474 START();
7475
7476 int vl = config->sve_vl_in_bytes();
7477
7478 // The immediate can address [-24, 21] times the VL, so allocate enough space
7479 // to exceed that in both directions.
7480 int data_size = vl * 128;
7481
7482 uint8_t* data = new uint8_t[data_size];
7483 memset(data, 0, data_size);
7484
7485 // Set the base half-way through the buffer so we can use negative indeces.
7486 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7487
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007488 // We can test ld4 by comparing the values loaded with the values stored.
7489 // There are two complications:
7490 // - Loads have zeroing predication, so we have to clear the inactive
7491 // elements on our reference.
7492 // - We want to test both loads and stores that span { z31, z0 }, so we have
7493 // to move some values around.
7494 //
7495 // Registers z3-z18 will hold as-stored values (with inactive elements
7496 // cleared). Registers z19-z31 and z0-z2 will hold the values that were
7497 // loaded.
7498
7499 __ Index(z10.VnB(), 1, -7);
7500 __ Index(z11.VnB(), 2, -7);
7501 __ Index(z12.VnB(), 3, -7);
7502 __ Index(z13.VnB(), 4, -7);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007503 __ Ptrue(p0.VnB());
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007504 __ St4b(z10.VnB(), z11.VnB(), z12.VnB(), z13.VnB(), p0, SVEMemOperand(x0));
7505 // Save the stored values for ld4 tests.
7506 __ Dup(z3.VnB(), 0);
7507 __ Dup(z4.VnB(), 0);
7508 __ Dup(z5.VnB(), 0);
7509 __ Dup(z6.VnB(), 0);
7510 __ Mov(z3.VnB(), p0.Merging(), z10.VnB());
7511 __ Mov(z4.VnB(), p0.Merging(), z11.VnB());
7512 __ Mov(z5.VnB(), p0.Merging(), z12.VnB());
7513 __ Mov(z6.VnB(), p0.Merging(), z13.VnB());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007514
7515 // Wrap around from z31 to z0.
7516 __ Index(z31.VnH(), -2, 5);
7517 __ Index(z0.VnH(), -3, 5);
7518 __ Index(z1.VnH(), -4, 5);
7519 __ Index(z2.VnH(), -5, 5);
7520 __ Ptrue(p1.VnH(), SVE_MUL3);
7521 __ St4h(z31.VnH(),
7522 z0.VnH(),
7523 z1.VnH(),
7524 z2.VnH(),
7525 p1,
7526 SVEMemOperand(x0, 4, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007527 // Save the stored values for ld4 tests.
7528 __ Dup(z7.VnH(), 0);
7529 __ Dup(z8.VnH(), 0);
7530 __ Dup(z9.VnH(), 0);
7531 __ Dup(z10.VnH(), 0);
7532 __ Mov(z7.VnH(), p1.Merging(), z31.VnH());
7533 __ Mov(z8.VnH(), p1.Merging(), z0.VnH());
7534 __ Mov(z9.VnH(), p1.Merging(), z1.VnH());
7535 __ Mov(z10.VnH(), p1.Merging(), z2.VnH());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007536
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007537 // Wrap around from z31 to z0.
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007538 __ Index(z29.VnS(), 2, -7);
7539 __ Index(z30.VnS(), 3, -7);
7540 __ Index(z31.VnS(), 4, -7);
7541 __ Index(z0.VnS(), 5, -7);
7542 __ Ptrue(p2.VnS(), SVE_POW2);
7543 __ St4w(z29.VnS(),
7544 z30.VnS(),
7545 z31.VnS(),
7546 z0.VnS(),
7547 p2,
7548 SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007549 // Save the stored values for ld4 tests.
7550 __ Dup(z11.VnS(), 0);
7551 __ Dup(z12.VnS(), 0);
7552 __ Dup(z13.VnS(), 0);
7553 __ Dup(z14.VnS(), 0);
7554 __ Mov(z11.VnS(), p2.Merging(), z29.VnS());
7555 __ Mov(z12.VnS(), p2.Merging(), z30.VnS());
7556 __ Mov(z13.VnS(), p2.Merging(), z31.VnS());
7557 __ Mov(z14.VnS(), p2.Merging(), z0.VnS());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007558
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007559 __ Index(z20.VnD(), -7, 8);
7560 __ Index(z21.VnD(), -8, 8);
7561 __ Index(z22.VnD(), -9, 8);
7562 __ Index(z23.VnD(), -10, 8);
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007563 // Sparse predication, including some irrelevant bits (0xee). To make the
7564 // results easy to check, activate each lane <n> where n is a multiple of 5.
7565 Initialise(&masm,
7566 p3,
7567 0xeee10000000001ee,
7568 0xeeeeeee100000000,
7569 0x01eeeeeeeee10000,
7570 0x000001eeeeeeeee1);
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007571 __ St4d(z20.VnD(),
7572 z21.VnD(),
7573 z22.VnD(),
7574 z23.VnD(),
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007575 p3,
7576 SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007577 // Save the stored values for ld4 tests.
7578 __ Dup(z15.VnD(), 0);
7579 __ Dup(z16.VnD(), 0);
7580 __ Dup(z17.VnD(), 0);
7581 __ Dup(z18.VnD(), 0);
7582 __ Mov(z15.VnD(), p3.Merging(), z20.VnD());
7583 __ Mov(z16.VnD(), p3.Merging(), z21.VnD());
7584 __ Mov(z17.VnD(), p3.Merging(), z22.VnD());
7585 __ Mov(z18.VnD(), p3.Merging(), z23.VnD());
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007586
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007587 // Corresponding loads.
7588 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7589 __ Ld4b(z31.VnB(),
7590 z0.VnB(),
7591 z1.VnB(),
7592 z2.VnB(),
7593 p0.Zeroing(),
7594 SVEMemOperand(x0));
7595 __ Mov(z19, z31);
7596 __ Mov(z20, z0);
7597 __ Mov(z21, z1);
7598 __ Mov(z22, z2);
7599 __ Ld4h(z23.VnH(),
7600 z24.VnH(),
7601 z25.VnH(),
7602 z26.VnH(),
7603 p1.Zeroing(),
7604 SVEMemOperand(x0, 4, SVE_MUL_VL));
7605 __ Ld4w(z27.VnS(),
7606 z28.VnS(),
7607 z29.VnS(),
7608 z30.VnS(),
7609 p2.Zeroing(),
7610 SVEMemOperand(x0, -12, SVE_MUL_VL));
7611 // Wrap around from z31 to z0.
7612 __ Ld4d(z31.VnD(),
7613 z0.VnD(),
7614 z1.VnD(),
7615 z2.VnD(),
7616 p3.Zeroing(),
7617 SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007618
7619 END();
7620
7621 if (CAN_RUN()) {
7622 RUN();
7623
7624 uint8_t* expected = new uint8_t[data_size];
7625 memset(expected, 0, data_size);
7626 uint8_t* middle = &expected[data_size / 2];
7627
7628 int vl_b = vl / kBRegSizeInBytes;
7629 int vl_h = vl / kHRegSizeInBytes;
7630 int vl_s = vl / kSRegSizeInBytes;
7631 int vl_d = vl / kDRegSizeInBytes;
7632
7633 int reg_count = 4;
7634
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007635 // st2b { z10.b, z11.b, z12.b, z13.b }, SVE_ALL
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007636 for (int i = 0; i < vl_b; i++) {
7637 uint8_t lane0 = 1 - (7 * i);
7638 uint8_t lane1 = 2 - (7 * i);
7639 uint8_t lane2 = 3 - (7 * i);
7640 uint8_t lane3 = 4 - (7 * i);
7641 MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
7642 MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
7643 MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
7644 MemoryWrite(middle, 0, (i * reg_count) + 3, lane3);
7645 }
7646
7647 // st4h { z31.h, z0.h, z1.h, z2.h }, SVE_MUL3
7648 int vl_h_mul3 = vl_h - (vl_h % 3);
7649 for (int i = 0; i < vl_h_mul3; i++) {
7650 int64_t offset = 4 * vl;
7651 uint16_t lane0 = -2 + (5 * i);
7652 uint16_t lane1 = -3 + (5 * i);
7653 uint16_t lane2 = -4 + (5 * i);
7654 uint16_t lane3 = -5 + (5 * i);
7655 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7656 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7657 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7658 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7659 }
7660
7661 // st4w { z29.s, z30.s, z31.s, z0.s }, SVE_POW2
7662 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
7663 for (int i = 0; i < vl_s_pow2; i++) {
7664 int64_t offset = -12 * vl;
7665 uint32_t lane0 = 2 - (7 * i);
7666 uint32_t lane1 = 3 - (7 * i);
7667 uint32_t lane2 = 4 - (7 * i);
7668 uint32_t lane3 = 5 - (7 * i);
7669 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7670 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7671 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7672 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7673 }
7674
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007675 // st4d { z20.d, z21.d, z22.d, z23.d }, ((i % 5) == 0)
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007676 for (int i = 0; i < vl_d; i++) {
7677 if ((i % 5) == 0) {
7678 int64_t offset = 16 * vl;
7679 uint64_t lane0 = -7 + (8 * i);
7680 uint64_t lane1 = -8 + (8 * i);
7681 uint64_t lane2 = -9 + (8 * i);
7682 uint64_t lane3 = -10 + (8 * i);
7683 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7684 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7685 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7686 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7687 }
7688 }
7689
7690 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7691
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00007692 // Check that we loaded back the expected values.
7693
7694 // st4b/ld4b
7695 ASSERT_EQUAL_SVE(z3, z19);
7696 ASSERT_EQUAL_SVE(z4, z20);
7697 ASSERT_EQUAL_SVE(z5, z21);
7698 ASSERT_EQUAL_SVE(z6, z22);
7699
7700 // st4h/ld4h
7701 ASSERT_EQUAL_SVE(z7, z23);
7702 ASSERT_EQUAL_SVE(z8, z24);
7703 ASSERT_EQUAL_SVE(z9, z25);
7704 ASSERT_EQUAL_SVE(z10, z26);
7705
7706 // st4w/ld4w
7707 ASSERT_EQUAL_SVE(z11, z27);
7708 ASSERT_EQUAL_SVE(z12, z28);
7709 ASSERT_EQUAL_SVE(z13, z29);
7710 ASSERT_EQUAL_SVE(z14, z30);
7711
7712 // st4d/ld4d
7713 ASSERT_EQUAL_SVE(z15, z31);
7714 ASSERT_EQUAL_SVE(z16, z0);
7715 ASSERT_EQUAL_SVE(z17, z1);
7716 ASSERT_EQUAL_SVE(z18, z2);
7717
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00007718 delete[] expected;
7719 }
7720 delete[] data;
7721}
7722
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007723TEST_SVE(sve_ld4_st4_scalar_plus_scalar) {
7724 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7725 START();
7726
7727 int vl = config->sve_vl_in_bytes();
7728
7729 // Allocate plenty of space to enable indexing in both directions.
7730 int data_size = vl * 128;
7731
7732 uint8_t* data = new uint8_t[data_size];
7733 memset(data, 0, data_size);
7734
7735 // Set the base half-way through the buffer so we can use negative indeces.
7736 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
7737
Jacob Bramleye483ce52019-11-05 16:52:29 +00007738 // We can test ld4 by comparing the values loaded with the values stored.
7739 // There are two complications:
7740 // - Loads have zeroing predication, so we have to clear the inactive
7741 // elements on our reference.
7742 // - We want to test both loads and stores that span { z31, z0 }, so we have
7743 // to move some values around.
7744 //
7745 // Registers z3-z18 will hold as-stored values (with inactive elements
7746 // cleared). Registers z19-z31 and z0-z2 will hold the values that were
7747 // loaded.
7748
7749 __ Index(z19.VnB(), -4, 11);
7750 __ Index(z20.VnB(), -5, 11);
7751 __ Index(z21.VnB(), -6, 11);
7752 __ Index(z22.VnB(), -7, 11);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007753 __ Ptrue(p7.VnB(), SVE_MUL4);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007754 __ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
7755 __ St4b(z19.VnB(),
7756 z20.VnB(),
7757 z21.VnB(),
7758 z22.VnB(),
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007759 p7,
7760 SVEMemOperand(x0, x1, LSL, 0));
Jacob Bramleye483ce52019-11-05 16:52:29 +00007761 // Save the stored values for ld4 tests.
7762 __ Dup(z3.VnB(), 0);
7763 __ Dup(z4.VnB(), 0);
7764 __ Dup(z5.VnB(), 0);
7765 __ Dup(z6.VnB(), 0);
7766 __ Mov(z3.VnB(), p7.Merging(), z19.VnB());
7767 __ Mov(z4.VnB(), p7.Merging(), z20.VnB());
7768 __ Mov(z5.VnB(), p7.Merging(), z21.VnB());
7769 __ Mov(z6.VnB(), p7.Merging(), z22.VnB());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007770
Jacob Bramleye483ce52019-11-05 16:52:29 +00007771 __ Index(z23.VnH(), 6, -2);
7772 __ Index(z24.VnH(), 7, -2);
7773 __ Index(z25.VnH(), 8, -2);
7774 __ Index(z26.VnH(), 9, -2);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007775 __ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007776 __ Rdvl(x2, 7); // (7 * vl) << 1 = 14 * vl
7777 __ St4h(z23.VnH(),
7778 z24.VnH(),
7779 z25.VnH(),
7780 z26.VnH(),
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007781 p6,
7782 SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramleye483ce52019-11-05 16:52:29 +00007783 // Save the stored values for ld4 tests.
7784 __ Dup(z7.VnH(), 0);
7785 __ Dup(z8.VnH(), 0);
7786 __ Dup(z9.VnH(), 0);
7787 __ Dup(z10.VnH(), 0);
7788 __ Mov(z7.VnH(), p6.Merging(), z23.VnH());
7789 __ Mov(z8.VnH(), p6.Merging(), z24.VnH());
7790 __ Mov(z9.VnH(), p6.Merging(), z25.VnH());
7791 __ Mov(z10.VnH(), p6.Merging(), z26.VnH());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007792
7793 // Wrap around from z31 to z0.
7794 __ Index(z29.VnS(), -6, 7);
7795 __ Index(z30.VnS(), -7, 7);
7796 __ Index(z31.VnS(), -8, 7);
7797 __ Index(z0.VnS(), -9, 7);
7798 // Sparse predication, including some irrelevant bits (0xe). To make the
7799 // results easy to check, activate each lane <n> where n is a multiple of 5.
7800 Initialise(&masm,
7801 p5,
7802 0xeee1000010000100,
7803 0x001eeee100001000,
7804 0x0100001eeee10000,
7805 0x10000100001eeee1);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007806 __ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007807 __ St4w(z29.VnS(),
7808 z30.VnS(),
7809 z31.VnS(),
7810 z0.VnS(),
7811 p5,
Jacob Bramleye483ce52019-11-05 16:52:29 +00007812 SVEMemOperand(x0, x3, LSL, 2));
7813 // Save the stored values for ld4 tests.
7814 __ Dup(z11.VnS(), 0);
7815 __ Dup(z12.VnS(), 0);
7816 __ Dup(z13.VnS(), 0);
7817 __ Dup(z14.VnS(), 0);
7818 __ Mov(z11.VnS(), p5.Merging(), z29.VnS());
7819 __ Mov(z12.VnS(), p5.Merging(), z30.VnS());
7820 __ Mov(z13.VnS(), p5.Merging(), z31.VnS());
7821 __ Mov(z14.VnS(), p5.Merging(), z0.VnS());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007822
7823 __ Index(z31.VnD(), 32, -11);
7824 __ Index(z0.VnD(), 33, -11);
7825 __ Index(z1.VnD(), 34, -11);
7826 __ Index(z2.VnD(), 35, -11);
7827 __ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramleye483ce52019-11-05 16:52:29 +00007828 __ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 *vl
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007829 __ St4d(z31.VnD(),
7830 z0.VnD(),
7831 z1.VnD(),
7832 z2.VnD(),
7833 p4,
Jacob Bramleye483ce52019-11-05 16:52:29 +00007834 SVEMemOperand(x0, x4, LSL, 3));
7835 // Save the stored values for ld4 tests.
7836 __ Dup(z15.VnD(), 0);
7837 __ Dup(z16.VnD(), 0);
7838 __ Dup(z17.VnD(), 0);
7839 __ Dup(z18.VnD(), 0);
7840 __ Mov(z15.VnD(), p4.Merging(), z31.VnD());
7841 __ Mov(z16.VnD(), p4.Merging(), z0.VnD());
7842 __ Mov(z17.VnD(), p4.Merging(), z1.VnD());
7843 __ Mov(z18.VnD(), p4.Merging(), z2.VnD());
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007844
Jacob Bramleye483ce52019-11-05 16:52:29 +00007845 // Corresponding loads.
7846 // Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
7847 __ Ld4b(z31.VnB(),
7848 z0.VnB(),
7849 z1.VnB(),
7850 z2.VnB(),
7851 p7.Zeroing(),
7852 SVEMemOperand(x0, x1, LSL, 0));
7853 __ Mov(z19, z31);
7854 __ Mov(z20, z0);
7855 __ Mov(z21, z1);
7856 __ Mov(z22, z2);
7857 __ Ld4h(z23.VnH(),
7858 z24.VnH(),
7859 z25.VnH(),
7860 z26.VnH(),
7861 p6.Zeroing(),
7862 SVEMemOperand(x0, x2, LSL, 1));
7863 __ Ld4w(z27.VnS(),
7864 z28.VnS(),
7865 z29.VnS(),
7866 z30.VnS(),
7867 p5.Zeroing(),
7868 SVEMemOperand(x0, x3, LSL, 2));
7869 // Wrap around from z31 to z0.
7870 __ Ld4d(z31.VnD(),
7871 z0.VnD(),
7872 z1.VnD(),
7873 z2.VnD(),
7874 p4.Zeroing(),
7875 SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007876
7877 END();
7878
7879 if (CAN_RUN()) {
7880 RUN();
7881
7882 uint8_t* expected = new uint8_t[data_size];
7883 memset(expected, 0, data_size);
7884 uint8_t* middle = &expected[data_size / 2];
7885
7886 int vl_b = vl / kBRegSizeInBytes;
7887 int vl_h = vl / kHRegSizeInBytes;
7888 int vl_s = vl / kSRegSizeInBytes;
7889 int vl_d = vl / kDRegSizeInBytes;
7890
7891 int reg_count = 4;
7892
Jacob Bramleye483ce52019-11-05 16:52:29 +00007893 // st4b { z19.b, z20.b, z21.b, z22.b }, SVE_MUL4
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007894 int vl_b_mul4 = vl_b - (vl_b % 4);
7895 for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007896 int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007897 uint8_t lane0 = -4 + (11 * i);
7898 uint8_t lane1 = -5 + (11 * i);
7899 uint8_t lane2 = -6 + (11 * i);
7900 uint8_t lane3 = -7 + (11 * i);
7901 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7902 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7903 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7904 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7905 }
7906
Jacob Bramleye483ce52019-11-05 16:52:29 +00007907 // st4h { z22.h, z23.h, z24.h, z25.h }, SVE_VL16
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007908 if (vl_h >= 16) {
7909 for (int i = 0; i < 16; i++) {
7910 int64_t offset = (7 << kHRegSizeInBytesLog2) * vl;
7911 uint16_t lane0 = 6 - (2 * i);
7912 uint16_t lane1 = 7 - (2 * i);
7913 uint16_t lane2 = 8 - (2 * i);
7914 uint16_t lane3 = 9 - (2 * i);
7915 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7916 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7917 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7918 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7919 }
7920 }
7921
7922 // st4w { z29.s, z30.s, z31.s, z0.s }, ((i % 5) == 0)
7923 for (int i = 0; i < vl_s; i++) {
7924 if ((i % 5) == 0) {
7925 int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
7926 uint32_t lane0 = -6 + (7 * i);
7927 uint32_t lane1 = -7 + (7 * i);
7928 uint32_t lane2 = -8 + (7 * i);
7929 uint32_t lane3 = -9 + (7 * i);
7930 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7931 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7932 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7933 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7934 }
7935 }
7936
7937 // st4d { z31.d, z0.d, z1.d, z2.d }, SVE_MUL3
7938 int vl_d_mul3 = vl_d - (vl_d % 3);
7939 for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramleye483ce52019-11-05 16:52:29 +00007940 int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007941 uint64_t lane0 = 32 - (11 * i);
7942 uint64_t lane1 = 33 - (11 * i);
7943 uint64_t lane2 = 34 - (11 * i);
7944 uint64_t lane3 = 35 - (11 * i);
7945 MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
7946 MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
7947 MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
7948 MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
7949 }
7950
7951 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
7952
Jacob Bramleye483ce52019-11-05 16:52:29 +00007953 // Check that we loaded back the expected values.
7954
7955 // st4b/ld4b
7956 ASSERT_EQUAL_SVE(z3, z19);
7957 ASSERT_EQUAL_SVE(z4, z20);
7958 ASSERT_EQUAL_SVE(z5, z21);
7959 ASSERT_EQUAL_SVE(z6, z22);
7960
7961 // st4h/ld4h
7962 ASSERT_EQUAL_SVE(z7, z23);
7963 ASSERT_EQUAL_SVE(z8, z24);
7964 ASSERT_EQUAL_SVE(z9, z25);
7965 ASSERT_EQUAL_SVE(z10, z26);
7966
7967 // st4w/ld4w
7968 ASSERT_EQUAL_SVE(z11, z27);
7969 ASSERT_EQUAL_SVE(z12, z28);
7970 ASSERT_EQUAL_SVE(z13, z29);
7971 ASSERT_EQUAL_SVE(z14, z30);
7972
7973 // st4d/ld4d
7974 ASSERT_EQUAL_SVE(z15, z31);
7975 ASSERT_EQUAL_SVE(z16, z0);
7976 ASSERT_EQUAL_SVE(z17, z1);
7977 ASSERT_EQUAL_SVE(z18, z2);
7978
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007979 delete[] expected;
7980 }
7981 delete[] data;
7982}
7983
7984TEST_SVE(sve_ld234_st234_scalar_plus_scalar_sp) {
7985 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7986 START();
7987
7988 // Check that the simulator correctly interprets rn == 31 as sp.
7989 // The indexing logic is the same regardless so we just check one load and
7990 // store of each type.
7991
7992 // There are no pre- or post-indexing modes, so reserve space first.
7993 __ ClaimVL(2 + 3 + 4);
7994
7995 __ Index(z0.VnB(), 42, 2);
7996 __ Index(z1.VnB(), 43, 2);
7997 __ Ptrue(p0.VnB(), SVE_VL7);
7998 __ Rdvl(x0, 0);
7999 __ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, x0));
8000
8001 __ Index(z4.VnH(), 42, 3);
8002 __ Index(z5.VnH(), 43, 3);
8003 __ Index(z6.VnH(), 44, 3);
8004 __ Ptrue(p1.VnH(), SVE_POW2);
8005 __ Rdvl(x1, 2);
8006 __ Lsr(x1, x1, 1);
8007 __ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, x1, LSL, 1));
8008
8009 __ Index(z8.VnS(), 42, 4);
8010 __ Index(z9.VnS(), 43, 4);
8011 __ Index(z10.VnS(), 44, 4);
8012 __ Index(z11.VnS(), 45, 4);
8013 __ Ptrue(p2.VnS());
8014 __ Rdvl(x2, 2 + 3);
8015 __ Lsr(x2, x2, 2);
8016 __ St4w(z8.VnS(),
8017 z9.VnS(),
8018 z10.VnS(),
8019 z11.VnS(),
8020 p2,
8021 SVEMemOperand(sp, x2, LSL, 2));
8022
Jacob Bramleye483ce52019-11-05 16:52:29 +00008023 // Corresponding loads.
8024 // We have to explicitly zero inactive lanes in the reference values because
8025 // loads have zeroing predication.
8026 __ Dup(z12.VnB(), 0);
8027 __ Dup(z13.VnB(), 0);
8028 __ Mov(z12.VnB(), p0.Merging(), z0.VnB());
8029 __ Mov(z13.VnB(), p0.Merging(), z1.VnB());
8030 __ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, x0));
8031
8032 __ Dup(z16.VnH(), 0);
8033 __ Dup(z17.VnH(), 0);
8034 __ Dup(z18.VnH(), 0);
8035 __ Mov(z16.VnH(), p1.Merging(), z4.VnH());
8036 __ Mov(z17.VnH(), p1.Merging(), z5.VnH());
8037 __ Mov(z18.VnH(), p1.Merging(), z6.VnH());
8038 __ Ld3h(z4.VnH(),
8039 z5.VnH(),
8040 z6.VnH(),
8041 p1.Zeroing(),
8042 SVEMemOperand(sp, x1, LSL, 1));
8043
8044 __ Dup(z20.VnS(), 0);
8045 __ Dup(z21.VnS(), 0);
8046 __ Dup(z22.VnS(), 0);
8047 __ Dup(z23.VnS(), 0);
8048 __ Mov(z20.VnS(), p2.Merging(), z8.VnS());
8049 __ Mov(z21.VnS(), p2.Merging(), z9.VnS());
8050 __ Mov(z22.VnS(), p2.Merging(), z10.VnS());
8051 __ Mov(z23.VnS(), p2.Merging(), z11.VnS());
8052 __ Ld4w(z8.VnS(),
8053 z9.VnS(),
8054 z10.VnS(),
8055 z11.VnS(),
8056 p2.Zeroing(),
8057 SVEMemOperand(sp, x2, LSL, 2));
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008058
8059 __ DropVL(2 + 3 + 4);
8060
8061 END();
8062
8063 if (CAN_RUN()) {
8064 RUN();
8065
8066 // The most likely failure mode is the that simulator reads sp as xzr and
8067 // crashes on execution. We already test the address calculations separately
8068 // and sp doesn't change this, so just test that we load the values we
8069 // stored.
Jacob Bramleye483ce52019-11-05 16:52:29 +00008070
8071 // st2b/ld2b
8072 ASSERT_EQUAL_SVE(z0, z12);
8073 ASSERT_EQUAL_SVE(z1, z13);
8074
8075 // st3h/ld3h
8076 ASSERT_EQUAL_SVE(z4, z16);
8077 ASSERT_EQUAL_SVE(z5, z17);
8078 ASSERT_EQUAL_SVE(z6, z18);
8079
8080 // st4h/ld4h
8081 ASSERT_EQUAL_SVE(z8, z20);
8082 ASSERT_EQUAL_SVE(z9, z21);
8083 ASSERT_EQUAL_SVE(z10, z22);
8084 ASSERT_EQUAL_SVE(z11, z23);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00008085 }
8086}
8087
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008088TEST_SVE(sve_ld234_st234_scalar_plus_imm_sp) {
8089 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8090 START();
8091
8092 // Check that the simulator correctly interprets rn == 31 as sp.
8093 // The indexing logic is the same regardless so we just check one load and
8094 // store of each type.
8095
8096 // There are no pre- or post-indexing modes, so reserve space first.
8097 // Note that the stores fill in an order that allows each immediate to be a
8098 // multiple of the number of registers.
8099 __ ClaimVL(4 + 2 + 3);
8100
8101 __ Index(z0.VnB(), 42, 2);
8102 __ Index(z1.VnB(), 43, 2);
8103 __ Ptrue(p0.VnB(), SVE_POW2);
8104 __ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, 4, SVE_MUL_VL));
8105
8106 __ Index(z4.VnH(), 42, 3);
8107 __ Index(z5.VnH(), 43, 3);
8108 __ Index(z6.VnH(), 44, 3);
8109 __ Ptrue(p1.VnH(), SVE_VL7);
8110 __ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, 6, SVE_MUL_VL));
8111
8112 __ Index(z8.VnS(), 42, 4);
8113 __ Index(z9.VnS(), 43, 4);
8114 __ Index(z10.VnS(), 44, 4);
8115 __ Index(z11.VnS(), 45, 4);
8116 __ Ptrue(p2.VnS());
8117 __ St4w(z8.VnS(), z9.VnS(), z10.VnS(), z11.VnS(), p2, SVEMemOperand(sp));
8118
Jacob Bramleye5ab0fe2019-11-05 16:52:29 +00008119 // Corresponding loads.
8120 // We have to explicitly zero inactive lanes in the reference values because
8121 // loads have zeroing predication.
8122 __ Dup(z12.VnB(), 0);
8123 __ Dup(z13.VnB(), 0);
8124 __ Mov(z12.VnB(), p0.Merging(), z0.VnB());
8125 __ Mov(z13.VnB(), p0.Merging(), z1.VnB());
8126 __ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, 4, SVE_MUL_VL));
8127
8128 __ Dup(z16.VnH(), 0);
8129 __ Dup(z17.VnH(), 0);
8130 __ Dup(z18.VnH(), 0);
8131 __ Mov(z16.VnH(), p1.Merging(), z4.VnH());
8132 __ Mov(z17.VnH(), p1.Merging(), z5.VnH());
8133 __ Mov(z18.VnH(), p1.Merging(), z6.VnH());
8134 __ Ld3h(z4.VnH(),
8135 z5.VnH(),
8136 z6.VnH(),
8137 p1.Zeroing(),
8138 SVEMemOperand(sp, 6, SVE_MUL_VL));
8139
8140 __ Dup(z20.VnS(), 0);
8141 __ Dup(z21.VnS(), 0);
8142 __ Dup(z22.VnS(), 0);
8143 __ Dup(z23.VnS(), 0);
8144 __ Mov(z20.VnS(), p2.Merging(), z8.VnS());
8145 __ Mov(z21.VnS(), p2.Merging(), z9.VnS());
8146 __ Mov(z22.VnS(), p2.Merging(), z10.VnS());
8147 __ Mov(z23.VnS(), p2.Merging(), z11.VnS());
8148 __ Ld4w(z8.VnS(),
8149 z9.VnS(),
8150 z10.VnS(),
8151 z11.VnS(),
8152 p2.Zeroing(),
8153 SVEMemOperand(sp));
Jacob Bramleyd4dd9c22019-11-04 16:44:01 +00008154
8155 __ DropVL(4 + 2 + 3);
8156
8157 END();
8158
8159 if (CAN_RUN()) {
8160 RUN();
8161
8162 // The most likely failure mode is the that simulator reads sp as xzr and
8163 // crashes on execution. We already test the address calculations separately
8164 // and sp doesn't change this, so just test that we load the values we
8165 // stored.
8166 // TODO: Actually do this, once loads are implemented.
8167 }
8168}
8169
Jacob Bramley85a9c102019-12-09 17:48:29 +00008170TEST_SVE(sve_ldff1_scalar_plus_scalar) {
8171 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8172 START();
8173
8174 int vl = config->sve_vl_in_bytes();
8175 size_t page_size = sysconf(_SC_PAGE_SIZE);
8176 VIXL_ASSERT(page_size > static_cast<size_t>(vl));
8177
8178 // Allocate two pages, then mprotect the second one to make it inaccessible.
8179 uintptr_t data = reinterpret_cast<uintptr_t>(mmap(NULL,
8180 page_size * 2,
8181 PROT_READ | PROT_WRITE,
8182 MAP_PRIVATE | MAP_ANONYMOUS,
8183 -1,
8184 0));
8185 mprotect(reinterpret_cast<void*>(data + page_size), page_size, PROT_NONE);
8186
8187 // Fill the accessible page with arbitrary data.
8188 for (size_t i = 0; i < page_size; i++) {
8189 // Reverse bits so we get a mixture of positive and negative values.
8190 uint8_t byte = ReverseBits(static_cast<uint8_t>(i));
8191 memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
8192 }
8193
8194 __ Mov(x20, data);
8195
8196 PRegister all = p7;
8197 __ Ptrue(all.VnB());
8198
8199 size_t offset_modifier = 0;
8200
8201 // The highest adddress at which a load stopped. Every FF load should fault at
8202 // `data + page_size`, so this value should not exceed that value. However,
8203 // the architecture allows fault-tolerant loads to fault arbitrarily, so the
8204 // real value may be lower.
8205 //
8206 // This is used to check that the `mprotect` above really does make the second
8207 // page inaccessible, and that the resulting FFR from each load reflects that.
8208 Register limit = x22;
8209 __ Mov(limit, 0);
8210
8211 // If the FFR grows unexpectedly, we increment this register by the
8212 // difference. FFR should never grow, except when explicitly set.
8213 Register ffr_grow_count = x23;
8214 __ Mov(ffr_grow_count, 0);
8215
8216#define VIXL_EMIT_LDFF1_TEST(LDFF1, M_SIZE, Zt, E_SIZE, LD1, ZtRef) \
8217 do { \
8218 /* Set the offset so that the load is guaranteed to start in the */ \
8219 /* accessible page, but end in the inaccessible one. */ \
8220 VIXL_ASSERT((page_size % k##M_SIZE##RegSizeInBytes) == 0); \
8221 VIXL_ASSERT((vl % k##M_SIZE##RegSizeInBytes) == 0); \
8222 size_t elements_per_page = page_size / k##M_SIZE##RegSizeInBytes; \
8223 size_t elements_per_access = vl / k##E_SIZE##RegSizeInBytes; \
8224 size_t min_offset = (elements_per_page - elements_per_access) + 1; \
8225 size_t max_offset = elements_per_page - 1; \
8226 size_t offset = \
8227 min_offset + (offset_modifier % (max_offset - min_offset + 1)); \
8228 offset_modifier++; \
8229 __ Mov(x21, offset); \
8230 __ Setffr(); \
8231 __ LDFF1(Zt.Vn##E_SIZE(), \
8232 all.Zeroing(), \
8233 SVEMemOperand(x20, x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
8234 __ Rdffrs(p0.VnB(), all.Zeroing()); \
8235 /* Execute another LDFF1 with no offset, so that every element could be */ \
8236 /* read. It should respect FFR, and load no more than we loaded the */ \
8237 /* first time. */ \
8238 __ LDFF1(ZtRef.Vn##E_SIZE(), all.Zeroing(), SVEMemOperand(x20)); \
8239 __ Rdffrs(p1.VnB(), all.Zeroing()); \
8240 __ Cntp(x0, all, p1.VnB()); \
8241 __ Uqdecp(x0, p0.VnB()); \
8242 __ Add(ffr_grow_count, ffr_grow_count, x0); \
8243 /* Use the FFR to predicate the normal load. If it wasn't properly set, */ \
8244 /* the normal load will abort. */ \
8245 __ LD1(ZtRef.Vn##E_SIZE(), \
8246 p0.Zeroing(), \
8247 SVEMemOperand(x20, x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
8248 /* Work out the address after the one that was just accessed. */ \
8249 __ Incp(x21, p0.Vn##E_SIZE()); \
8250 __ Add(x0, x20, Operand(x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
8251 __ Cmp(limit, x0); \
8252 __ Csel(limit, limit, x0, hs); \
8253 /* Clear lanes inactive in FFR. These have an undefined result. */ \
8254 /* TODO: Use the 'Not' and 'Mov' aliases once they are implemented. */ \
8255 __ Eor(p0.Vn##E_SIZE(), all.Zeroing(), p0.Vn##E_SIZE(), all.Vn##E_SIZE()); \
8256 __ Cpy(Zt.Vn##E_SIZE(), p0.Merging(), 0); \
8257 } while (0)
8258
8259 VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z0, B, Ld1b, z16);
8260 VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z1, H, Ld1b, z17);
8261 VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z2, S, Ld1b, z18);
8262 VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z3, D, Ld1b, z19);
8263
8264 VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z4, H, Ld1h, z20);
8265 VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z5, S, Ld1h, z21);
8266 VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z6, D, Ld1h, z22);
8267
8268 VIXL_EMIT_LDFF1_TEST(Ldff1w, S, z7, S, Ld1w, z23);
8269 VIXL_EMIT_LDFF1_TEST(Ldff1w, S, z8, D, Ld1w, z24);
8270
8271 VIXL_EMIT_LDFF1_TEST(Ldff1d, D, z9, D, Ld1d, z25);
8272
8273 VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z10, H, Ld1sb, z26);
8274 VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z11, S, Ld1sb, z27);
8275 VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z12, D, Ld1sb, z28);
8276
8277 VIXL_EMIT_LDFF1_TEST(Ldff1sh, H, z13, S, Ld1sh, z29);
8278 VIXL_EMIT_LDFF1_TEST(Ldff1sh, H, z14, D, Ld1sh, z30);
8279
8280 VIXL_EMIT_LDFF1_TEST(Ldff1sw, S, z15, D, Ld1sw, z31);
8281
8282#undef VIXL_EMIT_LDFF1_TEST
8283
8284 END();
8285
8286 if (CAN_RUN()) {
8287 RUN();
8288
8289 uintptr_t expected_limit = data + page_size;
8290 uintptr_t measured_limit = core.xreg(limit.GetCode());
8291 VIXL_CHECK(measured_limit <= expected_limit);
8292 if (measured_limit < expected_limit) {
8293 // We can't fail the test for this case, but a warning is helpful for
8294 // manually-run tests.
8295 printf(
8296 "WARNING: All fault-tolerant loads detected faults before the\n"
8297 "expected limit. This is architecturally possible, but improbable,\n"
8298 "and could be a symptom of another problem.\n");
8299 }
8300
8301 ASSERT_EQUAL_64(0, ffr_grow_count);
8302
8303 // Ldff1b
8304 ASSERT_EQUAL_SVE(z0.VnB(), z16.VnB());
8305 ASSERT_EQUAL_SVE(z1.VnH(), z17.VnH());
8306 ASSERT_EQUAL_SVE(z2.VnS(), z18.VnS());
8307 ASSERT_EQUAL_SVE(z3.VnD(), z19.VnD());
8308
8309 // Ldff1h
8310 ASSERT_EQUAL_SVE(z4.VnH(), z20.VnH());
8311 ASSERT_EQUAL_SVE(z5.VnS(), z21.VnS());
8312 ASSERT_EQUAL_SVE(z6.VnD(), z22.VnD());
8313
8314 // Ldff1w
8315 ASSERT_EQUAL_SVE(z7.VnS(), z23.VnS());
8316 ASSERT_EQUAL_SVE(z8.VnD(), z24.VnD());
8317
8318 // Ldff1d
8319 ASSERT_EQUAL_SVE(z9.VnD(), z25.VnD());
8320
8321 // Ldff1sb
8322 ASSERT_EQUAL_SVE(z10.VnH(), z26.VnH());
8323 ASSERT_EQUAL_SVE(z11.VnS(), z27.VnS());
8324 ASSERT_EQUAL_SVE(z12.VnD(), z28.VnD());
8325
8326 // Ldff1sh
8327 ASSERT_EQUAL_SVE(z13.VnS(), z29.VnS());
8328 ASSERT_EQUAL_SVE(z14.VnD(), z30.VnD());
8329
8330 // Ldff1sw
8331 ASSERT_EQUAL_SVE(z15.VnD(), z31.VnD());
8332 }
8333
8334 munmap(reinterpret_cast<void*>(data), page_size * 2);
8335}
8336
Jacob Bramleydcdbd752020-01-20 11:47:36 +00008337// Test gather loads by comparing them with the result of a set of equivalent
8338// scalar loads.
8339template <typename F>
8340static void GatherLoadHelper(Test* config,
8341 unsigned msize_in_bits,
8342 unsigned esize_in_bits,
8343 F sve_ld1,
8344 bool is_signed) {
8345 // SVE supports 32- and 64-bit addressing for gather loads.
8346 VIXL_ASSERT((esize_in_bits == kSRegSize) || (esize_in_bits == kDRegSize));
8347 static const unsigned kMaxLaneCount = kZRegMaxSize / kSRegSize;
8348
8349 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8350 START();
8351
8352 unsigned msize_in_bytes = msize_in_bits / kBitsPerByte;
8353 unsigned esize_in_bytes = esize_in_bits / kBitsPerByte;
8354 int vl = config->sve_vl_in_bytes();
8355
8356 // Use a fixed seed for nrand48() so that test runs are reproducible.
8357 unsigned short seed[3] = {1, 2, 3}; // NOLINT(runtime/int)
8358
8359 // Fill a buffer with arbitrary data.
8360 size_t buffer_size = vl * 64;
8361 uint64_t data = reinterpret_cast<uintptr_t>(malloc(buffer_size));
8362 for (size_t i = 0; i < buffer_size; i++) {
8363 uint8_t byte = nrand48(seed) & 0xff;
8364 memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
8365 }
8366
8367 // Vectors of random addresses and offsets into the buffer.
8368 uint64_t addresses[kMaxLaneCount];
8369 uint64_t offsets[kMaxLaneCount];
8370 uint64_t max_address = 0;
8371 for (unsigned i = 0; i < kMaxLaneCount; i++) {
8372 uint64_t rnd = nrand48(seed);
8373 // Limit the range to the set of completely-accessible elements in memory.
8374 offsets[i] = rnd % (buffer_size - msize_in_bytes);
8375 addresses[i] = data + offsets[i];
8376 max_address = std::max(max_address, addresses[i]);
8377 }
8378
8379 // Maximised offsets, to ensure that the address calculation is modulo-2^64,
8380 // and that the vector addresses are not sign-extended.
8381 uint64_t uint_e_max = (esize_in_bits == kDRegSize) ? UINT64_MAX : UINT32_MAX;
8382 uint64_t maxed_offsets[kMaxLaneCount];
8383 uint64_t maxed_offsets_imm = max_address - uint_e_max;
8384 for (unsigned i = 0; i < kMaxLaneCount; i++) {
8385 maxed_offsets[i] = addresses[i] - maxed_offsets_imm;
8386 }
8387
8388 ZRegister zn = z0.WithLaneSize(esize_in_bits);
8389 ZRegister zt_addresses = z1.WithLaneSize(esize_in_bits);
8390 ZRegister zt_offsets = z2.WithLaneSize(esize_in_bits);
8391 ZRegister zt_maxed = z3.WithLaneSize(esize_in_bits);
8392 ZRegister zt_ref = z4.WithLaneSize(esize_in_bits);
8393
8394 PRegisterZ pg = p0.Zeroing();
8395 Initialise(&masm,
8396 pg,
8397 0x9abcdef012345678,
8398 0xabcdef0123456789,
8399 0xf4f3f1f0fefdfcfa,
8400 0xf9f8f6f5f3f2f0ff);
8401
8402 // Execute each load.
8403
8404 if (esize_in_bits == kDRegSize) {
8405 // Only test `addresses` if we can use 64-bit pointers. InsrHelper will fail
8406 // if any value won't fit in a lane of zn.
8407 InsrHelper(&masm, zn, addresses);
8408 (masm.*sve_ld1)(zt_addresses, pg, SVEMemOperand(zn));
8409 }
8410
8411 InsrHelper(&masm, zn, offsets);
8412 (masm.*sve_ld1)(zt_offsets, pg, SVEMemOperand(zn, data));
8413
8414 InsrHelper(&masm, zn, maxed_offsets);
8415 (masm.*sve_ld1)(zt_maxed, pg, SVEMemOperand(zn, maxed_offsets_imm));
8416
8417 // TODO: Also test scalar-plus-vector SVEMemOperands.
8418 // TODO: Also test first-fault loads.
8419
8420 // Generate a reference result using scalar loads.
8421
8422 ZRegister lane_numbers = z10.WithLaneSize(esize_in_bits);
8423 __ Index(lane_numbers, 0, 1);
8424 __ Dup(zt_ref, 0);
8425 for (unsigned i = 0; i < (vl / esize_in_bytes); i++) {
8426 __ Mov(x0, addresses[ArrayLength(addresses) - i - 1]);
8427 Register rt(0, esize_in_bits);
8428 if (is_signed) {
8429 switch (msize_in_bits) {
8430 case kBRegSize:
8431 __ Ldrsb(rt, MemOperand(x0));
8432 break;
8433 case kHRegSize:
8434 __ Ldrsh(rt, MemOperand(x0));
8435 break;
8436 case kWRegSize:
8437 __ Ldrsw(rt, MemOperand(x0));
8438 break;
8439 }
8440 } else {
8441 switch (msize_in_bits) {
8442 case kBRegSize:
8443 __ Ldrb(rt, MemOperand(x0));
8444 break;
8445 case kHRegSize:
8446 __ Ldrh(rt, MemOperand(x0));
8447 break;
8448 case kWRegSize:
8449 __ Ldr(rt.W(), MemOperand(x0));
8450 break;
8451 case kXRegSize:
8452 __ Ldr(rt, MemOperand(x0));
8453 break;
8454 }
8455 }
8456
8457 // Emulate predication.
8458 __ Cmpeq(p7.WithLaneSize(esize_in_bits), pg, lane_numbers, i);
8459 __ Cpy(zt_ref, p7.Merging(), rt);
8460 }
8461
8462 END();
8463
8464 if (CAN_RUN()) {
8465 RUN();
8466
8467 if (esize_in_bits == kDRegSize) {
8468 ASSERT_EQUAL_SVE(zt_ref, zt_addresses);
8469 }
8470 ASSERT_EQUAL_SVE(zt_ref, zt_offsets);
8471 ASSERT_EQUAL_SVE(zt_ref, zt_maxed);
8472 }
8473
8474 free(reinterpret_cast<void*>(data));
8475}
8476
8477TEST_SVE(sve_ld1b_64bit_vector_plus_immediate) {
8478 GatherLoadHelper(config, kBRegSize, kDRegSize, &MacroAssembler::Ld1b, false);
8479}
8480
8481TEST_SVE(sve_ld1h_64bit_vector_plus_immediate) {
8482 GatherLoadHelper(config, kHRegSize, kDRegSize, &MacroAssembler::Ld1h, false);
8483}
8484
8485TEST_SVE(sve_ld1w_64bit_vector_plus_immediate) {
8486 GatherLoadHelper(config, kSRegSize, kDRegSize, &MacroAssembler::Ld1w, false);
8487}
8488
8489TEST_SVE(sve_ld1d_64bit_vector_plus_immediate) {
8490 GatherLoadHelper(config, kDRegSize, kDRegSize, &MacroAssembler::Ld1d, false);
8491}
8492
8493TEST_SVE(sve_ld1sb_64bit_vector_plus_immediate) {
8494 GatherLoadHelper(config, kBRegSize, kDRegSize, &MacroAssembler::Ld1sb, true);
8495}
8496
8497TEST_SVE(sve_ld1sh_64bit_vector_plus_immediate) {
8498 GatherLoadHelper(config, kHRegSize, kDRegSize, &MacroAssembler::Ld1sh, true);
8499}
8500
8501TEST_SVE(sve_ld1sw_64bit_vector_plus_immediate) {
8502 GatherLoadHelper(config, kSRegSize, kDRegSize, &MacroAssembler::Ld1sw, true);
8503}
8504
8505TEST_SVE(sve_ld1b_32bit_vector_plus_immediate) {
8506 GatherLoadHelper(config, kBRegSize, kSRegSize, &MacroAssembler::Ld1b, false);
8507}
8508
8509TEST_SVE(sve_ld1h_32bit_vector_plus_immediate) {
8510 GatherLoadHelper(config, kHRegSize, kSRegSize, &MacroAssembler::Ld1h, false);
8511}
8512
8513TEST_SVE(sve_ld1w_32bit_vector_plus_immediate) {
8514 GatherLoadHelper(config, kSRegSize, kSRegSize, &MacroAssembler::Ld1w, false);
8515}
8516
8517TEST_SVE(sve_ld1sb_32bit_vector_plus_immediate) {
8518 GatherLoadHelper(config, kBRegSize, kSRegSize, &MacroAssembler::Ld1sb, true);
8519}
8520
8521TEST_SVE(sve_ld1sh_32bit_vector_plus_immediate) {
8522 GatherLoadHelper(config, kHRegSize, kSRegSize, &MacroAssembler::Ld1sh, true);
8523}
8524
TatWai Chong6995bfd2019-09-26 10:48:05 +01008525typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
8526 const ZRegister& zn,
8527 const IntegerOperand imm);
8528
8529template <typename F, typename Td, typename Tn>
8530static void IntWideImmHelper(Test* config,
8531 F macro,
8532 unsigned lane_size_in_bits,
8533 const Tn& zn_inputs,
8534 IntegerOperand imm,
8535 const Td& zd_expected) {
8536 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8537 START();
8538
8539 ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
8540 InsrHelper(&masm, zd1, zn_inputs);
8541
8542 // Also test with a different zn, to test the movprfx case.
8543 ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
8544 InsrHelper(&masm, zn, zn_inputs);
8545 ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
8546 ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
8547
8548 // Make a copy so we can check that constructive operations preserve zn.
8549 __ Mov(zn_copy, zn);
8550
8551 {
8552 UseScratchRegisterScope temps(&masm);
8553 // The MacroAssembler needs a P scratch register for some of these macros,
8554 // and it doesn't have one by default.
8555 temps.Include(p3);
8556
8557 (masm.*macro)(zd1, zd1, imm);
8558 (masm.*macro)(zd2, zn, imm);
8559 }
8560
8561 END();
8562
8563 if (CAN_RUN()) {
8564 RUN();
8565
8566 ASSERT_EQUAL_SVE(zd_expected, zd1);
8567
8568 // Check the result from `instr` with movprfx is the same as
8569 // the immediate version.
8570 ASSERT_EQUAL_SVE(zd_expected, zd2);
8571
8572 ASSERT_EQUAL_SVE(zn_copy, zn);
8573 }
8574}
8575
8576TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
8577 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
8578 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
8579 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
8580 int64_t in_d[] = {1, 10, 10000, 1000000};
8581
8582 IntWideImmFn fn = &MacroAssembler::Smax;
8583
8584 int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
8585 int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
8586 int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
8587 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
8588
8589 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
8590 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
8591 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
8592 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
8593
8594 int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
8595 int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
8596 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
8597
8598 // The immediate is in the range [-128, 127], but the macro is able to
8599 // synthesise unencodable immediates.
8600 // B-sized lanes cannot take an immediate out of the range [-128, 127].
8601 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
8602 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8603 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
8604}
8605
8606TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
8607 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
8608 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
8609 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
8610 int64_t in_d[] = {1, 10, 10000, 1000000};
8611
8612 IntWideImmFn fn = &MacroAssembler::Smin;
8613
8614 int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
8615 int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
8616 int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
8617 int64_t exp_d_1[] = {1, 10, 99, 99};
8618
8619 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
8620 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
8621 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
8622 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
8623
8624 int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
8625 int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
8626 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
8627
8628 // The immediate is in the range [-128, 127], but the macro is able to
8629 // synthesise unencodable immediates.
8630 // B-sized lanes cannot take an immediate out of the range [-128, 127].
8631 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
8632 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8633 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
8634}
8635
8636TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
8637 int in_b[] = {0, 255, 127, 0x80, 1, 55};
8638 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
8639 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
8640 int64_t in_d[] = {1, 10, 10000, 1000000};
8641
8642 IntWideImmFn fn = &MacroAssembler::Umax;
8643
8644 int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
8645 int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
8646 int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
8647 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
8648
8649 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
8650 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
8651 IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
8652 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
8653
8654 int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
8655 int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
8656 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
8657
8658 // The immediate is in the range [0, 255], but the macro is able to
8659 // synthesise unencodable immediates.
8660 // B-sized lanes cannot take an immediate out of the range [0, 255].
8661 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
8662 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8663 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
8664}
8665
8666TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
8667 int in_b[] = {0, 255, 127, 0x80, 1, 55};
8668 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
8669 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
8670 int64_t in_d[] = {1, 10, 10000, 1000000};
8671
8672 IntWideImmFn fn = &MacroAssembler::Umin;
8673
8674 int exp_b_1[] = {0, 17, 17, 17, 1, 17};
8675 int exp_h_1[] = {0, 127, 127, 127, 1, 127};
8676 int exp_s_1[] = {0, 255, 127, 255, 1, 255};
8677 int64_t exp_d_1[] = {1, 10, 99, 99};
8678
8679 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
8680 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
8681 IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
8682 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
8683
8684 int exp_h_2[] = {0, 255, 127, 511, 1, 511};
8685 int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
8686 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
8687
8688 // The immediate is in the range [0, 255], but the macro is able to
8689 // synthesise unencodable immediates.
8690 // B-sized lanes cannot take an immediate out of the range [0, 255].
8691 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
8692 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8693 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
8694}
8695
8696TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
8697 int in_b[] = {11, -1, 7, -3};
8698 int in_h[] = {111, -1, 17, -123};
8699 int in_s[] = {11111, -1, 117, -12345};
8700 int64_t in_d[] = {0x7fffffff, 0x80000000};
8701
8702 IntWideImmFn fn = &MacroAssembler::Mul;
8703
8704 int exp_b_1[] = {66, -6, 42, -18};
8705 int exp_h_1[] = {-14208, 128, -2176, 15744};
8706 int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
8707 int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
8708
8709 IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
8710 IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
8711 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8712 IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
8713
8714 int exp_h_2[] = {-28305, 255, -4335, 31365};
8715 int exp_s_2[] = {22755328, -2048, 239616, -25282560};
8716 int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
8717
8718 // The immediate is in the range [-128, 127], but the macro is able to
8719 // synthesise unencodable immediates.
8720 // B-sized lanes cannot take an immediate out of the range [0, 255].
8721 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
8722 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
8723 IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
8724
8725 // Integer overflow on multiplication.
8726 unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
8727
8728 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
8729}
8730
8731TEST_SVE(sve_int_wide_imm_unpredicated_add) {
8732 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8733 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8734 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8735 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8736
8737 IntWideImmFn fn = &MacroAssembler::Add;
8738
8739 unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
8740 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
8741 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
8742 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
8743
8744 // Encodable with `add` (shift 0).
8745 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
8746 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8747 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8748 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
8749
8750 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
8751 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
8752 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
8753
8754 // Encodable with `add` (shift 8).
8755 // B-sized lanes cannot take a shift of 8.
8756 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8757 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8758 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
8759
8760 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
8761
8762 // The macro is able to synthesise unencodable immediates.
8763 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01008764
8765 unsigned exp_b_4[] = {0x61, 0x5f, 0xf0, 0xdf};
8766 unsigned exp_h_4[] = {0x6181, 0x5f7f, 0xf010, 0x8aaa};
8767 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
8768 uint64_t exp_d_4[] = {0x8000000180018180, 0x7fffffff7fff7f7e};
8769
8770 // Negative immediates use `sub`.
8771 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
8772 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
8773 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
8774 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008775}
8776
8777TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
8778 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8779 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8780 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8781 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8782
8783 IntWideImmFn fn = &MacroAssembler::Sqadd;
8784
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008785 unsigned exp_b_1[] = {0x02, 0x7f, 0x7f, 0x7f};
TatWai Chong6995bfd2019-09-26 10:48:05 +01008786 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
8787 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
8788 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
8789
8790 // Encodable with `sqadd` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008791 // Note that encodable immediates are unsigned, even for signed saturation.
8792 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008793 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8794 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008795 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008796
8797 unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
8798 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
8799 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
8800
8801 // Encodable with `sqadd` (shift 8).
8802 // B-sized lanes cannot take a shift of 8.
8803 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8804 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8805 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008806}
8807
8808TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
8809 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8810 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8811 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8812 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8813
8814 IntWideImmFn fn = &MacroAssembler::Uqadd;
8815
8816 unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
8817 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
8818 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
8819 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
8820
8821 // Encodable with `uqadd` (shift 0).
8822 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
8823 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8824 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8825 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
8826
8827 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
8828 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
8829 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
8830
8831 // Encodable with `uqadd` (shift 8).
8832 // B-sized lanes cannot take a shift of 8.
8833 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8834 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8835 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008836}
8837
8838TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
8839 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8840 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8841 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8842 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8843
8844 IntWideImmFn fn = &MacroAssembler::Sub;
8845
8846 unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
8847 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
8848 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
8849 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
8850
8851 // Encodable with `sub` (shift 0).
8852 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
8853 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8854 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8855 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
8856
8857 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
8858 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
8859 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
8860
8861 // Encodable with `sub` (shift 8).
8862 // B-sized lanes cannot take a shift of 8.
8863 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8864 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8865 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
8866
8867 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
8868
8869 // The macro is able to synthesise unencodable immediates.
8870 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01008871
8872 unsigned exp_b_4[] = {0xa1, 0x9f, 0x30, 0x1f};
8873 unsigned exp_h_4[] = {0xa181, 0x9f7f, 0x3010, 0xcaaa};
8874 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
8875 uint64_t exp_d_4[] = {0x8000000180018182, 0x7fffffff7fff7f80};
8876
8877 // Negative immediates use `add`.
8878 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
8879 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
8880 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
8881 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008882}
8883
8884TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
8885 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8886 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8887 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8888 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8889
8890 IntWideImmFn fn = &MacroAssembler::Sqsub;
8891
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008892 unsigned exp_b_1[] = {0x80, 0xfe, 0x8f, 0x80};
TatWai Chong6995bfd2019-09-26 10:48:05 +01008893 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
8894 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
8895 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
8896
8897 // Encodable with `sqsub` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008898 // Note that encodable immediates are unsigned, even for signed saturation.
8899 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008900 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8901 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01008902 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008903
8904 unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
8905 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
8906 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
8907
8908 // Encodable with `sqsub` (shift 8).
8909 // B-sized lanes cannot take a shift of 8.
8910 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8911 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8912 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008913}
8914
8915TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
8916 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
8917 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
8918 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
8919 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
8920
8921 IntWideImmFn fn = &MacroAssembler::Uqsub;
8922
8923 unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
8924 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
8925 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
8926 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
8927
8928 // Encodable with `uqsub` (shift 0).
8929 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
8930 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
8931 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
8932 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
8933
8934 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
8935 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
8936 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
8937
8938 // Encodable with `uqsub` (shift 8).
8939 // B-sized lanes cannot take a shift of 8.
8940 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
8941 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
8942 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01008943}
8944
8945TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
8946 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
8947 START();
8948
8949 // Encodable with `subr` (shift 0).
8950 __ Index(z0.VnD(), 1, 1);
8951 __ Sub(z0.VnD(), 100, z0.VnD());
8952 __ Index(z1.VnS(), 0x7f, 1);
8953 __ Sub(z1.VnS(), 0xf7, z1.VnS());
8954 __ Index(z2.VnH(), 0xaaaa, 0x2222);
8955 __ Sub(z2.VnH(), 0x80, z2.VnH());
8956 __ Index(z3.VnB(), 133, 1);
8957 __ Sub(z3.VnB(), 255, z3.VnB());
8958
8959 // Encodable with `subr` (shift 8).
8960 __ Index(z4.VnD(), 256, -1);
8961 __ Sub(z4.VnD(), 42 * 256, z4.VnD());
8962 __ Index(z5.VnS(), 0x7878, 1);
8963 __ Sub(z5.VnS(), 0x8000, z5.VnS());
8964 __ Index(z6.VnH(), 0x30f0, -1);
8965 __ Sub(z6.VnH(), 0x7f00, z6.VnH());
8966 // B-sized lanes cannot take a shift of 8.
8967
8968 // Select with movprfx.
8969 __ Index(z31.VnD(), 256, 4001);
8970 __ Sub(z7.VnD(), 42 * 256, z31.VnD());
8971
8972 // Out of immediate encodable range of `sub`.
8973 __ Index(z30.VnS(), 0x11223344, 1);
8974 __ Sub(z8.VnS(), 0x88776655, z30.VnS());
8975
8976 END();
8977
8978 if (CAN_RUN()) {
8979 RUN();
8980
8981 int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
8982 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
8983
8984 int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
8985 ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
8986
8987 int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
8988 ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
8989
8990 int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
8991 ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
8992
8993 int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
8994 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
8995
8996 int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
8997 ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
8998
8999 int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
9000 ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
9001
9002 int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
9003 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
9004
9005 int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
9006 ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
9007 }
9008}
9009
9010TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
9011 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9012 START();
9013
9014 // Immediates which can be encoded in the instructions.
9015 __ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
9016 __ Fdup(z1.VnS(), Float16(2.0));
9017 __ Fdup(z2.VnD(), Float16(3.875));
9018 __ Fdup(z3.VnH(), 8.0f);
9019 __ Fdup(z4.VnS(), -4.75f);
9020 __ Fdup(z5.VnD(), 0.5f);
9021 __ Fdup(z6.VnH(), 1.0);
9022 __ Fdup(z7.VnS(), 2.125);
9023 __ Fdup(z8.VnD(), -13.0);
9024
9025 // Immediates which cannot be encoded in the instructions.
9026 __ Fdup(z10.VnH(), Float16(0.0));
9027 __ Fdup(z11.VnH(), kFP16PositiveInfinity);
9028 __ Fdup(z12.VnS(), 255.0f);
9029 __ Fdup(z13.VnS(), kFP32NegativeInfinity);
9030 __ Fdup(z14.VnD(), 12.3456);
9031 __ Fdup(z15.VnD(), kFP64PositiveInfinity);
9032
9033 END();
9034
9035 if (CAN_RUN()) {
9036 RUN();
9037
9038 ASSERT_EQUAL_SVE(0xc500, z0.VnH());
9039 ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
9040 ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
9041 ASSERT_EQUAL_SVE(0x4800, z3.VnH());
9042 ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
9043 ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
9044 ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
9045 ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
9046 ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
9047
9048 ASSERT_EQUAL_SVE(0x0000, z10.VnH());
9049 ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
9050 ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
9051 ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
9052 ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
9053 ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
9054 }
9055}
9056
TatWai Chong6f111bc2019-10-07 09:20:37 +01009057TEST_SVE(sve_andv_eorv_orv) {
9058 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9059 START();
9060
9061 uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
9062 InsrHelper(&masm, z31.VnD(), in);
9063
9064 // For simplicity, we re-use the same pg for various lane sizes.
9065 // For D lanes: 1, 1, 0
9066 // For S lanes: 1, 1, 1, 0, 0
9067 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
9068 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
9069 Initialise(&masm, p0.VnB(), pg_in);
9070
9071 // Make a copy so we can check that constructive operations preserve zn.
9072 __ Mov(z0, z31);
9073 __ Andv(b0, p0, z0.VnB()); // destructive
9074 __ Andv(h1, p0, z31.VnH());
9075 __ Mov(z2, z31);
9076 __ Andv(s2, p0, z2.VnS()); // destructive
9077 __ Andv(d3, p0, z31.VnD());
9078
9079 __ Eorv(b4, p0, z31.VnB());
9080 __ Mov(z5, z31);
9081 __ Eorv(h5, p0, z5.VnH()); // destructive
9082 __ Eorv(s6, p0, z31.VnS());
9083 __ Mov(z7, z31);
9084 __ Eorv(d7, p0, z7.VnD()); // destructive
9085
9086 __ Mov(z8, z31);
9087 __ Orv(b8, p0, z8.VnB()); // destructive
9088 __ Orv(h9, p0, z31.VnH());
9089 __ Mov(z10, z31);
9090 __ Orv(s10, p0, z10.VnS()); // destructive
9091 __ Orv(d11, p0, z31.VnD());
9092
9093 END();
9094
9095 if (CAN_RUN()) {
9096 RUN();
9097
9098 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
9099 ASSERT_EQUAL_64(0x10, d0);
9100 ASSERT_EQUAL_64(0x1010, d1);
9101 ASSERT_EQUAL_64(0x33331111, d2);
9102 ASSERT_EQUAL_64(0x7777555533331111, d3);
9103 ASSERT_EQUAL_64(0xbf, d4);
9104 ASSERT_EQUAL_64(0xedcb, d5);
9105 ASSERT_EQUAL_64(0x44444444, d6);
9106 ASSERT_EQUAL_64(0x7777555533331111, d7);
9107 ASSERT_EQUAL_64(0xff, d8);
9108 ASSERT_EQUAL_64(0xffff, d9);
9109 ASSERT_EQUAL_64(0x77775555, d10);
9110 ASSERT_EQUAL_64(0x7777555533331111, d11);
9111 } else {
9112 ASSERT_EQUAL_64(0, d0);
9113 ASSERT_EQUAL_64(0x0010, d1);
9114 ASSERT_EQUAL_64(0x00110011, d2);
9115 ASSERT_EQUAL_64(0x0011001100110011, d3);
9116 ASSERT_EQUAL_64(0x62, d4);
9117 ASSERT_EQUAL_64(0x0334, d5);
9118 ASSERT_EQUAL_64(0x8899aabb, d6);
9119 ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
9120 ASSERT_EQUAL_64(0xff, d8);
9121 ASSERT_EQUAL_64(0xffff, d9);
9122 ASSERT_EQUAL_64(0xffffffff, d10);
9123 ASSERT_EQUAL_64(0xffffffffffffffff, d11);
9124 }
9125
9126 // Check the upper lanes above the top of the V register are all clear.
9127 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
9128 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
9129 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
9130 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
9131 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
9132 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
9133 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
9134 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
9135 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
9136 ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
9137 ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
9138 ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
9139 ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
9140 }
9141 }
9142}
9143
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07009144
9145TEST_SVE(sve_saddv_uaddv) {
9146 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9147 START();
9148
9149 uint64_t in[] = {0x8899aabbccddeeff, 0x8182838485868788, 0x0807060504030201};
9150 InsrHelper(&masm, z31.VnD(), in);
9151
9152 // For simplicity, we re-use the same pg for various lane sizes.
9153 // For D lanes: 1, 1, 0
9154 // For S lanes: 1, 1, 1, 0, 0
9155 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
9156 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
9157 Initialise(&masm, p0.VnB(), pg_in);
9158
9159 // Make a copy so we can check that constructive operations preserve zn.
9160 __ Mov(z0, z31);
9161 __ Saddv(b0, p0, z0.VnB()); // destructive
9162 __ Saddv(h1, p0, z31.VnH());
9163 __ Mov(z2, z31);
9164 __ Saddv(s2, p0, z2.VnS()); // destructive
9165
9166 __ Uaddv(b4, p0, z31.VnB());
9167 __ Mov(z5, z31);
9168 __ Uaddv(h5, p0, z5.VnH()); // destructive
9169 __ Uaddv(s6, p0, z31.VnS());
9170 __ Mov(z7, z31);
9171 __ Uaddv(d7, p0, z7.VnD()); // destructive
9172
9173 END();
9174
9175 if (CAN_RUN()) {
9176 RUN();
9177
9178 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
9179 // Saddv
9180 ASSERT_EQUAL_64(0xfffffffffffffda9, d0);
9181 ASSERT_EQUAL_64(0xfffffffffffe9495, d1);
9182 ASSERT_EQUAL_64(0xffffffff07090b0c, d2);
9183 // Uaddv
9184 ASSERT_EQUAL_64(0x00000000000002a9, d4);
9185 ASSERT_EQUAL_64(0x0000000000019495, d5);
9186 ASSERT_EQUAL_64(0x0000000107090b0c, d6);
9187 ASSERT_EQUAL_64(0x8182838485868788, d7);
9188 } else {
9189 // Saddv
9190 ASSERT_EQUAL_64(0xfffffffffffffd62, d0);
9191 ASSERT_EQUAL_64(0xfffffffffffe8394, d1);
9192 ASSERT_EQUAL_64(0xfffffffed3e6fa0b, d2);
9193 // Uaddv
9194 ASSERT_EQUAL_64(0x0000000000000562, d4);
9195 ASSERT_EQUAL_64(0x0000000000028394, d5);
9196 ASSERT_EQUAL_64(0x00000001d3e6fa0b, d6);
9197 ASSERT_EQUAL_64(0x0a1c2e4052647687, d7);
9198 }
9199
9200 // Check the upper lanes above the top of the V register are all clear.
9201 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
9202 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
9203 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
9204 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
9205 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
9206 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
9207 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
9208 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
9209 }
9210 }
9211}
9212
9213
9214TEST_SVE(sve_sminv_uminv) {
9215 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9216 START();
9217
9218 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
9219 InsrHelper(&masm, z31.VnD(), in);
9220
9221 // For simplicity, we re-use the same pg for various lane sizes.
9222 // For D lanes: 1, 0, 1
9223 // For S lanes: 1, 1, 0, 0, 1
9224 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
9225 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
9226 Initialise(&masm, p0.VnB(), pg_in);
9227
9228 // Make a copy so we can check that constructive operations preserve zn.
9229 __ Mov(z0, z31);
9230 __ Sminv(b0, p0, z0.VnB()); // destructive
9231 __ Sminv(h1, p0, z31.VnH());
9232 __ Mov(z2, z31);
9233 __ Sminv(s2, p0, z2.VnS()); // destructive
9234 __ Sminv(d3, p0, z31.VnD());
9235
9236 __ Uminv(b4, p0, z31.VnB());
9237 __ Mov(z5, z31);
9238 __ Uminv(h5, p0, z5.VnH()); // destructive
9239 __ Uminv(s6, p0, z31.VnS());
9240 __ Mov(z7, z31);
9241 __ Uminv(d7, p0, z7.VnD()); // destructive
9242
9243 END();
9244
9245 if (CAN_RUN()) {
9246 RUN();
9247
9248 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
9249 // Sminv
9250 ASSERT_EQUAL_64(0xaa, d0);
9251 ASSERT_EQUAL_64(0xaabb, d1);
9252 ASSERT_EQUAL_64(0xaabbfc00, d2);
9253 ASSERT_EQUAL_64(0x00112233aabbfc00, d3); // The smaller lane is inactive.
9254 // Uminv
9255 ASSERT_EQUAL_64(0, d4);
9256 ASSERT_EQUAL_64(0x2233, d5);
9257 ASSERT_EQUAL_64(0x112233, d6);
9258 ASSERT_EQUAL_64(0x00112233aabbfc00, d7); // The smaller lane is inactive.
9259 } else {
9260 // Sminv
9261 ASSERT_EQUAL_64(0xaa, d0);
9262 ASSERT_EQUAL_64(0xaaaa, d1);
9263 ASSERT_EQUAL_64(0xaaaaaaaa, d2);
9264 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d3);
9265 // Uminv
9266 ASSERT_EQUAL_64(0, d4);
9267 ASSERT_EQUAL_64(0x2233, d5);
9268 ASSERT_EQUAL_64(0x112233, d6);
9269 ASSERT_EQUAL_64(0x00112233aabbfc00, d7);
9270 }
9271
9272 // Check the upper lanes above the top of the V register are all clear.
9273 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
9274 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
9275 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
9276 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
9277 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
9278 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
9279 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
9280 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
9281 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
9282 }
9283 }
9284}
9285
9286TEST_SVE(sve_smaxv_umaxv) {
9287 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9288 START();
9289
9290 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
9291 InsrHelper(&masm, z31.VnD(), in);
9292
9293 // For simplicity, we re-use the same pg for various lane sizes.
9294 // For D lanes: 1, 0, 1
9295 // For S lanes: 1, 1, 0, 0, 1
9296 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
9297 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
9298 Initialise(&masm, p0.VnB(), pg_in);
9299
9300 // Make a copy so we can check that constructive operations preserve zn.
9301 __ Mov(z0, z31);
9302 __ Smaxv(b0, p0, z0.VnB()); // destructive
9303 __ Smaxv(h1, p0, z31.VnH());
9304 __ Mov(z2, z31);
9305 __ Smaxv(s2, p0, z2.VnS()); // destructive
9306 __ Smaxv(d3, p0, z31.VnD());
9307
9308 __ Umaxv(b4, p0, z31.VnB());
9309 __ Mov(z5, z31);
9310 __ Umaxv(h5, p0, z5.VnH()); // destructive
9311 __ Umaxv(s6, p0, z31.VnS());
9312 __ Mov(z7, z31);
9313 __ Umaxv(d7, p0, z7.VnD()); // destructive
9314
9315 END();
9316
9317 if (CAN_RUN()) {
9318 RUN();
9319
9320 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
9321 // Smaxv
9322 ASSERT_EQUAL_64(0x33, d0);
9323 ASSERT_EQUAL_64(0x44aa, d1);
9324 ASSERT_EQUAL_64(0x112233, d2);
9325 ASSERT_EQUAL_64(0x112233aabbfc00, d3);
9326 // Umaxv
9327 ASSERT_EQUAL_64(0xfe, d4);
9328 ASSERT_EQUAL_64(0xfc00, d5);
9329 ASSERT_EQUAL_64(0xaabbfc00, d6);
9330 ASSERT_EQUAL_64(0x112233aabbfc00, d7);
9331 } else {
9332 // Smaxv
9333 ASSERT_EQUAL_64(0x33, d0);
9334 ASSERT_EQUAL_64(0x44aa, d1);
9335 ASSERT_EQUAL_64(0x112233, d2);
9336 ASSERT_EQUAL_64(0x00112233aabbfc00, d3);
9337 // Umaxv
9338 ASSERT_EQUAL_64(0xfe, d4);
9339 ASSERT_EQUAL_64(0xfc00, d5);
9340 ASSERT_EQUAL_64(0xaabbfc00, d6);
9341 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d7);
9342 }
9343
9344 // Check the upper lanes above the top of the V register are all clear.
9345 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
9346 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
9347 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
9348 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
9349 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
9350 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
9351 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
9352 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
9353 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
9354 }
9355 }
9356}
9357
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009358typedef void (MacroAssembler::*SdotUdotFn)(const ZRegister& zd,
9359 const ZRegister& za,
9360 const ZRegister& zn,
9361 const ZRegister& zm);
9362
9363template <typename Td, typename Ts, typename Te>
9364static void SdotUdotHelper(Test* config,
9365 SdotUdotFn macro,
9366 unsigned lane_size_in_bits,
9367 const Td& zd_inputs,
9368 const Td& za_inputs,
9369 const Ts& zn_inputs,
9370 const Ts& zm_inputs,
9371 const Te& zd_expected,
9372 const Te& zdnm_expected) {
9373 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9374 START();
9375
9376 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
9377 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
9378 ZRegister zn = z2.WithLaneSize(lane_size_in_bits / 4);
9379 ZRegister zm = z3.WithLaneSize(lane_size_in_bits / 4);
9380
9381 InsrHelper(&masm, zd, zd_inputs);
9382 InsrHelper(&masm, za, za_inputs);
9383 InsrHelper(&masm, zn, zn_inputs);
9384 InsrHelper(&masm, zm, zm_inputs);
9385
9386 // The Dot macro handles arbitrarily-aliased registers in the argument list.
9387 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
9388 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
9389 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
9390 ZRegister dnm_result = z13.WithLaneSize(lane_size_in_bits);
9391 ZRegister d_result = z14.WithLaneSize(lane_size_in_bits);
9392
9393 __ Mov(da_result, za);
9394 // zda = zda + (zn . zm)
9395 (masm.*macro)(da_result, da_result, zn, zm);
9396
9397 __ Mov(dn_result, zn);
9398 // zdn = za + (zdn . zm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009399 (masm.*macro)(dn_result, za, dn_result.WithSameLaneSizeAs(zn), zm);
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009400
9401 __ Mov(dm_result, zm);
9402 // zdm = za + (zn . zdm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009403 (masm.*macro)(dm_result, za, zn, dm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009404
9405 __ Mov(d_result, zd);
9406 // zd = za + (zn . zm)
9407 (masm.*macro)(d_result, za, zn, zm);
9408
9409 __ Mov(dnm_result, zn);
9410 // zdnm = za + (zdmn . zdnm)
Jacob Bramley378fc892019-10-30 11:26:09 +00009411 (masm.*macro)(dnm_result,
9412 za,
9413 dnm_result.WithSameLaneSizeAs(zn),
9414 dnm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07009415
9416 END();
9417
9418 if (CAN_RUN()) {
9419 RUN();
9420
9421 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
9422 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits / 4));
9423 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits / 4));
9424
9425 ASSERT_EQUAL_SVE(zd_expected, da_result);
9426 ASSERT_EQUAL_SVE(zd_expected, dn_result);
9427 ASSERT_EQUAL_SVE(zd_expected, dm_result);
9428 ASSERT_EQUAL_SVE(zd_expected, d_result);
9429
9430 ASSERT_EQUAL_SVE(zdnm_expected, dnm_result);
9431 }
9432}
9433
9434TEST_SVE(sve_sdot) {
9435 int zd_inputs[] = {0x33, 0xee, 0xff};
9436 int za_inputs[] = {INT32_MAX, -3, 2};
9437 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
9438 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
9439
9440 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
9441 int32_t zd_expected_s[] = {-2147418113, -183, 133}; // 0x8000ffff
9442 int64_t zd_expected_d[] = {2147549183, -183, 133}; // 0x8000ffff
9443
9444 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
9445 int32_t zdnm_expected_s[] = {-2147418113, 980, 572};
9446 int64_t zdnm_expected_d[] = {2147549183, 980, 572};
9447
9448 SdotUdotHelper(config,
9449 &MacroAssembler::Sdot,
9450 kSRegSize,
9451 zd_inputs,
9452 za_inputs,
9453 zn_inputs,
9454 zm_inputs,
9455 zd_expected_s,
9456 zdnm_expected_s);
9457 SdotUdotHelper(config,
9458 &MacroAssembler::Sdot,
9459 kDRegSize,
9460 zd_inputs,
9461 za_inputs,
9462 zn_inputs,
9463 zm_inputs,
9464 zd_expected_d,
9465 zdnm_expected_d);
9466}
9467
9468TEST_SVE(sve_udot) {
9469 int zd_inputs[] = {0x33, 0xee, 0xff};
9470 int za_inputs[] = {INT32_MAX, -3, 2};
9471 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
9472 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
9473
9474 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
9475 uint32_t zd_expected_s[] = {0x8000ffff, 0x00001749, 0x0000f085};
9476 uint64_t zd_expected_d[] = {0x000000047c00ffff,
9477 0x000000000017ff49,
9478 0x00000000fff00085};
9479
9480 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
9481 uint32_t zdnm_expected_s[] = {0x8000ffff, 0x000101d4, 0x0001d03c};
9482 uint64_t zdnm_expected_d[] = {0x000000047c00ffff,
9483 0x00000000fffe03d4,
9484 0x00000001ffce023c};
9485
9486 SdotUdotHelper(config,
9487 &MacroAssembler::Udot,
9488 kSRegSize,
9489 zd_inputs,
9490 za_inputs,
9491 zn_inputs,
9492 zm_inputs,
9493 zd_expected_s,
9494 zdnm_expected_s);
9495 SdotUdotHelper(config,
9496 &MacroAssembler::Udot,
9497 kDRegSize,
9498 zd_inputs,
9499 za_inputs,
9500 zn_inputs,
9501 zm_inputs,
9502 zd_expected_d,
9503 zdnm_expected_d);
9504}
9505
TatWai Chong7a0d3672019-10-23 17:35:18 -07009506template <typename T, size_t N>
9507static void FPToRawbitsWithSize(const T (&inputs)[N],
9508 uint64_t* outputs,
9509 unsigned size_in_bits) {
TatWai Chongfe536042019-10-23 16:34:11 -07009510 for (size_t i = 0; i < N; i++) {
TatWai Chong7a0d3672019-10-23 17:35:18 -07009511 outputs[i] = vixl::FPToRawbitsWithSize(size_in_bits, inputs[i]);
TatWai Chongfe536042019-10-23 16:34:11 -07009512 }
9513}
9514
TatWai Chong7a0d3672019-10-23 17:35:18 -07009515template <typename Ti, typename Te, size_t N>
9516static void FPBinArithHelper(Test* config,
9517 ArithFn macro,
9518 int lane_size_in_bits,
9519 const Ti (&zn_inputs)[N],
9520 const Ti (&zm_inputs)[N],
9521 const Te (&zd_expected)[N]) {
TatWai Chongfe536042019-10-23 16:34:11 -07009522 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong7a0d3672019-10-23 17:35:18 -07009523
TatWai Chongfe536042019-10-23 16:34:11 -07009524 START();
9525
9526 ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
9527 ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
9528 ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
9529
9530 uint64_t zn_rawbits[N];
9531 uint64_t zm_rawbits[N];
9532
TatWai Chong7a0d3672019-10-23 17:35:18 -07009533 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
9534 FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
TatWai Chongfe536042019-10-23 16:34:11 -07009535
9536 InsrHelper(&masm, zn, zn_rawbits);
9537 InsrHelper(&masm, zm, zm_rawbits);
9538
9539 (masm.*macro)(zd, zn, zm);
9540
9541 END();
9542
9543 if (CAN_RUN()) {
9544 RUN();
9545
9546 ASSERT_EQUAL_SVE(zd_expected, zd);
9547 }
9548}
9549
9550TEST_SVE(sve_fp_arithmetic_unpredicated_fadd) {
9551 double zn_inputs[] = {24.0,
9552 5.5,
9553 0.0,
9554 3.875,
9555 2.125,
9556 kFP64PositiveInfinity,
9557 kFP64NegativeInfinity};
9558
9559 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
9560
TatWai Chong7a0d3672019-10-23 17:35:18 -07009561 ArithFn fn = &MacroAssembler::Fadd;
TatWai Chongfe536042019-10-23 16:34:11 -07009562
9563 uint16_t expected_h[] = {Float16ToRawbits(Float16(1048.0)),
9564 Float16ToRawbits(Float16(2053.5)),
9565 Float16ToRawbits(Float16(0.1)),
9566 Float16ToRawbits(Float16(-0.875)),
9567 Float16ToRawbits(Float16(14.465)),
9568 Float16ToRawbits(kFP16PositiveInfinity),
9569 Float16ToRawbits(kFP16NegativeInfinity)};
9570
TatWai Chong7a0d3672019-10-23 17:35:18 -07009571 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07009572
9573 uint32_t expected_s[] = {FloatToRawbits(1048.0f),
9574 FloatToRawbits(2053.5f),
9575 FloatToRawbits(0.1f),
9576 FloatToRawbits(-0.875f),
9577 FloatToRawbits(14.465f),
9578 FloatToRawbits(kFP32PositiveInfinity),
9579 FloatToRawbits(kFP32NegativeInfinity)};
9580
TatWai Chong7a0d3672019-10-23 17:35:18 -07009581 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07009582
9583 uint64_t expected_d[] = {DoubleToRawbits(1048.0),
9584 DoubleToRawbits(2053.5),
9585 DoubleToRawbits(0.1),
9586 DoubleToRawbits(-0.875),
9587 DoubleToRawbits(14.465),
9588 DoubleToRawbits(kFP64PositiveInfinity),
9589 DoubleToRawbits(kFP64NegativeInfinity)};
9590
TatWai Chong7a0d3672019-10-23 17:35:18 -07009591 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07009592}
9593
9594TEST_SVE(sve_fp_arithmetic_unpredicated_fsub) {
9595 double zn_inputs[] = {24.0,
9596 5.5,
9597 0.0,
9598 3.875,
9599 2.125,
9600 kFP64PositiveInfinity,
9601 kFP64NegativeInfinity};
9602
9603 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
9604
TatWai Chong7a0d3672019-10-23 17:35:18 -07009605 ArithFn fn = &MacroAssembler::Fsub;
TatWai Chongfe536042019-10-23 16:34:11 -07009606
9607 uint16_t expected_h[] = {Float16ToRawbits(Float16(-1000.0)),
9608 Float16ToRawbits(Float16(-2042.5)),
9609 Float16ToRawbits(Float16(-0.1)),
9610 Float16ToRawbits(Float16(8.625)),
9611 Float16ToRawbits(Float16(-10.215)),
9612 Float16ToRawbits(kFP16PositiveInfinity),
9613 Float16ToRawbits(kFP16NegativeInfinity)};
9614
TatWai Chong7a0d3672019-10-23 17:35:18 -07009615 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07009616
9617 uint32_t expected_s[] = {FloatToRawbits(-1000.0),
9618 FloatToRawbits(-2042.5),
9619 FloatToRawbits(-0.1),
9620 FloatToRawbits(8.625),
9621 FloatToRawbits(-10.215),
9622 FloatToRawbits(kFP32PositiveInfinity),
9623 FloatToRawbits(kFP32NegativeInfinity)};
9624
TatWai Chong7a0d3672019-10-23 17:35:18 -07009625 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07009626
9627 uint64_t expected_d[] = {DoubleToRawbits(-1000.0),
9628 DoubleToRawbits(-2042.5),
9629 DoubleToRawbits(-0.1),
9630 DoubleToRawbits(8.625),
9631 DoubleToRawbits(-10.215),
9632 DoubleToRawbits(kFP64PositiveInfinity),
9633 DoubleToRawbits(kFP64NegativeInfinity)};
9634
TatWai Chong7a0d3672019-10-23 17:35:18 -07009635 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07009636}
9637
9638TEST_SVE(sve_fp_arithmetic_unpredicated_fmul) {
9639 double zn_inputs[] = {24.0,
9640 5.5,
9641 0.0,
9642 3.875,
9643 2.125,
9644 kFP64PositiveInfinity,
9645 kFP64NegativeInfinity};
9646
9647 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
9648
TatWai Chong7a0d3672019-10-23 17:35:18 -07009649 ArithFn fn = &MacroAssembler::Fmul;
TatWai Chongfe536042019-10-23 16:34:11 -07009650
9651 uint16_t expected_h[] = {Float16ToRawbits(Float16(24576.0)),
9652 Float16ToRawbits(Float16(11264.0)),
9653 Float16ToRawbits(Float16(0.0)),
9654 Float16ToRawbits(Float16(-18.4)),
9655 Float16ToRawbits(Float16(26.23)),
9656 Float16ToRawbits(kFP16PositiveInfinity),
9657 Float16ToRawbits(kFP16PositiveInfinity)};
9658
TatWai Chong7a0d3672019-10-23 17:35:18 -07009659 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07009660
9661 uint32_t expected_s[] = {FloatToRawbits(24576.0),
9662 FloatToRawbits(11264.0),
9663 FloatToRawbits(0.0),
9664 FloatToRawbits(-18.40625),
9665 FloatToRawbits(26.2225),
9666 FloatToRawbits(kFP32PositiveInfinity),
9667 FloatToRawbits(kFP32PositiveInfinity)};
9668
TatWai Chong7a0d3672019-10-23 17:35:18 -07009669 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07009670
9671 uint64_t expected_d[] = {DoubleToRawbits(24576.0),
9672 DoubleToRawbits(11264.0),
9673 DoubleToRawbits(0.0),
9674 DoubleToRawbits(-18.40625),
9675 DoubleToRawbits(26.2225),
9676 DoubleToRawbits(kFP64PositiveInfinity),
9677 DoubleToRawbits(kFP64PositiveInfinity)};
9678
TatWai Chong7a0d3672019-10-23 17:35:18 -07009679 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07009680}
9681
TatWai Chong7a0d3672019-10-23 17:35:18 -07009682typedef void (MacroAssembler::*FPArithPredicatedFn)(
9683 const ZRegister& zd,
9684 const PRegisterM& pg,
9685 const ZRegister& zn,
9686 const ZRegister& zm,
9687 FPMacroNaNPropagationOption nan_option);
9688
Martyn Capewell37f28182020-01-14 10:15:10 +00009689typedef void (MacroAssembler::*FPArithPredicatedNoNaNOptFn)(
9690 const ZRegister& zd,
9691 const PRegisterM& pg,
9692 const ZRegister& zn,
9693 const ZRegister& zm);
9694
TatWai Chong7a0d3672019-10-23 17:35:18 -07009695template <typename Ti, typename Te, size_t N>
9696static void FPBinArithHelper(
9697 Test* config,
9698 FPArithPredicatedFn macro,
Martyn Capewell37f28182020-01-14 10:15:10 +00009699 FPArithPredicatedNoNaNOptFn macro_nonan,
TatWai Chong7a0d3672019-10-23 17:35:18 -07009700 unsigned lane_size_in_bits,
9701 const Ti (&zd_inputs)[N],
9702 const int (&pg_inputs)[N],
9703 const Ti (&zn_inputs)[N],
9704 const Ti (&zm_inputs)[N],
9705 const Te (&zd_expected)[N],
9706 FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
Martyn Capewell37f28182020-01-14 10:15:10 +00009707 VIXL_ASSERT((macro == NULL) ^ (macro_nonan == NULL));
TatWai Chongd316c5e2019-10-16 12:22:10 -07009708 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9709 START();
9710
TatWai Chong7a0d3672019-10-23 17:35:18 -07009711 // Avoid choosing default scratch registers.
9712 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
9713 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
9714 ZRegister zm = z28.WithLaneSize(lane_size_in_bits);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009715
TatWai Chong7a0d3672019-10-23 17:35:18 -07009716 uint64_t zn_inputs_rawbits[N];
9717 uint64_t zm_inputs_rawbits[N];
9718 uint64_t zd_inputs_rawbits[N];
TatWai Chongd316c5e2019-10-16 12:22:10 -07009719
TatWai Chong7a0d3672019-10-23 17:35:18 -07009720 FPToRawbitsWithSize(zn_inputs, zn_inputs_rawbits, lane_size_in_bits);
9721 FPToRawbitsWithSize(zm_inputs, zm_inputs_rawbits, lane_size_in_bits);
9722 FPToRawbitsWithSize(zd_inputs, zd_inputs_rawbits, lane_size_in_bits);
9723
9724 InsrHelper(&masm, zn, zn_inputs_rawbits);
9725 InsrHelper(&masm, zm, zm_inputs_rawbits);
9726 InsrHelper(&masm, zd, zd_inputs_rawbits);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009727
9728 PRegisterWithLaneSize pg = p0.WithLaneSize(lane_size_in_bits);
9729 Initialise(&masm, pg, pg_inputs);
9730
9731 // `instr` zdn, pg, zdn, zm
9732 ZRegister dn_result = z0.WithLaneSize(lane_size_in_bits);
9733 __ Mov(dn_result, zn);
Martyn Capewell37f28182020-01-14 10:15:10 +00009734 if (macro_nonan == NULL) {
9735 (masm.*macro)(dn_result, pg.Merging(), dn_result, zm, nan_option);
9736 } else {
9737 (masm.*macro_nonan)(dn_result, pg.Merging(), dn_result, zm);
9738 }
TatWai Chongd316c5e2019-10-16 12:22:10 -07009739
9740 // Based on whether zd and zm registers are aliased, the macro of instructions
9741 // (`Instr`) swaps the order of operands if it has the commutative property,
9742 // otherwise, transfer to the reversed `Instr`, such as fdivr.
9743 // `instr` zdm, pg, zn, zdm
9744 ZRegister dm_result = z1.WithLaneSize(lane_size_in_bits);
9745 __ Mov(dm_result, zm);
Martyn Capewell37f28182020-01-14 10:15:10 +00009746 if (macro_nonan == NULL) {
9747 (masm.*macro)(dm_result, pg.Merging(), zn, dm_result, nan_option);
9748 } else {
9749 (masm.*macro_nonan)(dm_result, pg.Merging(), zn, dm_result);
9750 }
TatWai Chongd316c5e2019-10-16 12:22:10 -07009751
9752 // The macro of instructions (`Instr`) automatically selects between `instr`
9753 // and movprfx + `instr` based on whether zd and zn registers are aliased.
9754 // A generated movprfx instruction is predicated that using the same
9755 // governing predicate register. In order to keep the result constant,
9756 // initialize the destination register first.
9757 // `instr` zd, pg, zn, zm
9758 ZRegister d_result = z2.WithLaneSize(lane_size_in_bits);
9759 __ Mov(d_result, zd);
Martyn Capewell37f28182020-01-14 10:15:10 +00009760 if (macro_nonan == NULL) {
9761 (masm.*macro)(d_result, pg.Merging(), zn, zm, nan_option);
9762 } else {
9763 (masm.*macro_nonan)(d_result, pg.Merging(), zn, zm);
9764 }
TatWai Chongd316c5e2019-10-16 12:22:10 -07009765
9766 END();
9767
9768 if (CAN_RUN()) {
9769 RUN();
9770
9771 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
9772 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
9773 if (!core.HasSVELane(dn_result, lane)) break;
9774 if ((pg_inputs[i] & 1) != 0) {
9775 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dn_result, lane);
9776 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -07009777 ASSERT_EQUAL_SVE_LANE(zn_inputs_rawbits[i], dn_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009778 }
9779 }
9780
9781 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
9782 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
9783 if (!core.HasSVELane(dm_result, lane)) break;
9784 if ((pg_inputs[i] & 1) != 0) {
9785 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dm_result, lane);
9786 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -07009787 ASSERT_EQUAL_SVE_LANE(zm_inputs_rawbits[i], dm_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009788 }
9789 }
9790
9791 ASSERT_EQUAL_SVE(zd_expected, d_result);
9792 }
9793}
9794
9795TEST_SVE(sve_binary_arithmetic_predicated_fdiv) {
TatWai Chong7a0d3672019-10-23 17:35:18 -07009796 // The inputs are shared with different precision tests.
TatWai Chongd316c5e2019-10-16 12:22:10 -07009797 double zd_in[] = {0.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};
9798
9799 double zn_in[] = {24.0,
9800 24.0,
9801 -2.0,
9802 -2.0,
9803 5.5,
9804 5.5,
9805 kFP64PositiveInfinity,
9806 kFP64PositiveInfinity,
9807 kFP64NegativeInfinity,
9808 kFP64NegativeInfinity};
9809
9810 double zm_in[] = {-2.0, -2.0, 24.0, 24.0, 0.5, 0.5, 0.65, 0.65, 24.0, 24.0};
9811
TatWai Chongd316c5e2019-10-16 12:22:10 -07009812 int pg_in[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
9813
TatWai Chong7a0d3672019-10-23 17:35:18 -07009814 uint16_t exp_h[] = {Float16ToRawbits(Float16(0.1)),
TatWai Chongd316c5e2019-10-16 12:22:10 -07009815 Float16ToRawbits(Float16(-12.0)),
9816 Float16ToRawbits(Float16(2.2)),
9817 Float16ToRawbits(Float16(-0.0833)),
9818 Float16ToRawbits(Float16(4.4)),
9819 Float16ToRawbits(Float16(11.0)),
9820 Float16ToRawbits(Float16(6.6)),
9821 Float16ToRawbits(kFP16PositiveInfinity),
9822 Float16ToRawbits(Float16(8.8)),
9823 Float16ToRawbits(kFP16NegativeInfinity)};
9824
TatWai Chong7a0d3672019-10-23 17:35:18 -07009825 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +00009826 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -07009827 &MacroAssembler::Fdiv,
9828 kHRegSize,
9829 zd_in,
9830 pg_in,
9831 zn_in,
9832 zm_in,
9833 exp_h);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009834
9835 uint32_t exp_s[] = {FloatToRawbits(0.1),
9836 FloatToRawbits(-12.0),
9837 FloatToRawbits(2.2),
9838 0xbdaaaaab,
9839 FloatToRawbits(4.4),
9840 FloatToRawbits(11.0),
9841 FloatToRawbits(6.6),
9842 FloatToRawbits(kFP32PositiveInfinity),
9843 FloatToRawbits(8.8),
9844 FloatToRawbits(kFP32NegativeInfinity)};
9845
TatWai Chong7a0d3672019-10-23 17:35:18 -07009846 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +00009847 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -07009848 &MacroAssembler::Fdiv,
9849 kSRegSize,
9850 zd_in,
9851 pg_in,
9852 zn_in,
9853 zm_in,
9854 exp_s);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009855
9856 uint64_t exp_d[] = {DoubleToRawbits(0.1),
9857 DoubleToRawbits(-12.0),
9858 DoubleToRawbits(2.2),
9859 0xbfb5555555555555,
9860 DoubleToRawbits(4.4),
9861 DoubleToRawbits(11.0),
9862 DoubleToRawbits(6.6),
9863 DoubleToRawbits(kFP64PositiveInfinity),
9864 DoubleToRawbits(8.8),
9865 DoubleToRawbits(kFP64NegativeInfinity)};
9866
TatWai Chong7a0d3672019-10-23 17:35:18 -07009867 FPBinArithHelper(config,
Martyn Capewell37f28182020-01-14 10:15:10 +00009868 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -07009869 &MacroAssembler::Fdiv,
9870 kDRegSize,
9871 zd_in,
9872 pg_in,
9873 zn_in,
9874 zm_in,
9875 exp_d);
TatWai Chongd316c5e2019-10-16 12:22:10 -07009876}
9877
Martyn Capewell9cc3f142019-10-29 14:06:35 +00009878TEST_SVE(sve_select) {
9879 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
9880 START();
9881
9882 uint64_t in0[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
9883 uint64_t in1[] = {0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa};
9884
9885 // For simplicity, we re-use the same pg for various lane sizes.
9886 // For D lanes: 1, 1, 0
9887 // For S lanes: 1, 1, 1, 0, 0
9888 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
9889 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
9890 Initialise(&masm, p0.VnB(), pg_in);
9891 PRegisterM pg = p0.Merging();
9892
9893 InsrHelper(&masm, z30.VnD(), in0);
9894 InsrHelper(&masm, z31.VnD(), in1);
9895
9896 __ Sel(z0.VnB(), pg, z30.VnB(), z31.VnB());
9897 __ Sel(z1.VnH(), pg, z30.VnH(), z31.VnH());
9898 __ Sel(z2.VnS(), pg, z30.VnS(), z31.VnS());
9899 __ Sel(z3.VnD(), pg, z30.VnD(), z31.VnD());
9900
9901 END();
9902
9903 if (CAN_RUN()) {
9904 RUN();
9905
9906 uint64_t expected_z0[] = {0xaaaaaaaa05aa07f8,
9907 0xfeaaaaf0aac3870f,
9908 0xaaaa56aa9abcdeaa};
9909 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
9910
9911 uint64_t expected_z1[] = {0xaaaaaaaaaaaa07f8,
9912 0xaaaaf8f0e1c3870f,
9913 0xaaaaaaaa9abcaaaa};
9914 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
9915
9916 uint64_t expected_z2[] = {0xaaaaaaaa05f607f8,
9917 0xfefcf8f0e1c3870f,
9918 0xaaaaaaaaaaaaaaaa};
9919 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
9920
9921 uint64_t expected_z3[] = {0x01f203f405f607f8,
9922 0xfefcf8f0e1c3870f,
9923 0xaaaaaaaaaaaaaaaa};
9924 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
9925 }
9926}
TatWai Chongd316c5e2019-10-16 12:22:10 -07009927
TatWai Chong7a0d3672019-10-23 17:35:18 -07009928TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_h) {
9929 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
9930 double zn_inputs[] = {-2.1,
9931 8.5,
9932 225.5,
9933 0.0,
9934 8.8,
9935 -4.75,
9936 kFP64PositiveInfinity,
9937 kFP64NegativeInfinity};
9938 double zm_inputs[] = {-2.0,
9939 -13.0,
9940 24.0,
9941 0.01,
9942 0.5,
9943 300.75,
9944 kFP64NegativeInfinity,
9945 kFP64PositiveInfinity};
9946 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
9947
9948 uint16_t zd_expected_max[] = {Float16ToRawbits(Float16(-2.0)),
9949 Float16ToRawbits(Float16(8.5)),
9950 Float16ToRawbits(Float16(3.3)),
9951 Float16ToRawbits(Float16(0.01)),
9952 Float16ToRawbits(Float16(5.5)),
9953 Float16ToRawbits(Float16(300.75)),
9954 Float16ToRawbits(kFP16PositiveInfinity),
9955 Float16ToRawbits(kFP16PositiveInfinity)};
9956 FPBinArithHelper(config,
9957 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +00009958 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -07009959 kHRegSize,
9960 zd_inputs,
9961 pg_inputs,
9962 zn_inputs,
9963 zm_inputs,
9964 zd_expected_max);
9965
9966 uint16_t zd_expected_min[] = {Float16ToRawbits(Float16(-2.1)),
9967 Float16ToRawbits(Float16(-13.0)),
9968 Float16ToRawbits(Float16(3.3)),
9969 Float16ToRawbits(Float16(0.0)),
9970 Float16ToRawbits(Float16(5.5)),
9971 Float16ToRawbits(Float16(-4.75)),
9972 Float16ToRawbits(kFP16NegativeInfinity),
9973 Float16ToRawbits(kFP16NegativeInfinity)};
9974 FPBinArithHelper(config,
9975 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +00009976 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -07009977 kHRegSize,
9978 zd_inputs,
9979 pg_inputs,
9980 zn_inputs,
9981 zm_inputs,
9982 zd_expected_min);
9983}
9984
9985TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_s) {
9986 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
9987 double zn_inputs[] = {-2.1,
9988 8.5,
9989 225.5,
9990 0.0,
9991 8.8,
9992 -4.75,
9993 kFP64PositiveInfinity,
9994 kFP64NegativeInfinity};
9995 double zm_inputs[] = {-2.0,
9996 -13.0,
9997 24.0,
9998 0.01,
9999 0.5,
10000 300.75,
10001 kFP64NegativeInfinity,
10002 kFP64PositiveInfinity};
10003 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
10004
10005 uint32_t zd_expected_max[] = {FloatToRawbits(-2.0),
10006 FloatToRawbits(8.5),
10007 FloatToRawbits(3.3),
10008 FloatToRawbits(0.01),
10009 FloatToRawbits(5.5),
10010 FloatToRawbits(300.75),
10011 FloatToRawbits(kFP32PositiveInfinity),
10012 FloatToRawbits(kFP32PositiveInfinity)};
10013 FPBinArithHelper(config,
10014 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +000010015 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010016 kSRegSize,
10017 zd_inputs,
10018 pg_inputs,
10019 zn_inputs,
10020 zm_inputs,
10021 zd_expected_max);
10022
10023 uint32_t zd_expected_min[] = {FloatToRawbits(-2.1),
10024 FloatToRawbits(-13.0),
10025 FloatToRawbits(3.3),
10026 FloatToRawbits(0.0),
10027 FloatToRawbits(5.5),
10028 FloatToRawbits(-4.75),
10029 FloatToRawbits(kFP32NegativeInfinity),
10030 FloatToRawbits(kFP32NegativeInfinity)};
10031 FPBinArithHelper(config,
10032 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +000010033 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010034 kSRegSize,
10035 zd_inputs,
10036 pg_inputs,
10037 zn_inputs,
10038 zm_inputs,
10039 zd_expected_min);
10040}
10041
10042TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_d) {
10043 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
10044 double zn_inputs[] = {-2.1,
10045 8.5,
10046 225.5,
10047 0.0,
10048 8.8,
10049 -4.75,
10050 kFP64PositiveInfinity,
10051 kFP64NegativeInfinity};
10052 double zm_inputs[] = {-2.0,
10053 -13.0,
10054 24.0,
10055 0.01,
10056 0.5,
10057 300.75,
10058 kFP64NegativeInfinity,
10059 kFP64PositiveInfinity};
10060 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
10061
10062 uint64_t zd_expected_max[] = {DoubleToRawbits(-2.0),
10063 DoubleToRawbits(8.5),
10064 DoubleToRawbits(3.3),
10065 DoubleToRawbits(0.01),
10066 DoubleToRawbits(5.5),
10067 DoubleToRawbits(300.75),
10068 DoubleToRawbits(kFP64PositiveInfinity),
10069 DoubleToRawbits(kFP64PositiveInfinity)};
10070 FPBinArithHelper(config,
10071 &MacroAssembler::Fmax,
Martyn Capewell37f28182020-01-14 10:15:10 +000010072 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010073 kDRegSize,
10074 zd_inputs,
10075 pg_inputs,
10076 zn_inputs,
10077 zm_inputs,
10078 zd_expected_max);
10079
10080 uint64_t zd_expected_min[] = {DoubleToRawbits(-2.1),
10081 DoubleToRawbits(-13.0),
10082 DoubleToRawbits(3.3),
10083 DoubleToRawbits(0.0),
10084 DoubleToRawbits(5.5),
10085 DoubleToRawbits(-4.75),
10086 DoubleToRawbits(kFP64NegativeInfinity),
10087 DoubleToRawbits(kFP64NegativeInfinity)};
10088 FPBinArithHelper(config,
10089 &MacroAssembler::Fmin,
Martyn Capewell37f28182020-01-14 10:15:10 +000010090 NULL,
TatWai Chong7a0d3672019-10-23 17:35:18 -070010091 kDRegSize,
10092 zd_inputs,
10093 pg_inputs,
10094 zn_inputs,
10095 zm_inputs,
10096 zd_expected_min);
10097}
TatWai Chong29a0c432019-11-06 22:20:44 -080010098
10099template <typename T, size_t N>
10100static void BitwiseShiftImmHelper(Test* config,
10101 int lane_size_in_bits,
10102 const T (&zn_inputs)[N],
10103 int shift) {
10104 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10105 START();
10106
10107 ZRegister zd_asr = z25.WithLaneSize(lane_size_in_bits);
10108 ZRegister zd_lsr = z26.WithLaneSize(lane_size_in_bits);
10109 ZRegister zd_lsl = z27.WithLaneSize(lane_size_in_bits);
10110 ZRegister zn = z28.WithLaneSize(lane_size_in_bits);
10111
10112 InsrHelper(&masm, zn, zn_inputs);
10113
10114 __ Asr(zd_asr, zn, shift);
10115 __ Lsr(zd_lsr, zn, shift);
10116 __ Lsl(zd_lsl, zn, shift);
10117
10118 END();
10119
10120 if (CAN_RUN()) {
10121 RUN();
10122
10123 const uint64_t mask = GetUintMask(lane_size_in_bits);
10124 for (int i = 0; i < static_cast<int>(N); i++) {
10125 int lane = N - i - 1;
10126 if (!core.HasSVELane(zd_asr, lane)) break;
10127 bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
10128 uint64_t result;
10129 if (shift >= lane_size_in_bits) {
10130 result = is_negative ? mask : 0;
10131 } else {
10132 result = zn_inputs[i] >> shift;
10133 if (is_negative) {
10134 result |= mask << (lane_size_in_bits - shift);
10135 result &= mask;
10136 }
10137 }
10138 ASSERT_EQUAL_SVE_LANE(result, zd_asr, lane);
10139 }
10140
10141 for (int i = 0; i < static_cast<int>(N); i++) {
10142 int lane = N - i - 1;
10143 if (!core.HasSVELane(zd_lsr, lane)) break;
10144 uint64_t result =
10145 (shift >= lane_size_in_bits) ? 0 : zn_inputs[i] >> shift;
10146 ASSERT_EQUAL_SVE_LANE(result, zd_lsr, lane);
10147 }
10148
10149 for (int i = 0; i < static_cast<int>(N); i++) {
10150 int lane = N - i - 1;
10151 if (!core.HasSVELane(zd_lsl, lane)) break;
10152 uint64_t result = (shift >= lane_size_in_bits) ? 0 : zn_inputs[i]
10153 << shift;
10154 ASSERT_EQUAL_SVE_LANE(result & mask, zd_lsl, lane);
10155 }
10156 }
10157}
10158
10159TEST_SVE(sve_bitwise_shift_imm_unpredicated) {
10160 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10161 int shift_b[] = {1, 3, 5, 8};
10162 for (size_t i = 0; i < ArrayLength(shift_b); i++) {
10163 BitwiseShiftImmHelper(config, kBRegSize, inputs_b, shift_b[i]);
10164 }
10165
10166 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233};
10167 int shift_h[] = {1, 8, 11, 16};
10168 for (size_t i = 0; i < ArrayLength(shift_h); i++) {
10169 BitwiseShiftImmHelper(config, kHRegSize, inputs_h, shift_h[i]);
10170 }
10171
10172 uint64_t inputs_s[] = {0xfedcba98, 0xfffa55aa, 0x00112233};
10173 int shift_s[] = {1, 9, 17, 32};
10174 for (size_t i = 0; i < ArrayLength(shift_s); i++) {
10175 BitwiseShiftImmHelper(config, kSRegSize, inputs_s, shift_s[i]);
10176 }
10177
10178 uint64_t inputs_d[] = {0xfedcba98fedcba98,
10179 0xfffa5555aaaaaaaa,
10180 0x0011223344aafe80};
10181 int shift_d[] = {1, 23, 45, 64};
10182 for (size_t i = 0; i < ArrayLength(shift_d); i++) {
10183 BitwiseShiftImmHelper(config, kDRegSize, inputs_d, shift_d[i]);
10184 }
10185}
10186
10187template <typename T, typename R, size_t N>
10188static void BitwiseShiftWideElementsHelper(Test* config,
10189 Shift shift_type,
10190 int lane_size_in_bits,
10191 const T (&zn_inputs)[N],
10192 const R& zm_inputs,
10193 const T (&zd_expected)[N]) {
10194 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10195 START();
10196
10197 ArithFn macro;
10198 // Since logical shift left and right by the current lane size width is equal
10199 // to 0, so initialize the array to 0 for convenience.
10200 uint64_t zd_expected_max_shift_amount[N] = {0};
10201 switch (shift_type) {
10202 case ASR: {
10203 macro = &MacroAssembler::Asr;
10204 uint64_t mask = GetUintMask(lane_size_in_bits);
10205 for (size_t i = 0; i < ArrayLength(zn_inputs); i++) {
10206 bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
10207 zd_expected_max_shift_amount[i] = is_negative ? mask : 0;
10208 }
10209 break;
10210 }
10211 case LSR:
10212 macro = &MacroAssembler::Lsr;
10213 break;
10214 case LSL:
10215 macro = &MacroAssembler::Lsl;
10216 break;
10217 default:
10218 VIXL_UNIMPLEMENTED();
10219 macro = NULL;
10220 break;
10221 }
10222
10223 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
10224 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
10225 ZRegister zm = z28.WithLaneSize(kDRegSize);
10226
10227 InsrHelper(&masm, zn, zn_inputs);
10228 InsrHelper(&masm, zm, zm_inputs);
10229
10230 (masm.*macro)(zd, zn, zm);
10231
10232 ZRegister zm_max_shift_amount = z25.WithLaneSize(kDRegSize);
10233 ZRegister zd_max_shift_amount = z24.WithLaneSize(lane_size_in_bits);
10234
10235 __ Dup(zm_max_shift_amount, lane_size_in_bits);
10236 (masm.*macro)(zd_max_shift_amount, zn, zm_max_shift_amount);
10237
10238 ZRegister zm_out_of_range = z23.WithLaneSize(kDRegSize);
10239 ZRegister zd_out_of_range = z22.WithLaneSize(lane_size_in_bits);
10240
10241 __ Dup(zm_out_of_range, GetUintMask(lane_size_in_bits));
10242 (masm.*macro)(zd_out_of_range, zn, zm_out_of_range);
10243
10244 END();
10245
10246 if (CAN_RUN()) {
10247 RUN();
10248
10249 ASSERT_EQUAL_SVE(zd_expected, zd);
10250 ASSERT_EQUAL_SVE(zd_expected_max_shift_amount, zd_max_shift_amount);
10251 ASSERT_EQUAL_SVE(zd_max_shift_amount, zd_out_of_range);
10252 }
10253}
10254
10255TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_asr) {
10256 // clang-format off
10257 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10258 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10259 int shift_b[] = {1, 3};
10260 uint64_t expected_b[] = {0xff, 0xee, 0xdd, 0xcc, 0xff, 0x2a, 0xd5, 0xc0,
10261 0xff, 0xfb, 0xf7, 0xf3, 0xff, 0x0a, 0xf5, 0xf0};
10262 BitwiseShiftWideElementsHelper(config,
10263 ASR,
10264 kBRegSize,
10265 inputs_b,
10266 shift_b,
10267 expected_b);
10268
10269 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10270 0xfedc, 0xfa55, 0x0011, 0x2233,
10271 0xfedc, 0xfa55, 0x0011, 0x2233};
10272 int shift_h[] = {1, 8, 11};
10273 uint64_t expected_h[] = {0xff6e, 0xfd2a, 0x0008, 0x1119,
10274 0xfffe, 0xfffa, 0x0000, 0x0022,
10275 0xffff, 0xffff, 0x0000, 0x0004};
10276 BitwiseShiftWideElementsHelper(config,
10277 ASR,
10278 kHRegSize,
10279 inputs_h,
10280 shift_h,
10281 expected_h);
10282
10283 uint64_t inputs_s[] =
10284 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10285 int shift_s[] = {1, 9, 23};
10286 uint64_t expected_s[] =
10287 {0xff6e5d4c, 0xfffd2ad5, 0x00000891, 0x000091a2, 0xffffff55, 0xffffff11};
10288 BitwiseShiftWideElementsHelper(config,
10289 ASR,
10290 kSRegSize,
10291 inputs_s,
10292 shift_s,
10293 expected_s);
10294 // clang-format on
10295}
10296
10297TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsr) {
10298 // clang-format off
10299 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10300 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10301 int shift_b[] = {1, 3};
10302 uint64_t expected_b[] = {0x7f, 0x6e, 0x5d, 0x4c, 0x7f, 0x2a, 0x55, 0x40,
10303 0x1f, 0x1b, 0x17, 0x13, 0x1f, 0x0a, 0x15, 0x10};
10304
10305 BitwiseShiftWideElementsHelper(config,
10306 LSR,
10307 kBRegSize,
10308 inputs_b,
10309 shift_b,
10310 expected_b);
10311
10312 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10313 0xfedc, 0xfa55, 0x0011, 0x2233,
10314 0xfedc, 0xfa55, 0x0011, 0x2233};
10315 int shift_h[] = {1, 8, 11};
10316 uint64_t expected_h[] = {0x7f6e, 0x7d2a, 0x0008, 0x1119,
10317 0x00fe, 0x00fa, 0x0000, 0x0022,
10318 0x001f, 0x001f, 0x0000, 0x0004};
10319 BitwiseShiftWideElementsHelper(config,
10320 LSR,
10321 kHRegSize,
10322 inputs_h,
10323 shift_h,
10324 expected_h);
10325
10326 uint64_t inputs_s[] =
10327 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10328 int shift_s[] = {1, 9, 23};
10329 uint64_t expected_s[] =
10330 {0x7f6e5d4c, 0x7ffd2ad5, 0x00000891, 0x000091a2, 0x00000155, 0x00000111};
10331 BitwiseShiftWideElementsHelper(config,
10332 LSR,
10333 kSRegSize,
10334 inputs_s,
10335 shift_s,
10336 expected_s);
10337 // clang-format on
10338}
10339
10340TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsl) {
10341 // clang-format off
10342 uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
10343 0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
10344 int shift_b[] = {1, 5};
10345
10346 uint64_t expected_b[] = {0xfc, 0xb8, 0x74, 0x30, 0xfe, 0xaa, 0x54, 0x00,
10347 0xc0, 0x80, 0x40, 0x00, 0xe0, 0xa0, 0x40, 0x00};
10348
10349 BitwiseShiftWideElementsHelper(config,
10350 LSL,
10351 kBRegSize,
10352 inputs_b,
10353 shift_b,
10354 expected_b);
10355 uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
10356 0xfedc, 0xfa55, 0x0011, 0x2233,
10357 0xfedc, 0xfa55, 0x0011, 0x2233};
10358 int shift_h[] = {1, 2, 14};
10359
10360 uint64_t expected_h[] = {0xfdb8, 0xf4aa, 0x0022, 0x4466,
10361 0xfb70, 0xe954, 0x0044, 0x88cc,
10362 0x0000, 0x4000, 0x4000, 0xc000};
10363 BitwiseShiftWideElementsHelper(config,
10364 LSL,
10365 kHRegSize,
10366 inputs_h,
10367 shift_h,
10368 expected_h);
10369 uint64_t inputs_s[] =
10370 {0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
10371 int shift_s[] = {1, 19, 26};
10372 uint64_t expected_s[] =
10373 {0xfdb97530, 0xfff4ab54, 0x11980000, 0x2b380000, 0xa8000000, 0x20000000};
10374 BitwiseShiftWideElementsHelper(config,
10375 LSL,
10376 kSRegSize,
10377 inputs_s,
10378 shift_s,
10379 expected_s);
10380 // clang-format on
10381}
10382
TatWai Chong4023d7a2019-11-18 14:16:28 -080010383TEST_SVE(sve_setffr) {
10384 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10385 START();
10386
10387 __ Ptrue(p15.VnB());
10388 __ Setffr();
10389 __ Rdffr(p14.VnB());
10390
10391 END();
10392
10393 if (CAN_RUN()) {
10394 RUN();
10395
10396 ASSERT_EQUAL_SVE(p14.VnB(), p15.VnB());
10397 }
10398}
10399
10400static void WrffrHelper(Test* config, unsigned active_lanes) {
10401 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10402 START();
10403
10404 int inputs[kPRegMaxSize] = {0};
10405 VIXL_ASSERT(active_lanes <= kPRegMaxSize);
10406 for (unsigned i = 0; i < active_lanes; i++) {
10407 // The rightmost (highest-indexed) array element maps to the lowest-numbered
10408 // lane.
10409 inputs[kPRegMaxSize - i - 1] = 1;
10410 }
10411
10412 Initialise(&masm, p1.VnB(), inputs);
10413 __ Wrffr(p1.VnB());
10414 __ Rdffr(p2.VnB());
10415
10416 END();
10417
10418 if (CAN_RUN()) {
10419 RUN();
10420
10421 ASSERT_EQUAL_SVE(p1.VnB(), p2.VnB());
10422 }
10423}
10424
10425TEST_SVE(sve_wrffr) {
10426 int active_lanes_inputs[] = {0, 1, 7, 10, 32, 48, kPRegMaxSize};
10427 for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
10428 WrffrHelper(config, active_lanes_inputs[i]);
10429 }
10430}
10431
TatWai Chonga3e8b172019-11-22 21:48:56 -080010432template <size_t N>
10433static void RdffrHelper(Test* config,
10434 size_t active_lanes,
10435 const int (&pg_inputs)[N]) {
10436 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10437 START();
10438
10439 VIXL_ASSERT(active_lanes <= kPRegMaxSize);
10440
10441 // The rightmost (highest-indexed) array element maps to the lowest-numbered
10442 // lane.
10443 int pd[kPRegMaxSize] = {0};
10444 for (unsigned i = 0; i < active_lanes; i++) {
10445 pd[kPRegMaxSize - i - 1] = 1;
10446 }
10447
10448 int pg[kPRegMaxSize] = {0};
10449 for (unsigned i = 0; i < N; i++) {
10450 pg[kPRegMaxSize - i - 1] = pg_inputs[i];
10451 }
10452
10453 int pd_expected[kPRegMaxSize] = {0};
10454 for (unsigned i = 0; i < std::min(active_lanes, N); i++) {
10455 int lane = kPRegMaxSize - i - 1;
10456 pd_expected[lane] = pd[lane] & pg[lane];
10457 }
10458
10459 Initialise(&masm, p0.VnB(), pg);
10460 Initialise(&masm, p1.VnB(), pd);
10461
10462 // The unpredicated form of rdffr has been tested in `WrffrHelper`.
10463 __ Wrffr(p1.VnB());
10464 __ Rdffr(p14.VnB(), p0.Zeroing());
10465 __ Rdffrs(p13.VnB(), p0.Zeroing());
10466 __ Mrs(x8, NZCV);
10467
10468 END();
10469
10470 if (CAN_RUN()) {
10471 RUN();
10472
10473 ASSERT_EQUAL_SVE(pd_expected, p14.VnB());
10474 ASSERT_EQUAL_SVE(pd_expected, p13.VnB());
10475 StatusFlags nzcv_expected =
10476 GetPredTestFlags(pd_expected, pg, core.GetSVELaneCount(kBRegSize));
10477 ASSERT_EQUAL_64(nzcv_expected, x8);
10478 }
10479}
10480
10481TEST_SVE(sve_rdffr_rdffrs) {
10482 // clang-format off
10483 int active_lanes_inputs[] = {0, 1, 15, 26, 39, 47, kPRegMaxSize};
10484 int pg_inputs_0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
10485 int pg_inputs_1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
10486 int pg_inputs_2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
10487 int pg_inputs_3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
10488 int pg_inputs_4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
10489 // clang-format on
10490
10491 for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
10492 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_0);
10493 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_1);
10494 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_2);
10495 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_3);
10496 RdffrHelper(config, active_lanes_inputs[i], pg_inputs_4);
10497 }
10498}
10499
TatWai Chong38303d92019-12-02 15:49:29 -080010500typedef void (MacroAssembler::*BrkpFn)(const PRegisterWithLaneSize& pd,
10501 const PRegisterZ& pg,
10502 const PRegisterWithLaneSize& pn,
10503 const PRegisterWithLaneSize& pm);
10504
10505template <typename Tg, typename Tn, typename Td>
10506static void BrkpaBrkpbHelper(Test* config,
10507 BrkpFn macro,
10508 BrkpFn macro_set_flags,
10509 const Tg& pg_inputs,
10510 const Tn& pn_inputs,
10511 const Tn& pm_inputs,
10512 const Td& pd_expected) {
10513 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10514 START();
10515
10516 PRegister pg = p15;
10517 PRegister pn = p14;
10518 PRegister pm = p13;
10519 Initialise(&masm, pg.VnB(), pg_inputs);
10520 Initialise(&masm, pn.VnB(), pn_inputs);
10521 Initialise(&masm, pm.VnB(), pm_inputs);
10522
10523 // Initialise NZCV to an impossible value, to check that we actually write it.
10524 __ Mov(x10, NZCVFlag);
10525 __ Msr(NZCV, x10);
10526
10527 (masm.*macro_set_flags)(p0.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
10528 __ Mrs(x0, NZCV);
10529
10530 (masm.*macro)(p1.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
10531
10532 END();
10533
10534 if (CAN_RUN()) {
10535 RUN();
10536
10537 ASSERT_EQUAL_SVE(pd_expected, p0.VnB());
10538
10539 // Check that the flags were properly set.
10540 StatusFlags nzcv_expected =
10541 GetPredTestFlags(pd_expected,
10542 pg_inputs,
10543 core.GetSVELaneCount(kBRegSize));
10544 ASSERT_EQUAL_64(nzcv_expected, x0);
10545 ASSERT_EQUAL_SVE(p0.VnB(), p1.VnB());
10546 }
10547}
10548
10549template <typename Tg, typename Tn, typename Td>
10550static void BrkpaHelper(Test* config,
10551 const Tg& pg_inputs,
10552 const Tn& pn_inputs,
10553 const Tn& pm_inputs,
10554 const Td& pd_expected) {
10555 BrkpaBrkpbHelper(config,
10556 &MacroAssembler::Brkpa,
10557 &MacroAssembler::Brkpas,
10558 pg_inputs,
10559 pn_inputs,
10560 pm_inputs,
10561 pd_expected);
10562}
10563
10564template <typename Tg, typename Tn, typename Td>
10565static void BrkpbHelper(Test* config,
10566 const Tg& pg_inputs,
10567 const Tn& pn_inputs,
10568 const Tn& pm_inputs,
10569 const Td& pd_expected) {
10570 BrkpaBrkpbHelper(config,
10571 &MacroAssembler::Brkpb,
10572 &MacroAssembler::Brkpbs,
10573 pg_inputs,
10574 pn_inputs,
10575 pm_inputs,
10576 pd_expected);
10577}
10578
10579TEST_SVE(sve_brkpb) {
10580 // clang-format off
10581 // The last active element of `pn` are `true` in all vector length configurations.
10582 // | boundary of 128-bits VL.
10583 // v
10584 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
10585 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
10586 int pg_3[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
10587
10588 // | highest-numbered lane lowest-numbered lane |
10589 // v v
10590 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
10591 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
10592 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1};
10593
10594 int pm_1[] = {1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
10595 int pm_2[] = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
10596 int pm_3[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
10597
10598 // | first active
10599 // v
10600 int exp_1_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
10601 // | first active
10602 // v
10603 int exp_1_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
10604 // | first active
10605 // v
10606 int exp_1_3_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
10607
10608 BrkpbHelper(config, pg_1, pn_1, pm_1, exp_1_1_1);
10609 BrkpbHelper(config, pg_1, pn_2, pm_2, exp_1_2_2);
10610 BrkpbHelper(config, pg_1, pn_3, pm_3, exp_1_3_3);
10611
10612 // | first active
10613 // v
10614 int exp_2_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
10615 // | first active
10616 // v
10617 int exp_2_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
10618 // | first active
10619 // v
10620 int exp_2_3_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
10621 BrkpbHelper(config, pg_2, pn_1, pm_2, exp_2_1_2);
10622 BrkpbHelper(config, pg_2, pn_2, pm_3, exp_2_2_3);
10623 BrkpbHelper(config, pg_2, pn_3, pm_1, exp_2_3_1);
10624
10625 // | first active
10626 // v
10627 int exp_3_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
10628 // | first active
10629 // v
10630 int exp_3_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
10631 // | first active
10632 // v
10633 int exp_3_3_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
10634 BrkpbHelper(config, pg_3, pn_1, pm_3, exp_3_1_3);
10635 BrkpbHelper(config, pg_3, pn_2, pm_1, exp_3_2_1);
10636 BrkpbHelper(config, pg_3, pn_3, pm_2, exp_3_3_2);
10637
10638 // The last active element of `pn` are `false` in all vector length configurations.
10639 // | last active lane when VL > 128 bits.
10640 // v
10641 // | last active lane when VL == 128 bits.
10642 // v
10643 int pg_4[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
10644 int exp_4_x_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
10645 BrkpbHelper(config, pg_4, pn_1, pm_1, exp_4_x_x);
10646 BrkpbHelper(config, pg_4, pn_2, pm_2, exp_4_x_x);
10647 BrkpbHelper(config, pg_4, pn_3, pm_3, exp_4_x_x);
10648 // clang-format on
10649}
10650
10651TEST_SVE(sve_brkpa) {
10652 // clang-format off
10653 // The last active element of `pn` are `true` in all vector length configurations.
10654 // | boundary of 128-bits VL.
10655 // v
10656 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
10657 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
10658 int pg_3[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
10659
10660 // | highest-numbered lane lowest-numbered lane |
10661 // v v
10662 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
10663 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
10664 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1};
10665
10666 int pm_1[] = {1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
10667 int pm_2[] = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
10668 int pm_3[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
10669
10670 // | first active
10671 // v
10672 int exp_1_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
10673 // | first active
10674 // v
10675 int exp_1_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
10676 // | first active
10677 // v
10678 int exp_1_3_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
10679
10680 BrkpaHelper(config, pg_1, pn_1, pm_1, exp_1_1_1);
10681 BrkpaHelper(config, pg_1, pn_2, pm_2, exp_1_2_2);
10682 BrkpaHelper(config, pg_1, pn_3, pm_3, exp_1_3_3);
10683
10684 // | first active
10685 // v
10686 int exp_2_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
10687 // | first active
10688 // v
10689 int exp_2_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
10690 // | first active
10691 // v
10692 int exp_2_3_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1};
10693 BrkpaHelper(config, pg_2, pn_1, pm_2, exp_2_1_2);
10694 BrkpaHelper(config, pg_2, pn_2, pm_3, exp_2_2_3);
10695 BrkpaHelper(config, pg_2, pn_3, pm_1, exp_2_3_1);
10696
10697 // | first active
10698 // v
10699 int exp_3_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
10700 // | first active
10701 // v
10702 int exp_3_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
10703 // | first active
10704 // v
10705 int exp_3_3_2[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
10706 BrkpaHelper(config, pg_3, pn_1, pm_3, exp_3_1_3);
10707 BrkpaHelper(config, pg_3, pn_2, pm_1, exp_3_2_1);
10708 BrkpaHelper(config, pg_3, pn_3, pm_2, exp_3_3_2);
10709
10710 // The last active element of `pn` are `false` in all vector length configurations.
10711 // | last active lane when VL > 128 bits.
10712 // v
10713 // | last active lane when VL == 128 bits.
10714 // v
10715 int pg_4[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
10716 int exp_4_x_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
10717 BrkpaHelper(config, pg_4, pn_1, pm_1, exp_4_x_x);
10718 BrkpaHelper(config, pg_4, pn_2, pm_2, exp_4_x_x);
10719 BrkpaHelper(config, pg_4, pn_3, pm_3, exp_4_x_x);
10720 // clang-format on
10721}
10722
Martyn Capewell77b6d982019-12-02 18:34:59 +000010723TEST_SVE(sve_rbit) {
10724 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10725 START();
10726
10727 uint64_t inputs[] = {0xaaaaaaaa55555555, 0xaaaa5555aa55aa55};
10728 InsrHelper(&masm, z0.VnD(), inputs);
10729
10730 __ Ptrue(p1.VnB());
10731 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
10732 Initialise(&masm, p2.VnB(), pred);
10733
10734 __ Rbit(z0.VnB(), p1.Merging(), z0.VnB());
10735 __ Rbit(z0.VnB(), p1.Merging(), z0.VnB());
10736
10737 __ Rbit(z1.VnB(), p1.Merging(), z0.VnB());
10738 __ Rbit(z2.VnH(), p1.Merging(), z0.VnH());
10739 __ Rbit(z3.VnS(), p1.Merging(), z0.VnS());
10740 __ Rbit(z4.VnD(), p1.Merging(), z0.VnD());
10741
10742 __ Dup(z5.VnB(), 0x42);
10743 __ Rbit(z5.VnB(), p2.Merging(), z0.VnB());
10744 __ Dup(z6.VnB(), 0x42);
10745 __ Rbit(z6.VnS(), p2.Merging(), z0.VnS());
10746
10747 END();
10748
10749 if (CAN_RUN()) {
10750 RUN();
10751
10752 ASSERT_EQUAL_SVE(inputs, z0.VnD());
10753
10754 uint64_t expected_z1[] = {0x55555555aaaaaaaa, 0x5555aaaa55aa55aa};
10755 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
10756 uint64_t expected_z2[] = {0x55555555aaaaaaaa, 0x5555aaaaaa55aa55};
10757 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
10758 uint64_t expected_z3[] = {0x55555555aaaaaaaa, 0xaaaa5555aa55aa55};
10759 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
10760 uint64_t expected_z4[] = {0xaaaaaaaa55555555, 0xaa55aa55aaaa5555};
10761 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
10762 uint64_t expected_z5[] = {0x4255425542aa42aa, 0x4255424242aa42aa};
10763 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
10764 uint64_t expected_z6[] = {0x55555555aaaaaaaa, 0x42424242aa55aa55};
10765 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
10766 }
10767}
10768
10769TEST_SVE(sve_rev_bhw) {
10770 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10771 START();
10772
10773 uint64_t inputs[] = {0xaaaaaaaa55555555, 0xaaaa5555aa55aa55};
10774 InsrHelper(&masm, z0.VnD(), inputs);
10775
10776 __ Ptrue(p1.VnB());
10777 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
10778 Initialise(&masm, p2.VnB(), pred);
10779
10780 __ Revb(z1.VnH(), p1.Merging(), z0.VnH());
10781 __ Revb(z2.VnS(), p1.Merging(), z0.VnS());
10782 __ Revb(z3.VnD(), p1.Merging(), z0.VnD());
10783 __ Revh(z4.VnS(), p1.Merging(), z0.VnS());
10784 __ Revh(z5.VnD(), p1.Merging(), z0.VnD());
10785 __ Revw(z6.VnD(), p1.Merging(), z0.VnD());
10786
10787 __ Dup(z7.VnB(), 0x42);
10788 __ Revb(z7.VnH(), p2.Merging(), z0.VnH());
10789 __ Dup(z8.VnB(), 0x42);
10790 __ Revh(z8.VnS(), p2.Merging(), z0.VnS());
10791
10792 END();
10793
10794 if (CAN_RUN()) {
10795 RUN();
10796
10797 uint64_t expected_z1[] = {0xaaaaaaaa55555555, 0xaaaa555555aa55aa};
10798 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
10799 uint64_t expected_z2[] = {0xaaaaaaaa55555555, 0x5555aaaa55aa55aa};
10800 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
10801 uint64_t expected_z3[] = {0x55555555aaaaaaaa, 0x55aa55aa5555aaaa};
10802 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
10803 uint64_t expected_z4[] = {0xaaaaaaaa55555555, 0x5555aaaaaa55aa55};
10804 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
10805 uint64_t expected_z5[] = {0x55555555aaaaaaaa, 0xaa55aa555555aaaa};
10806 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
10807 uint64_t expected_z6[] = {0x55555555aaaaaaaa, 0xaa55aa55aaaa5555};
10808 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
10809 uint64_t expected_z7[] = {0xaaaaaaaa55555555, 0xaaaa424255aa55aa};
10810 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
10811 uint64_t expected_z8[] = {0xaaaaaaaa55555555, 0x42424242aa55aa55};
10812 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
10813 }
10814}
10815
Martyn Capewell43782632019-12-12 13:22:10 +000010816TEST_SVE(sve_ftssel) {
10817 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10818 START();
10819
10820 uint64_t in[] = {0x1111777766665555, 0xaaaabbbbccccdddd};
10821 uint64_t q[] = {0x0001000300000002, 0x0001000200000003};
10822 InsrHelper(&masm, z0.VnD(), in);
10823 InsrHelper(&masm, z1.VnD(), q);
10824
10825 __ Ftssel(z2.VnH(), z0.VnH(), z1.VnH());
10826 __ Ftssel(z3.VnS(), z0.VnS(), z1.VnS());
10827 __ Ftssel(z4.VnD(), z0.VnD(), z1.VnD());
10828
10829 END();
10830
10831 if (CAN_RUN()) {
10832 RUN();
10833
10834 uint64_t expected_z2[] = {0x3c00bc006666d555, 0x3c003bbbccccbc00};
10835 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
10836 uint64_t expected_z3[] = {0xbf800000e6665555, 0x2aaabbbbbf800000};
10837 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
10838 uint64_t expected_z4[] = {0x9111777766665555, 0xbff0000000000000};
10839 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
10840 }
10841}
10842
10843TEST_SVE(sve_fexpa) {
10844 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10845 START();
10846
10847 uint64_t in0[] = {0x3ff0000000000000, 0x3ff0000000011001};
10848 uint64_t in1[] = {0x3ff000000002200f, 0xbff000000003301f};
10849 uint64_t in2[] = {0xbff000000004403f, 0x3ff0000000055040};
10850 uint64_t in3[] = {0x3f800000bf800001, 0x3f80000f3f80001f};
10851 uint64_t in4[] = {0x3f80002f3f82203f, 0xbf8000403f833041};
10852 uint64_t in5[] = {0x3c003c01bc00bc07, 0x3c08bc0f3c1fbc20};
10853 InsrHelper(&masm, z0.VnD(), in0);
10854 InsrHelper(&masm, z1.VnD(), in1);
10855 InsrHelper(&masm, z2.VnD(), in2);
10856 InsrHelper(&masm, z3.VnD(), in3);
10857 InsrHelper(&masm, z4.VnD(), in4);
10858 InsrHelper(&masm, z5.VnD(), in5);
10859
10860 __ Fexpa(z6.VnD(), z0.VnD());
10861 __ Fexpa(z7.VnD(), z1.VnD());
10862 __ Fexpa(z8.VnD(), z2.VnD());
10863 __ Fexpa(z9.VnS(), z3.VnS());
10864 __ Fexpa(z10.VnS(), z4.VnS());
10865 __ Fexpa(z11.VnH(), z5.VnH());
10866
10867 END();
10868
10869 if (CAN_RUN()) {
10870 RUN();
10871 uint64_t expected_z6[] = {0x0000000000000000, 0x44002c9a3e778061};
10872 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
10873 uint64_t expected_z7[] = {0x0802d285a6e4030b, 0x4c06623882552225};
10874 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
10875 uint64_t expected_z8[] = {0x100fa7c1819e90d8, 0x5410000000000000};
10876 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
10877 uint64_t expected_z9[] = {0x00000000000164d2, 0x0016942d003311c4};
10878 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
10879 uint64_t expected_z10[] = {0x0054f35b407d3e0c, 0x00800000608164d2};
10880 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
10881 uint64_t expected_z11[] = {0x00000016000000a8, 0x00c2018903d40400};
10882 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
10883 }
10884}
10885
Martyn Capewell7fd6fd52019-12-06 14:50:15 +000010886TEST_SVE(sve_rev_p) {
10887 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10888 START();
10889
10890 Initialise(&masm,
10891 p0.VnB(),
10892 0xabcdabcdabcdabcd,
10893 0xabcdabcdabcdabcd,
10894 0xabcdabcdabcdabcd,
10895 0xabcdabcdabcdabcd);
10896
10897 __ Rev(p1.VnB(), p0.VnB());
10898 __ Rev(p2.VnH(), p0.VnH());
10899 __ Rev(p3.VnS(), p0.VnS());
10900 __ Rev(p4.VnD(), p0.VnD());
10901
10902 END();
10903
10904 if (CAN_RUN()) {
10905 RUN();
10906
10907 int p1_expected[] = {1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1};
10908 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
10909 int p2_expected[] = {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0};
10910 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
10911 int p3_expected[] = {1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0};
10912 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
10913 int p4_expected[] = {1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1};
10914 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
10915 }
10916}
10917
10918TEST_SVE(sve_trn_p_bh) {
10919 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10920 START();
10921
10922 Initialise(&masm, p0.VnB(), 0xa5a55a5a);
10923 __ Pfalse(p1.VnB());
10924
10925 __ Trn1(p2.VnB(), p0.VnB(), p0.VnB());
10926 __ Trn2(p3.VnB(), p0.VnB(), p0.VnB());
10927 __ Trn1(p4.VnB(), p1.VnB(), p0.VnB());
10928 __ Trn2(p5.VnB(), p1.VnB(), p0.VnB());
10929 __ Trn1(p6.VnB(), p0.VnB(), p1.VnB());
10930 __ Trn2(p7.VnB(), p0.VnB(), p1.VnB());
10931
10932 __ Trn1(p8.VnH(), p0.VnH(), p0.VnH());
10933 __ Trn2(p9.VnH(), p0.VnH(), p0.VnH());
10934 __ Trn1(p10.VnH(), p1.VnH(), p0.VnH());
10935 __ Trn2(p11.VnH(), p1.VnH(), p0.VnH());
10936 __ Trn1(p12.VnH(), p0.VnH(), p1.VnH());
10937 __ Trn2(p13.VnH(), p0.VnH(), p1.VnH());
10938
10939 END();
10940
10941 if (CAN_RUN()) {
10942 RUN();
10943 int p2_expected[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
10944 int p3_expected[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
10945 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
10946 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
10947
10948 int p4_expected[] = {1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
10949 int p5_expected[] = {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0};
10950 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
10951 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
10952
10953 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0};
10954 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1};
10955 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
10956 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
10957
10958 int p8_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
10959 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
10960 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
10961 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
10962
10963 int p10_expected[] = {0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0};
10964 int p11_expected[] = {0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0};
10965 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
10966 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
10967
10968 int p12_expected[] = {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0};
10969 int p13_expected[] = {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0};
10970 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
10971 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
10972 }
10973}
10974
10975TEST_SVE(sve_trn_p_sd) {
10976 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
10977 START();
10978
10979 Initialise(&masm, p0.VnB(), 0x55a55aaa);
10980 __ Pfalse(p1.VnB());
10981
10982 __ Trn1(p2.VnS(), p0.VnS(), p0.VnS());
10983 __ Trn2(p3.VnS(), p0.VnS(), p0.VnS());
10984 __ Trn1(p4.VnS(), p1.VnS(), p0.VnS());
10985 __ Trn2(p5.VnS(), p1.VnS(), p0.VnS());
10986 __ Trn1(p6.VnS(), p0.VnS(), p1.VnS());
10987 __ Trn2(p7.VnS(), p0.VnS(), p1.VnS());
10988
10989 __ Trn1(p8.VnD(), p0.VnD(), p0.VnD());
10990 __ Trn2(p9.VnD(), p0.VnD(), p0.VnD());
10991 __ Trn1(p10.VnD(), p1.VnD(), p0.VnD());
10992 __ Trn2(p11.VnD(), p1.VnD(), p0.VnD());
10993 __ Trn1(p12.VnD(), p0.VnD(), p1.VnD());
10994 __ Trn2(p13.VnD(), p0.VnD(), p1.VnD());
10995
10996 END();
10997
10998 if (CAN_RUN()) {
10999 RUN();
11000 int p2_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
11001 int p3_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11002 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11003 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
11004
11005 int p4_expected[] = {1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11006 int p5_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11007 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
11008 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
11009
11010 int p6_expected[] = {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0};
11011 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
11012 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
11013 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
11014
11015 int p8_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
11016 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11017 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
11018 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
11019
11020 int p10_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11021 int p11_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11022 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
11023 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
11024
11025 int p12_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0};
11026 int p13_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11027 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
11028 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
11029 }
11030}
11031
11032TEST_SVE(sve_zip_p_bh) {
11033 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11034 START();
11035
11036 Initialise(&masm,
11037 p0.VnB(),
11038 0x5a5a5a5a5a5a5a5a,
11039 0x5a5a5a5a5a5a5a5a,
11040 0x5a5a5a5a5a5a5a5a,
11041 0x5a5a5a5a5a5a5a5a);
11042 __ Pfalse(p1.VnB());
11043
11044 __ Zip1(p2.VnB(), p0.VnB(), p0.VnB());
11045 __ Zip2(p3.VnB(), p0.VnB(), p0.VnB());
11046 __ Zip1(p4.VnB(), p1.VnB(), p0.VnB());
11047 __ Zip2(p5.VnB(), p1.VnB(), p0.VnB());
11048 __ Zip1(p6.VnB(), p0.VnB(), p1.VnB());
11049 __ Zip2(p7.VnB(), p0.VnB(), p1.VnB());
11050
11051 __ Zip1(p8.VnH(), p0.VnH(), p0.VnH());
11052 __ Zip2(p9.VnH(), p0.VnH(), p0.VnH());
11053 __ Zip1(p10.VnH(), p1.VnH(), p0.VnH());
11054 __ Zip2(p11.VnH(), p1.VnH(), p0.VnH());
11055 __ Zip1(p12.VnH(), p0.VnH(), p1.VnH());
11056 __ Zip2(p13.VnH(), p0.VnH(), p1.VnH());
11057
11058 END();
11059
11060 if (CAN_RUN()) {
11061 RUN();
11062 int p2_expected[] = {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0};
11063 int p3_expected[] = {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0};
11064 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11065 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
11066
11067 int p4_expected[] = {0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11068 int p5_expected[] = {0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11069 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
11070 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
11071
11072 int p6_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0};
11073 int p7_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0};
11074 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
11075 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
11076
11077 int p8_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11078 int p9_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11079 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
11080 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
11081
11082 int p10_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11083 int p11_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
11084 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
11085 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
11086
11087 int p12_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0};
11088 int p13_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0};
11089 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
11090 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
11091 }
11092}
11093
11094TEST_SVE(sve_zip_p_sd) {
11095 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11096 START();
11097
11098 Initialise(&masm,
11099 p0.VnB(),
11100 0x5a5a5a5a5a5a5a5a,
11101 0x5a5a5a5a5a5a5a5a,
11102 0x5a5a5a5a5a5a5a5a,
11103 0x5a5a5a5a5a5a5a5a);
11104 __ Pfalse(p1.VnB());
11105
11106 __ Zip1(p2.VnS(), p0.VnS(), p0.VnS());
11107 __ Zip2(p3.VnS(), p0.VnS(), p0.VnS());
11108 __ Zip1(p4.VnS(), p1.VnS(), p0.VnS());
11109 __ Zip2(p5.VnS(), p1.VnS(), p0.VnS());
11110 __ Zip1(p6.VnS(), p0.VnS(), p1.VnS());
11111 __ Zip2(p7.VnS(), p0.VnS(), p1.VnS());
11112
11113 __ Zip1(p8.VnD(), p0.VnD(), p0.VnD());
11114 __ Zip2(p9.VnD(), p0.VnD(), p0.VnD());
11115 __ Zip1(p10.VnD(), p1.VnD(), p0.VnD());
11116 __ Zip2(p11.VnD(), p1.VnD(), p0.VnD());
11117 __ Zip1(p12.VnD(), p0.VnD(), p1.VnD());
11118 __ Zip2(p13.VnD(), p0.VnD(), p1.VnD());
11119
11120 END();
11121
11122 if (CAN_RUN()) {
11123 RUN();
11124 int p2_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11125 int p3_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
11126 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11127 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
11128
11129 int p4_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11130 int p5_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
11131 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
11132 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
11133
11134 int p6_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
11135 int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
11136 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
11137 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
11138
11139 int p8_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11140 int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11141 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
11142 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
11143
11144 int p10_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11145 int p11_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11146 ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
11147 ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
11148
11149 int p12_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11150 int p13_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11151 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
11152 ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
11153 }
11154}
11155
11156TEST_SVE(sve_uzp_p) {
11157 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11158 START();
11159
11160 Initialise(&masm,
11161 p0.VnB(),
11162 0xf0f0ff00ffff0000,
11163 0x4242424242424242,
11164 0x5a5a5a5a5a5a5a5a,
11165 0x0123456789abcdef);
11166 __ Rev(p1.VnB(), p0.VnB());
11167
11168 __ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
11169 __ Zip2(p3.VnB(), p0.VnB(), p1.VnB());
11170 __ Uzp1(p4.VnB(), p2.VnB(), p3.VnB());
11171 __ Uzp2(p5.VnB(), p2.VnB(), p3.VnB());
11172
11173 __ Zip1(p2.VnH(), p0.VnH(), p1.VnH());
11174 __ Zip2(p3.VnH(), p0.VnH(), p1.VnH());
11175 __ Uzp1(p6.VnH(), p2.VnH(), p3.VnH());
11176 __ Uzp2(p7.VnH(), p2.VnH(), p3.VnH());
11177
11178 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
11179 __ Zip2(p3.VnS(), p0.VnS(), p1.VnS());
11180 __ Uzp1(p8.VnS(), p2.VnS(), p3.VnS());
11181 __ Uzp2(p9.VnS(), p2.VnS(), p3.VnS());
11182
11183 __ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
11184 __ Zip2(p3.VnD(), p0.VnD(), p1.VnD());
11185 __ Uzp1(p10.VnD(), p2.VnD(), p3.VnD());
11186 __ Uzp2(p11.VnD(), p2.VnD(), p3.VnD());
11187
11188 END();
11189
11190 if (CAN_RUN()) {
11191 RUN();
11192
11193 ASSERT_EQUAL_SVE(p0, p4);
11194 ASSERT_EQUAL_SVE(p1, p5);
11195 ASSERT_EQUAL_SVE(p0, p6);
11196 ASSERT_EQUAL_SVE(p1, p7);
11197 ASSERT_EQUAL_SVE(p0, p8);
11198 ASSERT_EQUAL_SVE(p1, p9);
11199 ASSERT_EQUAL_SVE(p0, p10);
11200 ASSERT_EQUAL_SVE(p1, p11);
11201 }
11202}
11203
11204TEST_SVE(sve_punpk) {
11205 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11206 START();
11207
11208 Initialise(&masm,
11209 p0.VnB(),
11210 0xf0a0f0a0f0a0f0a0,
11211 0xf0a0f0a0f0a0f0a0,
11212 0xa0f0a0f0a0f0a0f0,
11213 0xa0f0a0f0a0f0a0f0);
11214 __ Punpklo(p1.VnH(), p0.VnB());
11215 __ Punpkhi(p2.VnH(), p0.VnB());
11216
11217 END();
11218
11219 if (CAN_RUN()) {
11220 RUN();
11221
11222 int p1_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
11223 int p2_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11224 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
11225 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
11226 }
11227}
11228
TatWai Chong5d872292020-01-02 15:39:51 -080011229typedef void (MacroAssembler::*BrkFn)(const PRegisterWithLaneSize& pd,
11230 const PRegister& pg,
11231 const PRegisterWithLaneSize& pn);
11232
11233typedef void (MacroAssembler::*BrksFn)(const PRegisterWithLaneSize& pd,
11234 const PRegisterZ& pg,
11235 const PRegisterWithLaneSize& pn);
11236
11237template <typename T, size_t N>
11238static void BrkaBrkbHelper(Test* config,
11239 BrkFn macro,
11240 BrksFn macro_set_flags,
11241 const T (&pd_inputs)[N],
11242 const T (&pg_inputs)[N],
11243 const T (&pn_inputs)[N],
11244 const T (&pd_z_expected)[N]) {
11245 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11246 START();
11247
11248 PRegister pg = p10;
11249 PRegister pn = p9;
11250 PRegister pd_z = p0;
11251 PRegister pd_z_s = p1;
11252 PRegister pd_m = p2;
11253 Initialise(&masm, pg.VnB(), pg_inputs);
11254 Initialise(&masm, pn.VnB(), pn_inputs);
11255 Initialise(&masm, pd_m.VnB(), pd_inputs);
11256
11257 // Initialise NZCV to an impossible value, to check that we actually write it.
11258 __ Mov(x10, NZCVFlag);
11259 __ Msr(NZCV, x10);
11260
11261 (masm.*macro)(pd_z.VnB(), pg.Zeroing(), pn.VnB());
11262 (masm.*macro_set_flags)(pd_z_s.VnB(), pg.Zeroing(), pn.VnB());
11263 __ Mrs(x0, NZCV);
11264
11265 (masm.*macro)(pd_m.VnB(), pg.Merging(), pn.VnB());
11266
11267 END();
11268
11269 if (CAN_RUN()) {
11270 RUN();
11271
11272 ASSERT_EQUAL_SVE(pd_z_expected, pd_z.VnB());
11273
11274 // Check that the flags were properly set.
11275 StatusFlags nzcv_expected =
11276 GetPredTestFlags(pd_z_expected,
11277 pg_inputs,
11278 core.GetSVELaneCount(kBRegSize));
11279 ASSERT_EQUAL_64(nzcv_expected, x0);
11280 ASSERT_EQUAL_SVE(pd_z.VnB(), pd_z_s.VnB());
11281
11282 T pd_m_expected[N];
11283 // Set expected `pd` result on merging predication.
11284 for (size_t i = 0; i < N; i++) {
11285 pd_m_expected[i] = pg_inputs[i] ? pd_z_expected[i] : pd_inputs[i];
11286 }
11287 ASSERT_EQUAL_SVE(pd_m_expected, pd_m.VnB());
11288 }
11289}
11290
11291template <typename T>
11292static void BrkaHelper(Test* config,
11293 const T& pd_inputs,
11294 const T& pg_inputs,
11295 const T& pn_inputs,
11296 const T& pd_expected) {
11297 BrkaBrkbHelper(config,
11298 &MacroAssembler::Brka,
11299 &MacroAssembler::Brkas,
11300 pd_inputs,
11301 pg_inputs,
11302 pn_inputs,
11303 pd_expected);
11304}
11305
11306TEST_SVE(sve_brka) {
11307 // clang-format off
11308 // | boundary of 128-bits VL.
11309 // v
11310 int pd[] = {1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11311
11312 // | highest-numbered lane lowest-numbered lane |
11313 // v v
11314 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11315 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11316
11317 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
11318 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11319 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1};
11320
11321 // | first break
11322 // v
11323 int exp_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
11324 // | first break
11325 // v
11326 int exp_1_2[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11327 // | first break
11328 // v
11329 int exp_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11330
11331 BrkaHelper(config, pd, pg_1, pn_1, exp_1_1);
11332 BrkaHelper(config, pd, pg_1, pn_2, exp_1_2);
11333 BrkaHelper(config, pd, pg_1, pn_3, exp_1_3);
11334
11335 // | first break
11336 // v
11337 int exp_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1};
11338 // | first break
11339 // v
11340 int exp_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11341 // | first break
11342 // v
11343 int exp_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
11344 BrkaHelper(config, pd, pg_2, pn_1, exp_2_1);
11345 BrkaHelper(config, pd, pg_2, pn_2, exp_2_2);
11346 BrkaHelper(config, pd, pg_2, pn_3, exp_2_3);
11347
11348 // The all-inactive zeroing predicate sets destination predicate all-false.
11349 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11350 int exp_3_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11351 BrkaHelper(config, pd, pg_3, pn_1, exp_3_x);
11352 BrkaHelper(config, pd, pg_3, pn_2, exp_3_x);
11353 BrkaHelper(config, pd, pg_3, pn_3, exp_3_x);
11354 // clang-format on
11355}
11356
11357template <typename T>
11358static void BrkbHelper(Test* config,
11359 const T& pd_inputs,
11360 const T& pg_inputs,
11361 const T& pn_inputs,
11362 const T& pd_expected) {
11363 BrkaBrkbHelper(config,
11364 &MacroAssembler::Brkb,
11365 &MacroAssembler::Brkbs,
11366 pd_inputs,
11367 pg_inputs,
11368 pn_inputs,
11369 pd_expected);
11370}
11371
11372TEST_SVE(sve_brkb) {
11373 // clang-format off
11374 // | boundary of 128-bits VL.
11375 // v
11376 int pd[] = {1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11377
11378 // | highest-numbered lane lowest-numbered lane |
11379 // v v
11380 int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11381 int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11382
11383 int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
11384 int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11385 int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1};
11386
11387 // | first break
11388 // v
11389 int exp_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
11390 // | first break
11391 // v
11392 int exp_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
11393 // | first break
11394 // v
11395 int exp_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0};
11396
11397 BrkbHelper(config, pd, pg_1, pn_1, exp_1_1);
11398 BrkbHelper(config, pd, pg_1, pn_2, exp_1_2);
11399 BrkbHelper(config, pd, pg_1, pn_3, exp_1_3);
11400
11401 // | first break
11402 // v
11403 int exp_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
11404 // | first break
11405 // v
11406 int exp_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
11407 // | first break
11408 // v
11409 int exp_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11410 BrkbHelper(config, pd, pg_2, pn_1, exp_2_1);
11411 BrkbHelper(config, pd, pg_2, pn_2, exp_2_2);
11412 BrkbHelper(config, pd, pg_2, pn_3, exp_2_3);
11413
11414 // The all-inactive zeroing predicate sets destination predicate all-false.
11415 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11416 int exp_3_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
11417 BrkbHelper(config, pd, pg_3, pn_1, exp_3_x);
11418 BrkbHelper(config, pd, pg_3, pn_2, exp_3_x);
11419 BrkbHelper(config, pd, pg_3, pn_3, exp_3_x);
11420 // clang-format on
11421}
11422
11423typedef void (MacroAssembler::*BrknFn)(const PRegisterWithLaneSize& pd,
11424 const PRegisterZ& pg,
11425 const PRegisterWithLaneSize& pn,
11426 const PRegisterWithLaneSize& pm);
11427
11428typedef void (MacroAssembler::*BrknsFn)(const PRegisterWithLaneSize& pd,
11429 const PRegisterZ& pg,
11430 const PRegisterWithLaneSize& pn,
11431 const PRegisterWithLaneSize& pm);
11432
11433enum BrknDstPredicateState { kAllFalse, kUnchanged };
11434
11435template <typename T, size_t N>
11436static void BrknHelper(Test* config,
11437 BrknFn macro,
11438 BrknsFn macro_set_flags,
11439 const T (&pd_inputs)[N],
11440 const T (&pg_inputs)[N],
11441 const T (&pn_inputs)[N],
11442 const T (&pm_inputs)[N],
11443 BrknDstPredicateState expected_pd_state) {
11444 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11445 START();
11446
11447 PRegister pg = p10;
11448 PRegister pn = p9;
11449 PRegister pm = p8;
11450 PRegister pdm = p0;
11451 PRegister pd = p1;
11452 PRegister pd_s = p2;
11453 Initialise(&masm, pg.VnB(), pg_inputs);
11454 Initialise(&masm, pn.VnB(), pn_inputs);
11455 Initialise(&masm, pm.VnB(), pm_inputs);
11456 Initialise(&masm, pdm.VnB(), pm_inputs);
11457 Initialise(&masm, pd.VnB(), pd_inputs);
11458 Initialise(&masm, pd_s.VnB(), pd_inputs);
11459
11460 // Initialise NZCV to an impossible value, to check that we actually write it.
11461 __ Mov(x10, NZCVFlag);
11462 __ Msr(NZCV, x10);
11463
11464 (masm.*macro)(pdm.VnB(), pg.Zeroing(), pn.VnB(), pdm.VnB());
11465 // !pd.Aliases(pm).
11466 (masm.*macro)(pd.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
11467 (masm.*macro_set_flags)(pd_s.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
11468 __ Mrs(x0, NZCV);
11469
11470 END();
11471
11472 if (CAN_RUN()) {
11473 RUN();
11474
11475 T all_false[N] = {0};
11476 if (expected_pd_state == kAllFalse) {
11477 ASSERT_EQUAL_SVE(all_false, pd.VnB());
11478 } else {
11479 ASSERT_EQUAL_SVE(pm_inputs, pd.VnB());
11480 }
11481 ASSERT_EQUAL_SVE(pm_inputs, pm.VnB());
11482
11483 // Check that the flags were properly set.
11484 StatusFlags nzcv_expected =
11485 GetPredTestFlags((expected_pd_state == kAllFalse) ? all_false
11486 : pm_inputs,
11487 pg_inputs,
11488 core.GetSVELaneCount(kBRegSize));
11489 ASSERT_EQUAL_64(nzcv_expected, x0);
11490 ASSERT_EQUAL_SVE(pd.VnB(), pdm.VnB());
11491 ASSERT_EQUAL_SVE(pd.VnB(), pd_s.VnB());
11492 }
11493}
11494
11495TEST_SVE(sve_brkn) {
11496 // clang-format off
11497 int pd[] = {1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
11498 int pm[] = {0, 1, 1, 1, 1, 0, 0, 1, 0, 1};
11499
11500 int pg_1[] = {1, 1, 0, 0, 1, 0, 1, 1, 0, 0};
11501 int pg_2[] = {0, 0, 0, 1, 1, 1, 0, 0, 1, 1};
11502 int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // all-false
11503
11504 int pn_1[] = {1, 0, 0, 0, 0, 1, 1, 0, 0, 0};
11505 int pn_2[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
11506 int pn_3[] = {0, 0, 0, 0, 1, 1, 0, 0, 1, 1};
11507
11508 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_1, pm, kUnchanged);
11509 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_2, pm, kAllFalse);
11510 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_3, pm, kAllFalse);
11511
11512 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_1, pm, kAllFalse);
11513 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_2, pm, kUnchanged);
11514 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_3, pm, kAllFalse);
11515
11516 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_1, pm, kAllFalse);
11517 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_2, pm, kAllFalse);
11518 BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_3, pm, kAllFalse);
11519 // clang-format on
11520}
11521
Martyn Capewell15f89012020-01-09 11:18:30 +000011522TEST_SVE(sve_trn) {
11523 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11524 START();
11525
11526 uint64_t in0[] = {0xffeeddccbbaa9988, 0x7766554433221100};
11527 uint64_t in1[] = {0xaa55aa55aa55aa55, 0x55aa55aa55aa55aa};
11528 InsrHelper(&masm, z0.VnD(), in0);
11529 InsrHelper(&masm, z1.VnD(), in1);
11530
11531 __ Trn1(z2.VnB(), z0.VnB(), z1.VnB());
11532 __ Trn2(z3.VnB(), z0.VnB(), z1.VnB());
11533 __ Trn1(z4.VnH(), z0.VnH(), z1.VnH());
11534 __ Trn2(z5.VnH(), z0.VnH(), z1.VnH());
11535 __ Trn1(z6.VnS(), z0.VnS(), z1.VnS());
11536 __ Trn2(z7.VnS(), z0.VnS(), z1.VnS());
11537 __ Trn1(z8.VnD(), z0.VnD(), z1.VnD());
11538 __ Trn2(z9.VnD(), z0.VnD(), z1.VnD());
11539
11540 END();
11541
11542 if (CAN_RUN()) {
11543 RUN();
11544 uint64_t expected_z2[] = {0x55ee55cc55aa5588, 0xaa66aa44aa22aa00};
11545 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11546 uint64_t expected_z3[] = {0xaaffaaddaabbaa99, 0x5577555555335511};
11547 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11548 uint64_t expected_z4[] = {0xaa55ddccaa559988, 0x55aa554455aa1100};
11549 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11550 uint64_t expected_z5[] = {0xaa55ffeeaa55bbaa, 0x55aa776655aa3322};
11551 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11552 uint64_t expected_z6[] = {0xaa55aa55bbaa9988, 0x55aa55aa33221100};
11553 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11554 uint64_t expected_z7[] = {0xaa55aa55ffeeddcc, 0x55aa55aa77665544};
11555 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11556 uint64_t expected_z8[] = {0x55aa55aa55aa55aa, 0x7766554433221100};
11557 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11558 uint64_t expected_z9[] = {0xaa55aa55aa55aa55, 0xffeeddccbbaa9988};
11559 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11560 }
11561}
11562
11563TEST_SVE(sve_zip_uzp) {
11564 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11565 START();
11566
11567 __ Dup(z0.VnD(), 0xffeeddccbbaa9988);
11568 __ Insr(z0.VnD(), 0x7766554433221100);
11569 __ Dup(z1.VnD(), 0xaa55aa55aa55aa55);
11570 __ Insr(z1.VnD(), 0x55aa55aa55aa55aa);
11571
11572 __ Zip1(z2.VnB(), z0.VnB(), z1.VnB());
11573 __ Zip2(z3.VnB(), z0.VnB(), z1.VnB());
11574 __ Zip1(z4.VnH(), z0.VnH(), z1.VnH());
11575 __ Zip2(z5.VnH(), z0.VnH(), z1.VnH());
11576 __ Zip1(z6.VnS(), z0.VnS(), z1.VnS());
11577 __ Zip2(z7.VnS(), z0.VnS(), z1.VnS());
11578 __ Zip1(z8.VnD(), z0.VnD(), z1.VnD());
11579 __ Zip2(z9.VnD(), z0.VnD(), z1.VnD());
11580
11581 __ Uzp1(z10.VnB(), z2.VnB(), z3.VnB());
11582 __ Uzp2(z11.VnB(), z2.VnB(), z3.VnB());
11583 __ Uzp1(z12.VnH(), z4.VnH(), z5.VnH());
11584 __ Uzp2(z13.VnH(), z4.VnH(), z5.VnH());
11585 __ Uzp1(z14.VnS(), z6.VnS(), z7.VnS());
11586 __ Uzp2(z15.VnS(), z6.VnS(), z7.VnS());
11587 __ Uzp1(z16.VnD(), z8.VnD(), z9.VnD());
11588 __ Uzp2(z17.VnD(), z8.VnD(), z9.VnD());
11589
11590 END();
11591
11592 if (CAN_RUN()) {
11593 RUN();
11594 uint64_t expected_z2[] = {0x5577aa665555aa44, 0x5533aa225511aa00};
11595 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11596 uint64_t expected_z3[] = {0xaaff55eeaadd55cc, 0xaabb55aaaa995588};
11597 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11598 uint64_t expected_z4[] = {0x55aa776655aa5544, 0x55aa332255aa1100};
11599 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11600 uint64_t expected_z5[] = {0xaa55ffeeaa55ddcc, 0xaa55bbaaaa559988};
11601 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11602 uint64_t expected_z6[] = {0x55aa55aa77665544, 0x55aa55aa33221100};
11603 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11604 uint64_t expected_z7[] = {0xaa55aa55ffeeddcc, 0xaa55aa55bbaa9988};
11605 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11606 uint64_t expected_z8[] = {0x55aa55aa55aa55aa, 0x7766554433221100};
11607 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11608 uint64_t expected_z9[] = {0xaa55aa55aa55aa55, 0xffeeddccbbaa9988};
11609 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11610
11611 // Check uzp is the opposite of zip.
11612 ASSERT_EQUAL_SVE(z0.VnD(), z10.VnD());
11613 ASSERT_EQUAL_SVE(z1.VnD(), z11.VnD());
11614 ASSERT_EQUAL_SVE(z0.VnD(), z12.VnD());
11615 ASSERT_EQUAL_SVE(z1.VnD(), z13.VnD());
11616 ASSERT_EQUAL_SVE(z0.VnD(), z14.VnD());
11617 ASSERT_EQUAL_SVE(z1.VnD(), z15.VnD());
11618 ASSERT_EQUAL_SVE(z0.VnD(), z16.VnD());
11619 ASSERT_EQUAL_SVE(z1.VnD(), z17.VnD());
11620 }
11621}
Martyn Capewell50e9f552020-01-07 17:45:03 +000011622
11623TEST_SVE(sve_fpmul_index) {
11624 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11625 START();
11626
11627 uint64_t in0[] = {0x3ff000003f803c00, 0xbff00000bf80bc00};
11628 uint64_t in1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76};
11629
11630 InsrHelper(&masm, z0.VnD(), in0);
11631 InsrHelper(&masm, z1.VnD(), in1);
11632
11633 __ Fmul(z2.VnH(), z1.VnH(), z0.VnH(), 0);
11634 __ Fmul(z3.VnH(), z1.VnH(), z0.VnH(), 1);
11635 __ Fmul(z4.VnH(), z1.VnH(), z0.VnH(), 4);
11636 __ Fmul(z5.VnH(), z1.VnH(), z0.VnH(), 7);
11637
11638 __ Fmul(z6.VnS(), z1.VnS(), z0.VnS(), 0);
11639 __ Fmul(z7.VnS(), z1.VnS(), z0.VnS(), 1);
11640 __ Fmul(z8.VnS(), z1.VnS(), z0.VnS(), 2);
11641 __ Fmul(z9.VnS(), z1.VnS(), z0.VnS(), 3);
11642
11643 __ Fmul(z10.VnD(), z1.VnD(), z0.VnD(), 0);
11644 __ Fmul(z11.VnD(), z1.VnD(), z0.VnD(), 1);
11645
11646 // Compute the results using other instructions.
11647 __ Dup(z12.VnH(), z0.VnH(), 0);
11648 __ Fmul(z12.VnH(), z1.VnH(), z12.VnH());
11649 __ Dup(z13.VnH(), z0.VnH(), 1);
11650 __ Fmul(z13.VnH(), z1.VnH(), z13.VnH());
11651 __ Dup(z14.VnH(), z0.VnH(), 4);
11652 __ Fmul(z14.VnH(), z1.VnH(), z14.VnH());
11653 __ Dup(z15.VnH(), z0.VnH(), 7);
11654 __ Fmul(z15.VnH(), z1.VnH(), z15.VnH());
11655
11656 __ Dup(z16.VnS(), z0.VnS(), 0);
11657 __ Fmul(z16.VnS(), z1.VnS(), z16.VnS());
11658 __ Dup(z17.VnS(), z0.VnS(), 1);
11659 __ Fmul(z17.VnS(), z1.VnS(), z17.VnS());
11660 __ Dup(z18.VnS(), z0.VnS(), 2);
11661 __ Fmul(z18.VnS(), z1.VnS(), z18.VnS());
11662 __ Dup(z19.VnS(), z0.VnS(), 3);
11663 __ Fmul(z19.VnS(), z1.VnS(), z19.VnS());
11664
11665 __ Dup(z20.VnD(), z0.VnD(), 0);
11666 __ Fmul(z20.VnD(), z1.VnD(), z20.VnD());
11667 __ Dup(z21.VnD(), z0.VnD(), 1);
11668 __ Fmul(z21.VnD(), z1.VnD(), z21.VnD());
11669
11670 END();
11671
11672 if (CAN_RUN()) {
11673 RUN();
11674 ASSERT_EQUAL_SVE(z12.VnH(), z2.VnH());
11675 ASSERT_EQUAL_SVE(z13.VnH(), z3.VnH());
11676 ASSERT_EQUAL_SVE(z14.VnH(), z4.VnH());
11677 ASSERT_EQUAL_SVE(z15.VnH(), z5.VnH());
11678 ASSERT_EQUAL_SVE(z16.VnS(), z6.VnS());
11679 ASSERT_EQUAL_SVE(z17.VnS(), z7.VnS());
11680 ASSERT_EQUAL_SVE(z18.VnS(), z8.VnS());
11681 ASSERT_EQUAL_SVE(z19.VnS(), z9.VnS());
11682 ASSERT_EQUAL_SVE(z20.VnD(), z10.VnD());
11683 ASSERT_EQUAL_SVE(z21.VnD(), z11.VnD());
11684 }
11685}
11686
Martyn Capewell5fb2ad62020-01-10 14:08:27 +000011687TEST_SVE(sve_ftmad) {
11688 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11689 START();
11690
11691 uint64_t in_h0[] = {0x7c027e01fc02fe01,
11692 0x3c003c00bc00bc00,
11693 0x3c003c00bc00bc00};
11694 uint64_t in_h1[] = {0xfe01fc027e017e01,
11695 0x3c00bc003c00bc00,
11696 0x3c00bc003c00bc00};
11697 uint64_t in_s0[] = {0x7f800002ffc00001,
11698 0x3f8000003f800000,
11699 0xbf800000bf800000};
11700 uint64_t in_s1[] = {0xffc00001ffc00001,
11701 0x3f800000bf800000,
11702 0x3f800000bf800000};
11703 uint64_t in_d0[] = {0x7ff8000000000001,
11704 0x3ff0000000000000,
11705 0xbff0000000000000};
11706 uint64_t in_d1[] = {0xfff0000000000002,
11707 0xbff0000000000000,
11708 0x3ff0000000000000};
11709 InsrHelper(&masm, z0.VnD(), in_h0);
11710 InsrHelper(&masm, z1.VnD(), in_h1);
11711 InsrHelper(&masm, z2.VnD(), in_s0);
11712 InsrHelper(&masm, z3.VnD(), in_s1);
11713 InsrHelper(&masm, z4.VnD(), in_d0);
11714 InsrHelper(&masm, z5.VnD(), in_d1);
11715
11716 __ Mov(z6, z0);
11717 __ Ftmad(z6.VnH(), z6.VnH(), z1.VnH(), 0);
11718 __ Mov(z7, z0);
11719 __ Ftmad(z7.VnH(), z7.VnH(), z1.VnH(), 1);
11720 __ Mov(z8, z0);
11721 __ Ftmad(z8.VnH(), z8.VnH(), z1.VnH(), 2);
11722
11723 __ Mov(z9, z2);
11724 __ Ftmad(z9.VnS(), z9.VnS(), z3.VnS(), 0);
11725 __ Mov(z10, z2);
11726 __ Ftmad(z10.VnS(), z10.VnS(), z3.VnS(), 3);
11727 __ Mov(z11, z2);
11728 __ Ftmad(z11.VnS(), z11.VnS(), z3.VnS(), 4);
11729
11730 __ Mov(z12, z4);
11731 __ Ftmad(z12.VnD(), z12.VnD(), z5.VnD(), 0);
11732 __ Mov(z13, z4);
11733 __ Ftmad(z13.VnD(), z13.VnD(), z5.VnD(), 5);
11734 __ Mov(z14, z4);
11735 __ Ftmad(z14.VnD(), z14.VnD(), z5.VnD(), 7);
11736
11737 END();
11738
11739 if (CAN_RUN()) {
11740 RUN();
11741 uint64_t expected_z6[] = {0x7e027e02fe02fe01,
11742 0x4000400000000000,
11743 0x4000400000000000};
11744 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11745 uint64_t expected_z7[] = {0x7e027e02fe02fe01,
11746 0x3aab3800bcabbe00,
11747 0x3aab3800bcabbe00};
11748 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11749 uint64_t expected_z8[] = {0x7e027e02fe02fe01,
11750 0x3c083c2abbefbbac,
11751 0x3c083c2abbefbbac};
11752 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
11753 uint64_t expected_z9[] = {0x7fc00002ffc00001,
11754 0x4000000040000000,
11755 0x0000000000000000};
11756 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11757 uint64_t expected_z10[] = {0x7fc00002ffc00001,
11758 0x3f7ff2ff3f7fa4fc,
11759 0xbf800680bf802d82};
11760 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11761 uint64_t expected_z11[] = {0x7fc00002ffc00001,
11762 0x3f8000173f8000cd,
11763 0xbf7fffd2bf7ffe66};
11764 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
11765 uint64_t expected_z12[] = {0x7ff8000000000002,
11766 0x4000000000000000,
11767 0x0000000000000000};
11768 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
11769 uint64_t expected_z13[] = {0x7ff8000000000002,
11770 0x3fefffff6c0d846c,
11771 0xbff0000006b978ae};
11772 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
11773 uint64_t expected_z14[] = {0x7ff8000000000002,
11774 0x3feffffffffe708a,
11775 0xbff0000000000000};
11776 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
11777 }
11778}
11779
Martyn Capewell37f28182020-01-14 10:15:10 +000011780static void BasicFPArithHelper(MacroAssembler* masm,
11781 int lane_size_in_bits,
11782 const uint64_t (&inputs)[2],
11783 const uint64_t (&inputs_fmulx)[2],
11784 const uint64_t (&inputs_nans)[2]) {
11785 int ls = lane_size_in_bits;
11786
11787 for (int i = 0; i < 16; i++) {
11788 InsrHelper(masm, z0.VnD(), inputs);
11789 }
11790 ZRegister rvrs = z1.WithLaneSize(ls);
11791 masm->Rev(rvrs, z0.WithLaneSize(ls));
11792
11793 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
11794 Initialise(masm, p2.VnB(), pred);
11795 PRegisterM p2m = p2.Merging();
11796
11797 masm->Mov(z2, z0);
11798 masm->Fadd(z2.WithLaneSize(ls),
11799 p2m,
11800 z2.WithLaneSize(ls),
11801 rvrs,
11802 FastNaNPropagation);
11803 masm->Mov(z3, z0);
11804 masm->Fsub(z3.WithLaneSize(ls), p2m, z3.WithLaneSize(ls), rvrs);
11805 masm->Mov(z4, z0);
11806 masm->Fsub(z4.WithLaneSize(ls), p2m, rvrs, z4.WithLaneSize(ls));
11807 masm->Mov(z5, z0);
11808 masm->Fabd(z5.WithLaneSize(ls),
11809 p2m,
11810 z5.WithLaneSize(ls),
11811 rvrs,
11812 FastNaNPropagation);
11813 masm->Mov(z6, z0);
11814 masm->Fmul(z6.WithLaneSize(ls),
11815 p2m,
11816 z6.WithLaneSize(ls),
11817 rvrs,
11818 FastNaNPropagation);
11819
11820 for (int i = 0; i < 16; i++) {
11821 InsrHelper(masm, z7.VnD(), inputs_fmulx);
11822 }
11823 masm->Rev(z8.WithLaneSize(ls), z7.WithLaneSize(ls));
11824 masm->Fmulx(z7.WithLaneSize(ls),
11825 p2m,
11826 z7.WithLaneSize(ls),
11827 z8.WithLaneSize(ls),
11828 FastNaNPropagation);
11829
11830 InsrHelper(masm, z8.VnD(), inputs_nans);
11831 masm->Mov(z9, z8);
11832 masm->Fminnm(z9.WithLaneSize(ls),
11833 p2m,
11834 z9.WithLaneSize(ls),
11835 rvrs,
11836 FastNaNPropagation);
11837 masm->Mov(z10, z8);
11838 masm->Fmaxnm(z10.WithLaneSize(ls),
11839 p2m,
11840 z10.WithLaneSize(ls),
11841 rvrs,
11842 FastNaNPropagation);
11843}
11844
11845TEST_SVE(sve_fp_arith_pred_h) {
11846 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11847 START();
11848
11849 uint64_t inputs[] = {0x4800470046004500, 0x4400420040003c00};
11850 uint64_t inputs_fmulx[] = {0x7c00fc007c00fc00, 0x0000800000008000};
11851 uint64_t inputs_nans[] = {0x7fffffff7fffffff, 0x7bfffbff7fbbfbff};
11852
11853 BasicFPArithHelper(&masm, kHRegSize, inputs, inputs_fmulx, inputs_nans);
11854
11855 END();
11856
11857 if (CAN_RUN()) {
11858 RUN();
11859 uint64_t expected_z2[] = {0x4880488048804880, 0x4880420048804880};
11860 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11861 uint64_t expected_z3[] = {0x4700450042003c00, 0xbc004200c500c700};
11862 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11863 uint64_t expected_z4[] = {0xc700c500c200bc00, 0x3c00420045004700};
11864 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11865 uint64_t expected_z5[] = {0x4700450042003c00, 0x3c00420045004700};
11866 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11867 uint64_t expected_z6[] = {0x48004b004c804d00, 0x4d0042004b004800};
11868 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11869 uint64_t expected_z7[] = {0xc000c000c000c000, 0xc0008000c000c000};
11870 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11871 uint64_t expected_z9[] = {0x3c00400042004400, 0x4500fbff4700fbff};
11872 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11873 uint64_t expected_z10[] = {0x3c00400042004400, 0x7bfffbff47004800};
11874 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11875 }
11876}
11877
11878TEST_SVE(sve_fp_arith_pred_s) {
11879 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11880 START();
11881
11882 uint64_t inputs[] = {0x4080000040400000, 0x400000003f800000};
11883 uint64_t inputs_fmulx[] = {0x7f800000ff800000, 0x0000000080000000};
11884 uint64_t inputs_nans[] = {0x7fffffffffffffff, 0x41000000c1000000};
11885
11886 BasicFPArithHelper(&masm, kSRegSize, inputs, inputs_fmulx, inputs_nans);
11887
11888 END();
11889
11890 if (CAN_RUN()) {
11891 RUN();
11892 uint64_t expected_z2[] = {0x40a0000040a00000, 0x4000000040a00000};
11893 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11894 uint64_t expected_z3[] = {0x404000003f800000, 0x40000000c0400000};
11895 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11896 uint64_t expected_z4[] = {0xc0400000bf800000, 0x4000000040400000};
11897 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11898 uint64_t expected_z5[] = {0x404000003f800000, 0x4000000040400000};
11899 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11900 uint64_t expected_z6[] = {0x4080000040c00000, 0x4000000040800000};
11901 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11902 uint64_t expected_z7[] = {0xc0000000c0000000, 0x00000000c0000000};
11903 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11904 uint64_t expected_z9[] = {0x3f80000040000000, 0x41000000c1000000};
11905 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11906 uint64_t expected_z10[] = {0x3f80000040000000, 0x4100000040800000};
11907 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11908 }
11909}
11910
11911TEST_SVE(sve_fp_arith_pred_d) {
11912 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11913 START();
11914
11915 uint64_t inputs[] = {0x4000000000000000, 0x3ff0000000000000};
11916 uint64_t inputs_fmulx[] = {0x7ff0000000000000, 0x8000000000000000};
11917 uint64_t inputs_nans[] = {0x7fffffffffffffff, 0x4100000000000000};
11918
11919 BasicFPArithHelper(&masm, kDRegSize, inputs, inputs_fmulx, inputs_nans);
11920
11921 END();
11922
11923 if (CAN_RUN()) {
11924 RUN();
11925 uint64_t expected_z2[] = {0x4008000000000000, 0x4008000000000000};
11926 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
11927 uint64_t expected_z3[] = {0x3ff0000000000000, 0xbff0000000000000};
11928 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
11929 uint64_t expected_z4[] = {0xbff0000000000000, 0x3ff0000000000000};
11930 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
11931 uint64_t expected_z5[] = {0x3ff0000000000000, 0x3ff0000000000000};
11932 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
11933 uint64_t expected_z6[] = {0x4000000000000000, 0x4000000000000000};
11934 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
11935 uint64_t expected_z7[] = {0xc000000000000000, 0xc000000000000000};
11936 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
11937 uint64_t expected_z9[] = {0x3ff0000000000000, 0x4000000000000000};
11938 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
11939 uint64_t expected_z10[] = {0x3ff0000000000000, 0x4100000000000000};
11940 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
11941 }
11942}
11943
Martyn Capewella2fadc22020-01-16 16:09:55 +000011944TEST_SVE(sve_fp_arith_pred_imm) {
11945 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
11946 START();
11947
11948 int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
11949 Initialise(&masm, p0.VnB(), pred);
11950 PRegisterM p0m = p0.Merging();
11951 __ Ptrue(p1.VnB());
11952
11953 __ Fdup(z0.VnD(), 0.0);
11954
11955 __ Mov(z1, z0);
11956 __ Fdiv(z1.VnH(), p1.Merging(), z1.VnH(), z1.VnH());
11957 __ Mov(z2, z0);
11958 __ Fadd(z2.VnH(), p0m, z2.VnH(), 0.5);
11959 __ Mov(z3, z2);
11960 __ Fsub(z3.VnH(), p0m, z3.VnH(), 1.0);
11961 __ Mov(z4, z3);
11962 __ Fsub(z4.VnH(), p0m, 1.0, z4.VnH());
11963 __ Mov(z5, z4);
11964 __ Fmul(z5.VnH(), p0m, z5.VnH(), 2.0);
11965 __ Mov(z6, z1);
11966 __ Fminnm(z6.VnH(), p0m, z6.VnH(), 0.0);
11967 __ Mov(z7, z1);
11968 __ Fmaxnm(z7.VnH(), p0m, z7.VnH(), 1.0);
11969 __ Mov(z8, z5);
11970 __ Fmin(z8.VnH(), p0m, z8.VnH(), 1.0);
11971 __ Mov(z9, z5);
11972 __ Fmax(z9.VnH(), p0m, z9.VnH(), 0.0);
11973
11974 __ Mov(z11, z0);
11975 __ Fdiv(z11.VnS(), p1.Merging(), z11.VnS(), z11.VnS());
11976 __ Mov(z12, z0);
11977 __ Fadd(z12.VnS(), p0m, z12.VnS(), 0.5);
11978 __ Mov(z13, z12);
11979 __ Fsub(z13.VnS(), p0m, z13.VnS(), 1.0);
11980 __ Mov(z14, z13);
11981 __ Fsub(z14.VnS(), p0m, 1.0, z14.VnS());
11982 __ Mov(z15, z14);
11983 __ Fmul(z15.VnS(), p0m, z15.VnS(), 2.0);
11984 __ Mov(z16, z11);
11985 __ Fminnm(z16.VnS(), p0m, z16.VnS(), 0.0);
11986 __ Mov(z17, z11);
11987 __ Fmaxnm(z17.VnS(), p0m, z17.VnS(), 1.0);
11988 __ Mov(z18, z15);
11989 __ Fmin(z18.VnS(), p0m, z18.VnS(), 1.0);
11990 __ Mov(z19, z15);
11991 __ Fmax(z19.VnS(), p0m, z19.VnS(), 0.0);
11992
11993 __ Mov(z21, z0);
11994 __ Fdiv(z21.VnD(), p1.Merging(), z21.VnD(), z21.VnD());
11995 __ Mov(z22, z0);
11996 __ Fadd(z22.VnD(), p0m, z22.VnD(), 0.5);
11997 __ Mov(z23, z22);
11998 __ Fsub(z23.VnD(), p0m, z23.VnD(), 1.0);
11999 __ Mov(z24, z23);
12000 __ Fsub(z24.VnD(), p0m, 1.0, z24.VnD());
12001 __ Mov(z25, z24);
12002 __ Fmul(z25.VnD(), p0m, z25.VnD(), 2.0);
12003 __ Mov(z26, z21);
12004 __ Fminnm(z26.VnD(), p0m, z26.VnD(), 0.0);
12005 __ Mov(z27, z21);
12006 __ Fmaxnm(z27.VnD(), p0m, z27.VnD(), 1.0);
12007 __ Mov(z28, z25);
12008 __ Fmin(z28.VnD(), p0m, z28.VnD(), 1.0);
12009 __ Mov(z29, z25);
12010 __ Fmax(z29.VnD(), p0m, z29.VnD(), 0.0);
12011
12012 __ Index(z0.VnH(), -3, 1);
12013 __ Scvtf(z0.VnH(), p1.Merging(), z0.VnH());
12014 __ Fmax(z0.VnH(), p1.Merging(), z0.VnH(), 0.0);
12015 __ Index(z1.VnS(), -4, 2);
12016 __ Scvtf(z1.VnS(), p1.Merging(), z1.VnS());
12017 __ Fadd(z1.VnS(), p1.Merging(), z1.VnS(), 1.0);
12018
12019 END();
12020
12021 if (CAN_RUN()) {
12022 RUN();
12023 uint64_t expected_z2[] = {0x3800380038003800, 0x3800000038003800};
12024 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
12025 uint64_t expected_z3[] = {0xb800b800b800b800, 0xb8000000b800b800};
12026 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
12027 uint64_t expected_z4[] = {0x3e003e003e003e00, 0x3e0000003e003e00};
12028 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12029 uint64_t expected_z5[] = {0x4200420042004200, 0x4200000042004200};
12030 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12031 uint64_t expected_z6[] = {0x0000000000000000, 0x00007e0000000000};
12032 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12033 uint64_t expected_z7[] = {0x3c003c003c003c00, 0x3c007e003c003c00};
12034 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12035 uint64_t expected_z8[] = {0x3c003c003c003c00, 0x3c0000003c003c00};
12036 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12037 uint64_t expected_z9[] = {0x4200420042004200, 0x4200000042004200};
12038 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12039
12040 uint64_t expected_z12[] = {0x3f0000003f000000, 0x000000003f000000};
12041 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
12042 uint64_t expected_z13[] = {0xbf000000bf000000, 0x00000000bf000000};
12043 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
12044 uint64_t expected_z14[] = {0x3fc000003fc00000, 0x000000003fc00000};
12045 ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
12046 uint64_t expected_z15[] = {0x4040000040400000, 0x0000000040400000};
12047 ASSERT_EQUAL_SVE(expected_z15, z15.VnD());
12048 uint64_t expected_z16[] = {0x0000000000000000, 0x7fc0000000000000};
12049 ASSERT_EQUAL_SVE(expected_z16, z16.VnD());
12050 uint64_t expected_z17[] = {0x3f8000003f800000, 0x7fc000003f800000};
12051 ASSERT_EQUAL_SVE(expected_z17, z17.VnD());
12052 uint64_t expected_z18[] = {0x3f8000003f800000, 0x000000003f800000};
12053 ASSERT_EQUAL_SVE(expected_z18, z18.VnD());
12054 uint64_t expected_z19[] = {0x4040000040400000, 0x0000000040400000};
12055 ASSERT_EQUAL_SVE(expected_z19, z19.VnD());
12056
12057 uint64_t expected_z22[] = {0x3fe0000000000000, 0x3fe0000000000000};
12058 ASSERT_EQUAL_SVE(expected_z22, z22.VnD());
12059 uint64_t expected_z23[] = {0xbfe0000000000000, 0xbfe0000000000000};
12060 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
12061 uint64_t expected_z24[] = {0x3ff8000000000000, 0x3ff8000000000000};
12062 ASSERT_EQUAL_SVE(expected_z24, z24.VnD());
12063 uint64_t expected_z25[] = {0x4008000000000000, 0x4008000000000000};
12064 ASSERT_EQUAL_SVE(expected_z25, z25.VnD());
12065 uint64_t expected_z26[] = {0x0000000000000000, 0x0000000000000000};
12066 ASSERT_EQUAL_SVE(expected_z26, z26.VnD());
12067 uint64_t expected_z27[] = {0x3ff0000000000000, 0x3ff0000000000000};
12068 ASSERT_EQUAL_SVE(expected_z27, z27.VnD());
12069 uint64_t expected_z28[] = {0x3ff0000000000000, 0x3ff0000000000000};
12070 ASSERT_EQUAL_SVE(expected_z28, z28.VnD());
12071 uint64_t expected_z29[] = {0x4008000000000000, 0x4008000000000000};
12072 ASSERT_EQUAL_SVE(expected_z29, z29.VnD());
12073 uint64_t expected_z0[] = {0x4400420040003c00, 0x0000000000000000};
12074 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
12075 uint64_t expected_z1[] = {0x404000003f800000, 0xbf800000c0400000};
12076 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
12077 }
12078}
12079
Martyn Capewell37f28182020-01-14 10:15:10 +000012080TEST_SVE(sve_fscale) {
12081 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12082 START();
12083
12084 uint64_t inputs_h[] = {0x4800470046004500, 0x4400420040003c00};
12085 InsrHelper(&masm, z0.VnD(), inputs_h);
12086 uint64_t inputs_s[] = {0x4080000040400000, 0x400000003f800000};
12087 InsrHelper(&masm, z1.VnD(), inputs_s);
12088 uint64_t inputs_d[] = {0x40f0000000000000, 0x4000000000000000};
12089 InsrHelper(&masm, z2.VnD(), inputs_d);
12090
12091 uint64_t scales[] = {0x00080002fff8fffe, 0x00100001fff0ffff};
12092 InsrHelper(&masm, z3.VnD(), scales);
12093
12094 __ Ptrue(p0.VnB());
12095 int pred[] = {0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1};
12096 Initialise(&masm, p1.VnB(), pred);
12097
12098 __ Mov(z4, z0);
12099 __ Fscale(z4.VnH(), p0.Merging(), z4.VnH(), z3.VnH());
12100 __ Mov(z5, z0);
12101 __ Fscale(z5.VnH(), p1.Merging(), z5.VnH(), z3.VnH());
12102
12103 __ Sunpklo(z3.VnS(), z3.VnH());
12104 __ Mov(z6, z1);
12105 __ Fscale(z6.VnS(), p0.Merging(), z6.VnS(), z3.VnS());
12106 __ Mov(z7, z1);
12107 __ Fscale(z7.VnS(), p1.Merging(), z7.VnS(), z3.VnS());
12108
12109 __ Sunpklo(z3.VnD(), z3.VnS());
12110 __ Mov(z8, z2);
12111 __ Fscale(z8.VnD(), p0.Merging(), z8.VnD(), z3.VnD());
12112 __ Mov(z9, z2);
12113 __ Fscale(z9.VnD(), p1.Merging(), z9.VnD(), z3.VnD());
12114
12115 // Test full double precision range scaling.
12116 __ Dup(z10.VnD(), 2045);
12117 __ Dup(z11.VnD(), 0x0010000000000000); // 2^-1022
12118 __ Fscale(z11.VnD(), p0.Merging(), z11.VnD(), z10.VnD());
12119
12120 END();
12121
12122 if (CAN_RUN()) {
12123 RUN();
12124
12125 uint64_t expected_z4[] = {0x68004f0026003d00, 0x7c00460002003800};
12126 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
12127 uint64_t expected_z5[] = {0x68004f0026004500, 0x7c00420002003800};
12128 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
12129
12130 uint64_t expected_z6[] = {0x4880000040c00000, 0x380000003f000000};
12131 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
12132 uint64_t expected_z7[] = {0x4880000040400000, 0x400000003f000000};
12133 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
12134
12135 uint64_t expected_z8[] = {0x3ff0000000000000, 0x3ff0000000000000};
12136 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
12137 uint64_t expected_z9[] = {0x40f0000000000000, 0x3ff0000000000000};
12138 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
12139
12140 uint64_t expected_z11[] = {0x7fe0000000000000, 0x7fe0000000000000};
12141 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
12142 }
12143}
12144
TatWai Chongdb7437c2020-01-09 17:44:10 -080012145typedef void (MacroAssembler::*FcvtFn)(const ZRegister& zd,
12146 const PRegisterM& pg,
12147 const ZRegister& zn);
12148
12149template <typename F, size_t N>
12150static void TestFcvtzsFcvtzuHelper(
12151 Test* config,
12152 FcvtFn macro,
12153 int dst_type_size_in_bits,
12154 int src_type_size_in_bits,
12155 const F (&zn_inputs)[N],
12156 const int (&pg_inputs)[N],
12157 const uint64_t (&zd_expected_all_active)[N]) {
12158 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12159 START();
12160
12161 // If the input and result types have a different size, the instruction
12162 // options on elements of the largest specified type is determined by the
12163 // larger type.
12164 int lane_size_in_bits =
12165 std::max(dst_type_size_in_bits, src_type_size_in_bits);
12166
12167 ZRegister zd_all_active = z25;
12168 ZRegister zd_merged = z26;
12169 ZRegister zn = z27;
12170
12171 uint64_t zn_rawbits[N];
12172 FPToRawbitsWithSize(zn_inputs, zn_rawbits, src_type_size_in_bits);
12173 InsrHelper(&masm, zn.WithLaneSize(lane_size_in_bits), zn_rawbits);
12174
12175 PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
12176 __ Ptrue(pg_all_active);
12177
12178 // Test floating-point conversions with all lanes actived.
12179 (masm.*macro)(zd_all_active.WithLaneSize(dst_type_size_in_bits),
12180 pg_all_active.Merging(),
12181 zn.WithLaneSize(src_type_size_in_bits));
12182
12183 PRegisterWithLaneSize pg_merged = p1.WithLaneSize(lane_size_in_bits);
12184 Initialise(&masm, pg_merged, pg_inputs);
12185
12186 __ Dup(zd_merged.VnD(), 0x0bad0bad0bad0bad);
12187
12188 // Use the same `zn` inputs to test floating-point conversions but partial
12189 // lanes are set inactive.
12190 (masm.*macro)(zd_merged.WithLaneSize(dst_type_size_in_bits),
12191 pg_merged.Merging(),
12192 zn.WithLaneSize(src_type_size_in_bits));
12193
12194 END();
12195
12196 if (CAN_RUN()) {
12197 RUN();
12198
12199 ASSERT_EQUAL_SVE(zd_expected_all_active,
12200 zd_all_active.WithLaneSize(lane_size_in_bits));
12201
12202 uint64_t zd_expected_merged[N];
12203 for (unsigned i = 0; i < N; i++) {
12204 zd_expected_merged[i] =
12205 pg_inputs[i] ? zd_expected_all_active[i]
12206 : 0x0bad0bad0bad0bad & GetUintMask(lane_size_in_bits);
12207 }
12208 ASSERT_EQUAL_SVE(zd_expected_merged,
12209 zd_merged.WithLaneSize(lane_size_in_bits));
12210 }
12211}
12212
12213TEST_SVE(fcvtzs_fcvtzu_float16) {
12214 // clang-format off
12215 const double h_max_float16 = kHMaxInt; // Largest float16 == INT16_MAX.
12216 const double h_min_float16 = -h_max_float16; // Smallest float16 > INT16_MIN.
12217 const double largest_float16 = 0xffe0; // 65504
12218 const double smallest_float16 = -largest_float16;
12219 const double h_max_int_sub_one = kHMaxInt - 1;
12220 const double h_min_int_add_one = kHMinInt + 1;
12221
12222 double zn_inputs[] = {1.0,
12223 1.1,
12224 1.5,
12225 -1.5,
12226 h_max_float16,
12227 h_min_float16,
12228 largest_float16,
12229 smallest_float16,
12230 kFP64PositiveInfinity,
12231 kFP64NegativeInfinity,
12232 h_max_int_sub_one,
12233 h_min_int_add_one};
12234
12235 int pg_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
12236
12237 uint64_t expected_fcvtzs_fp162h[] = {1,
12238 1,
12239 1,
12240 0xffff,
12241 0x7fff,
12242 0x8000,
12243 0x7fff,
12244 0x8000,
12245 0x7fff,
12246 0x8000,
12247 0x7fff,
12248 0x8000};
12249
12250 uint64_t expected_fcvtzu_fp162h[] = {1,
12251 1,
12252 1,
12253 0,
12254 0x8000,
12255 0,
12256 0xffe0,
12257 0,
12258 0xffff,
12259 0,
12260 0x8000,
12261 0};
12262
12263 // Float16 to 16-bit integers.
12264 TestFcvtzsFcvtzuHelper(config,
12265 &MacroAssembler::Fcvtzs,
12266 kHRegSize,
12267 kHRegSize,
12268 zn_inputs,
12269 pg_inputs,
12270 expected_fcvtzs_fp162h);
12271
12272 TestFcvtzsFcvtzuHelper(config,
12273 &MacroAssembler::Fcvtzu,
12274 kHRegSize,
12275 kHRegSize,
12276 zn_inputs,
12277 pg_inputs,
12278 expected_fcvtzu_fp162h);
12279
12280 uint64_t expected_fcvtzs_fp162w[] = {1,
12281 1,
12282 1,
12283 0xffffffff,
12284 0x8000,
12285 0xffff8000,
12286 0xffe0,
12287 0xffff0020,
12288 0x7fffffff,
12289 0x80000000,
12290 0x8000,
12291 0xffff8000};
12292
12293 uint64_t expected_fcvtzu_fp162w[] = {1,
12294 1,
12295 1,
12296 0,
12297 0x8000,
12298 0,
12299 0xffe0,
12300 0,
12301 0xffffffff,
12302 0,
12303 0x8000,
12304 0};
12305
12306 // Float16 to 32-bit integers.
12307 TestFcvtzsFcvtzuHelper(config,
12308 &MacroAssembler::Fcvtzs,
12309 kSRegSize,
12310 kHRegSize,
12311 zn_inputs,
12312 pg_inputs,
12313 expected_fcvtzs_fp162w);
12314
12315 TestFcvtzsFcvtzuHelper(config,
12316 &MacroAssembler::Fcvtzu,
12317 kSRegSize,
12318 kHRegSize,
12319 zn_inputs,
12320 pg_inputs,
12321 expected_fcvtzu_fp162w);
12322
12323 uint64_t expected_fcvtzs_fp162x[] = {1,
12324 1,
12325 1,
12326 0xffffffffffffffff,
12327 0x8000,
12328 0xffffffffffff8000,
12329 0xffe0,
12330 0xffffffffffff0020,
12331 0x7fffffffffffffff,
12332 0x8000000000000000,
12333 0x8000,
12334 0xffffffffffff8000};
12335
12336 uint64_t expected_fcvtzu_fp162x[] = {1,
12337 1,
12338 1,
12339 0,
12340 0x8000,
12341 0,
12342 0xffe0,
12343 0,
12344 0xffffffffffffffff,
12345 0,
12346 0x8000,
12347 0};
12348
12349 // Float16 to 64-bit integers.
12350 TestFcvtzsFcvtzuHelper(config,
12351 &MacroAssembler::Fcvtzs,
12352 kDRegSize,
12353 kHRegSize,
12354 zn_inputs,
12355 pg_inputs,
12356 expected_fcvtzs_fp162x);
12357
12358 TestFcvtzsFcvtzuHelper(config,
12359 &MacroAssembler::Fcvtzu,
12360 kDRegSize,
12361 kHRegSize,
12362 zn_inputs,
12363 pg_inputs,
12364 expected_fcvtzu_fp162x);
12365 // clang-format on
12366}
12367
12368TEST_SVE(fcvtzs_fcvtzu_float) {
12369 const double w_max_float = 0x7fffff80; // Largest float < INT32_MAX.
12370 const double w_min_float = -w_max_float; // Smallest float > INT32_MIN.
12371 const double x_max_float = 0x7fffff8000000000; // Largest float < INT64_MAX.
12372 const double x_min_float = -x_max_float; // Smallest float > INT64_MIN.
12373 const double w_max_int_sub_one = kWMaxInt - 1;
12374 const double w_min_int_add_one = kWMinInt + 1;
12375 const double x_max_int_sub_one = kXMaxInt - 1;
12376 const double x_min_int_add_one = kXMinInt + 1;
12377
12378 // clang-format off
12379 double zn_inputs[] = {1.0,
12380 1.1,
12381 1.5,
12382 -1.5,
12383 w_max_float,
12384 w_min_float,
12385 x_max_float,
12386 x_min_float,
12387 kFP64PositiveInfinity,
12388 kFP64NegativeInfinity,
12389 w_max_int_sub_one,
12390 w_min_int_add_one,
12391 x_max_int_sub_one,
12392 x_min_int_add_one};
12393
12394 int pg_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0};
12395
12396 uint64_t expected_fcvtzs_s2w[] = {1,
12397 1,
12398 1,
12399 0xffffffff,
12400 0x7fffff80,
12401 0x80000080,
12402 0x7fffffff,
12403 0x80000000,
12404 0x7fffffff,
12405 0x80000000,
12406 0x7fffffff,
12407 0x80000000,
12408 0x7fffffff,
12409 0x80000000};
12410
12411 uint64_t expected_fcvtzu_s2w[] = {1,
12412 1,
12413 1,
12414 0,
12415 0x7fffff80,
12416 0,
12417 0xffffffff,
12418 0,
12419 0xffffffff,
12420 0,
12421 0x80000000,
12422 0,
12423 0xffffffff,
12424 0};
12425
12426 // Float to 32-bit integers.
12427 TestFcvtzsFcvtzuHelper(config,
12428 &MacroAssembler::Fcvtzs,
12429 kSRegSize,
12430 kSRegSize,
12431 zn_inputs,
12432 pg_inputs,
12433 expected_fcvtzs_s2w);
12434
12435 TestFcvtzsFcvtzuHelper(config,
12436 &MacroAssembler::Fcvtzu,
12437 kSRegSize,
12438 kSRegSize,
12439 zn_inputs,
12440 pg_inputs,
12441 expected_fcvtzu_s2w);
12442
12443 uint64_t expected_fcvtzs_s2x[] = {1,
12444 1,
12445 1,
12446 0xffffffffffffffff,
12447 0x7fffff80,
12448 0xffffffff80000080,
12449 0x7fffff8000000000,
12450 0x8000008000000000,
12451 0x7fffffffffffffff,
12452 0x8000000000000000,
12453 0x80000000,
12454 0xffffffff80000000,
12455 0x7fffffffffffffff,
12456 0x8000000000000000};
12457
12458 uint64_t expected_fcvtzu_s2x[] = {1,
12459 1,
12460 1,
12461 0,
12462 0x7fffff80,
12463 0,
12464 0x7fffff8000000000,
12465 0,
12466 0xffffffffffffffff,
12467 0,
12468 0x0000000080000000,
12469 0,
12470 0x8000000000000000,
12471 0};
12472
12473 // Float to 64-bit integers.
12474 TestFcvtzsFcvtzuHelper(config,
12475 &MacroAssembler::Fcvtzs,
12476 kDRegSize,
12477 kSRegSize,
12478 zn_inputs,
12479 pg_inputs,
12480 expected_fcvtzs_s2x);
12481
12482 TestFcvtzsFcvtzuHelper(config,
12483 &MacroAssembler::Fcvtzu,
12484 kDRegSize,
12485 kSRegSize,
12486 zn_inputs,
12487 pg_inputs,
12488 expected_fcvtzu_s2x);
12489 // clang-format on
12490}
12491
12492TEST_SVE(fcvtzs_fcvtzu_double) {
12493 // clang-format off
12494 const double w_max_float = 0x7fffff80; // Largest float < INT32_MAX.
12495 const double w_min_float = -w_max_float; // Smallest float > INT32_MIN.
12496 const double x_max_float = 0x7fffff8000000000; // Largest float < INT64_MAX.
12497 const double x_min_float = -x_max_float; // Smallest float > INT64_MIN.
12498 const double w_max_double = kWMaxInt; // Largest double == INT32_MAX.
12499 const double w_min_double = -w_max_double; // Smallest double > INT32_MIN.
12500 const double x_max_double = 0x7ffffffffffffc00; // Largest double < INT64_MAX.
12501 const double x_min_double = -x_max_double; // Smallest double > INT64_MIN.
12502 const double w_max_int_sub_one = kWMaxInt - 1;
12503 const double w_min_int_add_one = kWMinInt + 1;
12504 const double x_max_int_sub_one = kXMaxInt - 1;
12505 const double x_min_int_add_one = kXMinInt + 1;
12506
12507 double zn_inputs[] = {1.0,
12508 1.1,
12509 1.5,
12510 -1.5,
12511 w_max_float,
12512 w_min_float,
12513 x_max_float,
12514 x_min_float,
12515 w_max_double,
12516 w_min_double,
12517 x_max_double,
12518 x_min_double,
12519 kFP64PositiveInfinity,
12520 kFP64NegativeInfinity,
12521 w_max_int_sub_one,
12522 w_min_int_add_one,
12523 x_max_int_sub_one,
12524 x_min_int_add_one};
12525
12526 int pg_inputs[] = {1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0};
12527
12528 uint64_t expected_fcvtzs_d2w[] = {1,
12529 1,
12530 1,
12531 0xffffffffffffffff,
12532 0x7fffff80,
12533 0xffffffff80000080,
12534 0x7fffffff,
12535 0xffffffff80000000,
12536 0x7fffffff,
12537 0xffffffff80000001,
12538 0x7fffffff,
12539 0xffffffff80000000,
12540 0x7fffffff,
12541 0xffffffff80000000,
12542 0x7ffffffe,
12543 0xffffffff80000001,
12544 0x7fffffff,
12545 0xffffffff80000000};
12546
12547 uint64_t expected_fcvtzu_d2w[] = {1,
12548 1,
12549 1,
12550 0,
12551 0x7fffff80,
12552 0,
12553 0xffffffff,
12554 0,
12555 0x7fffffff,
12556 0,
12557 0xffffffff,
12558 0,
12559 0xffffffff,
12560 0,
12561 0x7ffffffe,
12562 0,
12563 0xffffffff,
12564 0};
12565
12566 // Double to 32-bit integers.
12567 TestFcvtzsFcvtzuHelper(config,
12568 &MacroAssembler::Fcvtzs,
12569 kSRegSize,
12570 kDRegSize,
12571 zn_inputs,
12572 pg_inputs,
12573 expected_fcvtzs_d2w);
12574
12575 TestFcvtzsFcvtzuHelper(config,
12576 &MacroAssembler::Fcvtzu,
12577 kSRegSize,
12578 kDRegSize,
12579 zn_inputs,
12580 pg_inputs,
12581 expected_fcvtzu_d2w);
12582
12583 uint64_t expected_fcvtzs_d2x[] = {1,
12584 1,
12585 1,
12586 0xffffffffffffffff,
12587 0x7fffff80,
12588 0xffffffff80000080,
12589 0x7fffff8000000000,
12590 0x8000008000000000,
12591 0x7fffffff,
12592 0xffffffff80000001,
12593 0x7ffffffffffffc00,
12594 0x8000000000000400,
12595 0x7fffffffffffffff,
12596 0x8000000000000000,
12597 0x7ffffffe,
12598 0xffffffff80000001,
12599 0x7fffffffffffffff,
12600 0x8000000000000000};
12601
12602 uint64_t expected_fcvtzu_d2x[] = {1,
12603 1,
12604 1,
12605 0,
12606 0x7fffff80,
12607 0,
12608 0x7fffff8000000000,
12609 0,
12610 0x7fffffff,
12611 0,
12612 0x7ffffffffffffc00,
12613 0,
12614 0xffffffffffffffff,
12615 0,
12616 0x000000007ffffffe,
12617 0,
12618 0x8000000000000000,
12619 0};
12620
12621 // Double to 64-bit integers.
12622 TestFcvtzsFcvtzuHelper(config,
12623 &MacroAssembler::Fcvtzs,
12624 kDRegSize,
12625 kDRegSize,
12626 zn_inputs,
12627 pg_inputs,
12628 expected_fcvtzs_d2x);
12629
12630 TestFcvtzsFcvtzuHelper(config,
12631 &MacroAssembler::Fcvtzu,
12632 kDRegSize,
12633 kDRegSize,
12634 zn_inputs,
12635 pg_inputs,
12636 expected_fcvtzu_d2x);
12637 // clang-format on
12638}
12639
TatWai Chong31cd6a02020-01-10 13:03:26 -080012640struct CvtfTestDataSet {
12641 uint64_t int_value;
12642 uint64_t scvtf_result;
12643 uint64_t ucvtf_result;
12644};
12645
12646template <size_t N>
12647static void TestUScvtfHelper(Test* config,
12648 int dst_type_size_in_bits,
12649 int src_type_size_in_bits,
12650 const int (&pg_inputs)[N],
12651 const CvtfTestDataSet (&data_set)[N]) {
12652 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
12653 START();
12654
12655 // Unpack the data from the array of struct into individual arrays that can
12656 // simplify the testing.
12657 uint64_t zn_inputs[N];
12658 uint64_t expected_zd_scvtf_all_active[N];
12659 uint64_t expected_zd_ucvtf_all_active[N];
12660 for (size_t i = 0; i < N; i++) {
12661 zn_inputs[i] = data_set[i].int_value;
12662 expected_zd_scvtf_all_active[i] = data_set[i].scvtf_result;
12663 expected_zd_ucvtf_all_active[i] = data_set[i].ucvtf_result;
12664 }
12665
12666 // If the input and result types have a different size, the instruction
12667 // operates on elements of the largest specified type.
12668 int lane_size_in_bits =
12669 std::max(dst_type_size_in_bits, src_type_size_in_bits);
12670
12671 ZRegister zd_scvtf_all_active = z25;
12672 ZRegister zd_ucvtf_all_active = z26;
12673 ZRegister zn = z27;
12674 InsrHelper(&masm, zn.WithLaneSize(lane_size_in_bits), zn_inputs);
12675
12676 PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
12677 __ Ptrue(pg_all_active);
12678
12679 // Test integer conversions with all lanes actived.
12680 __ Scvtf(zd_scvtf_all_active.WithLaneSize(dst_type_size_in_bits),
12681 pg_all_active.Merging(),
12682 zn.WithLaneSize(src_type_size_in_bits));
12683 __ Ucvtf(zd_ucvtf_all_active.WithLaneSize(dst_type_size_in_bits),
12684 pg_all_active.Merging(),
12685 zn.WithLaneSize(src_type_size_in_bits));
12686
12687 ZRegister zd_scvtf_merged = z23;
12688 ZRegister zd_ucvtf_merged = z24;
12689
12690 PRegisterWithLaneSize pg_merged = p1.WithLaneSize(lane_size_in_bits);
12691 Initialise(&masm, pg_merged, pg_inputs);
12692
12693 uint64_t snan;
12694 switch (lane_size_in_bits) {
12695 case kHRegSize:
12696 snan = 0x7c11;
12697 break;
12698 case kSRegSize:
12699 snan = 0x7f951111;
12700 break;
12701 case kDRegSize:
12702 snan = 0x7ff5555511111111;
12703 break;
12704 }
12705 __ Dup(zd_scvtf_merged.WithLaneSize(lane_size_in_bits), snan);
12706 __ Dup(zd_ucvtf_merged.WithLaneSize(lane_size_in_bits), snan);
12707
12708 // Use the same `zn` inputs to test integer conversions but some lanes are set
12709 // inactive.
12710 __ Scvtf(zd_scvtf_merged.WithLaneSize(dst_type_size_in_bits),
12711 pg_merged.Merging(),
12712 zn.WithLaneSize(src_type_size_in_bits));
12713 __ Ucvtf(zd_ucvtf_merged.WithLaneSize(dst_type_size_in_bits),
12714 pg_merged.Merging(),
12715 zn.WithLaneSize(src_type_size_in_bits));
12716
12717 END();
12718
12719 if (CAN_RUN()) {
12720 RUN();
12721
12722 ASSERT_EQUAL_SVE(expected_zd_scvtf_all_active,
12723 zd_scvtf_all_active.WithLaneSize(lane_size_in_bits));
12724 ASSERT_EQUAL_SVE(expected_zd_ucvtf_all_active,
12725 zd_ucvtf_all_active.WithLaneSize(lane_size_in_bits));
12726
12727 uint64_t expected_zd_scvtf_merged[N];
12728 for (size_t i = 0; i < N; i++) {
12729 expected_zd_scvtf_merged[i] =
12730 pg_inputs[i] ? expected_zd_scvtf_all_active[i] : snan;
12731 }
12732 ASSERT_EQUAL_SVE(expected_zd_scvtf_merged,
12733 zd_scvtf_merged.WithLaneSize(lane_size_in_bits));
12734
12735 uint64_t expected_zd_ucvtf_merged[N];
12736 for (size_t i = 0; i < N; i++) {
12737 expected_zd_ucvtf_merged[i] =
12738 pg_inputs[i] ? expected_zd_ucvtf_all_active[i] : snan;
12739 }
12740 ASSERT_EQUAL_SVE(expected_zd_ucvtf_merged,
12741 zd_ucvtf_merged.WithLaneSize(lane_size_in_bits));
12742 }
12743}
12744
12745TEST_SVE(scvtf_ucvtf_h_s_d_to_float16) {
12746 // clang-format off
12747 CvtfTestDataSet data_set_1[] = {
12748 // Simple conversions of positive numbers which require no rounding; the
12749 // results should not depened on the rounding mode, and ucvtf and scvtf should
12750 // produce the same result.
12751 {0x0000, 0x0000, 0x0000},
12752 {0x0001, 0x3c00, 0x3c00},
12753 {0x0010, 0x4c00, 0x4c00},
12754 {0x0080, 0x5800, 0x5800},
12755 {0x0400, 0x6400, 0x6400},
12756 // Conversions which require rounding.
12757 {0x4000, 0x7400, 0x7400},
12758 {0x4001, 0x7400, 0x7400},
12759 // Round up to produce a result that's too big for the input to represent.
12760 {0x7ff0, 0x77ff, 0x77ff},
12761 {0x7ff1, 0x77ff, 0x77ff},
12762 {0x7ffe, 0x7800, 0x7800},
12763 {0x7fff, 0x7800, 0x7800}};
12764 int pg_1[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
12765 TestUScvtfHelper(config, kHRegSize, kDRegSize, pg_1, data_set_1);
12766 TestUScvtfHelper(config, kHRegSize, kSRegSize, pg_1, data_set_1);
12767 TestUScvtfHelper(config, kHRegSize, kHRegSize, pg_1, data_set_1);
12768
12769 CvtfTestDataSet data_set_2[] = {
12770 // Test mantissa extremities.
12771 {0x0401, 0x6401, 0x6401},
12772 {0x4020, 0x7402, 0x7402},
12773 // The largest int16_t that fits in a float16.
12774 {0xffef, 0xcc40, 0x7bff},
12775 // Values that would be negative if treated as an int16_t.
12776 {0xff00, 0xdc00, 0x7bf8},
12777 {0x8000, 0xf800, 0x7800},
12778 {0x8100, 0xf7f0, 0x7808},
12779 // Check for bit pattern reproduction.
12780 {0x0123, 0x5c8c, 0x5c8c},
12781 {0x0cde, 0x6a6f, 0x6a6f},
12782 // Simple conversions of negative int64_t values. These require no rounding,
12783 // and the results should not depend on the rounding mode.
12784 {0xf800, 0xe800, 0x7bc0},
12785 {0xfc00, 0xe400, 0x7be0},
12786 {0xc000, 0xf400, 0x7a00},
12787 // Check rounding of negative int16_t values.
12788 {0x8ffe, 0xf700, 0x7880},
12789 {0x8fff, 0xf700, 0x7880},
12790 {0xffee, 0xcc80, 0x7bff},
12791 {0xffef, 0xcc40, 0x7bff}};
12792 int pg_2[] = {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1};
12793 // `32-bit to float16` and `64-bit to float16` of above tests has been tested
12794 // in `ucvtf` of `16-bit to float16`.
12795 TestUScvtfHelper(config, kHRegSize, kHRegSize, pg_2, data_set_2);
12796 // clang-format on
12797}
12798
12799TEST_SVE(scvtf_ucvtf_s_to_float) {
12800 // clang-format off
12801 int dst_lane_size = kSRegSize;
12802 int src_lane_size = kSRegSize;
12803
12804 // Simple conversions of positive numbers which require no rounding; the
12805 // results should not depened on the rounding mode, and ucvtf and scvtf should
12806 // produce the same result.
12807 CvtfTestDataSet data_set_1[] = {
12808 {0x00000000, 0x00000000, 0x00000000},
12809 {0x00000001, 0x3f800000, 0x3f800000},
12810 {0x00004000, 0x46800000, 0x46800000},
12811 {0x00010000, 0x47800000, 0x47800000},
12812 {0x40000000, 0x4e800000, 0x4e800000}};
12813 int pg_1[] = {1, 0, 1, 0, 0};
12814 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
12815
12816 CvtfTestDataSet data_set_2[] = {
12817 // Test mantissa extremities.
12818 {0x00800001, 0x4b000001, 0x4b000001},
12819 {0x40400000, 0x4e808000, 0x4e808000},
12820 // The largest int32_t that fits in a double.
12821 {0x7fffff80, 0x4effffff, 0x4effffff},
12822 // Values that would be negative if treated as an int32_t.
12823 {0xffffffff, 0xbf800000, 0x4f800000},
12824 {0xffffff00, 0xc3800000, 0x4f7fffff},
12825 {0x80000000, 0xcf000000, 0x4f000000},
12826 {0x80000001, 0xcf000000, 0x4f000000},
12827 // Check for bit pattern reproduction.
12828 {0x089abcde, 0x4d09abce, 0x4d09abce},
12829 {0x12345678, 0x4d91a2b4, 0x4d91a2b4}};
12830 int pg_2[] = {1, 0, 1, 0, 1, 1, 1, 0, 0};
12831 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
12832
12833 // Simple conversions of negative int32_t values. These require no rounding,
12834 // and the results should not depend on the rounding mode.
12835 CvtfTestDataSet data_set_3[] = {
12836 {0xffffc000, 0xc6800000, 0x4f7fffc0},
12837 {0xffff0000, 0xc7800000, 0x4f7fff00},
12838 {0xc0000000, 0xce800000, 0x4f400000},
12839 // Conversions which require rounding.
12840 {0x72800000, 0x4ee50000, 0x4ee50000},
12841 {0x72800001, 0x4ee50000, 0x4ee50000},
12842 {0x73000000, 0x4ee60000, 0x4ee60000},
12843 // Check rounding of negative int32_t values.
12844 {0x80000140, 0xcefffffe, 0x4f000001},
12845 {0x80000141, 0xcefffffd, 0x4f000001},
12846 {0x80000180, 0xcefffffd, 0x4f000002},
12847 // Round up to produce a result that's too big for the input to represent.
12848 {0x7fffffc0, 0x4f000000, 0x4f000000},
12849 {0x7fffffff, 0x4f000000, 0x4f000000}};
12850 int pg_3[] = {1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0};
12851 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
12852 // clang-format on
12853}
12854
12855TEST_SVE(scvtf_ucvtf_d_to_float) {
12856 // clang-format off
12857 int dst_lane_size = kSRegSize;
12858 int src_lane_size = kDRegSize;
12859
12860 // Simple conversions of positive numbers which require no rounding; the
12861 // results should not depened on the rounding mode, and ucvtf and scvtf should
12862 // produce the same result.
12863 CvtfTestDataSet data_set_1[] = {
12864 {0x0000000000000000, 0x00000000, 0x00000000},
12865 {0x0000000000000001, 0x3f800000, 0x3f800000},
12866 {0x0000000040000000, 0x4e800000, 0x4e800000},
12867 {0x0000000100000000, 0x4f800000, 0x4f800000},
12868 {0x4000000000000000, 0x5e800000, 0x5e800000}};
12869 int pg_1[] = {1, 1, 0, 1, 0};
12870 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
12871
12872 CvtfTestDataSet data_set_2[] = {
12873 // Test mantissa extremities.
12874 {0x0010000000000001, 0x59800000, 0x59800000},
12875 {0x4008000000000000, 0x5e801000, 0x5e801000},
12876 // The largest int32_t that fits in a float.
12877 {0x000000007fffff80, 0x4effffff, 0x4effffff},
12878 // Values that would be negative if treated as an int32_t.
12879 {0x00000000ffffffff, 0x4f800000, 0x4f800000},
12880 {0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff},
12881 {0x0000000080000000, 0x4f000000, 0x4f000000},
12882 {0x0000000080000100, 0x4f000001, 0x4f000001},
12883 // The largest int64_t that fits in a float.
12884 {0x7fffff8000000000, 0x5effffff, 0x5effffff},
12885 // Check for bit pattern reproduction.
12886 {0x0123456789abcde0, 0x5b91a2b4, 0x5b91a2b4},
12887 {0x0000000000876543, 0x4b076543, 0x4b076543}};
12888 int pg_2[] = {1, 0, 0, 0, 1, 0, 0, 0, 0, 1};
12889 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
12890
12891 CvtfTestDataSet data_set_3[] = {
12892 // Simple conversions of negative int64_t values. These require no rounding,
12893 // and the results should not depend on the rounding mode.
12894 {0xffffffffc0000000, 0xce800000, 0x5f800000},
12895 {0xffffffff00000000, 0xcf800000, 0x5f800000},
12896 {0xc000000000000000, 0xde800000, 0x5f400000},
12897 // Conversions which require rounding.
12898 {0x0000800002800000, 0x57000002, 0x57000002},
12899 {0x0000800002800001, 0x57000003, 0x57000003},
12900 {0x0000800003000000, 0x57000003, 0x57000003},
12901 // Check rounding of negative int64_t values.
12902 {0x8000014000000000, 0xdefffffe, 0x5f000001},
12903 {0x8000014000000001, 0xdefffffd, 0x5f000001},
12904 {0x8000018000000000, 0xdefffffd, 0x5f000002},
12905 // Round up to produce a result that's too big for the input to represent.
12906 {0x00000000ffffff80, 0x4f800000, 0x4f800000},
12907 {0x00000000ffffffff, 0x4f800000, 0x4f800000},
12908 {0xffffff8000000000, 0xd3000000, 0x5f800000},
12909 {0xffffffffffffffff, 0xbf800000, 0x5f800000}};
12910 int pg_3[] = {0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1};
12911 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
12912 // clang-format on
12913}
12914
12915TEST_SVE(scvtf_ucvtf_d_to_double) {
12916 // clang-format off
12917 int dst_lane_size = kDRegSize;
12918 int src_lane_size = kDRegSize;
12919
12920 // Simple conversions of positive numbers which require no rounding; the
12921 // results should not depened on the rounding mode, and ucvtf and scvtf should
12922 // produce the same result.
12923 CvtfTestDataSet data_set_1[] = {
12924 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
12925 {0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000},
12926 {0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000},
12927 {0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000},
12928 {0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000}};
12929 int pg_1[] = {0, 1, 1, 0, 0};
12930 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
12931
12932 CvtfTestDataSet data_set_2[] = {
12933 // Test mantissa extremities.
12934 {0x0010000000000001, 0x4330000000000001, 0x4330000000000001},
12935 {0x4008000000000000, 0x43d0020000000000, 0x43d0020000000000},
12936 // The largest int32_t that fits in a double.
12937 {0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000},
12938 // Values that would be negative if treated as an int32_t.
12939 {0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000},
12940 {0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000},
12941 {0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000},
12942 // The largest int64_t that fits in a double.
12943 {0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff},
12944 // Check for bit pattern reproduction.
12945 {0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde},
12946 {0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000}};
12947 int pg_2[] = {1, 1, 1, 1, 1, 0, 0, 0, 0};
12948 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
12949
12950 CvtfTestDataSet data_set_3[] = {
12951 // Simple conversions of negative int64_t values. These require no rounding,
12952 // and the results should not depend on the rounding mode.
12953 {0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000},
12954 {0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000},
12955 {0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000},
12956 // Conversions which require rounding.
12957 {0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002},
12958 {0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003},
12959 {0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003},
12960 // Check rounding of negative int64_t values.
12961 {0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001},
12962 {0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001},
12963 {0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002},
12964 // Round up to produce a result that's too big for the input to represent.
12965 {0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000},
12966 {0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000},
12967 {0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000},
12968 {0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000}};
12969 int pg_3[] = {1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0};
12970 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
12971 // clang-format on
12972}
12973
12974TEST_SVE(scvtf_ucvtf_s_to_double) {
12975 // clang-format off
12976 int dst_lane_size = kDRegSize;
12977 int src_lane_size = kSRegSize;
12978
12979 // Simple conversions of positive numbers which require no rounding; the
12980 // results should not depened on the rounding mode, and ucvtf and scvtf should
12981 // produce the same result.
12982 CvtfTestDataSet data_set_1[] = {
12983 {0x00000000, 0x0000000000000000, 0x0000000000000000},
12984 {0x00000001, 0x3ff0000000000000, 0x3ff0000000000000},
12985 {0x00004000, 0x40d0000000000000, 0x40d0000000000000},
12986 {0x00010000, 0x40f0000000000000, 0x40f0000000000000},
12987 {0x40000000, 0x41d0000000000000, 0x41d0000000000000}};
12988 int pg_1[] = {1, 0, 0, 0, 1};
12989 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
12990
12991 CvtfTestDataSet data_set_2[] = {
12992 // Test mantissa extremities.
12993 {0x40000400, 0x41d0000100000000, 0x41d0000100000000},
12994 // The largest int32_t that fits in a double.
12995 {0x7fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000},
12996 // Values that would be negative if treated as an int32_t.
12997 {0xffffffff, 0xbff0000000000000, 0x41efffffffe00000},
12998 {0x80000000, 0xc1e0000000000000, 0x41e0000000000000},
12999 {0x80000001, 0xc1dfffffffc00000, 0x41e0000000200000},
13000 // Check for bit pattern reproduction.
13001 {0x089abcde, 0x41a13579bc000000, 0x41a13579bc000000},
13002 {0x12345678, 0x41b2345678000000, 0x41b2345678000000},
13003 // Simple conversions of negative int32_t values. These require no rounding,
13004 // and the results should not depend on the rounding mode.
13005 {0xffffc000, 0xc0d0000000000000, 0x41effff800000000},
13006 {0xffff0000, 0xc0f0000000000000, 0x41efffe000000000},
13007 {0xc0000000, 0xc1d0000000000000, 0x41e8000000000000}};
13008 int pg_2[] = {1, 0, 1, 0, 0, 1, 1, 0, 1, 1};
13009 TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
13010
13011 // Note that IEEE 754 double-precision format has 52-bits fraction, so all
13012 // 32-bits integers are representable in double.
13013 // clang-format on
13014}
13015
Martyn Capewell4a9829f2020-01-30 17:41:01 +000013016TEST_SVE(sve_fadda) {
13017 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
13018 CPUFeatures::kFP,
13019 CPUFeatures::kFPHalf);
13020 START();
13021
13022 __ Ptrue(p0.VnB());
13023 __ Pfalse(p1.VnB());
13024 __ Zip1(p1.VnH(), p0.VnH(), p1.VnH());
13025
13026 __ Index(z0.VnS(), 3, 3);
13027 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
13028 __ Fmov(s2, 2.0);
13029 __ Fadda(s2, p0, s2, z0.VnS());
13030
13031 __ Index(z0.VnD(), -7, -7);
13032 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
13033 __ Fmov(d3, 3.0);
13034 __ Fadda(d3, p0, d3, z0.VnD());
13035
13036 __ Index(z0.VnH(), 1, 1);
13037 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
13038 __ Fmov(h4, 0);
13039 __ Fadda(h4, p1, h4, z0.VnH());
13040 END();
13041
13042 if (CAN_RUN()) {
13043 RUN();
13044 // Sum of 1 .. n is n+1 * n/2, ie. n(n+1)/2.
13045 int n = core.GetSVELaneCount(kSRegSize);
13046 ASSERT_EQUAL_FP32(2 + 3 * ((n + 1) * (n / 2)), s2);
13047
13048 n /= 2; // Half as many lanes.
13049 ASSERT_EQUAL_FP64(3 + -7 * ((n + 1) * (n / 2)), d3);
13050
13051 // Sum of first n odd numbers is n^2.
13052 n = core.GetSVELaneCount(kHRegSize) / 2; // Half are odd numbers.
13053 ASSERT_EQUAL_FP16(Float16(n * n), h4);
13054 }
13055}
13056
Martyn Capewellac07af12019-12-02 14:55:05 +000013057TEST_SVE(sve_extract) {
13058 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13059 START();
13060
13061 __ Index(z0.VnB(), 0, 1);
13062
13063 __ Mov(z1, z0);
13064 __ Mov(z2, z0);
13065 __ Mov(z3, z0);
13066 __ Mov(z4, z0);
13067 __ Mov(z5, z0);
13068 __ Mov(z6, z0);
13069
13070 __ Ext(z1, z1, z0, 0);
13071 __ Ext(z2, z2, z0, 1);
13072 __ Ext(z3, z3, z0, 15);
13073 __ Ext(z4, z4, z0, 31);
13074 __ Ext(z5, z5, z0, 47);
13075 __ Ext(z6, z6, z0, 255);
13076
13077 END();
13078
13079 if (CAN_RUN()) {
13080 RUN();
13081
13082 ASSERT_EQUAL_SVE(z1, z0);
13083
13084 int lane_count = core.GetSVELaneCount(kBRegSize);
13085 if (lane_count == 16) {
13086 uint64_t z2_expected[] = {0x000f0e0d0c0b0a09, 0x0807060504030201};
13087 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
13088 } else {
13089 uint64_t z2_expected[] = {0x100f0e0d0c0b0a09, 0x0807060504030201};
13090 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
13091 }
13092
13093 if (lane_count == 16) {
13094 uint64_t z3_expected[] = {0x0e0d0c0b0a090807, 0x060504030201000f};
13095 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
13096 } else {
13097 uint64_t z3_expected[] = {0x1e1d1c1b1a191817, 0x161514131211100f};
13098 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
13099 }
13100
13101 if (lane_count < 32) {
13102 ASSERT_EQUAL_SVE(z4, z0);
13103 } else if (lane_count == 32) {
13104 uint64_t z4_expected[] = {0x0e0d0c0b0a090807, 0x060504030201001f};
13105 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
13106 } else {
13107 uint64_t z4_expected[] = {0x2e2d2c2b2a292827, 0x262524232221201f};
13108 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
13109 }
13110
13111 if (lane_count < 48) {
13112 ASSERT_EQUAL_SVE(z5, z0);
13113 } else if (lane_count == 48) {
13114 uint64_t z5_expected[] = {0x0e0d0c0b0a090807, 0x060504030201002f};
13115 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
13116 } else {
13117 uint64_t z5_expected[] = {0x3e3d3c3b3a393837, 0x363534333231302f};
13118 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
13119 }
13120
13121 if (lane_count < 256) {
13122 ASSERT_EQUAL_SVE(z6, z0);
13123 } else {
13124 uint64_t z6_expected[] = {0x0e0d0c0b0a090807, 0x06050403020100ff};
13125 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
13126 }
13127 }
13128}
13129
Martyn Capewell894962f2020-02-05 15:46:44 +000013130TEST_SVE(sve_fp_paired_across) {
13131 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13132
13133 START();
13134
13135 __ Ptrue(p0.VnB());
13136 __ Pfalse(p1.VnB());
13137 __ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
13138 __ Zip1(p3.VnD(), p0.VnD(), p1.VnD());
13139 __ Zip1(p4.VnH(), p0.VnH(), p1.VnH());
13140
13141 __ Index(z0.VnS(), 3, 3);
13142 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
13143 __ Faddv(s1, p0, z0.VnS());
13144 __ Fminv(s2, p2, z0.VnS());
13145 __ Fmaxv(s3, p2, z0.VnS());
13146
13147 __ Index(z0.VnD(), -7, -7);
13148 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
13149 __ Faddv(d4, p0, z0.VnD());
13150 __ Fminv(d5, p3, z0.VnD());
13151 __ Fmaxv(d6, p3, z0.VnD());
13152
13153 __ Index(z0.VnH(), 1, 1);
13154 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
13155 __ Faddv(h7, p4, z0.VnH());
13156 __ Fminv(h8, p4, z0.VnH());
13157 __ Fmaxv(h9, p4, z0.VnH());
13158
13159 __ Dup(z10.VnH(), 0);
13160 __ Fdiv(z10.VnH(), p0.Merging(), z10.VnH(), z10.VnH());
13161 __ Insr(z10.VnH(), 0x5140);
13162 __ Insr(z10.VnH(), 0xd140);
13163 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 2);
13164 __ Fmaxnmv(h11, p0, z10.VnH());
13165 __ Fmaxnmv(h12, p4, z10.VnH());
13166 __ Fminnmv(h13, p0, z10.VnH());
13167 __ Fminnmv(h14, p4, z10.VnH());
13168
13169 __ Dup(z10.VnS(), 0);
13170 __ Fdiv(z10.VnS(), p0.Merging(), z10.VnS(), z10.VnS());
13171 __ Insr(z10.VnS(), 0x42280000);
13172 __ Insr(z10.VnS(), 0xc2280000);
13173 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 4);
13174 __ Fmaxnmv(s15, p0, z10.VnS());
13175 __ Fmaxnmv(s16, p2, z10.VnS());
13176 __ Fminnmv(s17, p0, z10.VnS());
13177 __ Fminnmv(s18, p2, z10.VnS());
13178
13179 __ Dup(z10.VnD(), 0);
13180 __ Fdiv(z10.VnD(), p0.Merging(), z10.VnD(), z10.VnD());
13181 __ Insr(z10.VnD(), 0x4045000000000000);
13182 __ Insr(z10.VnD(), 0xc045000000000000);
13183 __ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 8);
13184 __ Fmaxnmv(d19, p0, z10.VnD());
13185 __ Fmaxnmv(d20, p3, z10.VnD());
13186 __ Fminnmv(d21, p0, z10.VnD());
13187 __ Fminnmv(d22, p3, z10.VnD());
13188 END();
13189
13190 if (CAN_RUN()) {
13191 RUN();
13192 // Sum of 1 .. n is n+1 * n/2, ie. n(n+1)/2.
13193 int n = core.GetSVELaneCount(kSRegSize);
13194 ASSERT_EQUAL_FP32(3 * ((n + 1) * (n / 2)), s1);
13195 ASSERT_EQUAL_FP32(3, s2);
13196 ASSERT_EQUAL_FP32(3 * n - 3, s3);
13197
13198 n /= 2; // Half as many lanes.
13199 ASSERT_EQUAL_FP64(-7 * ((n + 1) * (n / 2)), d4);
13200 ASSERT_EQUAL_FP64(-7 * (n - 1), d5);
13201 ASSERT_EQUAL_FP64(-7, d6);
13202
13203 // Sum of first n odd numbers is n^2.
13204 n = core.GetSVELaneCount(kHRegSize) / 2; // Half are odd numbers.
13205 ASSERT_EQUAL_FP16(Float16(n * n), h7);
13206 ASSERT_EQUAL_FP16(Float16(1), h8);
13207
13208 n = core.GetSVELaneCount(kHRegSize);
13209 ASSERT_EQUAL_FP16(Float16(n - 1), h9);
13210
13211 ASSERT_EQUAL_FP16(Float16(42), h11);
13212 ASSERT_EQUAL_FP16(Float16(42), h12);
13213 ASSERT_EQUAL_FP16(Float16(-42), h13);
13214 ASSERT_EQUAL_FP16(Float16(42), h14);
13215 ASSERT_EQUAL_FP32(42, s15);
13216 ASSERT_EQUAL_FP32(42, s16);
13217 ASSERT_EQUAL_FP32(-42, s17);
13218 ASSERT_EQUAL_FP32(42, s18);
13219 ASSERT_EQUAL_FP64(42, d19);
13220 ASSERT_EQUAL_FP64(42, d20);
13221 ASSERT_EQUAL_FP64(-42, d21);
13222 ASSERT_EQUAL_FP64(42, d22);
13223 }
13224}
13225
Martyn Capewell13050ca2020-02-11 16:43:40 +000013226TEST_SVE(sve_frecpe_frsqrte) {
13227 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13228
13229 START();
13230
13231 __ Ptrue(p0.VnB());
13232
13233 __ Index(z0.VnH(), 0, 1);
13234 __ Fdup(z1.VnH(), Float16(1));
13235 __ Fscale(z1.VnH(), p0.Merging(), z1.VnH(), z0.VnH());
13236 __ Insr(z1.VnH(), 0);
13237 __ Frsqrte(z2.VnH(), z1.VnH());
13238 __ Frecpe(z1.VnH(), z1.VnH());
13239
13240 __ Index(z0.VnS(), 0, 1);
13241 __ Fdup(z3.VnS(), Float16(1));
13242 __ Fscale(z3.VnS(), p0.Merging(), z3.VnS(), z0.VnS());
13243 __ Insr(z3.VnS(), 0);
13244 __ Frsqrte(z4.VnS(), z3.VnS());
13245 __ Frecpe(z3.VnS(), z3.VnS());
13246
13247 __ Index(z0.VnD(), 0, 1);
13248 __ Fdup(z5.VnD(), Float16(1));
13249 __ Fscale(z5.VnD(), p0.Merging(), z5.VnD(), z0.VnD());
13250 __ Insr(z5.VnD(), 0);
13251 __ Frsqrte(z6.VnD(), z5.VnD());
13252 __ Frecpe(z5.VnD(), z5.VnD());
13253 END();
13254
13255 if (CAN_RUN()) {
13256 RUN();
13257 uint64_t z1_expected[] = {0x23fc27fc2bfc2ffc, 0x33fc37fc3bfc7c00};
13258 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
13259 uint64_t z2_expected[] = {0x2ffc31a433fc35a4, 0x37fc39a43bfc7c00};
13260 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
13261
13262 uint64_t z3_expected[] = {0x3e7f80003eff8000, 0x3f7f80007f800000};
13263 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
13264 uint64_t z4_expected[] = {0x3eff80003f348000, 0x3f7f80007f800000};
13265 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
13266
13267 uint64_t z5_expected[] = {0x3feff00000000000, 0x7ff0000000000000};
13268 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
13269 uint64_t z6_expected[] = {0x3feff00000000000, 0x7ff0000000000000};
13270 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
13271 }
13272}
13273
Martyn Capewellefd9dc72020-02-13 10:46:29 +000013274TEST_SVE(sve_frecps_frsqrts) {
13275 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13276
13277 START();
13278 __ Ptrue(p0.VnB());
13279
13280 __ Index(z0.VnH(), 0, -1);
13281 __ Fdup(z1.VnH(), Float16(1));
13282 __ Fscale(z1.VnH(), p0.Merging(), z1.VnH(), z0.VnH());
13283 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
13284 __ Insr(z1.VnH(), 0);
13285 __ Frsqrts(z2.VnH(), z1.VnH(), z0.VnH());
13286 __ Frecps(z1.VnH(), z1.VnH(), z0.VnH());
13287
13288 __ Index(z0.VnS(), 0, -1);
13289 __ Fdup(z3.VnS(), Float16(1));
13290 __ Fscale(z3.VnS(), p0.Merging(), z3.VnS(), z0.VnS());
13291 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
13292 __ Insr(z3.VnS(), 0);
13293 __ Frsqrts(z4.VnS(), z3.VnS(), z0.VnS());
13294 __ Frecps(z3.VnS(), z3.VnS(), z0.VnS());
13295
13296 __ Index(z0.VnD(), 0, -1);
13297 __ Fdup(z5.VnD(), Float16(1));
13298 __ Fscale(z5.VnD(), p0.Merging(), z5.VnD(), z0.VnD());
13299 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
13300 __ Insr(z5.VnD(), 0);
13301 __ Frsqrts(z6.VnD(), z5.VnD(), z0.VnD());
13302 __ Frecps(z5.VnD(), z5.VnD(), z0.VnD());
13303 END();
13304
13305 if (CAN_RUN()) {
13306 RUN();
13307 uint64_t z1_expected[] = {0x4038406040a04100, 0x4180420042004000};
13308 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
13309 uint64_t z2_expected[] = {0x3e383e603ea03f00, 0x3f80400040003e00};
13310 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
13311
13312 uint64_t z3_expected[] = {0x4030000040400000, 0x4040000040000000};
13313 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
13314 uint64_t z4_expected[] = {0x3ff0000040000000, 0x400000003fc00000};
13315 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
13316
13317 uint64_t z5_expected[] = {0x4008000000000000, 0x4000000000000000};
13318 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
13319 uint64_t z6_expected[] = {0x4000000000000000, 0x3ff8000000000000};
13320 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
13321 }
13322}
13323
13324TEST_SVE(sve_ftsmul) {
13325 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13326
13327 START();
13328 __ Ptrue(p0.VnB());
13329
13330 __ Index(z0.VnH(), 0, 1);
13331 __ Rev(z1.VnH(), z0.VnH());
13332 __ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
13333 __ Dup(z2.VnH(), 0);
13334 __ Fdiv(z2.VnH(), p0.Merging(), z2.VnH(), z2.VnH());
13335 __ Ftsmul(z3.VnH(), z0.VnH(), z1.VnH());
13336 __ Ftsmul(z4.VnH(), z2.VnH(), z1.VnH());
13337
13338 __ Index(z0.VnS(), -7, 1);
13339 __ Rev(z1.VnS(), z0.VnS());
13340 __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
13341 __ Dup(z2.VnS(), 0);
13342 __ Fdiv(z2.VnS(), p0.Merging(), z2.VnS(), z2.VnS());
13343 __ Ftsmul(z5.VnS(), z0.VnS(), z1.VnS());
13344 __ Ftsmul(z6.VnS(), z2.VnS(), z1.VnS());
13345
13346 __ Index(z0.VnD(), 2, -1);
13347 __ Rev(z1.VnD(), z0.VnD());
13348 __ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
13349 __ Dup(z2.VnD(), 0);
13350 __ Fdiv(z2.VnD(), p0.Merging(), z2.VnD(), z2.VnD());
13351 __ Ftsmul(z7.VnD(), z0.VnD(), z1.VnD());
13352 __ Ftsmul(z8.VnD(), z2.VnD(), z1.VnD());
13353 END();
13354
13355 if (CAN_RUN()) {
13356 RUN();
13357 uint64_t z3_expected[] = {0x5220d0804e40cc00, 0x4880c4003c008000};
13358 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
13359 uint64_t z4_expected[] = {0x7e007e007e007e00, 0x7e007e007e007e00};
13360 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
13361
13362 uint64_t z5_expected[] = {0x41800000c1c80000, 0x42100000c2440000};
13363 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
13364 uint64_t z6_expected[] = {0x7fc000007fc00000, 0x7fc000007fc00000};
13365 ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
13366
13367 uint64_t z7_expected[] = {0x3ff0000000000000, 0xc010000000000000};
13368 ASSERT_EQUAL_SVE(z7_expected, z7.VnD());
13369 uint64_t z8_expected[] = {0x7ff8000000000000, 0x7ff8000000000000};
13370 ASSERT_EQUAL_SVE(z8_expected, z8.VnD());
13371 }
13372}
TatWai Chongf8d29f12020-02-16 22:53:18 -080013373
13374typedef void (MacroAssembler::*FPMulAccFn)(
13375 const ZRegister& zd,
13376 const PRegisterM& pg,
13377 const ZRegister& za,
13378 const ZRegister& zn,
13379 const ZRegister& zm,
13380 FPMacroNaNPropagationOption nan_option);
13381
13382// The `pg_inputs` is used for examining the predication correctness internally.
13383// It does not imply the value of `result` argument. `result` stands for the
13384// expected result on all-true predication.
13385template <typename T, size_t N>
13386static void FPMulAccHelper(
13387 Test* config,
13388 FPMulAccFn macro,
13389 unsigned lane_size_in_bits,
13390 const int (&pg_inputs)[N],
13391 const T (&za_inputs)[N],
13392 const T (&zn_inputs)[N],
13393 const T (&zm_inputs)[N],
13394 const uint64_t (&result)[N],
13395 FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
13396 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13397 START();
13398
13399 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
13400 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
13401 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
13402 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
13403
13404 uint64_t za_rawbits[N];
13405 uint64_t zn_rawbits[N];
13406 uint64_t zm_rawbits[N];
13407
13408 FPToRawbitsWithSize(za_inputs, za_rawbits, lane_size_in_bits);
13409 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
13410 FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
13411
13412 InsrHelper(&masm, za, za_rawbits);
13413 InsrHelper(&masm, zn, zn_rawbits);
13414 InsrHelper(&masm, zm, zm_rawbits);
13415
13416 uint64_t zd_rawbits[N];
13417 for (size_t i = 0; i < N; i++) {
13418 // Initialize `zd` with a signalling NaN.
13419 switch (lane_size_in_bits) {
13420 case kHRegSize:
13421 zd_rawbits[i] = 0x7c99;
13422 break;
13423 case kSRegSize:
13424 zd_rawbits[i] = 0x7f959999;
13425 break;
13426 case kDRegSize:
13427 zd_rawbits[i] = 0x7ff5555599999999;
13428 break;
13429 default:
13430 VIXL_UNIMPLEMENTED();
13431 break;
13432 }
13433 }
13434 InsrHelper(&masm, zd, zd_rawbits);
13435
13436 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
13437
13438 // Fmla macro automatically selects between fmla, fmad and movprfx + fmla
13439 // Fmls `ditto` fmls, fmsb and movprfx + fmls
13440 // Fnmla `ditto` fnmla, fnmad and movprfx + fnmla
13441 // Fnmls `ditto` fnmls, fnmsb and movprfx + fnmls
13442 // based on what registers are aliased.
13443 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
13444 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
13445 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
13446 ZRegister d_result = z13.WithLaneSize(lane_size_in_bits);
13447
13448 __ Mov(da_result, za);
13449 (masm.*macro)(da_result, p0.Merging(), da_result, zn, zm, nan_option);
13450
13451 __ Mov(dn_result, zn);
13452 (masm.*macro)(dn_result, p0.Merging(), za, dn_result, zm, nan_option);
13453
13454 __ Mov(dm_result, zm);
13455 (masm.*macro)(dm_result, p0.Merging(), za, zn, dm_result, nan_option);
13456
13457 __ Mov(d_result, zd);
13458 (masm.*macro)(d_result, p0.Merging(), za, zn, zm, nan_option);
13459
13460 END();
13461
13462 if (CAN_RUN()) {
13463 RUN();
13464
13465 ASSERT_EQUAL_SVE(za_rawbits, za);
13466 ASSERT_EQUAL_SVE(zn_rawbits, zn);
13467 ASSERT_EQUAL_SVE(zm_rawbits, zm);
13468
13469 uint64_t da_expected[N];
13470 uint64_t dn_expected[N];
13471 uint64_t dm_expected[N];
13472 uint64_t d_expected[N];
13473 for (size_t i = 0; i < N; i++) {
13474 da_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : za_rawbits[i];
13475 dn_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zn_rawbits[i];
13476 dm_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zm_rawbits[i];
13477 d_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zd_rawbits[i];
13478 }
13479
13480 ASSERT_EQUAL_SVE(da_expected, da_result);
13481 ASSERT_EQUAL_SVE(dn_expected, dn_result);
13482 ASSERT_EQUAL_SVE(dm_expected, dm_result);
13483 ASSERT_EQUAL_SVE(d_expected, d_result);
13484 }
13485}
13486
13487TEST_SVE(sve_fmla_fmad) {
13488 // fmla : zd = za + zn * zm
13489 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
13490 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
13491 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
13492 int pg_inputs[] = {1, 1, 0, 1};
13493
13494 uint64_t fmla_result_h[] = {Float16ToRawbits(Float16(-84.0)),
13495 Float16ToRawbits(Float16(101.0)),
13496 Float16ToRawbits(Float16(33.0)),
13497 Float16ToRawbits(Float16(42.0))};
13498
13499 // `fmad` has been tested in the helper.
13500 FPMulAccHelper(config,
13501 &MacroAssembler::Fmla,
13502 kHRegSize,
13503 pg_inputs,
13504 za_inputs,
13505 zn_inputs,
13506 zm_inputs,
13507 fmla_result_h);
13508
13509 uint64_t fmla_result_s[] = {FloatToRawbits(-84.0f),
13510 FloatToRawbits(101.0f),
13511 FloatToRawbits(33.0f),
13512 FloatToRawbits(42.0f)};
13513
13514 FPMulAccHelper(config,
13515 &MacroAssembler::Fmla,
13516 kSRegSize,
13517 pg_inputs,
13518 za_inputs,
13519 zn_inputs,
13520 zm_inputs,
13521 fmla_result_s);
13522
13523 uint64_t fmla_result_d[] = {DoubleToRawbits(-84.0),
13524 DoubleToRawbits(101.0),
13525 DoubleToRawbits(33.0),
13526 DoubleToRawbits(42.0)};
13527
13528 FPMulAccHelper(config,
13529 &MacroAssembler::Fmla,
13530 kDRegSize,
13531 pg_inputs,
13532 za_inputs,
13533 zn_inputs,
13534 zm_inputs,
13535 fmla_result_d);
13536}
13537
13538TEST_SVE(sve_fmls_fmsb) {
13539 // fmls : zd = za - zn * zm
13540 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
13541 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
13542 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
13543 int pg_inputs[] = {1, 0, 1, 1};
13544
13545 uint64_t fmls_result_h[] = {Float16ToRawbits(Float16(6.0)),
13546 Float16ToRawbits(Float16(-99.0)),
13547 Float16ToRawbits(Float16(-39.0)),
13548 Float16ToRawbits(Float16(-38.0))};
13549
13550 // `fmsb` has been tested in the helper.
13551 FPMulAccHelper(config,
13552 &MacroAssembler::Fmls,
13553 kHRegSize,
13554 pg_inputs,
13555 za_inputs,
13556 zn_inputs,
13557 zm_inputs,
13558 fmls_result_h);
13559
13560 uint64_t fmls_result_s[] = {FloatToRawbits(6.0f),
13561 FloatToRawbits(-99.0f),
13562 FloatToRawbits(-39.0f),
13563 FloatToRawbits(-38.0f)};
13564
13565 FPMulAccHelper(config,
13566 &MacroAssembler::Fmls,
13567 kSRegSize,
13568 pg_inputs,
13569 za_inputs,
13570 zn_inputs,
13571 zm_inputs,
13572 fmls_result_s);
13573
13574 uint64_t fmls_result_d[] = {DoubleToRawbits(6.0),
13575 DoubleToRawbits(-99.0),
13576 DoubleToRawbits(-39.0),
13577 DoubleToRawbits(-38.0)};
13578
13579 FPMulAccHelper(config,
13580 &MacroAssembler::Fmls,
13581 kDRegSize,
13582 pg_inputs,
13583 za_inputs,
13584 zn_inputs,
13585 zm_inputs,
13586 fmls_result_d);
13587}
13588
13589TEST_SVE(sve_fnmla_fnmad) {
13590 // fnmla : zd = -za - zn * zm
13591 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
13592 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
13593 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
13594 int pg_inputs[] = {0, 1, 1, 1};
13595
13596 uint64_t fnmla_result_h[] = {Float16ToRawbits(Float16(84.0)),
13597 Float16ToRawbits(Float16(-101.0)),
13598 Float16ToRawbits(Float16(-33.0)),
13599 Float16ToRawbits(Float16(-42.0))};
13600
13601 // `fnmad` has been tested in the helper.
13602 FPMulAccHelper(config,
13603 &MacroAssembler::Fnmla,
13604 kHRegSize,
13605 pg_inputs,
13606 za_inputs,
13607 zn_inputs,
13608 zm_inputs,
13609 fnmla_result_h);
13610
13611 uint64_t fnmla_result_s[] = {FloatToRawbits(84.0f),
13612 FloatToRawbits(-101.0f),
13613 FloatToRawbits(-33.0f),
13614 FloatToRawbits(-42.0f)};
13615
13616 FPMulAccHelper(config,
13617 &MacroAssembler::Fnmla,
13618 kSRegSize,
13619 pg_inputs,
13620 za_inputs,
13621 zn_inputs,
13622 zm_inputs,
13623 fnmla_result_s);
13624
13625 uint64_t fnmla_result_d[] = {DoubleToRawbits(84.0),
13626 DoubleToRawbits(-101.0),
13627 DoubleToRawbits(-33.0),
13628 DoubleToRawbits(-42.0)};
13629
13630 FPMulAccHelper(config,
13631 &MacroAssembler::Fnmla,
13632 kDRegSize,
13633 pg_inputs,
13634 za_inputs,
13635 zn_inputs,
13636 zm_inputs,
13637 fnmla_result_d);
13638}
13639
13640TEST_SVE(sve_fnmls_fnmsb) {
13641 // fnmls : zd = -za + zn * zm
13642 double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
13643 double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
13644 double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
13645 int pg_inputs[] = {1, 1, 1, 0};
13646
13647 uint64_t fnmls_result_h[] = {Float16ToRawbits(Float16(-6.0)),
13648 Float16ToRawbits(Float16(99.0)),
13649 Float16ToRawbits(Float16(39.0)),
13650 Float16ToRawbits(Float16(38.0))};
13651
13652 // `fnmsb` has been tested in the helper.
13653 FPMulAccHelper(config,
13654 &MacroAssembler::Fnmls,
13655 kHRegSize,
13656 pg_inputs,
13657 za_inputs,
13658 zn_inputs,
13659 zm_inputs,
13660 fnmls_result_h);
13661
13662 uint64_t fnmls_result_s[] = {FloatToRawbits(-6.0f),
13663 FloatToRawbits(99.0f),
13664 FloatToRawbits(39.0f),
13665 FloatToRawbits(38.0f)};
13666
13667 FPMulAccHelper(config,
13668 &MacroAssembler::Fnmls,
13669 kSRegSize,
13670 pg_inputs,
13671 za_inputs,
13672 zn_inputs,
13673 zm_inputs,
13674 fnmls_result_s);
13675
13676 uint64_t fnmls_result_d[] = {DoubleToRawbits(-6.0),
13677 DoubleToRawbits(99.0),
13678 DoubleToRawbits(39.0),
13679 DoubleToRawbits(38.0)};
13680
13681 FPMulAccHelper(config,
13682 &MacroAssembler::Fnmls,
13683 kDRegSize,
13684 pg_inputs,
13685 za_inputs,
13686 zn_inputs,
13687 zm_inputs,
13688 fnmls_result_d);
13689}
13690
TatWai Chonga2c1bb72020-02-16 23:16:47 -080013691typedef void (MacroAssembler::*FPMulAccIdxFn)(const ZRegister& zd,
13692 const ZRegister& za,
13693 const ZRegister& zn,
13694 const ZRegister& zm,
13695 int index);
13696
13697template <typename T, size_t N>
13698static void FPMulAccIdxHelper(Test* config,
13699 FPMulAccFn macro,
13700 FPMulAccIdxFn macro_idx,
13701 const T (&za_inputs)[N],
13702 const T (&zn_inputs)[N],
13703 const T (&zm_inputs)[N]) {
13704 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
13705 START();
13706
13707 InsrHelper(&masm, z0.VnD(), zm_inputs);
13708 InsrHelper(&masm, z1.VnD(), zn_inputs);
13709 InsrHelper(&masm, z2.VnD(), za_inputs);
13710
13711 __ Mov(z3, z0);
13712 (masm.*macro_idx)(z3.VnH(), z2.VnH(), z1.VnH(), z3.VnH(), 0); // zd == zm
13713 __ Mov(z4, z1);
13714 (masm.*macro_idx)(z4.VnH(), z2.VnH(), z4.VnH(), z0.VnH(), 1); // zd == zn
13715 __ Mov(z5, z2);
13716 (masm.*macro_idx)(z5.VnH(), z5.VnH(), z1.VnH(), z0.VnH(), 4); // zd == za
13717 (masm.*macro_idx)(z6.VnH(), z2.VnH(), z1.VnH(), z0.VnH(), 7);
13718
13719 __ Mov(z7, z0);
13720 (masm.*macro_idx)(z7.VnS(), z2.VnS(), z1.VnS(), z7.VnS(), 0); // zd == zm
13721 __ Mov(z8, z1);
13722 (masm.*macro_idx)(z8.VnS(), z2.VnS(), z8.VnS(), z0.VnS(), 1); // zd == zn
13723 __ Mov(z9, z2);
13724 (masm.*macro_idx)(z9.VnS(), z9.VnS(), z1.VnS(), z0.VnS(), 2); // zd == za
13725 (masm.*macro_idx)(z10.VnS(), z2.VnS(), z1.VnS(), z0.VnS(), 3);
13726
13727 __ Mov(z11, z0);
13728 (masm.*macro_idx)(z11.VnD(), z2.VnD(), z1.VnD(), z11.VnD(), 0); // zd == zm
13729 __ Mov(z12, z1);
13730 (masm.*macro_idx)(z12.VnD(), z2.VnD(), z12.VnD(), z0.VnD(), 1); // zd == zn
13731 __ Mov(z13, z2);
13732 (masm.*macro_idx)(z13.VnD(), z13.VnD(), z1.VnD(), z0.VnD(), 0); // zd == za
13733 __ Mov(z14, z0);
13734 // zd == zn == zm
13735 (masm.*macro_idx)(z14.VnD(), z2.VnD(), z14.VnD(), z14.VnD(), 1);
13736
13737 __ Ptrue(p0.VnB());
13738
13739 // Indexed form of Fmla and Fmls won't swap argument, passing strict NaN
13740 // propagation mode to ensure the following macros don't swap argument in
13741 // any cases.
13742 FPMacroNaNPropagationOption option = StrictNaNPropagation;
13743 // Compute the results using other instructions.
13744 __ Dup(z31.VnH(), z0.VnH(), 0);
13745 (masm.*macro)(z15.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
13746 __ Dup(z31.VnH(), z0.VnH(), 1);
13747 (masm.*macro)(z16.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
13748 __ Dup(z31.VnH(), z0.VnH(), 4);
13749 (masm.*macro)(z17.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
13750 __ Dup(z31.VnH(), z0.VnH(), 7);
13751 (masm.*macro)(z18.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
13752
13753 __ Dup(z31.VnS(), z0.VnS(), 0);
13754 (masm.*macro)(z19.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
13755 __ Dup(z31.VnS(), z0.VnS(), 1);
13756 (masm.*macro)(z20.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
13757 __ Dup(z31.VnS(), z0.VnS(), 2);
13758 (masm.*macro)(z21.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
13759 __ Dup(z31.VnS(), z0.VnS(), 3);
13760 (masm.*macro)(z22.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
13761
13762 __ Dup(z31.VnD(), z0.VnD(), 0);
13763 (masm.*macro)(z23.VnD(), p0.Merging(), z2.VnD(), z1.VnD(), z31.VnD(), option);
13764 __ Dup(z31.VnD(), z0.VnD(), 1);
13765 (masm.*macro)(z24.VnD(), p0.Merging(), z2.VnD(), z1.VnD(), z31.VnD(), option);
13766 __ Dup(z31.VnD(), z0.VnD(), 1);
13767 (masm.*macro)(z25.VnD(), p0.Merging(), z2.VnD(), z0.VnD(), z31.VnD(), option);
13768
13769 END();
13770
13771 if (CAN_RUN()) {
13772 RUN();
13773
13774 ASSERT_EQUAL_SVE(z15.VnH(), z3.VnH());
13775 ASSERT_EQUAL_SVE(z16.VnH(), z4.VnH());
13776 ASSERT_EQUAL_SVE(z17.VnH(), z5.VnH());
13777 ASSERT_EQUAL_SVE(z18.VnH(), z6.VnH());
13778
13779 ASSERT_EQUAL_SVE(z19.VnS(), z7.VnS());
13780 ASSERT_EQUAL_SVE(z20.VnS(), z8.VnS());
13781 ASSERT_EQUAL_SVE(z21.VnS(), z9.VnS());
13782 ASSERT_EQUAL_SVE(z22.VnS(), z10.VnS());
13783
13784 ASSERT_EQUAL_SVE(z23.VnD(), z11.VnD());
13785 ASSERT_EQUAL_SVE(z24.VnD(), z12.VnD());
13786 ASSERT_EQUAL_SVE(z11.VnD(), z13.VnD());
13787 ASSERT_EQUAL_SVE(z25.VnD(), z14.VnD());
13788 }
13789}
13790
13791TEST_SVE(sve_fmla_fmls_index) {
13792 uint64_t zm_inputs_1[] = {0x3ff000003f803c00, 0xbff00000bf80bc00};
13793 uint64_t zn_inputs_1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76};
13794 uint64_t za_inputs_1[] = {0x3c004000bc00c000, 0x64006800e400e800};
13795
13796 // Using the vector form of Fmla and Fmls to verify the indexed form.
13797 FPMulAccIdxHelper(config,
13798 &MacroAssembler::Fmla, // vector form
13799 &MacroAssembler::Fmla, // indexed form
13800 za_inputs_1,
13801 zn_inputs_1,
13802 zm_inputs_1);
13803
13804 FPMulAccIdxHelper(config,
13805 &MacroAssembler::Fmls, // vector form
13806 &MacroAssembler::Fmls, // indexed form
13807 za_inputs_1,
13808 zn_inputs_1,
13809 zm_inputs_1);
13810
13811 uint64_t zm_inputs_2[] = {0x7ff5555511111111, // NaN
13812 0xfff0000000000000}; // Infinity
13813 uint64_t zn_inputs_2[] = {0x7f9511117fc00000, // NaN
13814 0x7f800000ff800000}; // Infinity
13815 uint64_t za_inputs_2[] = {0x7c11000000007e00, // NaN
13816 0x000000007c00fc00}; // Infinity
13817 FPMulAccIdxHelper(config,
13818 &MacroAssembler::Fmla, // vector form
13819 &MacroAssembler::Fmla, // indexed form
13820 za_inputs_2,
13821 zn_inputs_2,
13822 zm_inputs_2);
13823
13824 FPMulAccIdxHelper(config,
13825 &MacroAssembler::Fmls, // vector form
13826 &MacroAssembler::Fmls, // indexed form
13827 za_inputs_2,
13828 zn_inputs_2,
13829 zm_inputs_2);
13830}
13831
TatWai Chongf8d29f12020-02-16 22:53:18 -080013832// Execute a number of instructions which all use ProcessNaNs, and check that
13833// they all propagate NaNs correctly.
13834template <typename Ti, typename Td, size_t N>
13835static void ProcessNaNsHelper(Test* config,
13836 int lane_size_in_bits,
13837 const Ti (&zn_inputs)[N],
13838 const Ti (&zm_inputs)[N],
13839 const Td (&zd_expected)[N],
13840 FPMacroNaNPropagationOption nan_option) {
13841 ArithFn arith_unpredicated_macro[] = {&MacroAssembler::Fadd,
13842 &MacroAssembler::Fsub,
13843 &MacroAssembler::Fmul};
13844
13845 for (size_t i = 0; i < ArrayLength(arith_unpredicated_macro); i++) {
13846 FPBinArithHelper(config,
13847 arith_unpredicated_macro[i],
13848 lane_size_in_bits,
13849 zn_inputs,
13850 zm_inputs,
13851 zd_expected);
13852 }
13853
13854 FPArithPredicatedFn arith_predicated_macro[] = {&MacroAssembler::Fmax,
13855 &MacroAssembler::Fmin};
13856 int pg_inputs[N];
13857 // With an all-true predicate, this helper aims to compare with special
13858 // numbers.
13859 for (size_t i = 0; i < N; i++) {
13860 pg_inputs[i] = 1;
13861 }
13862
13863 // fdivr propagates the quotient (Zm) preferentially, so we don't actually
13864 // need any special handling for StrictNaNPropagation.
13865 FPBinArithHelper(config,
13866 NULL,
13867 &MacroAssembler::Fdiv,
13868 lane_size_in_bits,
13869 // With an all-true predicate, the value in zd is
13870 // irrelevant to the operations.
13871 zn_inputs,
13872 pg_inputs,
13873 zn_inputs,
13874 zm_inputs,
13875 zd_expected);
13876
13877 for (size_t i = 0; i < ArrayLength(arith_predicated_macro); i++) {
13878 FPBinArithHelper(config,
13879 arith_predicated_macro[i],
13880 NULL,
13881 lane_size_in_bits,
13882 // With an all-true predicate, the value in zd is
13883 // irrelevant to the operations.
13884 zn_inputs,
13885 pg_inputs,
13886 zn_inputs,
13887 zm_inputs,
13888 zd_expected,
13889 nan_option);
13890 }
13891}
13892
13893template <typename Ti, typename Td, size_t N>
13894static void ProcessNaNsHelper3(Test* config,
13895 int lane_size_in_bits,
13896 const Ti (&za_inputs)[N],
13897 const Ti (&zn_inputs)[N],
13898 const Ti (&zm_inputs)[N],
13899 const Td (&zd_expected_fmla)[N],
13900 const Td (&zd_expected_fmls)[N],
13901 const Td (&zd_expected_fnmla)[N],
13902 const Td (&zd_expected_fnmls)[N],
13903 FPMacroNaNPropagationOption nan_option) {
13904 int pg_inputs[N];
13905 // With an all-true predicate, this helper aims to compare with special
13906 // numbers.
13907 for (size_t i = 0; i < N; i++) {
13908 pg_inputs[i] = 1;
13909 }
13910
13911 FPMulAccHelper(config,
13912 &MacroAssembler::Fmla,
13913 lane_size_in_bits,
13914 pg_inputs,
13915 za_inputs,
13916 zn_inputs,
13917 zm_inputs,
13918 zd_expected_fmla,
13919 nan_option);
13920
13921 FPMulAccHelper(config,
13922 &MacroAssembler::Fmls,
13923 lane_size_in_bits,
13924 pg_inputs,
13925 za_inputs,
13926 zn_inputs,
13927 zm_inputs,
13928 zd_expected_fmls,
13929 nan_option);
13930
13931 FPMulAccHelper(config,
13932 &MacroAssembler::Fnmla,
13933 lane_size_in_bits,
13934 pg_inputs,
13935 za_inputs,
13936 zn_inputs,
13937 zm_inputs,
13938 zd_expected_fnmla,
13939 nan_option);
13940
13941 FPMulAccHelper(config,
13942 &MacroAssembler::Fnmls,
13943 lane_size_in_bits,
13944 pg_inputs,
13945 za_inputs,
13946 zn_inputs,
13947 zm_inputs,
13948 zd_expected_fnmls,
13949 nan_option);
13950}
13951
13952TEST_SVE(sve_process_nans_double) {
13953 // Use non-standard NaNs to check that the payload bits are preserved.
13954 double sa = RawbitsToDouble(0x7ff5555511111111);
13955 double sn = RawbitsToDouble(0x7ff5555522222222);
13956 double sm = RawbitsToDouble(0x7ff5555533333333);
13957 double qa = RawbitsToDouble(0x7ffaaaaa11111111);
13958 double qn = RawbitsToDouble(0x7ffaaaaa22222222);
13959 double qm = RawbitsToDouble(0x7ffaaaaa33333333);
13960 VIXL_ASSERT(IsSignallingNaN(sa));
13961 VIXL_ASSERT(IsSignallingNaN(sn));
13962 VIXL_ASSERT(IsSignallingNaN(sm));
13963 VIXL_ASSERT(IsQuietNaN(qa));
13964 VIXL_ASSERT(IsQuietNaN(qn));
13965 VIXL_ASSERT(IsQuietNaN(qm));
13966
13967 // The input NaNs after passing through ProcessNaN.
13968 uint64_t sa_proc = 0x7ffd555511111111;
13969 uint64_t sn_proc = 0x7ffd555522222222;
13970 uint64_t sm_proc = 0x7ffd555533333333;
13971 uint64_t qa_proc = DoubleToRawbits(qa);
13972 uint64_t qn_proc = DoubleToRawbits(qn);
13973 uint64_t qm_proc = DoubleToRawbits(qm);
13974 uint64_t sa_proc_n = sa_proc ^ kDSignMask;
13975 uint64_t sn_proc_n = sn_proc ^ kDSignMask;
13976 uint64_t qa_proc_n = qa_proc ^ kDSignMask;
13977 uint64_t qn_proc_n = qn_proc ^ kDSignMask;
13978
13979 // Quiet NaNs are propagated.
13980 double zn_inputs_1[] = {qn, 0.0, 0.0, qm, qn, qm};
13981 double zm_inputs_1[] = {0.0, qn, qm, 0.0, qm, qn};
13982 uint64_t zd_expected_1[] =
13983 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
13984
13985 ProcessNaNsHelper(config,
13986 kDRegSize,
13987 zn_inputs_1,
13988 zm_inputs_1,
13989 zd_expected_1,
13990 StrictNaNPropagation);
13991
13992 // Signalling NaNs are propagated.
13993 double zn_inputs_2[] = {sn, 0.0, 0.0, sm, sn, sm};
13994 double zm_inputs_2[] = {0.0, sn, sm, 0.0, sm, sn};
13995 uint64_t zd_expected_2[] =
13996 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
13997 ProcessNaNsHelper(config,
13998 kDRegSize,
13999 zn_inputs_2,
14000 zm_inputs_2,
14001 zd_expected_2,
14002 StrictNaNPropagation);
14003
14004 // Signalling NaNs take precedence over quiet NaNs.
14005 double zn_inputs_3[] = {sn, qn, sn, sn, qn};
14006 double zm_inputs_3[] = {qm, sm, sm, qn, sn};
14007 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
14008 ProcessNaNsHelper(config,
14009 kDRegSize,
14010 zn_inputs_3,
14011 zm_inputs_3,
14012 zd_expected_3,
14013 StrictNaNPropagation);
14014
14015 double za_inputs_4[] = {qa, qa, 0.0, 0.0, qa, qa};
14016 double zn_inputs_4[] = {qn, 0.0, 0.0, qn, qn, qn};
14017 double zm_inputs_4[] = {0.0, qm, qm, qm, qm, 0.0};
14018
14019 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
14020 // If `n` is propagated, its sign is inverted by fmls and fnmla.
14021 // If `m` is propagated, its sign is never inverted.
14022 uint64_t zd_expected_fmla_4[] =
14023 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
14024 uint64_t zd_expected_fmls_4[] =
14025 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
14026 uint64_t zd_expected_fnmla_4[] =
14027 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
14028 uint64_t zd_expected_fnmls_4[] =
14029 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
14030
14031 ProcessNaNsHelper3(config,
14032 kDRegSize,
14033 za_inputs_4,
14034 zn_inputs_4,
14035 zm_inputs_4,
14036 zd_expected_fmla_4,
14037 zd_expected_fmls_4,
14038 zd_expected_fnmla_4,
14039 zd_expected_fnmls_4,
14040 StrictNaNPropagation);
14041
14042 // Signalling NaNs take precedence over quiet NaNs.
14043 double za_inputs_5[] = {qa, qa, sa, sa, sa};
14044 double zn_inputs_5[] = {qn, sn, sn, sn, qn};
14045 double zm_inputs_5[] = {sm, qm, sm, qa, sm};
14046 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
14047 uint64_t zd_expected_fmls_5[] = {sm_proc,
14048 sn_proc_n,
14049 sa_proc,
14050 sa_proc,
14051 sa_proc};
14052 uint64_t zd_expected_fnmla_5[] = {sm_proc,
14053 sn_proc_n,
14054 sa_proc_n,
14055 sa_proc_n,
14056 sa_proc_n};
14057 uint64_t zd_expected_fnmls_5[] = {sm_proc,
14058 sn_proc,
14059 sa_proc_n,
14060 sa_proc_n,
14061 sa_proc_n};
14062
14063 ProcessNaNsHelper3(config,
14064 kDRegSize,
14065 za_inputs_5,
14066 zn_inputs_5,
14067 zm_inputs_5,
14068 zd_expected_fmla_5,
14069 zd_expected_fmls_5,
14070 zd_expected_fnmla_5,
14071 zd_expected_fnmls_5,
14072 StrictNaNPropagation);
14073
14074 const double inf = kFP64PositiveInfinity;
14075 const double inf_n = kFP64NegativeInfinity;
14076 uint64_t inf_proc = DoubleToRawbits(inf);
14077 uint64_t inf_proc_n = DoubleToRawbits(inf_n);
14078 uint64_t d_inf_proc = DoubleToRawbits(kFP64DefaultNaN);
14079
14080 double za_inputs_6[] = {qa, qa, 0.0f, -0.0f, qa, sa};
14081 double zn_inputs_6[] = {inf, -0.0f, -0.0f, inf, inf_n, inf};
14082 double zm_inputs_6[] = {0.0f, inf_n, inf, inf, inf, 0.0f};
14083
14084 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
14085 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
14086 // quiet_nan.
14087 uint64_t zd_expected_fmla_6[] =
14088 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
14089 uint64_t zd_expected_fmls_6[] =
14090 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
14091 uint64_t zd_expected_fnmla_6[] =
14092 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
14093 uint64_t zd_expected_fnmls_6[] =
14094 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
14095
14096 ProcessNaNsHelper3(config,
14097 kDRegSize,
14098 za_inputs_6,
14099 zn_inputs_6,
14100 zm_inputs_6,
14101 zd_expected_fmla_6,
14102 zd_expected_fmls_6,
14103 zd_expected_fnmla_6,
14104 zd_expected_fnmls_6,
14105 StrictNaNPropagation);
14106}
14107
14108TEST_SVE(sve_process_nans_float) {
14109 // Use non-standard NaNs to check that the payload bits are preserved.
14110 float sa = RawbitsToFloat(0x7f951111);
14111 float sn = RawbitsToFloat(0x7f952222);
14112 float sm = RawbitsToFloat(0x7f953333);
14113 float qa = RawbitsToFloat(0x7fea1111);
14114 float qn = RawbitsToFloat(0x7fea2222);
14115 float qm = RawbitsToFloat(0x7fea3333);
14116 VIXL_ASSERT(IsSignallingNaN(sa));
14117 VIXL_ASSERT(IsSignallingNaN(sn));
14118 VIXL_ASSERT(IsSignallingNaN(sm));
14119 VIXL_ASSERT(IsQuietNaN(qa));
14120 VIXL_ASSERT(IsQuietNaN(qn));
14121 VIXL_ASSERT(IsQuietNaN(qm));
14122
14123 // The input NaNs after passing through ProcessNaN.
14124 uint32_t sa_proc = 0x7fd51111;
14125 uint32_t sn_proc = 0x7fd52222;
14126 uint32_t sm_proc = 0x7fd53333;
14127 uint32_t qa_proc = FloatToRawbits(qa);
14128 uint32_t qn_proc = FloatToRawbits(qn);
14129 uint32_t qm_proc = FloatToRawbits(qm);
14130 uint32_t sa_proc_n = sa_proc ^ kSSignMask;
14131 uint32_t sn_proc_n = sn_proc ^ kSSignMask;
14132 uint32_t qa_proc_n = qa_proc ^ kSSignMask;
14133 uint32_t qn_proc_n = qn_proc ^ kSSignMask;
14134
14135 // Quiet NaNs are propagated.
14136 float zn_inputs_1[] = {qn, 0.0f, 0.0f, qm, qn, qm};
14137 float zm_inputs_1[] = {0.0f, qn, qm, 0.0f, qm, qn};
14138 uint64_t zd_expected_1[] =
14139 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
14140
14141 ProcessNaNsHelper(config,
14142 kSRegSize,
14143 zn_inputs_1,
14144 zm_inputs_1,
14145 zd_expected_1,
14146 StrictNaNPropagation);
14147
14148 // Signalling NaNs are propagated.
14149 float zn_inputs_2[] = {sn, 0.0f, 0.0f, sm, sn, sm};
14150 float zm_inputs_2[] = {0.0f, sn, sm, 0.0f, sm, sn};
14151 uint64_t zd_expected_2[] =
14152 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
14153 ProcessNaNsHelper(config,
14154 kSRegSize,
14155 zn_inputs_2,
14156 zm_inputs_2,
14157 zd_expected_2,
14158 StrictNaNPropagation);
14159
14160 // Signalling NaNs take precedence over quiet NaNs.
14161 float zn_inputs_3[] = {sn, qn, sn, sn, qn};
14162 float zm_inputs_3[] = {qm, sm, sm, qn, sn};
14163 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
14164 ProcessNaNsHelper(config,
14165 kSRegSize,
14166 zn_inputs_3,
14167 zm_inputs_3,
14168 zd_expected_3,
14169 StrictNaNPropagation);
14170
14171 float za_inputs_4[] = {qa, qa, 0.0f, 0.0f, qa, qa};
14172 float zn_inputs_4[] = {qn, 0.0f, 0.0f, qn, qn, qn};
14173 float zm_inputs_4[] = {0.0f, qm, qm, qm, qm, 0.0f};
14174
14175 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
14176 // If `n` is propagated, its sign is inverted by fmls and fnmla.
14177 // If `m` is propagated, its sign is never inverted.
14178 uint64_t zd_expected_fmla_4[] =
14179 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
14180 uint64_t zd_expected_fmls_4[] =
14181 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
14182 uint64_t zd_expected_fnmla_4[] =
14183 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
14184 uint64_t zd_expected_fnmls_4[] =
14185 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
14186
14187 ProcessNaNsHelper3(config,
14188 kSRegSize,
14189 za_inputs_4,
14190 zn_inputs_4,
14191 zm_inputs_4,
14192 zd_expected_fmla_4,
14193 zd_expected_fmls_4,
14194 zd_expected_fnmla_4,
14195 zd_expected_fnmls_4,
14196 StrictNaNPropagation);
14197
14198 // Signalling NaNs take precedence over quiet NaNs.
14199 float za_inputs_5[] = {qa, qa, sa, sa, sa};
14200 float zn_inputs_5[] = {qn, sn, sn, sn, qn};
14201 float zm_inputs_5[] = {sm, qm, sm, qa, sm};
14202 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
14203 uint64_t zd_expected_fmls_5[] = {sm_proc,
14204 sn_proc_n,
14205 sa_proc,
14206 sa_proc,
14207 sa_proc};
14208 uint64_t zd_expected_fnmla_5[] = {sm_proc,
14209 sn_proc_n,
14210 sa_proc_n,
14211 sa_proc_n,
14212 sa_proc_n};
14213 uint64_t zd_expected_fnmls_5[] = {sm_proc,
14214 sn_proc,
14215 sa_proc_n,
14216 sa_proc_n,
14217 sa_proc_n};
14218
14219 ProcessNaNsHelper3(config,
14220 kSRegSize,
14221 za_inputs_5,
14222 zn_inputs_5,
14223 zm_inputs_5,
14224 zd_expected_fmla_5,
14225 zd_expected_fmls_5,
14226 zd_expected_fnmla_5,
14227 zd_expected_fnmls_5,
14228 StrictNaNPropagation);
14229
14230 const float inf = kFP32PositiveInfinity;
14231 const float inf_n = kFP32NegativeInfinity;
14232 uint32_t inf_proc = FloatToRawbits(inf);
14233 uint32_t inf_proc_n = FloatToRawbits(inf_n);
14234 uint32_t d_inf_proc = FloatToRawbits(kFP32DefaultNaN);
14235
14236 float za_inputs_6[] = {qa, qa, 0.0f, 0.0f, qa, sa};
14237 float zn_inputs_6[] = {inf, 0.0f, 0.0f, inf, inf_n, inf};
14238 float zm_inputs_6[] = {0.0f, inf_n, inf, inf, inf, 0.0f};
14239
14240 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
14241 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
14242 // quiet_nan.
14243 uint64_t zd_expected_fmla_6[] =
14244 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
14245 uint64_t zd_expected_fmls_6[] =
14246 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
14247 uint64_t zd_expected_fnmla_6[] =
14248 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
14249 uint64_t zd_expected_fnmls_6[] =
14250 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
14251
14252 ProcessNaNsHelper3(config,
14253 kSRegSize,
14254 za_inputs_6,
14255 zn_inputs_6,
14256 zm_inputs_6,
14257 zd_expected_fmla_6,
14258 zd_expected_fmls_6,
14259 zd_expected_fnmla_6,
14260 zd_expected_fnmls_6,
14261 StrictNaNPropagation);
14262}
14263
14264TEST_SVE(sve_process_nans_half) {
14265 // Use non-standard NaNs to check that the payload bits are preserved.
14266 Float16 sa(RawbitsToFloat16(0x7c11));
14267 Float16 sn(RawbitsToFloat16(0x7c22));
14268 Float16 sm(RawbitsToFloat16(0x7c33));
14269 Float16 qa(RawbitsToFloat16(0x7e44));
14270 Float16 qn(RawbitsToFloat16(0x7e55));
14271 Float16 qm(RawbitsToFloat16(0x7e66));
14272 VIXL_ASSERT(IsSignallingNaN(sa));
14273 VIXL_ASSERT(IsSignallingNaN(sn));
14274 VIXL_ASSERT(IsSignallingNaN(sm));
14275 VIXL_ASSERT(IsQuietNaN(qa));
14276 VIXL_ASSERT(IsQuietNaN(qn));
14277 VIXL_ASSERT(IsQuietNaN(qm));
14278
14279 // The input NaNs after passing through ProcessNaN.
14280 uint16_t sa_proc = 0x7e11;
14281 uint16_t sn_proc = 0x7e22;
14282 uint16_t sm_proc = 0x7e33;
14283 uint16_t qa_proc = Float16ToRawbits(qa);
14284 uint16_t qn_proc = Float16ToRawbits(qn);
14285 uint16_t qm_proc = Float16ToRawbits(qm);
14286 uint16_t sa_proc_n = sa_proc ^ kHSignMask;
14287 uint16_t sn_proc_n = sn_proc ^ kHSignMask;
14288 uint16_t qa_proc_n = qa_proc ^ kHSignMask;
14289 uint16_t qn_proc_n = qn_proc ^ kHSignMask;
14290 Float16 zero(0.0);
14291
14292 // Quiet NaNs are propagated.
14293 Float16 zn_inputs_1[] = {qn, zero, zero, qm, qn, qm};
14294 Float16 zm_inputs_1[] = {zero, qn, qm, zero, qm, qn};
14295 uint64_t zd_expected_1[] =
14296 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
14297
14298 ProcessNaNsHelper(config,
14299 kHRegSize,
14300 zn_inputs_1,
14301 zm_inputs_1,
14302 zd_expected_1,
14303 StrictNaNPropagation);
14304
14305 // Signalling NaNs are propagated.
14306 Float16 zn_inputs_2[] = {sn, zero, zero, sm, sn, sm};
14307 Float16 zm_inputs_2[] = {zero, sn, sm, zero, sm, sn};
14308 uint64_t zd_expected_2[] =
14309 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
14310 ProcessNaNsHelper(config,
14311 kHRegSize,
14312 zn_inputs_2,
14313 zm_inputs_2,
14314 zd_expected_2,
14315 StrictNaNPropagation);
14316
14317 // Signalling NaNs take precedence over quiet NaNs.
14318 Float16 zn_inputs_3[] = {sn, qn, sn, sn, qn};
14319 Float16 zm_inputs_3[] = {qm, sm, sm, qn, sn};
14320 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
14321 ProcessNaNsHelper(config,
14322 kHRegSize,
14323 zn_inputs_3,
14324 zm_inputs_3,
14325 zd_expected_3,
14326 StrictNaNPropagation);
14327
14328 Float16 za_inputs_4[] = {qa, qa, zero, zero, qa, qa};
14329 Float16 zn_inputs_4[] = {qn, zero, zero, qn, qn, qn};
14330 Float16 zm_inputs_4[] = {zero, qm, qm, qm, qm, zero};
14331
14332 // If `a` is propagated, its sign is inverted by fnmla and fnmls.
14333 // If `n` is propagated, its sign is inverted by fmls and fnmla.
14334 // If `m` is propagated, its sign is never inverted.
14335 uint64_t zd_expected_fmla_4[] =
14336 {qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
14337 uint64_t zd_expected_fmls_4[] =
14338 {qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
14339 uint64_t zd_expected_fnmla_4[] =
14340 {qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
14341 uint64_t zd_expected_fnmls_4[] =
14342 {qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
14343
14344 ProcessNaNsHelper3(config,
14345 kHRegSize,
14346 za_inputs_4,
14347 zn_inputs_4,
14348 zm_inputs_4,
14349 zd_expected_fmla_4,
14350 zd_expected_fmls_4,
14351 zd_expected_fnmla_4,
14352 zd_expected_fnmls_4,
14353 StrictNaNPropagation);
14354
14355 // Signalling NaNs take precedence over quiet NaNs.
14356 Float16 za_inputs_5[] = {qa, qa, sa, sa, sa};
14357 Float16 zn_inputs_5[] = {qn, sn, sn, sn, qn};
14358 Float16 zm_inputs_5[] = {sm, qm, sm, qa, sm};
14359 uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
14360 uint64_t zd_expected_fmls_5[] = {sm_proc,
14361 sn_proc_n,
14362 sa_proc,
14363 sa_proc,
14364 sa_proc};
14365 uint64_t zd_expected_fnmla_5[] = {sm_proc,
14366 sn_proc_n,
14367 sa_proc_n,
14368 sa_proc_n,
14369 sa_proc_n};
14370 uint64_t zd_expected_fnmls_5[] = {sm_proc,
14371 sn_proc,
14372 sa_proc_n,
14373 sa_proc_n,
14374 sa_proc_n};
14375
14376 ProcessNaNsHelper3(config,
14377 kHRegSize,
14378 za_inputs_5,
14379 zn_inputs_5,
14380 zm_inputs_5,
14381 zd_expected_fmla_5,
14382 zd_expected_fmls_5,
14383 zd_expected_fnmla_5,
14384 zd_expected_fnmls_5,
14385 StrictNaNPropagation);
14386
14387 const Float16 inf = kFP16PositiveInfinity;
14388 const Float16 inf_n = kFP16NegativeInfinity;
14389 uint64_t inf_proc = Float16ToRawbits(inf);
14390 uint64_t inf_proc_n = Float16ToRawbits(inf_n);
14391 uint64_t d_inf_proc = Float16ToRawbits(kFP16DefaultNaN);
14392
14393 Float16 za_inputs_6[] = {qa, qa, zero, zero, qa, sa};
14394 Float16 zn_inputs_6[] = {inf, zero, zero, inf, inf_n, inf};
14395 Float16 zm_inputs_6[] = {zero, inf_n, inf, inf, inf, zero};
14396
14397 // quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
14398 // (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
14399 // quiet_nan.
14400 uint64_t zd_expected_fmla_6[] =
14401 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
14402 uint64_t zd_expected_fmls_6[] =
14403 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
14404 uint64_t zd_expected_fnmla_6[] =
14405 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
14406 uint64_t zd_expected_fnmls_6[] =
14407 {d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
14408
14409 ProcessNaNsHelper3(config,
14410 kHRegSize,
14411 za_inputs_6,
14412 zn_inputs_6,
14413 zm_inputs_6,
14414 zd_expected_fmla_6,
14415 zd_expected_fmls_6,
14416 zd_expected_fnmla_6,
14417 zd_expected_fnmls_6,
14418 StrictNaNPropagation);
14419}
14420
Jacob Bramleyd77a8e42019-02-12 16:52:24 +000014421} // namespace aarch64
14422} // namespace vixl