Blame - test/aarch64/test-assembler-sve-aarch64.cc - arm/vixl.git

blob: e72d00919914cad190d63abfca0f2ecdd70287cc [file] [log] [blame]

Jacob Bramley	d77a8e4	2019-02-12 16:52:24 +0000	[diff] [blame]	1	// Copyright 2019, VIXL authors
				2	// All rights reserved.
				3	//
				4	// Redistribution and use in source and binary forms, with or without
				5	// modification, are permitted provided that the following conditions are met:
				6	//
				7	// * Redistributions of source code must retain the above copyright notice,
				8	// this list of conditions and the following disclaimer.
				9	// * Redistributions in binary form must reproduce the above copyright notice,
				10	// this list of conditions and the following disclaimer in the documentation
				11	// and/or other materials provided with the distribution.
				12	// * Neither the name of ARM Limited nor the names of its contributors may be
				13	// used to endorse or promote products derived from this software without
				14	// specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
				17	// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				18	// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				19	// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
				20	// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				21	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
				22	// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
				23	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
				24	// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				25	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				26
				27	#include <sys/mman.h>
Jacob Bramley	85a9c10	2019-12-09 17:48:29 +0000	[diff] [blame]	28	#include <unistd.h>
Jacob Bramley	d77a8e4	2019-02-12 16:52:24 +0000	[diff] [blame]	29
				30	#include <cfloat>
				31	#include <cmath>
				32	#include <cstdio>
				33	#include <cstdlib>
				34	#include <cstring>
				35
				36	#include "test-runner.h"
				37	#include "test-utils.h"
				38	#include "aarch64/test-utils-aarch64.h"
				39
				40	#include "aarch64/cpu-aarch64.h"
				41	#include "aarch64/disasm-aarch64.h"
				42	#include "aarch64/macro-assembler-aarch64.h"
				43	#include "aarch64/simulator-aarch64.h"
				44	#include "test-assembler-aarch64.h"
				45
				46	namespace vixl {
				47	namespace aarch64 {
				48
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	49	Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
				50	// We never free this memory, but we need it to live for as long as the static
				51	// linked list of tests, and this is the easiest way to do it.
				52	Test* test = new Test(name, fn);
				53	test->set_sve_vl_in_bits(vl);
				54	return test;
				55	}
				56
				57	// The TEST_SVE macro works just like the usual TEST macro, but the resulting
				58	// function receives a `const Test& config` argument, to allow it to query the
				59	// vector length.
				60	#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
				61	// On the Simulator, run SVE tests with several vector lengths, including the
				62	// extreme values and an intermediate value that isn't a power of two.
				63
				64	#define TEST_SVE(name) \
				65	void Test##name(Test* config); \
				66	Test* test_##name##_list[] = \
				67	{MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
				68	MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
				69	MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
				70	void Test##name(Test* config)
				71
				72	#define SVE_SETUP_WITH_FEATURES(...) \
				73	SETUP_WITH_FEATURES(__VA_ARGS__); \
				74	simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
				75
				76	#else
				77	// Otherwise, just use whatever the hardware provides.
				78	static const int kSVEVectorLengthInBits =
				79	CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
				80	? CPU::ReadSVEVectorLengthInBits()
				81	: 0;
				82
				83	#define TEST_SVE(name) \
				84	void Test##name(Test* config); \
				85	Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
				86	"AARCH64_ASM_" #name "_vlauto", \
				87	&Test##name); \
				88	void Test##name(Test* config)
				89
				90	#define SVE_SETUP_WITH_FEATURES(...) \
				91	SETUP_WITH_FEATURES(__VA_ARGS__); \
				92	USE(config)
				93
				94	#endif
				95
Jacob Bramley	03c0b51	2019-02-22 16:42:06 +0000	[diff] [blame]	96	// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
				97	// is optimised for call-site clarity, not generated code quality, so it doesn't
				98	// exist in the MacroAssembler itself.
				99	//
				100	// Usage:
				101	//
				102	// int values[] = { 42, 43, 44 };
				103	// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
				104	//
				105	// The rightmost (highest-indexed) array element maps to the lowest-numbered
				106	// lane.
				107	template <typename T, size_t N>
				108	void InsrHelper(MacroAssembler* masm,
				109	const ZRegister& zdn,
				110	const T (&values)[N]) {
				111	for (size_t i = 0; i < N; i++) {
				112	masm->Insr(zdn, values[i]);
				113	}
				114	}
				115
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	116	// Conveniently initialise P registers with scalar bit patterns. The destination
				117	// lane size is ignored. This is optimised for call-site clarity, not generated
				118	// code quality.
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	119	//
				120	// Usage:
				121	//
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	122	// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	123	void Initialise(MacroAssembler* masm,
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	124	const PRegister& pd,
				125	uint64_t value3,
				126	uint64_t value2,
				127	uint64_t value1,
				128	uint64_t value0) {
				129	// Generate a literal pool, as in the array form.
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	130	UseScratchRegisterScope temps(masm);
				131	Register temp = temps.AcquireX();
				132	Label data;
				133	Label done;
				134
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	135	masm->Adr(temp, &data);
Jacob Bramley	66e6671	2019-08-02 17:45:32 +0100	[diff] [blame]	136	masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	137	masm->B(&done);
				138	{
				139	ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
				140	masm->bind(&data);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	141	masm->dc64(value0);
				142	masm->dc64(value1);
				143	masm->dc64(value2);
				144	masm->dc64(value3);
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	145	}
				146	masm->Bind(&done);
				147	}
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	148	void Initialise(MacroAssembler* masm,
				149	const PRegister& pd,
				150	uint64_t value2,
				151	uint64_t value1,
				152	uint64_t value0) {
				153	Initialise(masm, pd, 0, value2, value1, value0);
				154	}
				155	void Initialise(MacroAssembler* masm,
				156	const PRegister& pd,
				157	uint64_t value1,
				158	uint64_t value0) {
				159	Initialise(masm, pd, 0, 0, value1, value0);
				160	}
				161	void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
				162	Initialise(masm, pd, 0, 0, 0, value0);
				163	}
				164
				165	// Conveniently initialise P registers by lane. This is optimised for call-site
				166	// clarity, not generated code quality.
				167	//
				168	// Usage:
				169	//
				170	// int values[] = { 0x0, 0x1, 0x2 };
				171	// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
				172	//
				173	// The rightmost (highest-indexed) array element maps to the lowest-numbered
				174	// lane. Unspecified lanes are set to 0 (inactive).
				175	//
				176	// Each element of the `values` array is mapped onto a lane in `pd`. The
				177	// architecture only respects the lower bit, and writes zero the upper bits, but
				178	// other (encodable) values can be specified if required by the test.
				179	template <typename T, size_t N>
				180	void Initialise(MacroAssembler* masm,
				181	const PRegisterWithLaneSize& pd,
				182	const T (&values)[N]) {
				183	// Turn the array into 64-bit chunks.
				184	uint64_t chunks[4] = {0, 0, 0, 0};
				185	VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
				186
				187	int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
				188	VIXL_ASSERT((64 % p_bits_per_lane) == 0);
				189	VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
				190
				191	uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
				192
				193	VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
				194	size_t bit = 0;
				195	for (int n = static_cast<int>(N - 1); n >= 0; n--) {
				196	VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
				197	uint64_t value = values[n] & p_lane_mask;
				198	chunks[bit / 64] \|= value << (bit % 64);
				199	bit += p_bits_per_lane;
				200	}
				201
				202	Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
				203	}
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	204
Jacob Bramley	d77a8e4	2019-02-12 16:52:24 +0000	[diff] [blame]	205	// Ensure that basic test infrastructure works.
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	206	TEST_SVE(sve_test_infrastructure_z) {
				207	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d77a8e4	2019-02-12 16:52:24 +0000	[diff] [blame]	208	START();
				209
Jacob Bramley	03c0b51	2019-02-22 16:42:06 +0000	[diff] [blame]	210	__ Mov(x0, 0x0123456789abcdef);
				211
				212	// Test basic `Insr` behaviour.
				213	__ Insr(z0.VnB(), 1);
				214	__ Insr(z0.VnB(), 2);
				215	__ Insr(z0.VnB(), x0);
				216	__ Insr(z0.VnB(), -42);
				217	__ Insr(z0.VnB(), 0);
				218
				219	// Test array inputs.
				220	int z1_inputs[] = {3, 4, 5, -42, 0};
				221	InsrHelper(&masm, z1.VnH(), z1_inputs);
				222
				223	// Test that sign-extension works as intended for various lane sizes.
				224	__ Dup(z2.VnD(), 0); // Clear the register first.
				225	__ Insr(z2.VnB(), -42); // 0xd6
				226	__ Insr(z2.VnB(), 0xfe); // 0xfe
				227	__ Insr(z2.VnH(), -42); // 0xffd6
				228	__ Insr(z2.VnH(), 0xfedc); // 0xfedc
				229	__ Insr(z2.VnS(), -42); // 0xffffffd6
				230	__ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
				231	// Use another register for VnD(), so we can support 128-bit Z registers.
				232	__ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
				233	__ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
				234
Jacob Bramley	d77a8e4	2019-02-12 16:52:24 +0000	[diff] [blame]	235	END();
Jacob Bramley	d77a8e4	2019-02-12 16:52:24 +0000	[diff] [blame]	236
Jacob Bramley	119bd21	2019-04-16 10:13:09 +0100	[diff] [blame]	237	if (CAN_RUN()) {
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	238	RUN();
Jacob Bramley	03c0b51	2019-02-22 16:42:06 +0000	[diff] [blame]	239
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	240	// Test that array checks work properly on a register initialised
				241	// lane-by-lane.
				242	int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
				243	ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley	03c0b51	2019-02-22 16:42:06 +0000	[diff] [blame]	244
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	245	// Test that lane-by-lane checks work properly on a register initialised
				246	// by array.
				247	for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
				248	// The rightmost (highest-indexed) array element maps to the
				249	// lowest-numbered lane.
				250	int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
				251	ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley	03c0b51	2019-02-22 16:42:06 +0000	[diff] [blame]	252	}
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	253
				254	uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
				255	ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
				256	uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
				257	ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley	119bd21	2019-04-16 10:13:09 +0100	[diff] [blame]	258	}
Jacob Bramley	d77a8e4	2019-02-12 16:52:24 +0000	[diff] [blame]	259	}
				260
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	261	// Ensure that basic test infrastructure works.
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	262	TEST_SVE(sve_test_infrastructure_p) {
				263	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	264	START();
				265
				266	// Simple cases: move boolean (0 or 1) values.
				267
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	268	int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	269	Initialise(&masm, p0.VnB(), p0_inputs);
				270
				271	int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
				272	Initialise(&masm, p1.VnH(), p1_inputs);
				273
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	274	int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	275	Initialise(&masm, p2.VnS(), p2_inputs);
				276
				277	int p3_inputs[] = {0, 1};
				278	Initialise(&masm, p3.VnD(), p3_inputs);
				279
				280	// Advanced cases: move numeric value into architecturally-ignored bits.
				281
				282	// B-sized lanes get one bit in a P register, so there are no ignored bits.
				283
				284	// H-sized lanes get two bits in a P register.
				285	int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
				286	Initialise(&masm, p4.VnH(), p4_inputs);
				287
				288	// S-sized lanes get four bits in a P register.
				289	int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
				290	Initialise(&masm, p5.VnS(), p5_inputs);
				291
				292	// D-sized lanes get eight bits in a P register.
				293	int p6_inputs[] = {0x81, 0xcc, 0x55};
				294	Initialise(&masm, p6.VnD(), p6_inputs);
				295
				296	// The largest possible P register has 32 bytes.
				297	int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
				298	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
				299	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
				300	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
				301	Initialise(&masm, p7.VnD(), p7_inputs);
				302
				303	END();
				304
				305	if (CAN_RUN()) {
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	306	RUN();
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	307
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	308	// Test that lane-by-lane checks work properly. The rightmost
				309	// (highest-indexed) array element maps to the lowest-numbered lane.
				310	for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
				311	int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
				312	ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	313	}
Jacob Bramley	9d06c4d	2019-05-13 18:15:06 +0100	[diff] [blame]	314	for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
				315	int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
				316	ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
				317	}
				318	for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
				319	int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
				320	ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
				321	}
				322	for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
				323	int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
				324	ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
				325	}
				326
				327	// Test that array checks work properly on predicates initialised with a
				328	// possibly-different lane size.
				329	// 0b...11'10'01'00'01'10'11
				330	int p4_expected[] = {0x39, 0x1b};
				331	ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
				332
				333	ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
				334
				335	// 0b...10000001'11001100'01010101
				336	int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
				337	ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
				338
				339	// 0b...10011100'10011101'10011110'10011111
				340	int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
				341	1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
				342	ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley	2eaecf1	2019-05-01 15:46:34 +0100	[diff] [blame]	343	}
				344	}
				345
Jacob Bramley	935b15b	2019-07-04 14:09:22 +0100	[diff] [blame]	346	// Test that writes to V registers clear the high bits of the corresponding Z
				347	// register.
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	348	TEST_SVE(sve_v_write_clear) {
				349	SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
				350	CPUFeatures::kFP,
				351	CPUFeatures::kSVE);
Jacob Bramley	935b15b	2019-07-04 14:09:22 +0100	[diff] [blame]	352	START();
				353
				354	// The Simulator has two mechansisms for writing V registers:
				355	// - Write*Register, calling through to SimRegisterBase::Write.
				356	// - LogicVRegister::ClearForWrite followed by one or more lane updates.
				357	// Try to cover both variants.
				358
				359	// Prepare some known inputs.
				360	uint8_t data[kQRegSizeInBytes];
				361	for (size_t i = 0; i < kQRegSizeInBytes; i++) {
				362	data[i] = 42 + i;
				363	}
				364	__ Mov(x10, reinterpret_cast<uintptr_t>(data));
				365	__ Fmov(d30, 42.0);
				366
Jacob Bramley	199339d	2019-08-05 18:49:13 +0100	[diff] [blame]	367	// Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley	935b15b	2019-07-04 14:09:22 +0100	[diff] [blame]	368	// diagnose.
				369	__ Index(z0.VnB(), 0, 1);
				370	__ Index(z1.VnB(), 0, 1);
				371	__ Index(z2.VnB(), 0, 1);
				372	__ Index(z3.VnB(), 0, 1);
				373	__ Index(z4.VnB(), 0, 1);
				374
				375	__ Index(z10.VnB(), 0, -1);
				376	__ Index(z11.VnB(), 0, -1);
				377	__ Index(z12.VnB(), 0, -1);
				378	__ Index(z13.VnB(), 0, -1);
				379	__ Index(z14.VnB(), 0, -1);
				380
				381	// Instructions using Write*Register (and SimRegisterBase::Write).
				382	__ Ldr(b0, MemOperand(x10));
				383	__ Fcvt(h1, d30);
				384	__ Fmov(s2, 1.5f);
				385	__ Fmov(d3, d30);
				386	__ Ldr(q4, MemOperand(x10));
				387
				388	// Instructions using LogicVRegister::ClearForWrite.
				389	// These also (incidentally) test that across-lane instructions correctly
				390	// ignore the high-order Z register lanes.
				391	__ Sminv(b10, v10.V16B());
				392	__ Addv(h11, v11.V4H());
				393	__ Saddlv(s12, v12.V8H());
				394	__ Dup(v13.V8B(), b13, kDRegSizeInBytes);
				395	__ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
				396
				397	END();
				398
				399	if (CAN_RUN()) {
				400	RUN();
				401
				402	// Check the Q part first.
				403	ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
				404	ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
				405	ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
				406	ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
				407	ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
				408	ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
				409	// 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
				410	ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
				411	// 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
				412	ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
				413	ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
				414	// [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
				415	// + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
				416	// -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
				417	ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
				418
				419	// Check that the upper lanes are all clear.
				420	for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
				421	ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
				422	ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
				423	ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
				424	ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
				425	ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
				426	ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
				427	ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
				428	ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
				429	ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
				430	ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
				431	}
				432	}
				433	}
				434
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	435	static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
				436	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	437	START();
				438
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	439	int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	440	int za_inputs[] = {-39, 1, -3, 2};
				441	int zn_inputs[] = {-5, -20, 9, 8};
				442	int zm_inputs[] = {9, -5, 4, 5};
				443
				444	ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
				445	ZRegister za = z1.WithLaneSize(lane_size_in_bits);
				446	ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
				447	ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
				448
				449	// TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	450	InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	451	InsrHelper(&masm, za, za_inputs);
				452	InsrHelper(&masm, zn, zn_inputs);
				453	InsrHelper(&masm, zm, zm_inputs);
				454
				455	int p0_inputs[] = {1, 1, 0, 1};
				456	int p1_inputs[] = {1, 0, 1, 1};
				457	int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	458	int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	459
				460	Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
				461	Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
				462	Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
				463	Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
				464
				465	// The Mla macro automatically selects between mla, mad and movprfx + mla
				466	// based on what registers are aliased.
				467	ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
				468	ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
				469	ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	470	ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	471
				472	__ Mov(mla_da_result, za);
				473	__ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
				474
				475	__ Mov(mla_dn_result, zn);
				476	__ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
				477
				478	__ Mov(mla_dm_result, zm);
				479	__ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
				480
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	481	__ Mov(mla_d_result, zd);
				482	__ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	483
				484	// The Mls macro automatically selects between mls, msb and movprfx + mls
				485	// based on what registers are aliased.
				486	ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
				487	ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
				488	ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	489	ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	490
				491	__ Mov(mls_da_result, za);
				492	__ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
				493
				494	__ Mov(mls_dn_result, zn);
				495	__ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
				496
				497	__ Mov(mls_dm_result, zm);
				498	__ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
				499
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	500	__ Mov(mls_d_result, zd);
				501	__ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	502
				503	END();
				504
				505	if (CAN_RUN()) {
				506	RUN();
				507
				508	ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
				509	ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
				510	ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
				511
				512	int mla[] = {-84, 101, 33, 42};
				513	int mls[] = {6, -99, -39, -38};
				514
				515	int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
				516	ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
				517
				518	int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
				519	ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
				520
				521	int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
				522	ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
				523
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	524	int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
				525	ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	526
				527	int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
				528	ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
				529
				530	int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
				531	ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
				532
				533	int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
				534	ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
				535
Jacob Bramley	ae2fc3b	2019-05-21 19:24:36 +0100	[diff] [blame]	536	int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
				537	ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	538	}
				539	}
				540
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	541	TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
				542	TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
				543	TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
				544	TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley	22023df	2019-05-14 17:55:43 +0100	[diff] [blame]	545
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	546	TEST_SVE(sve_bitwise_unpredicate_logical) {
				547	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong	cfb9421	2019-05-16 13:30:09 -0700	[diff] [blame]	548	START();
				549
				550	uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
				551	InsrHelper(&masm, z8.VnD(), z8_inputs);
				552	uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
				553	InsrHelper(&masm, z15.VnD(), z15_inputs);
				554
				555	__ And(z1.VnD(), z8.VnD(), z15.VnD());
				556	__ Bic(z2.VnD(), z8.VnD(), z15.VnD());
				557	__ Eor(z3.VnD(), z8.VnD(), z15.VnD());
				558	__ Orr(z4.VnD(), z8.VnD(), z15.VnD());
				559
				560	END();
				561
				562	if (CAN_RUN()) {
				563	RUN();
				564	uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
				565	uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
				566	uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
				567	uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
				568
				569	ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
				570	ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
				571	ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
				572	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				573	}
TatWai Chong	cfb9421	2019-05-16 13:30:09 -0700	[diff] [blame]	574	}
				575
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	576	TEST_SVE(sve_predicate_logical) {
				577	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong	f4fa822	2019-06-17 12:08:14 -0700	[diff] [blame]	578	START();
				579
				580	// 0b...01011010'10110111
				581	int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
				582	// 0b...11011001'01010010
				583	int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
				584	// 0b...01010101'10110010
				585	int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
				586
				587	Initialise(&masm, p10.VnB(), p10_inputs);
				588	Initialise(&masm, p11.VnB(), p11_inputs);
				589	Initialise(&masm, p12.VnB(), p12_inputs);
				590
				591	__ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
				592	__ Mrs(x0, NZCV);
				593	__ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
				594	__ Mrs(x1, NZCV);
				595	__ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
				596	__ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
				597	__ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
				598	__ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
				599	__ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
				600	__ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
				601
				602	END();
				603
				604	if (CAN_RUN()) {
				605	RUN();
				606
				607	// 0b...01010000'00010010
				608	int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
				609	// 0b...00000001'00000000
				610	int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
				611	// 0b...00000001'10100000
				612	int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
				613	// 0b...00000101'10100000
				614	int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
				615	// 0b...00000100'00000000
				616	int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				617	// 0b...01010101'00010010
				618	int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
				619	// 0b...01010001'10110010
				620	int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
				621	// 0b...01011011'00010111
				622	int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
				623
				624	ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
				625	ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
				626	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				627	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				628	ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
				629	ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
				630	ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
				631	ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
				632
TatWai Chong	96713fe	2019-06-04 16:39:37 -0700	[diff] [blame]	633	ASSERT_EQUAL_32(SVEFirstFlag, w0);
				634	ASSERT_EQUAL_32(SVENotLastFlag, w1);
				635	}
				636	}
TatWai Chong	f4fa822	2019-06-17 12:08:14 -0700	[diff] [blame]	637
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	638	TEST_SVE(sve_int_compare_vectors) {
				639	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong	96713fe	2019-06-04 16:39:37 -0700	[diff] [blame]	640	START();
				641
				642	int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
				643	int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
				644	int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
				645	InsrHelper(&masm, z10.VnB(), z10_inputs);
				646	InsrHelper(&masm, z11.VnB(), z11_inputs);
				647	Initialise(&masm, p0.VnB(), p0_inputs);
				648
				649	__ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
				650	__ Mrs(x6, NZCV);
				651
				652	uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
				653	uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
				654	int p1_inputs[] = {1, 1};
				655	InsrHelper(&masm, z12.VnD(), z12_inputs);
				656	InsrHelper(&masm, z13.VnD(), z13_inputs);
				657	Initialise(&masm, p1.VnD(), p1_inputs);
				658
				659	__ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
				660	__ Mrs(x7, NZCV);
				661
				662	int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
				663	int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
				664
				665	int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
				666	InsrHelper(&masm, z14.VnH(), z14_inputs);
				667	InsrHelper(&masm, z15.VnH(), z15_inputs);
				668	Initialise(&masm, p2.VnH(), p2_inputs);
				669
				670	__ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
				671	__ Mrs(x8, NZCV);
				672
				673	__ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
				674	__ Mrs(x9, NZCV);
				675
				676	int z16_inputs[] = {0, -1, 0, 0};
				677	int z17_inputs[] = {0, 0, 2147483647, -2147483648};
				678	int p3_inputs[] = {1, 1, 1, 1};
				679	InsrHelper(&masm, z16.VnS(), z16_inputs);
				680	InsrHelper(&masm, z17.VnS(), z17_inputs);
				681	Initialise(&masm, p3.VnS(), p3_inputs);
				682
				683	__ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
				684	__ Mrs(x10, NZCV);
				685
				686	__ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
				687	__ Mrs(x11, NZCV);
				688
				689	// Architectural aliases testing.
				690	__ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
				691	__ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
				692	__ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
				693	__ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
				694
				695	END();
				696
				697	if (CAN_RUN()) {
				698	RUN();
				699
				700	int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
				701	for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
				702	int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
				703	ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
				704	}
				705
				706	int p7_expected[] = {1, 0};
				707	ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
				708
				709	int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
				710	ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
				711
				712	int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
				713	ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
				714
				715	int p10_expected[] = {0, 0, 0, 1};
				716	ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
				717
				718	int p11_expected[] = {0, 1, 1, 1};
				719	ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
				720
				721	// Reuse the expected results to verify the architectural aliases.
				722	ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
				723	ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
				724	ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
				725	ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
				726
				727	ASSERT_EQUAL_32(SVEFirstFlag, w6);
				728	ASSERT_EQUAL_32(NoFlag, w7);
				729	ASSERT_EQUAL_32(NoFlag, w8);
				730	ASSERT_EQUAL_32(NoFlag, w9);
				731	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w10);
				732	}
				733	}
				734
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	735	TEST_SVE(sve_int_compare_vectors_wide_elements) {
				736	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong	96713fe	2019-06-04 16:39:37 -0700	[diff] [blame]	737	START();
				738
				739	int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
				740	int src2_inputs_1[] = {0, -1};
				741	int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
				742	InsrHelper(&masm, z13.VnB(), src1_inputs_1);
				743	InsrHelper(&masm, z19.VnD(), src2_inputs_1);
				744	Initialise(&masm, p0.VnB(), mask_inputs_1);
				745
				746	__ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
				747	__ Mrs(x2, NZCV);
				748	__ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
				749	__ Mrs(x3, NZCV);
				750
				751	int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
				752	int src2_inputs_2[] = {0, -32767};
				753	int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
				754	InsrHelper(&masm, z13.VnH(), src1_inputs_2);
				755	InsrHelper(&masm, z19.VnD(), src2_inputs_2);
				756	Initialise(&masm, p0.VnH(), mask_inputs_2);
				757
				758	__ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
				759	__ Mrs(x4, NZCV);
				760	__ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
				761	__ Mrs(x5, NZCV);
				762
				763	int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
				764	int src2_inputs_3[] = {0, -2147483648};
				765	int mask_inputs_3[] = {1, 1, 1, 1};
				766	InsrHelper(&masm, z13.VnS(), src1_inputs_3);
				767	InsrHelper(&masm, z19.VnD(), src2_inputs_3);
				768	Initialise(&masm, p0.VnS(), mask_inputs_3);
				769
				770	__ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
				771	__ Mrs(x6, NZCV);
				772	__ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
				773	__ Mrs(x7, NZCV);
				774
				775	int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
				776	int src2_inputs_4[] = {0x00, 0x7f};
				777	int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
				778	InsrHelper(&masm, z13.VnB(), src1_inputs_4);
				779	InsrHelper(&masm, z19.VnD(), src2_inputs_4);
				780	Initialise(&masm, p0.VnB(), mask_inputs_4);
				781
				782	__ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
				783	__ Mrs(x8, NZCV);
				784	__ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
				785	__ Mrs(x9, NZCV);
				786
				787	int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
				788	int src2_inputs_5[] = {0x8000, 0xffff};
				789	int mask_inputs_5[] = {1, 1, 1, 1};
				790	InsrHelper(&masm, z13.VnS(), src1_inputs_5);
				791	InsrHelper(&masm, z19.VnD(), src2_inputs_5);
				792	Initialise(&masm, p0.VnS(), mask_inputs_5);
				793
				794	__ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
				795	__ Mrs(x10, NZCV);
				796	__ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
				797	__ Mrs(x11, NZCV);
				798
				799	END();
				800
				801	if (CAN_RUN()) {
				802	RUN();
				803	int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
				804	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				805
				806	int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
				807	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				808
				809	int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
				810	ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
				811
				812	int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
				813	ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
				814
				815	int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
				816	ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
				817
				818	int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
				819	ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
				820
				821	int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
				822	ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
				823
				824	int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
				825	ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
				826
				827	int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
				828	ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
				829
				830	int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
				831	ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
				832
				833	ASSERT_EQUAL_32(NoFlag, w2);
				834	ASSERT_EQUAL_32(NoFlag, w3);
				835	ASSERT_EQUAL_32(NoFlag, w4);
				836	ASSERT_EQUAL_32(SVENotLastFlag, w5);
				837	ASSERT_EQUAL_32(SVEFirstFlag, w6);
				838	ASSERT_EQUAL_32(SVENotLastFlag, w7);
				839	ASSERT_EQUAL_32(SVEFirstFlag, w8);
				840	ASSERT_EQUAL_32(SVEFirstFlag, w9);
				841	ASSERT_EQUAL_32(SVENotLastFlag \| SVENoneFlag, w10);
				842	ASSERT_EQUAL_32(SVENotLastFlag \| SVEFirstFlag, w11);
TatWai Chong	f4fa822	2019-06-17 12:08:14 -0700	[diff] [blame]	843	}
TatWai Chong	f4fa822	2019-06-17 12:08:14 -0700	[diff] [blame]	844	}
				845
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	846	TEST_SVE(sve_bitwise_imm) {
				847	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong	a1885a5	2019-04-15 17:19:14 -0700	[diff] [blame]	848	START();
				849
				850	// clang-format off
				851	uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
				852	uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
				853	uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
				854	0x0123, 0x4567, 0x89ab, 0xcdef};
				855	uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
				856	0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
				857	// clang-format on
				858
				859	InsrHelper(&masm, z1.VnD(), z21_inputs);
				860	InsrHelper(&masm, z2.VnS(), z22_inputs);
				861	InsrHelper(&masm, z3.VnH(), z23_inputs);
				862	InsrHelper(&masm, z4.VnB(), z24_inputs);
				863
				864	__ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
				865	__ And(z2.VnS(), z2.VnS(), 0xff0000ff);
				866	__ And(z3.VnH(), z3.VnH(), 0x0ff0);
				867	__ And(z4.VnB(), z4.VnB(), 0x3f);
				868
				869	InsrHelper(&masm, z5.VnD(), z21_inputs);
				870	InsrHelper(&masm, z6.VnS(), z22_inputs);
				871	InsrHelper(&masm, z7.VnH(), z23_inputs);
				872	InsrHelper(&masm, z8.VnB(), z24_inputs);
				873
				874	__ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
				875	__ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
				876	__ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
				877	__ Eor(z8.VnB(), z8.VnB(), 0x3f);
				878
				879	InsrHelper(&masm, z9.VnD(), z21_inputs);
				880	InsrHelper(&masm, z10.VnS(), z22_inputs);
				881	InsrHelper(&masm, z11.VnH(), z23_inputs);
				882	InsrHelper(&masm, z12.VnB(), z24_inputs);
				883
				884	__ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
				885	__ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
				886	__ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
				887	__ Orr(z12.VnB(), z12.VnB(), 0x3f);
				888
Jacob Bramley	6069fd4	2019-06-24 10:20:45 +0100	[diff] [blame]	889	{
				890	// The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
				891	// so here we test `dupm` directly.
				892	ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
				893	__ dupm(z13.VnD(), 0x7ffffff800000000);
				894	__ dupm(z14.VnS(), 0x7ffc7ffc);
				895	__ dupm(z15.VnH(), 0x3ffc);
				896	__ dupm(z16.VnB(), 0xc3);
				897	}
TatWai Chong	a1885a5	2019-04-15 17:19:14 -0700	[diff] [blame]	898
				899	END();
				900
				901	if (CAN_RUN()) {
				902	RUN();
				903
				904	// clang-format off
				905	uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
				906	uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
				907	uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
				908	0x0120, 0x0560, 0x09a0, 0x0de0};
				909	uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
				910	0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
				911
				912	ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
				913	ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
				914	ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
				915	ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
				916
				917	uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
				918	uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
				919	uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
				920	0x0ed3, 0x4a97, 0x865b, 0xc21f};
				921	uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
				922	0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
				923
				924	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				925	ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
				926	ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
				927	ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
				928
				929	uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
				930	uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
				931	uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
				932	0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
				933	uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
				934	0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
				935
				936	ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
				937	ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
				938	ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
				939	ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
				940
				941	uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
				942	uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
				943	uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
				944	0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
				945	ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
				946	ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
				947	ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
				948	// clang-format on
				949	}
TatWai Chong	a1885a5	2019-04-15 17:19:14 -0700	[diff] [blame]	950	}
				951
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	952	TEST_SVE(sve_dup_imm) {
Jacob Bramley	6069fd4	2019-06-24 10:20:45 +0100	[diff] [blame]	953	// The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
				954	// unencodable immediates.
				955
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	956	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	6069fd4	2019-06-24 10:20:45 +0100	[diff] [blame]	957	START();
				958
				959	// Encodable with `dup` (shift 0).
				960	__ Dup(z0.VnD(), -1);
				961	__ Dup(z1.VnS(), 0x7f);
				962	__ Dup(z2.VnH(), -0x80);
				963	__ Dup(z3.VnB(), 42);
				964
				965	// Encodable with `dup` (shift 8).
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	966	__ Dup(z4.VnD(), -42 * 256);
				967	__ Dup(z5.VnS(), -0x8000);
				968	__ Dup(z6.VnH(), 0x7f00);
Jacob Bramley	6069fd4	2019-06-24 10:20:45 +0100	[diff] [blame]	969	// B-sized lanes cannot take a shift of 8.
				970
				971	// Encodable with `dupm` (but not `dup`).
				972	__ Dup(z10.VnD(), 0x3fc);
				973	__ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
				974	__ Dup(z12.VnH(), 0x0001);
				975	// All values that fit B-sized lanes are encodable with `dup`.
				976
				977	// Cases that require immediate synthesis.
				978	__ Dup(z20.VnD(), 0x1234);
				979	__ Dup(z21.VnD(), -4242);
				980	__ Dup(z22.VnD(), 0xfedcba9876543210);
				981	__ Dup(z23.VnS(), 0x01020304);
				982	__ Dup(z24.VnS(), -0x01020304);
				983	__ Dup(z25.VnH(), 0x3c38);
				984	// All values that fit B-sized lanes are directly encodable.
				985
				986	END();
				987
				988	if (CAN_RUN()) {
				989	RUN();
				990
				991	ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
				992	ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
				993	ASSERT_EQUAL_SVE(0xff80, z2.VnH());
				994	ASSERT_EQUAL_SVE(0x2a, z3.VnB());
				995
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	996	ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
				997	ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
				998	ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley	6069fd4	2019-06-24 10:20:45 +0100	[diff] [blame]	999
				1000	ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
				1001	ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
				1002	ASSERT_EQUAL_SVE(0x0001, z12.VnH());
				1003
				1004	ASSERT_EQUAL_SVE(0x1234, z20.VnD());
				1005	ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
				1006	ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
				1007	ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
				1008	ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
				1009	ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
				1010	}
				1011	}
				1012
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1013	TEST_SVE(sve_inc_dec_p_scalar) {
				1014	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1015	START();
				1016
				1017	int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
				1018	Initialise(&masm, p0.VnB(), p0_inputs);
				1019
				1020	int p0_b_count = 9;
				1021	int p0_h_count = 5;
				1022	int p0_s_count = 3;
				1023	int p0_d_count = 2;
				1024
				1025	// 64-bit operations preserve their high bits.
				1026	__ Mov(x0, 0x123456780000002a);
				1027	__ Decp(x0, p0.VnB());
				1028
				1029	__ Mov(x1, 0x123456780000002a);
				1030	__ Incp(x1, p0.VnH());
				1031
				1032	// Check that saturation does not occur.
				1033	__ Mov(x10, 1);
				1034	__ Decp(x10, p0.VnS());
				1035
				1036	__ Mov(x11, UINT64_MAX);
				1037	__ Incp(x11, p0.VnD());
				1038
				1039	__ Mov(x12, INT64_MAX);
				1040	__ Incp(x12, p0.VnB());
				1041
				1042	// With an all-true predicate, these instructions increment or decrement by
				1043	// the vector length.
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	1044	__ Ptrue(p15.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1045
				1046	__ Mov(x20, 0x4000000000000000);
				1047	__ Decp(x20, p15.VnB());
				1048
				1049	__ Mov(x21, 0x4000000000000000);
				1050	__ Incp(x21, p15.VnH());
				1051
				1052	END();
				1053	if (CAN_RUN()) {
				1054	RUN();
				1055
				1056	ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
				1057	ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
				1058
				1059	ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
				1060	ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
				1061	ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
				1062
				1063	ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
				1064	ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
				1065	}
				1066	}
				1067
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1068	TEST_SVE(sve_sqinc_sqdec_p_scalar) {
				1069	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1070	START();
				1071
				1072	int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
				1073	Initialise(&masm, p0.VnB(), p0_inputs);
				1074
				1075	int p0_b_count = 9;
				1076	int p0_h_count = 5;
				1077	int p0_s_count = 3;
				1078	int p0_d_count = 2;
				1079
				1080	uint64_t dummy_high = 0x1234567800000000;
				1081
				1082	// 64-bit operations preserve their high bits.
				1083	__ Mov(x0, dummy_high + 42);
				1084	__ Sqdecp(x0, p0.VnB());
				1085
				1086	__ Mov(x1, dummy_high + 42);
				1087	__ Sqincp(x1, p0.VnH());
				1088
				1089	// 32-bit operations sign-extend into their high bits.
				1090	__ Mov(x2, dummy_high + 42);
				1091	__ Sqdecp(x2, p0.VnS(), w2);
				1092
				1093	__ Mov(x3, dummy_high + 42);
				1094	__ Sqincp(x3, p0.VnD(), w3);
				1095
				1096	__ Mov(x4, dummy_high + 1);
				1097	__ Sqdecp(x4, p0.VnS(), w4);
				1098
				1099	__ Mov(x5, dummy_high - 1);
				1100	__ Sqincp(x5, p0.VnD(), w5);
				1101
				1102	// Check that saturation behaves correctly.
				1103	__ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	1104	__ Sqdecp(x10, p0.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1105
				1106	__ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
				1107	__ Sqdecp(x11, p0.VnH(), w11);
				1108
				1109	__ Mov(x12, 1);
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	1110	__ Sqdecp(x12, p0.VnS());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1111
				1112	__ Mov(x13, dummy_high + 1);
				1113	__ Sqdecp(x13, p0.VnD(), w13);
				1114
				1115	__ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	1116	__ Sqincp(x14, p0.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1117
				1118	__ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
				1119	__ Sqincp(x15, p0.VnH(), w15);
				1120
				1121	// Don't use x16 and x17 since they are scratch registers by default.
				1122
				1123	__ Mov(x18, 0xffffffffffffffff);
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	1124	__ Sqincp(x18, p0.VnS());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1125
				1126	__ Mov(x19, dummy_high + 0xffffffff);
				1127	__ Sqincp(x19, p0.VnD(), w19);
				1128
				1129	__ Mov(x20, dummy_high + 0xffffffff);
				1130	__ Sqdecp(x20, p0.VnB(), w20);
				1131
				1132	// With an all-true predicate, these instructions increment or decrement by
				1133	// the vector length.
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	1134	__ Ptrue(p15.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1135
				1136	__ Mov(x21, 0);
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	1137	__ Sqdecp(x21, p15.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1138
				1139	__ Mov(x22, 0);
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	1140	__ Sqincp(x22, p15.VnH());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1141
				1142	__ Mov(x23, dummy_high);
				1143	__ Sqdecp(x23, p15.VnS(), w23);
				1144
				1145	__ Mov(x24, dummy_high);
				1146	__ Sqincp(x24, p15.VnD(), w24);
				1147
				1148	END();
				1149	if (CAN_RUN()) {
				1150	RUN();
				1151
				1152	// 64-bit operations preserve their high bits.
				1153	ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
				1154	ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
				1155
				1156	// 32-bit operations sign-extend into their high bits.
				1157	ASSERT_EQUAL_64(42 - p0_s_count, x2);
				1158	ASSERT_EQUAL_64(42 + p0_d_count, x3);
				1159	ASSERT_EQUAL_64(0xffffffff00000000 \| (1 - p0_s_count), x4);
				1160	ASSERT_EQUAL_64(p0_d_count - 1, x5);
				1161
				1162	// Check that saturation behaves correctly.
				1163	ASSERT_EQUAL_64(INT64_MIN, x10);
				1164	ASSERT_EQUAL_64(INT32_MIN, x11);
				1165	ASSERT_EQUAL_64(1 - p0_s_count, x12);
				1166	ASSERT_EQUAL_64(1 - p0_d_count, x13);
				1167	ASSERT_EQUAL_64(INT64_MAX, x14);
				1168	ASSERT_EQUAL_64(INT32_MAX, x15);
				1169	ASSERT_EQUAL_64(p0_s_count - 1, x18);
				1170	ASSERT_EQUAL_64(p0_d_count - 1, x19);
				1171	ASSERT_EQUAL_64(-1 - p0_b_count, x20);
				1172
				1173	// Check all-true predicates.
				1174	ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
				1175	ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
				1176	ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
				1177	ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
				1178	}
				1179	}
				1180
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1181	TEST_SVE(sve_uqinc_uqdec_p_scalar) {
				1182	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1183	START();
				1184
				1185	int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
				1186	Initialise(&masm, p0.VnB(), p0_inputs);
				1187
				1188	int p0_b_count = 9;
				1189	int p0_h_count = 5;
				1190	int p0_s_count = 3;
				1191	int p0_d_count = 2;
				1192
				1193	uint64_t dummy_high = 0x1234567800000000;
				1194
				1195	// 64-bit operations preserve their high bits.
				1196	__ Mov(x0, dummy_high + 42);
				1197	__ Uqdecp(x0, p0.VnB());
				1198
				1199	__ Mov(x1, dummy_high + 42);
				1200	__ Uqincp(x1, p0.VnH());
				1201
				1202	// 32-bit operations zero-extend into their high bits.
				1203	__ Mov(x2, dummy_high + 42);
				1204	__ Uqdecp(x2, p0.VnS(), w2);
				1205
				1206	__ Mov(x3, dummy_high + 42);
				1207	__ Uqincp(x3, p0.VnD(), w3);
				1208
				1209	__ Mov(x4, dummy_high + 0x80000001);
				1210	__ Uqdecp(x4, p0.VnS(), w4);
				1211
				1212	__ Mov(x5, dummy_high + 0x7fffffff);
				1213	__ Uqincp(x5, p0.VnD(), w5);
				1214
				1215	// Check that saturation behaves correctly.
				1216	__ Mov(x10, 1);
				1217	__ Uqdecp(x10, p0.VnB(), x10);
				1218
				1219	__ Mov(x11, dummy_high + 1);
				1220	__ Uqdecp(x11, p0.VnH(), w11);
				1221
				1222	__ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
				1223	__ Uqdecp(x12, p0.VnS(), x12);
				1224
				1225	__ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
				1226	__ Uqdecp(x13, p0.VnD(), w13);
				1227
				1228	__ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
				1229	__ Uqincp(x14, p0.VnB(), x14);
				1230
				1231	__ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
				1232	__ Uqincp(x15, p0.VnH(), w15);
				1233
				1234	// Don't use x16 and x17 since they are scratch registers by default.
				1235
				1236	__ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
				1237	__ Uqincp(x18, p0.VnS(), x18);
				1238
				1239	__ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
				1240	__ Uqincp(x19, p0.VnD(), w19);
				1241
				1242	// With an all-true predicate, these instructions increment or decrement by
				1243	// the vector length.
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	1244	__ Ptrue(p15.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1245
				1246	__ Mov(x20, 0x4000000000000000);
				1247	__ Uqdecp(x20, p15.VnB(), x20);
				1248
				1249	__ Mov(x21, 0x4000000000000000);
				1250	__ Uqincp(x21, p15.VnH(), x21);
				1251
				1252	__ Mov(x22, dummy_high + 0x40000000);
				1253	__ Uqdecp(x22, p15.VnS(), w22);
				1254
				1255	__ Mov(x23, dummy_high + 0x40000000);
				1256	__ Uqincp(x23, p15.VnD(), w23);
				1257
				1258	END();
				1259	if (CAN_RUN()) {
				1260	RUN();
				1261
				1262	// 64-bit operations preserve their high bits.
				1263	ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
				1264	ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
				1265
				1266	// 32-bit operations zero-extend into their high bits.
				1267	ASSERT_EQUAL_64(42 - p0_s_count, x2);
				1268	ASSERT_EQUAL_64(42 + p0_d_count, x3);
				1269	ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
				1270	ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
				1271
				1272	// Check that saturation behaves correctly.
				1273	ASSERT_EQUAL_64(0, x10);
				1274	ASSERT_EQUAL_64(0, x11);
				1275	ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
				1276	ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
				1277	ASSERT_EQUAL_64(UINT64_MAX, x14);
				1278	ASSERT_EQUAL_64(UINT32_MAX, x15);
				1279	ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
				1280	ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
				1281
				1282	// Check all-true predicates.
				1283	ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
				1284	ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
				1285	ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
				1286	ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
				1287	}
				1288	}
				1289
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1290	TEST_SVE(sve_inc_dec_p_vector) {
				1291	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1292	START();
				1293
				1294	// There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
				1295	int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
				1296	Initialise(&masm, p0.VnB(), p0_inputs);
				1297
				1298	// Check that saturation does not occur.
				1299
				1300	int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
				1301	InsrHelper(&masm, z0.VnD(), z0_inputs);
				1302
				1303	int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
				1304	InsrHelper(&masm, z1.VnD(), z1_inputs);
				1305
				1306	int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
				1307	InsrHelper(&masm, z2.VnS(), z2_inputs);
				1308
				1309	int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
				1310	InsrHelper(&masm, z3.VnH(), z3_inputs);
				1311
				1312	// The MacroAssembler implements non-destructive operations using movprfx.
				1313	__ Decp(z10.VnD(), p0, z0.VnD());
				1314	__ Decp(z11.VnD(), p0, z1.VnD());
				1315	__ Decp(z12.VnS(), p0, z2.VnS());
				1316	__ Decp(z13.VnH(), p0, z3.VnH());
				1317
				1318	__ Incp(z14.VnD(), p0, z0.VnD());
				1319	__ Incp(z15.VnD(), p0, z1.VnD());
				1320	__ Incp(z16.VnS(), p0, z2.VnS());
				1321	__ Incp(z17.VnH(), p0, z3.VnH());
				1322
				1323	// Also test destructive forms.
				1324	__ Mov(z4, z0);
				1325	__ Mov(z5, z1);
				1326	__ Mov(z6, z2);
				1327	__ Mov(z7, z3);
				1328
				1329	__ Decp(z0.VnD(), p0);
				1330	__ Decp(z1.VnD(), p0);
				1331	__ Decp(z2.VnS(), p0);
				1332	__ Decp(z3.VnH(), p0);
				1333
				1334	__ Incp(z4.VnD(), p0);
				1335	__ Incp(z5.VnD(), p0);
				1336	__ Incp(z6.VnS(), p0);
				1337	__ Incp(z7.VnH(), p0);
				1338
				1339	END();
				1340	if (CAN_RUN()) {
				1341	RUN();
				1342
				1343	// z0_inputs[...] - number of active D lanes (2)
				1344	int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
				1345	ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
				1346
				1347	// z1_inputs[...] - number of active D lanes (2)
				1348	int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
				1349	ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
				1350
				1351	// z2_inputs[...] - number of active S lanes (3)
				1352	int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
				1353	ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
				1354
				1355	// z3_inputs[...] - number of active H lanes (5)
				1356	int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
				1357	ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
				1358
				1359	// z0_inputs[...] + number of active D lanes (2)
				1360	uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
				1361	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				1362
				1363	// z1_inputs[...] + number of active D lanes (2)
				1364	uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
				1365	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				1366
				1367	// z2_inputs[...] + number of active S lanes (3)
				1368	uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
				1369	ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
				1370
				1371	// z3_inputs[...] + number of active H lanes (5)
				1372	uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
				1373	ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
				1374
				1375	// Check that the non-destructive macros produced the same results.
				1376	ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
				1377	ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
				1378	ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
				1379	ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
				1380	ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
				1381	ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
				1382	ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
				1383	ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
				1384	}
				1385	}
				1386
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1387	TEST_SVE(sve_inc_dec_ptrue_vector) {
				1388	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1389	START();
				1390
				1391	// With an all-true predicate, these instructions increment or decrement by
				1392	// the vector length.
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	1393	__ Ptrue(p15.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1394
				1395	__ Dup(z0.VnD(), 0);
				1396	__ Decp(z0.VnD(), p15);
				1397
				1398	__ Dup(z1.VnS(), 0);
				1399	__ Decp(z1.VnS(), p15);
				1400
				1401	__ Dup(z2.VnH(), 0);
				1402	__ Decp(z2.VnH(), p15);
				1403
				1404	__ Dup(z3.VnD(), 0);
				1405	__ Incp(z3.VnD(), p15);
				1406
				1407	__ Dup(z4.VnS(), 0);
				1408	__ Incp(z4.VnS(), p15);
				1409
				1410	__ Dup(z5.VnH(), 0);
				1411	__ Incp(z5.VnH(), p15);
				1412
				1413	END();
				1414	if (CAN_RUN()) {
				1415	RUN();
				1416
				1417	int d_lane_count = core.GetSVELaneCount(kDRegSize);
				1418	int s_lane_count = core.GetSVELaneCount(kSRegSize);
				1419	int h_lane_count = core.GetSVELaneCount(kHRegSize);
				1420
				1421	for (int i = 0; i < d_lane_count; i++) {
				1422	ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
				1423	ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
				1424	}
				1425
				1426	for (int i = 0; i < s_lane_count; i++) {
				1427	ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
				1428	ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
				1429	}
				1430
				1431	for (int i = 0; i < h_lane_count; i++) {
				1432	ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
				1433	ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
				1434	}
				1435	}
				1436	}
				1437
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1438	TEST_SVE(sve_sqinc_sqdec_p_vector) {
				1439	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1440	START();
				1441
				1442	// There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
				1443	int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
				1444	Initialise(&masm, p0.VnB(), p0_inputs);
				1445
				1446	// Check that saturation behaves correctly.
				1447
				1448	int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
				1449	InsrHelper(&masm, z0.VnD(), z0_inputs);
				1450
				1451	int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
				1452	InsrHelper(&masm, z1.VnD(), z1_inputs);
				1453
				1454	int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
				1455	InsrHelper(&masm, z2.VnS(), z2_inputs);
				1456
				1457	int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
				1458	InsrHelper(&masm, z3.VnH(), z3_inputs);
				1459
				1460	// The MacroAssembler implements non-destructive operations using movprfx.
				1461	__ Sqdecp(z10.VnD(), p0, z0.VnD());
				1462	__ Sqdecp(z11.VnD(), p0, z1.VnD());
				1463	__ Sqdecp(z12.VnS(), p0, z2.VnS());
				1464	__ Sqdecp(z13.VnH(), p0, z3.VnH());
				1465
				1466	__ Sqincp(z14.VnD(), p0, z0.VnD());
				1467	__ Sqincp(z15.VnD(), p0, z1.VnD());
				1468	__ Sqincp(z16.VnS(), p0, z2.VnS());
				1469	__ Sqincp(z17.VnH(), p0, z3.VnH());
				1470
				1471	// Also test destructive forms.
				1472	__ Mov(z4, z0);
				1473	__ Mov(z5, z1);
				1474	__ Mov(z6, z2);
				1475	__ Mov(z7, z3);
				1476
				1477	__ Sqdecp(z0.VnD(), p0);
				1478	__ Sqdecp(z1.VnD(), p0);
				1479	__ Sqdecp(z2.VnS(), p0);
				1480	__ Sqdecp(z3.VnH(), p0);
				1481
				1482	__ Sqincp(z4.VnD(), p0);
				1483	__ Sqincp(z5.VnD(), p0);
				1484	__ Sqincp(z6.VnS(), p0);
				1485	__ Sqincp(z7.VnH(), p0);
				1486
				1487	END();
				1488	if (CAN_RUN()) {
				1489	RUN();
				1490
				1491	// z0_inputs[...] - number of active D lanes (2)
				1492	int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
				1493	ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
				1494
				1495	// z1_inputs[...] - number of active D lanes (2)
				1496	int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
				1497	ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
				1498
				1499	// z2_inputs[...] - number of active S lanes (3)
				1500	int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
				1501	ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
				1502
				1503	// z3_inputs[...] - number of active H lanes (5)
				1504	int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
				1505	ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
				1506
				1507	// z0_inputs[...] + number of active D lanes (2)
				1508	uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
				1509	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				1510
				1511	// z1_inputs[...] + number of active D lanes (2)
				1512	uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
				1513	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				1514
				1515	// z2_inputs[...] + number of active S lanes (3)
				1516	uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
				1517	ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
				1518
				1519	// z3_inputs[...] + number of active H lanes (5)
				1520	uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
				1521	ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
				1522
				1523	// Check that the non-destructive macros produced the same results.
				1524	ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
				1525	ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
				1526	ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
				1527	ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
				1528	ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
				1529	ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
				1530	ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
				1531	ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
				1532	}
				1533	}
				1534
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1535	TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
				1536	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1537	START();
				1538
				1539	// With an all-true predicate, these instructions increment or decrement by
				1540	// the vector length.
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	1541	__ Ptrue(p15.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1542
				1543	__ Dup(z0.VnD(), 0);
				1544	__ Sqdecp(z0.VnD(), p15);
				1545
				1546	__ Dup(z1.VnS(), 0);
				1547	__ Sqdecp(z1.VnS(), p15);
				1548
				1549	__ Dup(z2.VnH(), 0);
				1550	__ Sqdecp(z2.VnH(), p15);
				1551
				1552	__ Dup(z3.VnD(), 0);
				1553	__ Sqincp(z3.VnD(), p15);
				1554
				1555	__ Dup(z4.VnS(), 0);
				1556	__ Sqincp(z4.VnS(), p15);
				1557
				1558	__ Dup(z5.VnH(), 0);
				1559	__ Sqincp(z5.VnH(), p15);
				1560
				1561	END();
				1562	if (CAN_RUN()) {
				1563	RUN();
				1564
				1565	int d_lane_count = core.GetSVELaneCount(kDRegSize);
				1566	int s_lane_count = core.GetSVELaneCount(kSRegSize);
				1567	int h_lane_count = core.GetSVELaneCount(kHRegSize);
				1568
				1569	for (int i = 0; i < d_lane_count; i++) {
				1570	ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
				1571	ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
				1572	}
				1573
				1574	for (int i = 0; i < s_lane_count; i++) {
				1575	ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
				1576	ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
				1577	}
				1578
				1579	for (int i = 0; i < h_lane_count; i++) {
				1580	ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
				1581	ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
				1582	}
				1583	}
				1584	}
				1585
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1586	TEST_SVE(sve_uqinc_uqdec_p_vector) {
				1587	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1588	START();
				1589
				1590	// There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
				1591	int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
				1592	Initialise(&masm, p0.VnB(), p0_inputs);
				1593
				1594	// Check that saturation behaves correctly.
				1595
				1596	uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
				1597	InsrHelper(&masm, z0.VnD(), z0_inputs);
				1598
				1599	uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
				1600	InsrHelper(&masm, z1.VnD(), z1_inputs);
				1601
				1602	uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
				1603	InsrHelper(&masm, z2.VnS(), z2_inputs);
				1604
				1605	uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
				1606	InsrHelper(&masm, z3.VnH(), z3_inputs);
				1607
				1608	// The MacroAssembler implements non-destructive operations using movprfx.
				1609	__ Uqdecp(z10.VnD(), p0, z0.VnD());
				1610	__ Uqdecp(z11.VnD(), p0, z1.VnD());
				1611	__ Uqdecp(z12.VnS(), p0, z2.VnS());
				1612	__ Uqdecp(z13.VnH(), p0, z3.VnH());
				1613
				1614	__ Uqincp(z14.VnD(), p0, z0.VnD());
				1615	__ Uqincp(z15.VnD(), p0, z1.VnD());
				1616	__ Uqincp(z16.VnS(), p0, z2.VnS());
				1617	__ Uqincp(z17.VnH(), p0, z3.VnH());
				1618
				1619	// Also test destructive forms.
				1620	__ Mov(z4, z0);
				1621	__ Mov(z5, z1);
				1622	__ Mov(z6, z2);
				1623	__ Mov(z7, z3);
				1624
				1625	__ Uqdecp(z0.VnD(), p0);
				1626	__ Uqdecp(z1.VnD(), p0);
				1627	__ Uqdecp(z2.VnS(), p0);
				1628	__ Uqdecp(z3.VnH(), p0);
				1629
				1630	__ Uqincp(z4.VnD(), p0);
				1631	__ Uqincp(z5.VnD(), p0);
				1632	__ Uqincp(z6.VnS(), p0);
				1633	__ Uqincp(z7.VnH(), p0);
				1634
				1635	END();
				1636	if (CAN_RUN()) {
				1637	RUN();
				1638
				1639	// z0_inputs[...] - number of active D lanes (2)
				1640	uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
				1641	ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
				1642
				1643	// z1_inputs[...] - number of active D lanes (2)
				1644	uint64_t z1_expected[] = {0x12345678ffffff28,
				1645	0,
				1646	0xfffffffffffffffd,
				1647	0x7ffffffffffffffd};
				1648	ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
				1649
				1650	// z2_inputs[...] - number of active S lanes (3)
				1651	uint32_t z2_expected[] =
				1652	{0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
				1653	ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
				1654
				1655	// z3_inputs[...] - number of active H lanes (5)
				1656	uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
				1657	ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
				1658
				1659	// z0_inputs[...] + number of active D lanes (2)
				1660	uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
				1661	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				1662
				1663	// z1_inputs[...] + number of active D lanes (2)
				1664	uint64_t z5_expected[] = {0x12345678ffffff2c,
				1665	2,
				1666	UINT64_MAX,
				1667	0x8000000000000001};
				1668	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				1669
				1670	// z2_inputs[...] + number of active S lanes (3)
				1671	uint32_t z6_expected[] =
				1672	{0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
				1673	ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
				1674
				1675	// z3_inputs[...] + number of active H lanes (5)
				1676	uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
				1677	ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
				1678
				1679	// Check that the non-destructive macros produced the same results.
				1680	ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
				1681	ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
				1682	ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
				1683	ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
				1684	ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
				1685	ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
				1686	ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
				1687	ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
				1688	}
				1689	}
				1690
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1691	TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
				1692	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1693	START();
				1694
				1695	// With an all-true predicate, these instructions increment or decrement by
				1696	// the vector length.
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	1697	__ Ptrue(p15.VnB());
Jacob Bramley	d1686cb	2019-05-28 17:39:05 +0100	[diff] [blame]	1698
				1699	__ Mov(x0, 0x1234567800000000);
				1700	__ Mov(x1, 0x12340000);
				1701	__ Mov(x2, 0x1200);
				1702
				1703	__ Dup(z0.VnD(), x0);
				1704	__ Uqdecp(z0.VnD(), p15);
				1705
				1706	__ Dup(z1.VnS(), x1);
				1707	__ Uqdecp(z1.VnS(), p15);
				1708
				1709	__ Dup(z2.VnH(), x2);
				1710	__ Uqdecp(z2.VnH(), p15);
				1711
				1712	__ Dup(z3.VnD(), x0);
				1713	__ Uqincp(z3.VnD(), p15);
				1714
				1715	__ Dup(z4.VnS(), x1);
				1716	__ Uqincp(z4.VnS(), p15);
				1717
				1718	__ Dup(z5.VnH(), x2);
				1719	__ Uqincp(z5.VnH(), p15);
				1720
				1721	END();
				1722	if (CAN_RUN()) {
				1723	RUN();
				1724
				1725	int d_lane_count = core.GetSVELaneCount(kDRegSize);
				1726	int s_lane_count = core.GetSVELaneCount(kSRegSize);
				1727	int h_lane_count = core.GetSVELaneCount(kHRegSize);
				1728
				1729	for (int i = 0; i < d_lane_count; i++) {
				1730	ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
				1731	ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
				1732	}
				1733
				1734	for (int i = 0; i < s_lane_count; i++) {
				1735	ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
				1736	ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
				1737	}
				1738
				1739	for (int i = 0; i < h_lane_count; i++) {
				1740	ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
				1741	ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
				1742	}
				1743	}
				1744	}
				1745
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	1746	TEST_SVE(sve_index) {
				1747	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	cd8148c	2019-07-11 18:43:20 +0100	[diff] [blame]	1748	START();
				1749
				1750	// Simple cases.
				1751	__ Index(z0.VnB(), 0, 1);
				1752	__ Index(z1.VnH(), 1, 1);
				1753	__ Index(z2.VnS(), 2, 1);
				1754	__ Index(z3.VnD(), 3, 1);
				1755
				1756	// Synthesised immediates.
				1757	__ Index(z4.VnB(), 42, -1);
				1758	__ Index(z5.VnH(), -1, 42);
				1759	__ Index(z6.VnS(), 42, 42);
				1760
				1761	// Register arguments.
				1762	__ Mov(x0, 42);
				1763	__ Mov(x1, -3);
				1764	__ Index(z10.VnD(), x0, x1);
				1765	__ Index(z11.VnB(), w0, w1);
				1766	// The register size should correspond to the lane size, but VIXL allows any
				1767	// register at least as big as the lane size.
				1768	__ Index(z12.VnB(), x0, x1);
				1769	__ Index(z13.VnH(), w0, x1);
				1770	__ Index(z14.VnS(), x0, w1);
				1771
				1772	// Integer overflow.
				1773	__ Index(z20.VnB(), UINT8_MAX - 2, 2);
				1774	__ Index(z21.VnH(), 7, -3);
				1775	__ Index(z22.VnS(), INT32_MAX - 2, 1);
				1776	__ Index(z23.VnD(), INT64_MIN + 6, -7);
				1777
				1778	END();
				1779
				1780	if (CAN_RUN()) {
				1781	RUN();
				1782
				1783	int b_lane_count = core.GetSVELaneCount(kBRegSize);
				1784	int h_lane_count = core.GetSVELaneCount(kHRegSize);
				1785	int s_lane_count = core.GetSVELaneCount(kSRegSize);
				1786	int d_lane_count = core.GetSVELaneCount(kDRegSize);
				1787
				1788	uint64_t b_mask = GetUintMask(kBRegSize);
				1789	uint64_t h_mask = GetUintMask(kHRegSize);
				1790	uint64_t s_mask = GetUintMask(kSRegSize);
				1791	uint64_t d_mask = GetUintMask(kDRegSize);
				1792
				1793	// Simple cases.
				1794	for (int i = 0; i < b_lane_count; i++) {
				1795	ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
				1796	}
				1797	for (int i = 0; i < h_lane_count; i++) {
				1798	ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
				1799	}
				1800	for (int i = 0; i < s_lane_count; i++) {
				1801	ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
				1802	}
				1803	for (int i = 0; i < d_lane_count; i++) {
				1804	ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
				1805	}
				1806
				1807	// Synthesised immediates.
				1808	for (int i = 0; i < b_lane_count; i++) {
				1809	ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
				1810	}
				1811	for (int i = 0; i < h_lane_count; i++) {
				1812	ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
				1813	}
				1814	for (int i = 0; i < s_lane_count; i++) {
				1815	ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
				1816	}
				1817
				1818	// Register arguments.
				1819	for (int i = 0; i < d_lane_count; i++) {
				1820	ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
				1821	}
				1822	for (int i = 0; i < b_lane_count; i++) {
				1823	ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
				1824	}
				1825	for (int i = 0; i < b_lane_count; i++) {
				1826	ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
				1827	}
				1828	for (int i = 0; i < h_lane_count; i++) {
				1829	ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
				1830	}
				1831	for (int i = 0; i < s_lane_count; i++) {
				1832	ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
				1833	}
				1834
				1835	// Integer overflow.
				1836	uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
				1837	ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
				1838	uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
				1839	ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
				1840	uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
				1841	ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
				1842	uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
				1843	ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
				1844	}
				1845	}
				1846
TatWai Chong	c844bb2	2019-06-10 15:32:53 -0700	[diff] [blame]	1847	TEST(sve_int_compare_count_and_limit_scalars) {
				1848	SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				1849	START();
				1850
				1851	__ Mov(w20, 0xfffffffd);
				1852	__ Mov(w21, 0xffffffff);
				1853
				1854	__ Whilele(p0.VnB(), w20, w21);
				1855	__ Mrs(x0, NZCV);
				1856	__ Whilele(p1.VnH(), w20, w21);
				1857	__ Mrs(x1, NZCV);
				1858
				1859	__ Mov(w20, 0xffffffff);
				1860	__ Mov(w21, 0x00000000);
				1861
				1862	__ Whilelt(p2.VnS(), w20, w21);
				1863	__ Mrs(x2, NZCV);
				1864	__ Whilelt(p3.VnD(), w20, w21);
				1865	__ Mrs(x3, NZCV);
				1866
				1867	__ Mov(w20, 0xfffffffd);
				1868	__ Mov(w21, 0xffffffff);
				1869
				1870	__ Whilels(p4.VnB(), w20, w21);
				1871	__ Mrs(x4, NZCV);
				1872	__ Whilels(p5.VnH(), w20, w21);
				1873	__ Mrs(x5, NZCV);
				1874
				1875	__ Mov(w20, 0xffffffff);
				1876	__ Mov(w21, 0x00000000);
				1877
				1878	__ Whilelo(p6.VnS(), w20, w21);
				1879	__ Mrs(x6, NZCV);
				1880	__ Whilelo(p7.VnD(), w20, w21);
				1881	__ Mrs(x7, NZCV);
				1882
				1883	__ Mov(x20, 0xfffffffffffffffd);
				1884	__ Mov(x21, 0xffffffffffffffff);
				1885
				1886	__ Whilele(p8.VnB(), x20, x21);
				1887	__ Mrs(x8, NZCV);
				1888	__ Whilele(p9.VnH(), x20, x21);
				1889	__ Mrs(x9, NZCV);
				1890
				1891	__ Mov(x20, 0xffffffffffffffff);
				1892	__ Mov(x21, 0x0000000000000000);
				1893
				1894	__ Whilelt(p10.VnS(), x20, x21);
				1895	__ Mrs(x10, NZCV);
				1896	__ Whilelt(p11.VnD(), x20, x21);
				1897	__ Mrs(x11, NZCV);
				1898
				1899	__ Mov(x20, 0xfffffffffffffffd);
				1900	__ Mov(x21, 0xffffffffffffffff);
				1901
				1902	__ Whilels(p12.VnB(), x20, x21);
				1903	__ Mrs(x12, NZCV);
				1904	__ Whilels(p13.VnH(), x20, x21);
				1905	__ Mrs(x13, NZCV);
				1906
				1907	__ Mov(x20, 0xffffffffffffffff);
				1908	__ Mov(x21, 0x0000000000000000);
				1909
				1910	__ Whilelo(p14.VnS(), x20, x21);
				1911	__ Mrs(x14, NZCV);
				1912	__ Whilelo(p15.VnD(), x20, x21);
				1913	__ Mrs(x15, NZCV);
				1914
				1915	END();
				1916
				1917	if (CAN_RUN()) {
				1918	RUN();
				1919
				1920	// 0b...00000000'00000111
				1921	int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
				1922	ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
				1923
				1924	// 0b...00000000'00010101
				1925	int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
				1926	ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
				1927
				1928	int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
				1929	ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
				1930
				1931	int p3_expected[] = {0x00, 0x01};
				1932	ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
				1933
				1934	// 0b...11111111'11111111
				1935	int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
				1936	ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
				1937
				1938	// 0b...01010101'01010101
				1939	int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
				1940	ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
				1941
				1942	int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
				1943	ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
				1944
				1945	int p7_expected[] = {0x00, 0x00};
				1946	ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
				1947
				1948	// 0b...00000000'00000111
				1949	int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
				1950	ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
				1951
				1952	// 0b...00000000'00010101
				1953	int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
				1954	ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
				1955
				1956	int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
				1957	ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
				1958
				1959	int p11_expected[] = {0x00, 0x01};
				1960	ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
				1961
				1962	// 0b...11111111'11111111
				1963	int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
				1964	ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
				1965
				1966	// 0b...01010101'01010101
				1967	int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
				1968	ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
				1969
				1970	int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
				1971	ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
				1972
				1973	int p15_expected[] = {0x00, 0x00};
				1974	ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
				1975
				1976	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w0);
				1977	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w1);
				1978	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w2);
				1979	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w3);
				1980	ASSERT_EQUAL_32(SVEFirstFlag, w4);
				1981	ASSERT_EQUAL_32(SVEFirstFlag, w5);
				1982	ASSERT_EQUAL_32(SVENoneFlag \| SVENotLastFlag, w6);
				1983	ASSERT_EQUAL_32(SVENoneFlag \| SVENotLastFlag, w7);
				1984	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w8);
				1985	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w9);
				1986	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w10);
				1987	ASSERT_EQUAL_32(SVEFirstFlag \| SVENotLastFlag, w11);
				1988	ASSERT_EQUAL_32(SVEFirstFlag, w12);
				1989	ASSERT_EQUAL_32(SVEFirstFlag, w13);
				1990	ASSERT_EQUAL_32(SVENoneFlag \| SVENotLastFlag, w14);
				1991	ASSERT_EQUAL_32(SVENoneFlag \| SVENotLastFlag, w15);
				1992	}
				1993	}
				1994
TatWai Chong	302729c	2019-06-14 16:18:51 -0700	[diff] [blame]	1995	TEST(sve_int_compare_vectors_signed_imm) {
				1996	SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				1997	START();
				1998
				1999	int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
				2000	int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
				2001	InsrHelper(&masm, z13.VnB(), z13_inputs);
				2002	Initialise(&masm, p0.VnB(), mask_inputs1);
				2003
				2004	__ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
				2005	__ Mrs(x2, NZCV);
				2006	__ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
				2007
				2008	int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
				2009	int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
				2010	InsrHelper(&masm, z14.VnH(), z14_inputs);
				2011	Initialise(&masm, p0.VnH(), mask_inputs2);
				2012
				2013	__ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
				2014	__ Mrs(x4, NZCV);
				2015	__ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
				2016
				2017	int z15_inputs[] = {0, 1, -1, INT_MIN};
				2018	int mask_inputs3[] = {0, 1, 1, 1};
				2019	InsrHelper(&masm, z15.VnS(), z15_inputs);
				2020	Initialise(&masm, p0.VnS(), mask_inputs3);
				2021
				2022	__ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
				2023	__ Mrs(x6, NZCV);
				2024	__ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
				2025
				2026	__ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
				2027	__ Mrs(x8, NZCV);
				2028	__ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
				2029
				2030	int64_t z16_inputs[] = {0, -1};
				2031	int mask_inputs4[] = {1, 1};
				2032	InsrHelper(&masm, z16.VnD(), z16_inputs);
				2033	Initialise(&masm, p0.VnD(), mask_inputs4);
				2034
				2035	__ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
				2036	__ Mrs(x10, NZCV);
				2037	__ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
				2038
				2039	__ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
				2040	__ Mrs(x12, NZCV);
				2041	__ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
				2042
				2043	END();
				2044
				2045	if (CAN_RUN()) {
				2046	RUN();
				2047
				2048	int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
				2049	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				2050
				2051	int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
				2052	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				2053
				2054	int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
				2055	ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
				2056
				2057	int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
				2058	ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
				2059
				2060	int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
				2061	ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
				2062
				2063	int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
				2064	ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
				2065
				2066	int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
				2067	ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
				2068
				2069	int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
				2070	ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
				2071
				2072	int p10_expected[] = {0x00, 0x01};
				2073	ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
				2074
				2075	int p11_expected[] = {0x00, 0x00};
				2076	ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
				2077
				2078	int p12_expected[] = {0x01, 0x00};
				2079	ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
				2080
				2081	int p13_expected[] = {0x01, 0x01};
				2082	ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
				2083
				2084	ASSERT_EQUAL_32(SVENotLastFlag \| SVEFirstFlag, w2);
				2085	ASSERT_EQUAL_32(SVEFirstFlag, w4);
				2086	ASSERT_EQUAL_32(NoFlag, w6);
				2087	ASSERT_EQUAL_32(SVENotLastFlag \| SVEFirstFlag, w8);
				2088	ASSERT_EQUAL_32(SVENotLastFlag \| SVEFirstFlag, w10);
				2089	ASSERT_EQUAL_32(NoFlag, w12);
				2090	}
				2091	}
				2092
				2093	TEST(sve_int_compare_vectors_unsigned_imm) {
				2094	SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				2095	START();
				2096
				2097	uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
				2098	int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
				2099	InsrHelper(&masm, z13.VnB(), src1_inputs);
				2100	Initialise(&masm, p0.VnB(), mask_inputs1);
				2101
				2102	__ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
				2103	__ Mrs(x2, NZCV);
				2104	__ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
				2105
				2106	uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
				2107	int mask_inputs2[] = {1, 1, 1, 1, 0};
				2108	InsrHelper(&masm, z13.VnH(), src2_inputs);
				2109	Initialise(&masm, p0.VnH(), mask_inputs2);
				2110
				2111	__ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
				2112	__ Mrs(x4, NZCV);
				2113	__ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
				2114
				2115	uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
				2116	int mask_inputs3[] = {1, 1, 1, 1};
				2117	InsrHelper(&masm, z13.VnS(), src3_inputs);
				2118	Initialise(&masm, p0.VnS(), mask_inputs3);
				2119
				2120	__ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
				2121	__ Mrs(x6, NZCV);
				2122	__ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
				2123
				2124	uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
				2125	int mask_inputs4[] = {1, 1};
				2126	InsrHelper(&masm, z13.VnD(), src4_inputs);
				2127	Initialise(&masm, p0.VnD(), mask_inputs4);
				2128
				2129	__ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
				2130	__ Mrs(x8, NZCV);
				2131	__ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
				2132
				2133	END();
				2134
				2135	if (CAN_RUN()) {
				2136	RUN();
				2137
				2138	int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
				2139	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				2140
				2141	int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
				2142	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				2143
				2144	int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
				2145	ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
				2146
				2147	int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
				2148	ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
				2149
				2150	int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
				2151	ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
				2152
				2153	int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
				2154	ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
				2155
				2156	int p8_expected[] = {0x00, 0x01};
				2157	ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
				2158
				2159	int p9_expected[] = {0x00, 0x01};
				2160	ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
				2161
				2162	ASSERT_EQUAL_32(SVEFirstFlag, w2);
				2163	ASSERT_EQUAL_32(NoFlag, w4);
				2164	ASSERT_EQUAL_32(SVENotLastFlag \| SVEFirstFlag, w6);
				2165	ASSERT_EQUAL_32(SVENotLastFlag \| SVEFirstFlag, w8);
				2166	}
				2167	}
				2168
TatWai Chong	c844bb2	2019-06-10 15:32:53 -0700	[diff] [blame]	2169	TEST(sve_int_compare_conditionally_terminate_scalars) {
				2170	SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				2171	START();
				2172
				2173	__ Mov(x0, 0xfedcba9887654321);
				2174	__ Mov(x1, 0x1000100010001000);
				2175
Jacob Bramley	b40aa69	2019-10-07 19:24:29 +0100	[diff] [blame]	2176	// Initialise Z and C. These are preserved by cterm*, and the V flag is set to
				2177	// !C if the condition does not hold.
				2178	__ Mov(x10, NoFlag);
				2179	__ Msr(NZCV, x10);
				2180
TatWai Chong	c844bb2	2019-06-10 15:32:53 -0700	[diff] [blame]	2181	__ Ctermeq(w0, w0);
				2182	__ Mrs(x2, NZCV);
				2183	__ Ctermeq(x0, x1);
				2184	__ Mrs(x3, NZCV);
				2185	__ Ctermne(x0, x0);
				2186	__ Mrs(x4, NZCV);
				2187	__ Ctermne(w0, w1);
				2188	__ Mrs(x5, NZCV);
				2189
Jacob Bramley	b40aa69	2019-10-07 19:24:29 +0100	[diff] [blame]	2190	// As above, but with all flags initially set.
				2191	__ Mov(x10, NZCVFlag);
				2192	__ Msr(NZCV, x10);
				2193
				2194	__ Ctermeq(w0, w0);
				2195	__ Mrs(x6, NZCV);
				2196	__ Ctermeq(x0, x1);
				2197	__ Mrs(x7, NZCV);
				2198	__ Ctermne(x0, x0);
				2199	__ Mrs(x8, NZCV);
				2200	__ Ctermne(w0, w1);
				2201	__ Mrs(x9, NZCV);
				2202
TatWai Chong	c844bb2	2019-06-10 15:32:53 -0700	[diff] [blame]	2203	END();
				2204
				2205	if (CAN_RUN()) {
				2206	RUN();
				2207
				2208	ASSERT_EQUAL_32(SVEFirstFlag, w2);
				2209	ASSERT_EQUAL_32(VFlag, w3);
				2210	ASSERT_EQUAL_32(VFlag, w4);
				2211	ASSERT_EQUAL_32(SVEFirstFlag, w5);
Jacob Bramley	b40aa69	2019-10-07 19:24:29 +0100	[diff] [blame]	2212
				2213	ASSERT_EQUAL_32(SVEFirstFlag \| ZCFlag, w6);
				2214	ASSERT_EQUAL_32(ZCFlag, w7);
				2215	ASSERT_EQUAL_32(ZCFlag, w8);
				2216	ASSERT_EQUAL_32(SVEFirstFlag \| ZCFlag, w9);
TatWai Chong	c844bb2	2019-06-10 15:32:53 -0700	[diff] [blame]	2217	}
				2218	}
				2219
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2220	// Work out what the architectural `PredTest` pseudocode should produce for the
				2221	// given result and governing predicate.
				2222	template <typename Tg, typename Td, int N>
				2223	static StatusFlags GetPredTestFlags(const Td (&pd)[N],
				2224	const Tg (&pg)[N],
				2225	int vl) {
				2226	int first = -1;
				2227	int last = -1;
				2228	bool any_active = false;
				2229
				2230	// Only consider potentially-active lanes.
				2231	int start = (N > vl) ? (N - vl) : 0;
				2232	for (int i = start; i < N; i++) {
				2233	if ((pg[i] & 1) == 1) {
				2234	// Look for the first and last active lanes.
				2235	// Note that the 'first' lane is the one with the highest index.
				2236	if (last < 0) last = i;
				2237	first = i;
				2238	// Look for any active lanes that are also active in pd.
				2239	if ((pd[i] & 1) == 1) any_active = true;
				2240	}
				2241	}
				2242
				2243	uint32_t flags = 0;
				2244	if ((first >= 0) && ((pd[first] & 1) == 1)) flags \|= SVEFirstFlag;
				2245	if (!any_active) flags \|= SVENoneFlag;
				2246	if ((last < 0) \|\| ((pd[last] & 1) == 0)) flags \|= SVENotLastFlag;
				2247	return static_cast<StatusFlags>(flags);
				2248	}
				2249
				2250	typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
				2251	const PRegister& pg,
				2252	const PRegisterWithLaneSize& pn);
				2253	template <typename Tg, typename Tn, typename Td>
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2254	static void PfirstPnextHelper(Test* config,
				2255	PfirstPnextFn macro,
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2256	unsigned lane_size_in_bits,
				2257	const Tg& pg_inputs,
				2258	const Tn& pn_inputs,
				2259	const Td& pd_expected) {
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2260	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2261	START();
				2262
				2263	PRegister pg = p15;
				2264	PRegister pn = p14;
				2265	Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
				2266	Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
				2267
				2268	// Initialise NZCV to an impossible value, to check that we actually write it.
				2269	__ Mov(x10, NZCVFlag);
				2270
				2271	// If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
				2272	// the Assembler.
				2273	__ Msr(NZCV, x10);
				2274	__ Mov(p0, pn);
				2275	(masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
				2276	pg,
				2277	p0.WithLaneSize(lane_size_in_bits));
				2278	__ Mrs(x0, NZCV);
				2279
				2280	// The MacroAssembler supports non-destructive use.
				2281	__ Msr(NZCV, x10);
				2282	(masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
				2283	pg,
				2284	pn.WithLaneSize(lane_size_in_bits));
				2285	__ Mrs(x1, NZCV);
				2286
				2287	// If pd.Aliases(pg) the macro requires a scratch register.
				2288	{
				2289	UseScratchRegisterScope temps(&masm);
				2290	temps.Include(p13);
				2291	__ Msr(NZCV, x10);
				2292	__ Mov(p2, p15);
				2293	(masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
				2294	p2,
				2295	pn.WithLaneSize(lane_size_in_bits));
				2296	__ Mrs(x2, NZCV);
				2297	}
				2298
				2299	END();
				2300
				2301	if (CAN_RUN()) {
				2302	RUN();
				2303
				2304	// Check that the inputs weren't modified.
				2305	ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
				2306	ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
				2307
				2308	// Check the primary operation.
				2309	ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
				2310	ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
				2311	ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
				2312
				2313	// Check that the flags were properly set.
				2314	StatusFlags nzcv_expected =
				2315	GetPredTestFlags(pd_expected,
				2316	pg_inputs,
				2317	core.GetSVELaneCount(kBRegSize));
				2318	ASSERT_EQUAL_64(nzcv_expected, x0);
				2319	ASSERT_EQUAL_64(nzcv_expected, x1);
				2320	ASSERT_EQUAL_64(nzcv_expected, x2);
				2321	}
				2322	}
				2323
				2324	template <typename Tg, typename Tn, typename Td>
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2325	static void PfirstHelper(Test* config,
				2326	const Tg& pg_inputs,
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2327	const Tn& pn_inputs,
				2328	const Td& pd_expected) {
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2329	PfirstPnextHelper(config,
				2330	&MacroAssembler::Pfirst,
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2331	kBRegSize, // pfirst only accepts B-sized lanes.
				2332	pg_inputs,
				2333	pn_inputs,
				2334	pd_expected);
				2335	}
				2336
				2337	template <typename Tg, typename Tn, typename Td>
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2338	static void PnextHelper(Test* config,
				2339	unsigned lane_size_in_bits,
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2340	const Tg& pg_inputs,
				2341	const Tn& pn_inputs,
				2342	const Td& pd_expected) {
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2343	PfirstPnextHelper(config,
				2344	&MacroAssembler::Pnext,
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2345	lane_size_in_bits,
				2346	pg_inputs,
				2347	pn_inputs,
				2348	pd_expected);
				2349	}
				2350
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2351	TEST_SVE(sve_pfirst) {
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2352	// Provide more lanes than kPRegMinSize (to check propagation if we have a
				2353	// large VL), but few enough to make the test easy to read.
				2354	int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2355	int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
				2356	int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				2357	int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
				2358	int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2359	VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
				2360
				2361	// Pfirst finds the first active lane in pg, and activates the corresponding
				2362	// lane in pn (if it isn't already active).
				2363
				2364	// The first active lane in in1 is here. \|
				2365	// v
				2366	int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
				2367	int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
				2368	int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
				2369	int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2370	PfirstHelper(config, in1, in0, exp10);
				2371	PfirstHelper(config, in1, in2, exp12);
				2372	PfirstHelper(config, in1, in3, exp13);
				2373	PfirstHelper(config, in1, in4, exp14);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2374
				2375	// The first active lane in in2 is here. \|
				2376	// v
				2377	int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
				2378	int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
				2379	int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
				2380	int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2381	PfirstHelper(config, in2, in0, exp20);
				2382	PfirstHelper(config, in2, in1, exp21);
				2383	PfirstHelper(config, in2, in3, exp23);
				2384	PfirstHelper(config, in2, in4, exp24);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2385
				2386	// The first active lane in in3 is here. \|
				2387	// v
				2388	int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
				2389	int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
				2390	int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
				2391	int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2392	PfirstHelper(config, in3, in0, exp30);
				2393	PfirstHelper(config, in3, in1, exp31);
				2394	PfirstHelper(config, in3, in2, exp32);
				2395	PfirstHelper(config, in3, in4, exp34);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2396
				2397	// \| The first active lane in in4 is here.
				2398	// v
				2399	int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2400	int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
				2401	int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				2402	int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2403	PfirstHelper(config, in4, in0, exp40);
				2404	PfirstHelper(config, in4, in1, exp41);
				2405	PfirstHelper(config, in4, in2, exp42);
				2406	PfirstHelper(config, in4, in3, exp43);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2407
				2408	// If pg is all inactive, the input is passed through unchanged.
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2409	PfirstHelper(config, in0, in0, in0);
				2410	PfirstHelper(config, in0, in1, in1);
				2411	PfirstHelper(config, in0, in2, in2);
				2412	PfirstHelper(config, in0, in3, in3);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2413
				2414	// If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2415	PfirstHelper(config, in0, in0, in0);
				2416	PfirstHelper(config, in1, in1, in1);
				2417	PfirstHelper(config, in2, in2, in2);
				2418	PfirstHelper(config, in3, in3, in3);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2419	}
				2420
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2421	TEST_SVE(sve_pfirst_alias) {
				2422	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2423	START();
				2424
				2425	// Check that the Simulator behaves correctly when all arguments are aliased.
				2426	int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
				2427	int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
				2428	int in_s[] = {0, 1, 1, 0};
				2429	int in_d[] = {1, 1};
				2430
				2431	Initialise(&masm, p0.VnB(), in_b);
				2432	Initialise(&masm, p1.VnH(), in_h);
				2433	Initialise(&masm, p2.VnS(), in_s);
				2434	Initialise(&masm, p3.VnD(), in_d);
				2435
				2436	// Initialise NZCV to an impossible value, to check that we actually write it.
				2437	__ Mov(x10, NZCVFlag);
				2438
				2439	__ Msr(NZCV, x10);
				2440	__ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
				2441	__ Mrs(x0, NZCV);
				2442
				2443	__ Msr(NZCV, x10);
				2444	__ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
				2445	__ Mrs(x1, NZCV);
				2446
				2447	__ Msr(NZCV, x10);
				2448	__ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
				2449	__ Mrs(x2, NZCV);
				2450
				2451	__ Msr(NZCV, x10);
				2452	__ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
				2453	__ Mrs(x3, NZCV);
				2454
				2455	END();
				2456
				2457	if (CAN_RUN()) {
				2458	RUN();
				2459
				2460	// The first lane from pg is already active in pdn, so the P register should
				2461	// be unchanged.
				2462	ASSERT_EQUAL_SVE(in_b, p0.VnB());
				2463	ASSERT_EQUAL_SVE(in_h, p1.VnH());
				2464	ASSERT_EQUAL_SVE(in_s, p2.VnS());
				2465	ASSERT_EQUAL_SVE(in_d, p3.VnD());
				2466
				2467	ASSERT_EQUAL_64(SVEFirstFlag, x0);
				2468	ASSERT_EQUAL_64(SVEFirstFlag, x1);
				2469	ASSERT_EQUAL_64(SVEFirstFlag, x2);
				2470	ASSERT_EQUAL_64(SVEFirstFlag, x3);
				2471	}
				2472	}
				2473
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2474	TEST_SVE(sve_pnext_b) {
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2475	// TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
				2476	// (to check propagation if we have a large VL), but few enough to make the
				2477	// test easy to read.
				2478	// For now, we just use kPRegMinSize so that the test works anywhere.
				2479	int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2480	int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
				2481	int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				2482	int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
				2483	int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2484
				2485	// Pnext activates the next element that is true in pg, after the last-active
				2486	// element in pn. If all pn elements are false (as in in0), it starts looking
				2487	// at element 0.
				2488
				2489	// There are no active lanes in in0, so the result is simply the first active
				2490	// lane from pg.
				2491	int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2492	int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
				2493	int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
				2494	int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
				2495	int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2496
				2497	// The last active lane in in1 is here. \|
				2498	// v
				2499	int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2500	int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2501	int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2502	int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2503	int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2504
				2505	// \| The last active lane in in2 is here.
				2506	// v
				2507	int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2508	int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2509	int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2510	int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2511	int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2512
				2513	// \| The last active lane in in3 is here.
				2514	// v
				2515	int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2516	int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2517	int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2518	int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2519	int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2520
				2521	// \| The last active lane in in4 is here.
				2522	// v
				2523	int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2524	int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2525	int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2526	int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2527	int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				2528
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2529	PnextHelper(config, kBRegSize, in0, in0, exp00);
				2530	PnextHelper(config, kBRegSize, in1, in0, exp10);
				2531	PnextHelper(config, kBRegSize, in2, in0, exp20);
				2532	PnextHelper(config, kBRegSize, in3, in0, exp30);
				2533	PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2534
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2535	PnextHelper(config, kBRegSize, in0, in1, exp01);
				2536	PnextHelper(config, kBRegSize, in1, in1, exp11);
				2537	PnextHelper(config, kBRegSize, in2, in1, exp21);
				2538	PnextHelper(config, kBRegSize, in3, in1, exp31);
				2539	PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2540
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2541	PnextHelper(config, kBRegSize, in0, in2, exp02);
				2542	PnextHelper(config, kBRegSize, in1, in2, exp12);
				2543	PnextHelper(config, kBRegSize, in2, in2, exp22);
				2544	PnextHelper(config, kBRegSize, in3, in2, exp32);
				2545	PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2546
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2547	PnextHelper(config, kBRegSize, in0, in3, exp03);
				2548	PnextHelper(config, kBRegSize, in1, in3, exp13);
				2549	PnextHelper(config, kBRegSize, in2, in3, exp23);
				2550	PnextHelper(config, kBRegSize, in3, in3, exp33);
				2551	PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2552
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2553	PnextHelper(config, kBRegSize, in0, in4, exp04);
				2554	PnextHelper(config, kBRegSize, in1, in4, exp14);
				2555	PnextHelper(config, kBRegSize, in2, in4, exp24);
				2556	PnextHelper(config, kBRegSize, in3, in4, exp34);
				2557	PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2558	}
				2559
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2560	TEST_SVE(sve_pnext_h) {
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2561	// TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
				2562	// (to check propagation if we have a large VL), but few enough to make the
				2563	// test easy to read.
				2564	// For now, we just use kPRegMinSize so that the test works anywhere.
				2565	int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2566	int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
				2567	int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
				2568	int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
				2569	int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
				2570
				2571	// Pnext activates the next element that is true in pg, after the last-active
				2572	// element in pn. If all pn elements are false (as in in0), it starts looking
				2573	// at element 0.
				2574	//
				2575	// As for other SVE instructions, elements are only considered to be active if
				2576	// the _first_ bit in each field is one. Other bits are ignored.
				2577
				2578	// There are no active lanes in in0, so the result is simply the first active
				2579	// lane from pg.
				2580	int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2581	int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
				2582	int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
				2583	int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
				2584	int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
				2585
				2586	// \| The last active lane in in1 is here.
				2587	// v
				2588	int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2589	int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2590	int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
				2591	int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2592	int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
				2593
				2594	// \| The last active lane in in2 is here.
				2595	// v
				2596	int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2597	int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2598	int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2599	int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2600	int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
				2601
				2602	// \| The last active lane in in3 is here.
				2603	// v
				2604	int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2605	int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2606	int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
				2607	int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2608	int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
				2609
				2610	// \| The last active lane in in4 is here.
				2611	// v
				2612	int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2613	int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2614	int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2615	int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2616	int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
				2617
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2618	PnextHelper(config, kHRegSize, in0, in0, exp00);
				2619	PnextHelper(config, kHRegSize, in1, in0, exp10);
				2620	PnextHelper(config, kHRegSize, in2, in0, exp20);
				2621	PnextHelper(config, kHRegSize, in3, in0, exp30);
				2622	PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2623
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2624	PnextHelper(config, kHRegSize, in0, in1, exp01);
				2625	PnextHelper(config, kHRegSize, in1, in1, exp11);
				2626	PnextHelper(config, kHRegSize, in2, in1, exp21);
				2627	PnextHelper(config, kHRegSize, in3, in1, exp31);
				2628	PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2629
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2630	PnextHelper(config, kHRegSize, in0, in2, exp02);
				2631	PnextHelper(config, kHRegSize, in1, in2, exp12);
				2632	PnextHelper(config, kHRegSize, in2, in2, exp22);
				2633	PnextHelper(config, kHRegSize, in3, in2, exp32);
				2634	PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2635
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2636	PnextHelper(config, kHRegSize, in0, in3, exp03);
				2637	PnextHelper(config, kHRegSize, in1, in3, exp13);
				2638	PnextHelper(config, kHRegSize, in2, in3, exp23);
				2639	PnextHelper(config, kHRegSize, in3, in3, exp33);
				2640	PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2641
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2642	PnextHelper(config, kHRegSize, in0, in4, exp04);
				2643	PnextHelper(config, kHRegSize, in1, in4, exp14);
				2644	PnextHelper(config, kHRegSize, in2, in4, exp24);
				2645	PnextHelper(config, kHRegSize, in3, in4, exp34);
				2646	PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2647	}
				2648
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2649	TEST_SVE(sve_pnext_s) {
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2650	// TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
				2651	// (to check propagation if we have a large VL), but few enough to make the
				2652	// test easy to read.
				2653	// For now, we just use kPRegMinSize so that the test works anywhere.
				2654	int in0[] = {0xe, 0xc, 0x8, 0x0};
				2655	int in1[] = {0x0, 0x2, 0x0, 0x1};
				2656	int in2[] = {0x0, 0x1, 0xf, 0x0};
				2657	int in3[] = {0xf, 0x0, 0x0, 0x0};
				2658
				2659	// Pnext activates the next element that is true in pg, after the last-active
				2660	// element in pn. If all pn elements are false (as in in0), it starts looking
				2661	// at element 0.
				2662	//
				2663	// As for other SVE instructions, elements are only considered to be active if
				2664	// the _first_ bit in each field is one. Other bits are ignored.
				2665
				2666	// There are no active lanes in in0, so the result is simply the first active
				2667	// lane from pg.
				2668	int exp00[] = {0, 0, 0, 0};
				2669	int exp10[] = {0, 0, 0, 1};
				2670	int exp20[] = {0, 0, 1, 0};
				2671	int exp30[] = {1, 0, 0, 0};
				2672
				2673	// \| The last active lane in in1 is here.
				2674	// v
				2675	int exp01[] = {0, 0, 0, 0};
				2676	int exp11[] = {0, 0, 0, 0};
				2677	int exp21[] = {0, 0, 1, 0};
				2678	int exp31[] = {1, 0, 0, 0};
				2679
				2680	// \| The last active lane in in2 is here.
				2681	// v
				2682	int exp02[] = {0, 0, 0, 0};
				2683	int exp12[] = {0, 0, 0, 0};
				2684	int exp22[] = {0, 0, 0, 0};
				2685	int exp32[] = {1, 0, 0, 0};
				2686
				2687	// \| The last active lane in in3 is here.
				2688	// v
				2689	int exp03[] = {0, 0, 0, 0};
				2690	int exp13[] = {0, 0, 0, 0};
				2691	int exp23[] = {0, 0, 0, 0};
				2692	int exp33[] = {0, 0, 0, 0};
				2693
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2694	PnextHelper(config, kSRegSize, in0, in0, exp00);
				2695	PnextHelper(config, kSRegSize, in1, in0, exp10);
				2696	PnextHelper(config, kSRegSize, in2, in0, exp20);
				2697	PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2698
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2699	PnextHelper(config, kSRegSize, in0, in1, exp01);
				2700	PnextHelper(config, kSRegSize, in1, in1, exp11);
				2701	PnextHelper(config, kSRegSize, in2, in1, exp21);
				2702	PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2703
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2704	PnextHelper(config, kSRegSize, in0, in2, exp02);
				2705	PnextHelper(config, kSRegSize, in1, in2, exp12);
				2706	PnextHelper(config, kSRegSize, in2, in2, exp22);
				2707	PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2708
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2709	PnextHelper(config, kSRegSize, in0, in3, exp03);
				2710	PnextHelper(config, kSRegSize, in1, in3, exp13);
				2711	PnextHelper(config, kSRegSize, in2, in3, exp23);
				2712	PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2713	}
				2714
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2715	TEST_SVE(sve_pnext_d) {
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2716	// TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
				2717	// (to check propagation if we have a large VL), but few enough to make the
				2718	// test easy to read.
				2719	// For now, we just use kPRegMinSize so that the test works anywhere.
				2720	int in0[] = {0xfe, 0xf0};
				2721	int in1[] = {0x00, 0x55};
				2722	int in2[] = {0x33, 0xff};
				2723
				2724	// Pnext activates the next element that is true in pg, after the last-active
				2725	// element in pn. If all pn elements are false (as in in0), it starts looking
				2726	// at element 0.
				2727	//
				2728	// As for other SVE instructions, elements are only considered to be active if
				2729	// the _first_ bit in each field is one. Other bits are ignored.
				2730
				2731	// There are no active lanes in in0, so the result is simply the first active
				2732	// lane from pg.
				2733	int exp00[] = {0, 0};
				2734	int exp10[] = {0, 1};
				2735	int exp20[] = {0, 1};
				2736
				2737	// \| The last active lane in in1 is here.
				2738	// v
				2739	int exp01[] = {0, 0};
				2740	int exp11[] = {0, 0};
				2741	int exp21[] = {1, 0};
				2742
				2743	// \| The last active lane in in2 is here.
				2744	// v
				2745	int exp02[] = {0, 0};
				2746	int exp12[] = {0, 0};
				2747	int exp22[] = {0, 0};
				2748
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2749	PnextHelper(config, kDRegSize, in0, in0, exp00);
				2750	PnextHelper(config, kDRegSize, in1, in0, exp10);
				2751	PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2752
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2753	PnextHelper(config, kDRegSize, in0, in1, exp01);
				2754	PnextHelper(config, kDRegSize, in1, in1, exp11);
				2755	PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2756
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2757	PnextHelper(config, kDRegSize, in0, in2, exp02);
				2758	PnextHelper(config, kDRegSize, in1, in2, exp12);
				2759	PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2760	}
				2761
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2762	TEST_SVE(sve_pnext_alias) {
				2763	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2764	START();
				2765
				2766	// Check that the Simulator behaves correctly when all arguments are aliased.
				2767	int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
				2768	int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
				2769	int in_s[] = {0, 1, 1, 0};
				2770	int in_d[] = {1, 1};
				2771
				2772	Initialise(&masm, p0.VnB(), in_b);
				2773	Initialise(&masm, p1.VnH(), in_h);
				2774	Initialise(&masm, p2.VnS(), in_s);
				2775	Initialise(&masm, p3.VnD(), in_d);
				2776
				2777	// Initialise NZCV to an impossible value, to check that we actually write it.
				2778	__ Mov(x10, NZCVFlag);
				2779
				2780	__ Msr(NZCV, x10);
				2781	__ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
				2782	__ Mrs(x0, NZCV);
				2783
				2784	__ Msr(NZCV, x10);
				2785	__ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
				2786	__ Mrs(x1, NZCV);
				2787
				2788	__ Msr(NZCV, x10);
				2789	__ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
				2790	__ Mrs(x2, NZCV);
				2791
				2792	__ Msr(NZCV, x10);
				2793	__ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
				2794	__ Mrs(x3, NZCV);
				2795
				2796	END();
				2797
				2798	if (CAN_RUN()) {
				2799	RUN();
				2800
				2801	// Since pg.Is(pdn), there can be no active lanes in pg above the last
				2802	// active lane in pdn, so the result should always be zero.
				2803	ASSERT_EQUAL_SVE(0, p0.VnB());
				2804	ASSERT_EQUAL_SVE(0, p1.VnH());
				2805	ASSERT_EQUAL_SVE(0, p2.VnS());
				2806	ASSERT_EQUAL_SVE(0, p3.VnD());
				2807
				2808	ASSERT_EQUAL_64(SVENoneFlag \| SVENotLastFlag, x0);
				2809	ASSERT_EQUAL_64(SVENoneFlag \| SVENotLastFlag, x1);
				2810	ASSERT_EQUAL_64(SVENoneFlag \| SVENotLastFlag, x2);
				2811	ASSERT_EQUAL_64(SVENoneFlag \| SVENotLastFlag, x3);
				2812	}
				2813	}
				2814
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2815	static void PtrueHelper(Test* config,
				2816	unsigned lane_size_in_bits,
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2817	FlagsUpdate s = LeaveFlags) {
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2818	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2819	START();
				2820
				2821	PRegisterWithLaneSize p[kNumberOfPRegisters];
				2822	for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
				2823	p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
				2824	}
				2825
				2826	// Initialise NZCV to an impossible value, to check that we actually write it.
				2827	StatusFlags nzcv_unmodified = NZCVFlag;
				2828	__ Mov(x20, nzcv_unmodified);
				2829
				2830	// We don't have enough registers to conveniently test every pattern, so take
				2831	// samples from each group.
				2832	__ Msr(NZCV, x20);
				2833	__ Ptrue(p[0], SVE_POW2, s);
				2834	__ Mrs(x0, NZCV);
				2835
				2836	__ Msr(NZCV, x20);
				2837	__ Ptrue(p[1], SVE_VL1, s);
				2838	__ Mrs(x1, NZCV);
				2839
				2840	__ Msr(NZCV, x20);
				2841	__ Ptrue(p[2], SVE_VL2, s);
				2842	__ Mrs(x2, NZCV);
				2843
				2844	__ Msr(NZCV, x20);
				2845	__ Ptrue(p[3], SVE_VL5, s);
				2846	__ Mrs(x3, NZCV);
				2847
				2848	__ Msr(NZCV, x20);
				2849	__ Ptrue(p[4], SVE_VL6, s);
				2850	__ Mrs(x4, NZCV);
				2851
				2852	__ Msr(NZCV, x20);
				2853	__ Ptrue(p[5], SVE_VL8, s);
				2854	__ Mrs(x5, NZCV);
				2855
				2856	__ Msr(NZCV, x20);
				2857	__ Ptrue(p[6], SVE_VL16, s);
				2858	__ Mrs(x6, NZCV);
				2859
				2860	__ Msr(NZCV, x20);
				2861	__ Ptrue(p[7], SVE_VL64, s);
				2862	__ Mrs(x7, NZCV);
				2863
				2864	__ Msr(NZCV, x20);
				2865	__ Ptrue(p[8], SVE_VL256, s);
				2866	__ Mrs(x8, NZCV);
				2867
				2868	{
				2869	// We have to use the Assembler to use values not defined by
				2870	// SVEPredicateConstraint, so call `ptrues` directly..
				2871	typedef void (
				2872	MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
				2873	int pattern);
				2874	AssemblePtrueFn assemble =
				2875	(s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
				2876
				2877	ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
				2878	__ msr(NZCV, x20);
				2879	(masm.*assemble)(p[9], 0xe);
				2880	__ mrs(x9, NZCV);
				2881
				2882	__ msr(NZCV, x20);
				2883	(masm.*assemble)(p[10], 0x16);
				2884	__ mrs(x10, NZCV);
				2885
				2886	__ msr(NZCV, x20);
				2887	(masm.*assemble)(p[11], 0x1a);
				2888	__ mrs(x11, NZCV);
				2889
				2890	__ msr(NZCV, x20);
				2891	(masm.*assemble)(p[12], 0x1c);
				2892	__ mrs(x12, NZCV);
				2893	}
				2894
				2895	__ Msr(NZCV, x20);
				2896	__ Ptrue(p[13], SVE_MUL4, s);
				2897	__ Mrs(x13, NZCV);
				2898
				2899	__ Msr(NZCV, x20);
				2900	__ Ptrue(p[14], SVE_MUL3, s);
				2901	__ Mrs(x14, NZCV);
				2902
				2903	__ Msr(NZCV, x20);
				2904	__ Ptrue(p[15], SVE_ALL, s);
				2905	__ Mrs(x15, NZCV);
				2906
				2907	END();
				2908
				2909	if (CAN_RUN()) {
				2910	RUN();
				2911
				2912	int all = core.GetSVELaneCount(lane_size_in_bits);
				2913	int pow2 = 1 << HighestSetBitPosition(all);
				2914	int mul4 = all - (all % 4);
				2915	int mul3 = all - (all % 3);
				2916
				2917	// Check P register results.
				2918	for (int i = 0; i < all; i++) {
				2919	ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
				2920	ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
				2921	ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
				2922	ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
				2923	ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
				2924	ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
				2925	ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
				2926	ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
				2927	ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
				2928	ASSERT_EQUAL_SVE_LANE(false, p[9], i);
				2929	ASSERT_EQUAL_SVE_LANE(false, p[10], i);
				2930	ASSERT_EQUAL_SVE_LANE(false, p[11], i);
				2931	ASSERT_EQUAL_SVE_LANE(false, p[12], i);
				2932	ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
				2933	ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
				2934	ASSERT_EQUAL_SVE_LANE(true, p[15], i);
				2935	}
				2936
				2937	// Check NZCV results.
				2938	if (s == LeaveFlags) {
				2939	// No flags should have been updated.
				2940	for (int i = 0; i <= 15; i++) {
				2941	ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
				2942	}
				2943	} else {
				2944	StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag \| SVENotLastFlag);
				2945	StatusFlags nonzero = SVEFirstFlag;
				2946
				2947	// POW2
				2948	ASSERT_EQUAL_64(nonzero, x0);
				2949	// VL*
				2950	ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
				2951	ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
				2952	ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
				2953	ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
				2954	ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
				2955	ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
				2956	ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
				2957	ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
				2958	// #uimm5
				2959	ASSERT_EQUAL_64(zero, x9);
				2960	ASSERT_EQUAL_64(zero, x10);
				2961	ASSERT_EQUAL_64(zero, x11);
				2962	ASSERT_EQUAL_64(zero, x12);
				2963	// MUL*
				2964	ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
				2965	ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
				2966	// ALL
				2967	ASSERT_EQUAL_64(nonzero, x15);
				2968	}
				2969	}
				2970	}
				2971
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2972	TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
				2973	TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
				2974	TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
				2975	TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2976
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2977	TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
				2978	TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
				2979	TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
				2980	TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2981
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	2982	TEST_SVE(sve_pfalse) {
				2983	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	2984	START();
				2985
				2986	// Initialise non-zero inputs.
				2987	__ Ptrue(p0.VnB());
				2988	__ Ptrue(p1.VnH());
				2989	__ Ptrue(p2.VnS());
				2990	__ Ptrue(p3.VnD());
				2991
				2992	// The instruction only supports B-sized lanes, but the lane size has no
				2993	// logical effect, so the MacroAssembler accepts anything.
				2994	__ Pfalse(p0.VnB());
				2995	__ Pfalse(p1.VnH());
				2996	__ Pfalse(p2.VnS());
				2997	__ Pfalse(p3.VnD());
				2998
				2999	END();
				3000
				3001	if (CAN_RUN()) {
				3002	RUN();
				3003
				3004	ASSERT_EQUAL_SVE(0, p0.VnB());
				3005	ASSERT_EQUAL_SVE(0, p1.VnB());
				3006	ASSERT_EQUAL_SVE(0, p2.VnB());
				3007	ASSERT_EQUAL_SVE(0, p3.VnB());
				3008	}
				3009	}
				3010
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	3011	TEST_SVE(sve_ptest) {
				3012	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	0ce7584	2019-07-17 18:12:50 +0100	[diff] [blame]	3013	START();
				3014
				3015	// Initialise NZCV to a known (impossible) value.
				3016	StatusFlags nzcv_unmodified = NZCVFlag;
				3017	__ Mov(x0, nzcv_unmodified);
				3018	__ Msr(NZCV, x0);
				3019
				3020	// Construct some test inputs.
				3021	int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
				3022	int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
				3023	int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
				3024	__ Pfalse(p0.VnB());
				3025	__ Ptrue(p1.VnB());
				3026	Initialise(&masm, p2.VnB(), in2);
				3027	Initialise(&masm, p3.VnB(), in3);
				3028	Initialise(&masm, p4.VnB(), in4);
				3029
				3030	// All-inactive pg.
				3031	__ Ptest(p0, p0.VnB());
				3032	__ Mrs(x0, NZCV);
				3033	__ Ptest(p0, p1.VnB());
				3034	__ Mrs(x1, NZCV);
				3035	__ Ptest(p0, p2.VnB());
				3036	__ Mrs(x2, NZCV);
				3037	__ Ptest(p0, p3.VnB());
				3038	__ Mrs(x3, NZCV);
				3039	__ Ptest(p0, p4.VnB());
				3040	__ Mrs(x4, NZCV);
				3041
				3042	// All-active pg.
				3043	__ Ptest(p1, p0.VnB());
				3044	__ Mrs(x5, NZCV);
				3045	__ Ptest(p1, p1.VnB());
				3046	__ Mrs(x6, NZCV);
				3047	__ Ptest(p1, p2.VnB());
				3048	__ Mrs(x7, NZCV);
				3049	__ Ptest(p1, p3.VnB());
				3050	__ Mrs(x8, NZCV);
				3051	__ Ptest(p1, p4.VnB());
				3052	__ Mrs(x9, NZCV);
				3053
				3054	// Combinations of other inputs.
				3055	__ Ptest(p2, p2.VnB());
				3056	__ Mrs(x20, NZCV);
				3057	__ Ptest(p2, p3.VnB());
				3058	__ Mrs(x21, NZCV);
				3059	__ Ptest(p2, p4.VnB());
				3060	__ Mrs(x22, NZCV);
				3061	__ Ptest(p3, p2.VnB());
				3062	__ Mrs(x23, NZCV);
				3063	__ Ptest(p3, p3.VnB());
				3064	__ Mrs(x24, NZCV);
				3065	__ Ptest(p3, p4.VnB());
				3066	__ Mrs(x25, NZCV);
				3067	__ Ptest(p4, p2.VnB());
				3068	__ Mrs(x26, NZCV);
				3069	__ Ptest(p4, p3.VnB());
				3070	__ Mrs(x27, NZCV);
				3071	__ Ptest(p4, p4.VnB());
				3072	__ Mrs(x28, NZCV);
				3073
				3074	END();
				3075
				3076	if (CAN_RUN()) {
				3077	RUN();
				3078
				3079	StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag \| SVENotLastFlag);
				3080
				3081	// If pg is all inactive, the value of pn is irrelevant.
				3082	ASSERT_EQUAL_64(zero, x0);
				3083	ASSERT_EQUAL_64(zero, x1);
				3084	ASSERT_EQUAL_64(zero, x2);
				3085	ASSERT_EQUAL_64(zero, x3);
				3086	ASSERT_EQUAL_64(zero, x4);
				3087
				3088	// All-active pg.
				3089	ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
				3090	ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
				3091	// Other pn inputs are non-zero, but the first and last lanes are inactive.
				3092	ASSERT_EQUAL_64(SVENotLastFlag, x7);
				3093	ASSERT_EQUAL_64(SVENotLastFlag, x8);
				3094	ASSERT_EQUAL_64(SVENotLastFlag, x9);
				3095
				3096	// Other inputs.
				3097	ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
				3098	ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
				3099	ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
				3100	ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag \| SVENotLastFlag),
				3101	x23); // pg: in3, pn: in2
				3102	ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
				3103	ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
				3104	ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
				3105	ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
				3106	ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
				3107	}
				3108	}
				3109
Jacob Bramley	e828920	2019-07-31 11:25:23 +0100	[diff] [blame]	3110	TEST_SVE(sve_cntp) {
				3111	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	d961a0c	2019-07-17 10:53:45 +0100	[diff] [blame]	3112	START();
				3113
				3114	// There are {7, 5, 2, 1} active {B, H, S, D} lanes.
				3115	int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
				3116	Initialise(&masm, p0.VnB(), p0_inputs);
				3117
				3118	// With an all-true predicate, these instructions measure the vector length.
				3119	__ Ptrue(p10.VnB());
				3120	__ Ptrue(p11.VnH());
				3121	__ Ptrue(p12.VnS());
				3122	__ Ptrue(p13.VnD());
				3123
				3124	// `ptrue p10.b` provides an all-active pg.
				3125	__ Cntp(x10, p10, p10.VnB());
				3126	__ Cntp(x11, p10, p11.VnH());
				3127	__ Cntp(x12, p10, p12.VnS());
				3128	__ Cntp(x13, p10, p13.VnD());
				3129
				3130	// Check that the predicate mask is applied properly.
				3131	__ Cntp(x14, p10, p10.VnB());
				3132	__ Cntp(x15, p11, p10.VnB());
				3133	__ Cntp(x16, p12, p10.VnB());
				3134	__ Cntp(x17, p13, p10.VnB());
				3135
				3136	// Check other patterns (including some ignored bits).
				3137	__ Cntp(x0, p10, p0.VnB());
				3138	__ Cntp(x1, p10, p0.VnH());
				3139	__ Cntp(x2, p10, p0.VnS());
				3140	__ Cntp(x3, p10, p0.VnD());
				3141	__ Cntp(x4, p0, p10.VnB());
				3142	__ Cntp(x5, p0, p10.VnH());
				3143	__ Cntp(x6, p0, p10.VnS());
				3144	__ Cntp(x7, p0, p10.VnD());
				3145
				3146	END();
				3147
				3148	if (CAN_RUN()) {
				3149	RUN();
				3150
				3151	int vl_b = core.GetSVELaneCount(kBRegSize);
				3152	int vl_h = core.GetSVELaneCount(kHRegSize);
				3153	int vl_s = core.GetSVELaneCount(kSRegSize);
				3154	int vl_d = core.GetSVELaneCount(kDRegSize);
				3155
				3156	// Check all-active predicates in various combinations.
				3157	ASSERT_EQUAL_64(vl_b, x10);
				3158	ASSERT_EQUAL_64(vl_h, x11);
				3159	ASSERT_EQUAL_64(vl_s, x12);
				3160	ASSERT_EQUAL_64(vl_d, x13);
				3161
				3162	ASSERT_EQUAL_64(vl_b, x14);
				3163	ASSERT_EQUAL_64(vl_h, x15);
				3164	ASSERT_EQUAL_64(vl_s, x16);
				3165	ASSERT_EQUAL_64(vl_d, x17);
				3166
				3167	// Check that irrelevant bits are properly ignored.
				3168	ASSERT_EQUAL_64(7, x0);
				3169	ASSERT_EQUAL_64(5, x1);
				3170	ASSERT_EQUAL_64(2, x2);
				3171	ASSERT_EQUAL_64(1, x3);
				3172
				3173	ASSERT_EQUAL_64(7, x4);
				3174	ASSERT_EQUAL_64(5, x5);
				3175	ASSERT_EQUAL_64(2, x6);
				3176	ASSERT_EQUAL_64(1, x7);
				3177	}
				3178	}
				3179
Martyn Capewell	74f84f6	2019-10-30 15:30:44 +0000	[diff] [blame]	3180	typedef void (MacroAssembler::*CntFn)(const Register& dst,
				3181	int pattern,
				3182	int multiplier);
				3183
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	3184	template <typename T>
				3185	void GenerateCntSequence(MacroAssembler* masm,
				3186	CntFn cnt,
				3187	T acc_value,
				3188	int multiplier) {
				3189	// Initialise accumulators.
				3190	masm->Mov(x0, acc_value);
				3191	masm->Mov(x1, acc_value);
				3192	masm->Mov(x2, acc_value);
				3193	masm->Mov(x3, acc_value);
				3194	masm->Mov(x4, acc_value);
				3195	masm->Mov(x5, acc_value);
				3196	masm->Mov(x6, acc_value);
				3197	masm->Mov(x7, acc_value);
				3198	masm->Mov(x8, acc_value);
				3199	masm->Mov(x9, acc_value);
				3200	masm->Mov(x10, acc_value);
				3201	masm->Mov(x11, acc_value);
				3202	masm->Mov(x12, acc_value);
				3203	masm->Mov(x13, acc_value);
				3204	masm->Mov(x14, acc_value);
				3205	masm->Mov(x15, acc_value);
				3206	masm->Mov(x18, acc_value);
				3207	masm->Mov(x19, acc_value);
				3208	masm->Mov(x20, acc_value);
				3209	masm->Mov(x21, acc_value);
				3210
				3211	(masm->cnt)(Register(0, sizeof(T) kBitsPerByte), SVE_POW2, multiplier);
				3212	(masm->cnt)(Register(1, sizeof(T) kBitsPerByte), SVE_VL1, multiplier);
				3213	(masm->cnt)(Register(2, sizeof(T) kBitsPerByte), SVE_VL2, multiplier);
				3214	(masm->cnt)(Register(3, sizeof(T) kBitsPerByte), SVE_VL3, multiplier);
				3215	(masm->cnt)(Register(4, sizeof(T) kBitsPerByte), SVE_VL4, multiplier);
				3216	(masm->cnt)(Register(5, sizeof(T) kBitsPerByte), SVE_VL5, multiplier);
				3217	(masm->cnt)(Register(6, sizeof(T) kBitsPerByte), SVE_VL6, multiplier);
				3218	(masm->cnt)(Register(7, sizeof(T) kBitsPerByte), SVE_VL7, multiplier);
				3219	(masm->cnt)(Register(8, sizeof(T) kBitsPerByte), SVE_VL8, multiplier);
				3220	(masm->cnt)(Register(9, sizeof(T) kBitsPerByte), SVE_VL16, multiplier);
				3221	(masm->cnt)(Register(10, sizeof(T) kBitsPerByte), SVE_VL32, multiplier);
				3222	(masm->cnt)(Register(11, sizeof(T) kBitsPerByte), SVE_VL64, multiplier);
				3223	(masm->cnt)(Register(12, sizeof(T) kBitsPerByte), SVE_VL128, multiplier);
				3224	(masm->cnt)(Register(13, sizeof(T) kBitsPerByte), SVE_VL256, multiplier);
				3225	(masm->cnt)(Register(14, sizeof(T) kBitsPerByte), 16, multiplier);
				3226	(masm->cnt)(Register(15, sizeof(T) kBitsPerByte), 23, multiplier);
				3227	(masm->cnt)(Register(18, sizeof(T) kBitsPerByte), 28, multiplier);
				3228	(masm->cnt)(Register(19, sizeof(T) kBitsPerByte), SVE_MUL4, multiplier);
				3229	(masm->cnt)(Register(20, sizeof(T) kBitsPerByte), SVE_MUL3, multiplier);
				3230	(masm->cnt)(Register(21, sizeof(T) kBitsPerByte), SVE_ALL, multiplier);
				3231	}
				3232
				3233	int FixedVL(int fixed, int length) {
				3234	VIXL_ASSERT(((fixed >= 1) && (fixed <= 8)) \|\| (fixed == 16) \|\|
				3235	(fixed == 32) \|\| (fixed == 64) \|\| (fixed == 128) \|\|
				3236	(fixed = 256));
				3237	return (length >= fixed) ? fixed : 0;
				3238	}
				3239
Martyn Capewell	74f84f6	2019-10-30 15:30:44 +0000	[diff] [blame]	3240	static void CntHelper(Test* config,
				3241	CntFn cnt,
				3242	int multiplier,
Martyn Capewell	579c92d	2019-10-30 17:48:52 +0000	[diff] [blame]	3243	int lane_size_in_bits,
				3244	int64_t acc_value = 0,
				3245	bool is_increment = true) {
Martyn Capewell	74f84f6	2019-10-30 15:30:44 +0000	[diff] [blame]	3246	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				3247	START();
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	3248	GenerateCntSequence(&masm, cnt, acc_value, multiplier);
Martyn Capewell	74f84f6	2019-10-30 15:30:44 +0000	[diff] [blame]	3249	END();
				3250
				3251	if (CAN_RUN()) {
				3252	RUN();
				3253
				3254	int all = core.GetSVELaneCount(lane_size_in_bits);
				3255	int pow2 = 1 << HighestSetBitPosition(all);
				3256	int mul4 = all - (all % 4);
				3257	int mul3 = all - (all % 3);
				3258
Martyn Capewell	579c92d	2019-10-30 17:48:52 +0000	[diff] [blame]	3259	multiplier = is_increment ? multiplier : -multiplier;
				3260
				3261	ASSERT_EQUAL_64(acc_value + (multiplier * pow2), x0);
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	3262	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(1, all)), x1);
				3263	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(2, all)), x2);
				3264	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(3, all)), x3);
				3265	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(4, all)), x4);
				3266	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(5, all)), x5);
				3267	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(6, all)), x6);
				3268	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(7, all)), x7);
				3269	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(8, all)), x8);
				3270	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(16, all)), x9);
				3271	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(32, all)), x10);
				3272	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(64, all)), x11);
				3273	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(128, all)), x12);
				3274	ASSERT_EQUAL_64(acc_value + (multiplier * FixedVL(256, all)), x13);
Martyn Capewell	579c92d	2019-10-30 17:48:52 +0000	[diff] [blame]	3275	ASSERT_EQUAL_64(acc_value, x14);
				3276	ASSERT_EQUAL_64(acc_value, x15);
				3277	ASSERT_EQUAL_64(acc_value, x18);
				3278	ASSERT_EQUAL_64(acc_value + (multiplier * mul4), x19);
				3279	ASSERT_EQUAL_64(acc_value + (multiplier * mul3), x20);
				3280	ASSERT_EQUAL_64(acc_value + (multiplier * all), x21);
Martyn Capewell	74f84f6	2019-10-30 15:30:44 +0000	[diff] [blame]	3281	}
				3282	}
				3283
Martyn Capewell	579c92d	2019-10-30 17:48:52 +0000	[diff] [blame]	3284	static void IncHelper(Test* config,
				3285	CntFn cnt,
				3286	int multiplier,
				3287	int lane_size_in_bits,
				3288	int64_t acc_value) {
				3289	CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
				3290	}
				3291
				3292	static void DecHelper(Test* config,
				3293	CntFn cnt,
				3294	int multiplier,
				3295	int lane_size_in_bits,
				3296	int64_t acc_value) {
				3297	CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
				3298	}
				3299
Martyn Capewell	74f84f6	2019-10-30 15:30:44 +0000	[diff] [blame]	3300	TEST_SVE(sve_cntb) {
				3301	CntHelper(config, &MacroAssembler::Cntb, 1, kBRegSize);
				3302	CntHelper(config, &MacroAssembler::Cntb, 2, kBRegSize);
				3303	CntHelper(config, &MacroAssembler::Cntb, 15, kBRegSize);
				3304	CntHelper(config, &MacroAssembler::Cntb, 16, kBRegSize);
				3305	}
				3306
				3307	TEST_SVE(sve_cnth) {
				3308	CntHelper(config, &MacroAssembler::Cnth, 1, kHRegSize);
				3309	CntHelper(config, &MacroAssembler::Cnth, 2, kHRegSize);
				3310	CntHelper(config, &MacroAssembler::Cnth, 15, kHRegSize);
				3311	CntHelper(config, &MacroAssembler::Cnth, 16, kHRegSize);
				3312	}
				3313
				3314	TEST_SVE(sve_cntw) {
				3315	CntHelper(config, &MacroAssembler::Cntw, 1, kWRegSize);
				3316	CntHelper(config, &MacroAssembler::Cntw, 2, kWRegSize);
				3317	CntHelper(config, &MacroAssembler::Cntw, 15, kWRegSize);
				3318	CntHelper(config, &MacroAssembler::Cntw, 16, kWRegSize);
				3319	}
				3320
				3321	TEST_SVE(sve_cntd) {
				3322	CntHelper(config, &MacroAssembler::Cntd, 1, kDRegSize);
				3323	CntHelper(config, &MacroAssembler::Cntd, 2, kDRegSize);
				3324	CntHelper(config, &MacroAssembler::Cntd, 15, kDRegSize);
				3325	CntHelper(config, &MacroAssembler::Cntd, 16, kDRegSize);
				3326	}
				3327
Martyn Capewell	579c92d	2019-10-30 17:48:52 +0000	[diff] [blame]	3328	TEST_SVE(sve_decb) {
				3329	DecHelper(config, &MacroAssembler::Decb, 1, kBRegSize, 42);
				3330	DecHelper(config, &MacroAssembler::Decb, 2, kBRegSize, -1);
				3331	DecHelper(config, &MacroAssembler::Decb, 15, kBRegSize, INT64_MIN);
				3332	DecHelper(config, &MacroAssembler::Decb, 16, kBRegSize, -42);
				3333	}
				3334
				3335	TEST_SVE(sve_dech) {
				3336	DecHelper(config, &MacroAssembler::Dech, 1, kHRegSize, 42);
				3337	DecHelper(config, &MacroAssembler::Dech, 2, kHRegSize, -1);
				3338	DecHelper(config, &MacroAssembler::Dech, 15, kHRegSize, INT64_MIN);
				3339	DecHelper(config, &MacroAssembler::Dech, 16, kHRegSize, -42);
				3340	}
				3341
				3342	TEST_SVE(sve_decw) {
				3343	DecHelper(config, &MacroAssembler::Decw, 1, kWRegSize, 42);
				3344	DecHelper(config, &MacroAssembler::Decw, 2, kWRegSize, -1);
				3345	DecHelper(config, &MacroAssembler::Decw, 15, kWRegSize, INT64_MIN);
				3346	DecHelper(config, &MacroAssembler::Decw, 16, kWRegSize, -42);
				3347	}
				3348
				3349	TEST_SVE(sve_decd) {
				3350	DecHelper(config, &MacroAssembler::Decd, 1, kDRegSize, 42);
				3351	DecHelper(config, &MacroAssembler::Decd, 2, kDRegSize, -1);
				3352	DecHelper(config, &MacroAssembler::Decd, 15, kDRegSize, INT64_MIN);
				3353	DecHelper(config, &MacroAssembler::Decd, 16, kDRegSize, -42);
				3354	}
				3355
				3356	TEST_SVE(sve_incb) {
				3357	IncHelper(config, &MacroAssembler::Incb, 1, kBRegSize, 42);
				3358	IncHelper(config, &MacroAssembler::Incb, 2, kBRegSize, -1);
				3359	IncHelper(config, &MacroAssembler::Incb, 15, kBRegSize, INT64_MAX);
				3360	IncHelper(config, &MacroAssembler::Incb, 16, kBRegSize, -42);
				3361	}
				3362
				3363	TEST_SVE(sve_inch) {
				3364	IncHelper(config, &MacroAssembler::Inch, 1, kHRegSize, 42);
				3365	IncHelper(config, &MacroAssembler::Inch, 2, kHRegSize, -1);
				3366	IncHelper(config, &MacroAssembler::Inch, 15, kHRegSize, INT64_MAX);
				3367	IncHelper(config, &MacroAssembler::Inch, 16, kHRegSize, -42);
				3368	}
				3369
				3370	TEST_SVE(sve_incw) {
				3371	IncHelper(config, &MacroAssembler::Incw, 1, kWRegSize, 42);
				3372	IncHelper(config, &MacroAssembler::Incw, 2, kWRegSize, -1);
				3373	IncHelper(config, &MacroAssembler::Incw, 15, kWRegSize, INT64_MAX);
				3374	IncHelper(config, &MacroAssembler::Incw, 16, kWRegSize, -42);
				3375	}
				3376
				3377	TEST_SVE(sve_incd) {
				3378	IncHelper(config, &MacroAssembler::Incd, 1, kDRegSize, 42);
				3379	IncHelper(config, &MacroAssembler::Incd, 2, kDRegSize, -1);
				3380	IncHelper(config, &MacroAssembler::Incd, 15, kDRegSize, INT64_MAX);
				3381	IncHelper(config, &MacroAssembler::Incd, 16, kDRegSize, -42);
				3382	}
				3383
Martyn Capewell	91d5ba3	2019-11-01 18:11:23 +0000	[diff] [blame]	3384	template <typename T>
				3385	static T QAdd(T x, int y) {
				3386	VIXL_ASSERT(y > INT_MIN);
				3387	T result;
				3388	T min = std::numeric_limits<T>::min();
				3389	T max = std::numeric_limits<T>::max();
				3390	if ((x >= 0) && (y >= 0)) {
				3391	// For positive a and b, saturate at max.
				3392	result = (max - x) < static_cast<T>(y) ? max : x + y;
				3393	} else if ((y < 0) && ((x < 0) \|\| (min == 0))) {
				3394	// For negative b, where either a negative or T unsigned.
				3395	result = (x - min) < static_cast<T>(-y) ? min : x + y;
				3396	} else {
				3397	result = x + y;
				3398	}
				3399	return result;
				3400	}
				3401
				3402	template <typename T>
				3403	static void QIncDecHelper(Test* config,
				3404	CntFn cnt,
				3405	int multiplier,
				3406	int lane_size_in_bits,
				3407	T acc_value,
				3408	bool is_increment) {
				3409	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				3410	START();
				3411	GenerateCntSequence(&masm, cnt, acc_value, multiplier);
				3412	END();
				3413
				3414	if (CAN_RUN()) {
				3415	RUN();
				3416
				3417	int all = core.GetSVELaneCount(lane_size_in_bits);
				3418	int pow2 = 1 << HighestSetBitPosition(all);
				3419	int mul4 = all - (all % 4);
				3420	int mul3 = all - (all % 3);
				3421
				3422	multiplier = is_increment ? multiplier : -multiplier;
				3423
				3424	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
				3425	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
				3426	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
				3427	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
				3428	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
				3429	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
				3430	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
				3431	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
				3432	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
				3433	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
				3434	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
				3435	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
				3436	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
				3437	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
				3438	ASSERT_EQUAL_64(acc_value, x14);
				3439	ASSERT_EQUAL_64(acc_value, x15);
				3440	ASSERT_EQUAL_64(acc_value, x18);
				3441	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
				3442	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
				3443	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
				3444	}
				3445	}
				3446
				3447	template <typename T>
				3448	static void QIncHelper(Test* config,
				3449	CntFn cnt,
				3450	int multiplier,
				3451	int lane_size_in_bits,
				3452	T acc_value) {
				3453	QIncDecHelper<T>(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
				3454	}
				3455
				3456	template <typename T>
				3457	static void QDecHelper(Test* config,
				3458	CntFn cnt,
				3459	int multiplier,
				3460	int lane_size_in_bits,
				3461	T acc_value) {
				3462	QIncDecHelper<T>(config,
				3463	cnt,
				3464	multiplier,
				3465	lane_size_in_bits,
				3466	acc_value,
				3467	false);
				3468	}
				3469
				3470	TEST_SVE(sve_sqdecb) {
				3471	int64_t bigneg = INT64_MIN + 42;
				3472	int64_t bigpos = INT64_MAX - 42;
				3473	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
				3474	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 2, kBRegSize, bigneg);
				3475	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
				3476	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecb, 16, kBRegSize, bigpos);
				3477	}
				3478
				3479	TEST_SVE(sve_sqdech) {
				3480	int64_t bigneg = INT64_MIN + 42;
				3481	int64_t bigpos = INT64_MAX - 42;
				3482	QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
				3483	QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 2, kHRegSize, bigneg);
				3484	QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
				3485	QDecHelper<int64_t>(config, &MacroAssembler::Sqdech, 16, kHRegSize, bigpos);
				3486	}
				3487
				3488	TEST_SVE(sve_sqdecw) {
				3489	int64_t bigneg = INT64_MIN + 42;
				3490	int64_t bigpos = INT64_MAX - 42;
				3491	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
				3492	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 2, kWRegSize, bigneg);
				3493	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
				3494	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecw, 16, kWRegSize, bigpos);
				3495	}
				3496
				3497	TEST_SVE(sve_sqdecd) {
				3498	int64_t bigneg = INT64_MIN + 42;
				3499	int64_t bigpos = INT64_MAX - 42;
				3500	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
				3501	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 2, kDRegSize, bigneg);
				3502	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
				3503	QDecHelper<int64_t>(config, &MacroAssembler::Sqdecd, 16, kDRegSize, bigpos);
				3504	}
				3505
				3506	TEST_SVE(sve_sqincb) {
				3507	int64_t bigneg = INT64_MIN + 42;
				3508	int64_t bigpos = INT64_MAX - 42;
				3509	QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
				3510	QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 2, kBRegSize, bigneg);
				3511	QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
				3512	QIncHelper<int64_t>(config, &MacroAssembler::Sqincb, 16, kBRegSize, bigpos);
				3513	}
				3514
				3515	TEST_SVE(sve_sqinch) {
				3516	int64_t bigneg = INT64_MIN + 42;
				3517	int64_t bigpos = INT64_MAX - 42;
				3518	QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
				3519	QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 2, kHRegSize, bigneg);
				3520	QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
				3521	QIncHelper<int64_t>(config, &MacroAssembler::Sqinch, 16, kHRegSize, bigpos);
				3522	}
				3523
				3524	TEST_SVE(sve_sqincw) {
				3525	int64_t bigneg = INT64_MIN + 42;
				3526	int64_t bigpos = INT64_MAX - 42;
				3527	QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
				3528	QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 2, kWRegSize, bigneg);
				3529	QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
				3530	QIncHelper<int64_t>(config, &MacroAssembler::Sqincw, 16, kWRegSize, bigpos);
				3531	}
				3532
				3533	TEST_SVE(sve_sqincd) {
				3534	int64_t bigneg = INT64_MIN + 42;
				3535	int64_t bigpos = INT64_MAX - 42;
				3536	QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
				3537	QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 2, kDRegSize, bigneg);
				3538	QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
				3539	QIncHelper<int64_t>(config, &MacroAssembler::Sqincd, 16, kDRegSize, bigpos);
				3540	}
				3541
				3542	TEST_SVE(sve_uqdecb) {
				3543	int32_t big32 = UINT32_MAX - 42;
				3544	int64_t big64 = UINT64_MAX - 42;
				3545	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
				3546	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
				3547	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
				3548	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big32);
				3549	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 1, kBRegSize, 1);
				3550	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 2, kBRegSize, 42);
				3551	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 15, kBRegSize, 999);
				3552	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecb, 16, kBRegSize, big64);
				3553	}
				3554
				3555	TEST_SVE(sve_uqdech) {
				3556	int32_t big32 = UINT32_MAX - 42;
				3557	int64_t big64 = UINT64_MAX - 42;
				3558	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
				3559	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
				3560	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
				3561	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big32);
				3562	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 1, kHRegSize, 1);
				3563	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 2, kHRegSize, 42);
				3564	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 15, kHRegSize, 999);
				3565	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdech, 16, kHRegSize, big64);
				3566	}
				3567
				3568	TEST_SVE(sve_uqdecw) {
				3569	int32_t big32 = UINT32_MAX - 42;
				3570	int64_t big64 = UINT64_MAX - 42;
				3571	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
				3572	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
				3573	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
				3574	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big32);
				3575	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 1, kWRegSize, 1);
				3576	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 2, kWRegSize, 42);
				3577	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 15, kWRegSize, 999);
				3578	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecw, 16, kWRegSize, big64);
				3579	}
				3580
				3581	TEST_SVE(sve_uqdecd) {
				3582	int32_t big32 = UINT32_MAX - 42;
				3583	int64_t big64 = UINT64_MAX - 42;
				3584	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
				3585	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
				3586	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
				3587	QDecHelper<uint32_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big32);
				3588	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 1, kDRegSize, 1);
				3589	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 2, kDRegSize, 42);
				3590	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 15, kDRegSize, 999);
				3591	QDecHelper<uint64_t>(config, &MacroAssembler::Uqdecd, 16, kDRegSize, big64);
				3592	}
				3593
				3594	TEST_SVE(sve_uqincb) {
				3595	int32_t big32 = UINT32_MAX - 42;
				3596	int64_t big64 = UINT64_MAX - 42;
				3597	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
				3598	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
				3599	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
				3600	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big32);
				3601	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 1, kBRegSize, 1);
				3602	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 2, kBRegSize, 42);
				3603	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 15, kBRegSize, 999);
				3604	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincb, 16, kBRegSize, big64);
				3605	}
				3606
				3607	TEST_SVE(sve_uqinch) {
				3608	int32_t big32 = UINT32_MAX - 42;
				3609	int64_t big64 = UINT64_MAX - 42;
				3610	QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
				3611	QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
				3612	QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
				3613	QIncHelper<uint32_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big32);
				3614	QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 1, kHRegSize, 1);
				3615	QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 2, kHRegSize, 42);
				3616	QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 15, kHRegSize, 999);
				3617	QIncHelper<uint64_t>(config, &MacroAssembler::Uqinch, 16, kHRegSize, big64);
				3618	}
				3619
				3620	TEST_SVE(sve_uqincw) {
				3621	int32_t big32 = UINT32_MAX - 42;
				3622	int64_t big64 = UINT64_MAX - 42;
				3623	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
				3624	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
				3625	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
				3626	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big32);
				3627	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 1, kWRegSize, 1);
				3628	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 2, kWRegSize, 42);
				3629	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 15, kWRegSize, 999);
				3630	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincw, 16, kWRegSize, big64);
				3631	}
				3632
				3633	TEST_SVE(sve_uqincd) {
				3634	int32_t big32 = UINT32_MAX - 42;
				3635	int64_t big64 = UINT64_MAX - 42;
				3636	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
				3637	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
				3638	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
				3639	QIncHelper<uint32_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big32);
				3640	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 1, kDRegSize, 1);
				3641	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 2, kDRegSize, 42);
				3642	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 15, kDRegSize, 999);
				3643	QIncHelper<uint64_t>(config, &MacroAssembler::Uqincd, 16, kDRegSize, big64);
				3644	}
				3645
				3646	typedef void (MacroAssembler::*QIncDecXWFn)(const Register& dst,
				3647	const Register& src,
				3648	int pattern,
				3649	int multiplier);
				3650
				3651	static void QIncDecXWHelper(Test* config,
				3652	QIncDecXWFn cnt,
				3653	int multiplier,
				3654	int lane_size_in_bits,
				3655	int32_t acc_value,
				3656	bool is_increment) {
				3657	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				3658	START();
				3659
				3660	// Initialise accumulators.
				3661	__ Mov(x0, acc_value);
				3662	__ Mov(x1, acc_value);
				3663	__ Mov(x2, acc_value);
				3664	__ Mov(x3, acc_value);
				3665	__ Mov(x4, acc_value);
				3666	__ Mov(x5, acc_value);
				3667	__ Mov(x6, acc_value);
				3668	__ Mov(x7, acc_value);
				3669	__ Mov(x8, acc_value);
				3670	__ Mov(x9, acc_value);
				3671	__ Mov(x10, acc_value);
				3672	__ Mov(x11, acc_value);
				3673	__ Mov(x12, acc_value);
				3674	__ Mov(x13, acc_value);
				3675	__ Mov(x14, acc_value);
				3676	__ Mov(x15, acc_value);
				3677	__ Mov(x18, acc_value);
				3678	__ Mov(x19, acc_value);
				3679	__ Mov(x20, acc_value);
				3680	__ Mov(x21, acc_value);
				3681
				3682	(masm.*cnt)(x0, w0, SVE_POW2, multiplier);
				3683	(masm.*cnt)(x1, w1, SVE_VL1, multiplier);
				3684	(masm.*cnt)(x2, w2, SVE_VL2, multiplier);
				3685	(masm.*cnt)(x3, w3, SVE_VL3, multiplier);
				3686	(masm.*cnt)(x4, w4, SVE_VL4, multiplier);
				3687	(masm.*cnt)(x5, w5, SVE_VL5, multiplier);
				3688	(masm.*cnt)(x6, w6, SVE_VL6, multiplier);
				3689	(masm.*cnt)(x7, w7, SVE_VL7, multiplier);
				3690	(masm.*cnt)(x8, w8, SVE_VL8, multiplier);
				3691	(masm.*cnt)(x9, w9, SVE_VL16, multiplier);
				3692	(masm.*cnt)(x10, w10, SVE_VL32, multiplier);
				3693	(masm.*cnt)(x11, w11, SVE_VL64, multiplier);
				3694	(masm.*cnt)(x12, w12, SVE_VL128, multiplier);
				3695	(masm.*cnt)(x13, w13, SVE_VL256, multiplier);
				3696	(masm.*cnt)(x14, w14, 16, multiplier);
				3697	(masm.*cnt)(x15, w15, 23, multiplier);
				3698	(masm.*cnt)(x18, w18, 28, multiplier);
				3699	(masm.*cnt)(x19, w19, SVE_MUL4, multiplier);
				3700	(masm.*cnt)(x20, w20, SVE_MUL3, multiplier);
				3701	(masm.*cnt)(x21, w21, SVE_ALL, multiplier);
				3702
				3703	END();
				3704
				3705	if (CAN_RUN()) {
				3706	RUN();
				3707
				3708	int all = core.GetSVELaneCount(lane_size_in_bits);
				3709	int pow2 = 1 << HighestSetBitPosition(all);
				3710	int mul4 = all - (all % 4);
				3711	int mul3 = all - (all % 3);
				3712
				3713	multiplier = is_increment ? multiplier : -multiplier;
				3714
				3715	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * pow2), x0);
				3716	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(1, all)), x1);
				3717	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(2, all)), x2);
				3718	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(3, all)), x3);
				3719	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(4, all)), x4);
				3720	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(5, all)), x5);
				3721	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(6, all)), x6);
				3722	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(7, all)), x7);
				3723	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(8, all)), x8);
				3724	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(16, all)), x9);
				3725	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(32, all)), x10);
				3726	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(64, all)), x11);
				3727	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(128, all)), x12);
				3728	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * FixedVL(256, all)), x13);
				3729	ASSERT_EQUAL_64(acc_value, x14);
				3730	ASSERT_EQUAL_64(acc_value, x15);
				3731	ASSERT_EQUAL_64(acc_value, x18);
				3732	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul4), x19);
				3733	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * mul3), x20);
				3734	ASSERT_EQUAL_64(QAdd(acc_value, multiplier * all), x21);
				3735	}
				3736	}
				3737
				3738	static void QIncXWHelper(Test* config,
				3739	QIncDecXWFn cnt,
				3740	int multiplier,
				3741	int lane_size_in_bits,
				3742	int32_t acc_value) {
				3743	QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
				3744	}
				3745
				3746	static void QDecXWHelper(Test* config,
				3747	QIncDecXWFn cnt,
				3748	int multiplier,
				3749	int lane_size_in_bits,
				3750	int32_t acc_value) {
				3751	QIncDecXWHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
				3752	}
				3753
				3754	TEST_SVE(sve_sqdecb_xw) {
				3755	QDecXWHelper(config, &MacroAssembler::Sqdecb, 1, kBRegSize, 1);
				3756	QDecXWHelper(config, &MacroAssembler::Sqdecb, 2, kBRegSize, INT32_MIN + 42);
				3757	QDecXWHelper(config, &MacroAssembler::Sqdecb, 15, kBRegSize, 999);
				3758	QDecXWHelper(config, &MacroAssembler::Sqdecb, 16, kBRegSize, INT32_MAX - 42);
				3759	}
				3760
				3761	TEST_SVE(sve_sqdech_xw) {
				3762	QDecXWHelper(config, &MacroAssembler::Sqdech, 1, kHRegSize, 1);
				3763	QDecXWHelper(config, &MacroAssembler::Sqdech, 2, kHRegSize, INT32_MIN + 42);
				3764	QDecXWHelper(config, &MacroAssembler::Sqdech, 15, kHRegSize, 999);
				3765	QDecXWHelper(config, &MacroAssembler::Sqdech, 16, kHRegSize, INT32_MAX - 42);
				3766	}
				3767
				3768	TEST_SVE(sve_sqdecw_xw) {
				3769	QDecXWHelper(config, &MacroAssembler::Sqdecw, 1, kWRegSize, 1);
				3770	QDecXWHelper(config, &MacroAssembler::Sqdecw, 2, kWRegSize, INT32_MIN + 42);
				3771	QDecXWHelper(config, &MacroAssembler::Sqdecw, 15, kWRegSize, 999);
				3772	QDecXWHelper(config, &MacroAssembler::Sqdecw, 16, kWRegSize, INT32_MAX - 42);
				3773	}
				3774
				3775	TEST_SVE(sve_sqdecd_xw) {
				3776	QDecXWHelper(config, &MacroAssembler::Sqdecd, 1, kDRegSize, 1);
				3777	QDecXWHelper(config, &MacroAssembler::Sqdecd, 2, kDRegSize, INT32_MIN + 42);
				3778	QDecXWHelper(config, &MacroAssembler::Sqdecd, 15, kDRegSize, 999);
				3779	QDecXWHelper(config, &MacroAssembler::Sqdecd, 16, kDRegSize, INT32_MAX - 42);
				3780	}
				3781
				3782	TEST_SVE(sve_sqincb_xw) {
				3783	QIncXWHelper(config, &MacroAssembler::Sqincb, 1, kBRegSize, 1);
				3784	QIncXWHelper(config, &MacroAssembler::Sqincb, 2, kBRegSize, INT32_MIN + 42);
				3785	QIncXWHelper(config, &MacroAssembler::Sqincb, 15, kBRegSize, 999);
				3786	QIncXWHelper(config, &MacroAssembler::Sqincb, 16, kBRegSize, INT32_MAX - 42);
				3787	}
				3788
				3789	TEST_SVE(sve_sqinch_xw) {
				3790	QIncXWHelper(config, &MacroAssembler::Sqinch, 1, kHRegSize, 1);
				3791	QIncXWHelper(config, &MacroAssembler::Sqinch, 2, kHRegSize, INT32_MIN + 42);
				3792	QIncXWHelper(config, &MacroAssembler::Sqinch, 15, kHRegSize, 999);
				3793	QIncXWHelper(config, &MacroAssembler::Sqinch, 16, kHRegSize, INT32_MAX - 42);
				3794	}
				3795
				3796	TEST_SVE(sve_sqincw_xw) {
				3797	QIncXWHelper(config, &MacroAssembler::Sqincw, 1, kWRegSize, 1);
				3798	QIncXWHelper(config, &MacroAssembler::Sqincw, 2, kWRegSize, INT32_MIN + 42);
				3799	QIncXWHelper(config, &MacroAssembler::Sqincw, 15, kWRegSize, 999);
				3800	QIncXWHelper(config, &MacroAssembler::Sqincw, 16, kWRegSize, INT32_MAX - 42);
				3801	}
				3802
				3803	TEST_SVE(sve_sqincd_xw) {
				3804	QIncXWHelper(config, &MacroAssembler::Sqincd, 1, kDRegSize, 1);
				3805	QIncXWHelper(config, &MacroAssembler::Sqincd, 2, kDRegSize, INT32_MIN + 42);
				3806	QIncXWHelper(config, &MacroAssembler::Sqincd, 15, kDRegSize, 999);
				3807	QIncXWHelper(config, &MacroAssembler::Sqincd, 16, kDRegSize, INT32_MAX - 42);
				3808	}
				3809
Martyn Capewell	8188ddf	2019-11-21 17:09:34 +0000	[diff] [blame]	3810	typedef void (MacroAssembler::*IncDecZFn)(const ZRegister& dst,
				3811	int pattern,
				3812	int multiplier);
				3813	typedef void (MacroAssembler::*AddSubFn)(const ZRegister& dst,
				3814	const ZRegister& src1,
				3815	const ZRegister& src2);
				3816
				3817	static void IncDecZHelper(Test* config,
				3818	IncDecZFn fn,
				3819	CntFn cnt,
				3820	AddSubFn addsub,
				3821	int multiplier,
				3822	int lane_size_in_bits) {
				3823	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				3824	START();
				3825
				3826	uint64_t acc_inputs[] = {0x7766554433221100,
				3827	0xffffffffffffffff,
				3828	0x0000000000000000,
				3829	0xffffffff0000ffff,
				3830	0x7fffffffffffffff,
				3831	0x8000000000000000,
				3832	0x7fffffff7fff7fff,
				3833	0x8000000080008000};
				3834
				3835	for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
				3836	for (int j = 0; j < 4; j++) {
				3837	InsrHelper(&masm, ZRegister(i, kDRegSize), acc_inputs);
				3838	}
				3839	}
				3840	for (unsigned i = 0; i < 15; i++) {
				3841	__ Mov(XRegister(i), 0);
				3842	}
				3843
				3844	(masm.*fn)(z16.WithLaneSize(lane_size_in_bits), SVE_POW2, multiplier);
				3845	(masm.*fn)(z17.WithLaneSize(lane_size_in_bits), SVE_VL1, multiplier);
				3846	(masm.*fn)(z18.WithLaneSize(lane_size_in_bits), SVE_VL2, multiplier);
				3847	(masm.*fn)(z19.WithLaneSize(lane_size_in_bits), SVE_VL3, multiplier);
				3848	(masm.*fn)(z20.WithLaneSize(lane_size_in_bits), SVE_VL4, multiplier);
				3849	(masm.*fn)(z21.WithLaneSize(lane_size_in_bits), SVE_VL7, multiplier);
				3850	(masm.*fn)(z22.WithLaneSize(lane_size_in_bits), SVE_VL8, multiplier);
				3851	(masm.*fn)(z23.WithLaneSize(lane_size_in_bits), SVE_VL16, multiplier);
				3852	(masm.*fn)(z24.WithLaneSize(lane_size_in_bits), SVE_VL64, multiplier);
				3853	(masm.*fn)(z25.WithLaneSize(lane_size_in_bits), SVE_VL256, multiplier);
				3854	(masm.*fn)(z26.WithLaneSize(lane_size_in_bits), 16, multiplier);
				3855	(masm.*fn)(z27.WithLaneSize(lane_size_in_bits), 28, multiplier);
				3856	(masm.*fn)(z28.WithLaneSize(lane_size_in_bits), SVE_MUL3, multiplier);
				3857	(masm.*fn)(z29.WithLaneSize(lane_size_in_bits), SVE_MUL4, multiplier);
				3858	(masm.*fn)(z30.WithLaneSize(lane_size_in_bits), SVE_ALL, multiplier);
				3859
				3860	// Perform computation using alternative instructions.
				3861	(masm.*cnt)(x0, SVE_POW2, multiplier);
				3862	(masm.*cnt)(x1, SVE_VL1, multiplier);
				3863	(masm.*cnt)(x2, SVE_VL2, multiplier);
				3864	(masm.*cnt)(x3, SVE_VL3, multiplier);
				3865	(masm.*cnt)(x4, SVE_VL4, multiplier);
				3866	(masm.*cnt)(x5, SVE_VL7, multiplier);
				3867	(masm.*cnt)(x6, SVE_VL8, multiplier);
				3868	(masm.*cnt)(x7, SVE_VL16, multiplier);
				3869	(masm.*cnt)(x8, SVE_VL64, multiplier);
				3870	(masm.*cnt)(x9, SVE_VL256, multiplier);
				3871	(masm.*cnt)(x10, 16, multiplier);
				3872	(masm.*cnt)(x11, 28, multiplier);
				3873	(masm.*cnt)(x12, SVE_MUL3, multiplier);
				3874	(masm.*cnt)(x13, SVE_MUL4, multiplier);
				3875	(masm.*cnt)(x14, SVE_ALL, multiplier);
				3876
				3877	ZRegister zscratch = z15.WithLaneSize(lane_size_in_bits);
				3878	for (unsigned i = 0; i < 15; i++) {
				3879	ZRegister zsrcdst = ZRegister(i, lane_size_in_bits);
				3880	Register x = Register(i, kXRegSize);
				3881	__ Dup(zscratch, x);
				3882	(masm.*addsub)(zsrcdst, zsrcdst, zscratch);
				3883	}
				3884
				3885	END();
				3886
				3887	if (CAN_RUN()) {
				3888	RUN();
				3889
				3890	ASSERT_EQUAL_SVE(z0, z16);
				3891	ASSERT_EQUAL_SVE(z1, z17);
				3892	ASSERT_EQUAL_SVE(z2, z18);
				3893	ASSERT_EQUAL_SVE(z3, z19);
				3894	ASSERT_EQUAL_SVE(z4, z20);
				3895	ASSERT_EQUAL_SVE(z5, z21);
				3896	ASSERT_EQUAL_SVE(z6, z22);
				3897	ASSERT_EQUAL_SVE(z7, z23);
				3898	ASSERT_EQUAL_SVE(z8, z24);
				3899	ASSERT_EQUAL_SVE(z9, z25);
				3900	ASSERT_EQUAL_SVE(z10, z26);
				3901	ASSERT_EQUAL_SVE(z11, z27);
				3902	ASSERT_EQUAL_SVE(z12, z28);
				3903	ASSERT_EQUAL_SVE(z13, z29);
				3904	ASSERT_EQUAL_SVE(z14, z30);
				3905	}
				3906	}
				3907
				3908	TEST_SVE(sve_inc_dec_vec) {
				3909	CntFn cnth = &MacroAssembler::Cnth;
				3910	CntFn cntw = &MacroAssembler::Cntw;
				3911	CntFn cntd = &MacroAssembler::Cntd;
				3912	AddSubFn sub = &MacroAssembler::Sub;
				3913	AddSubFn add = &MacroAssembler::Add;
				3914	for (int mult = 1; mult <= 16; mult += 5) {
				3915	IncDecZHelper(config, &MacroAssembler::Dech, cnth, sub, mult, kHRegSize);
				3916	IncDecZHelper(config, &MacroAssembler::Decw, cntw, sub, mult, kSRegSize);
				3917	IncDecZHelper(config, &MacroAssembler::Decd, cntd, sub, mult, kDRegSize);
				3918	IncDecZHelper(config, &MacroAssembler::Inch, cnth, add, mult, kHRegSize);
				3919	IncDecZHelper(config, &MacroAssembler::Incw, cntw, add, mult, kSRegSize);
				3920	IncDecZHelper(config, &MacroAssembler::Incd, cntd, add, mult, kDRegSize);
				3921	}
				3922	}
				3923
				3924	TEST_SVE(sve_unsigned_sat_inc_dec_vec) {
				3925	CntFn cnth = &MacroAssembler::Cnth;
				3926	CntFn cntw = &MacroAssembler::Cntw;
				3927	CntFn cntd = &MacroAssembler::Cntd;
				3928	AddSubFn sub = &MacroAssembler::Uqsub;
				3929	AddSubFn add = &MacroAssembler::Uqadd;
				3930	for (int mult = 1; mult <= 16; mult += 5) {
				3931	IncDecZHelper(config, &MacroAssembler::Uqdech, cnth, sub, mult, kHRegSize);
				3932	IncDecZHelper(config, &MacroAssembler::Uqdecw, cntw, sub, mult, kSRegSize);
				3933	IncDecZHelper(config, &MacroAssembler::Uqdecd, cntd, sub, mult, kDRegSize);
				3934	IncDecZHelper(config, &MacroAssembler::Uqinch, cnth, add, mult, kHRegSize);
				3935	IncDecZHelper(config, &MacroAssembler::Uqincw, cntw, add, mult, kSRegSize);
				3936	IncDecZHelper(config, &MacroAssembler::Uqincd, cntd, add, mult, kDRegSize);
				3937	}
				3938	}
				3939
				3940	TEST_SVE(sve_signed_sat_inc_dec_vec) {
				3941	CntFn cnth = &MacroAssembler::Cnth;
				3942	CntFn cntw = &MacroAssembler::Cntw;
				3943	CntFn cntd = &MacroAssembler::Cntd;
				3944	AddSubFn sub = &MacroAssembler::Sqsub;
				3945	AddSubFn add = &MacroAssembler::Sqadd;
				3946	for (int mult = 1; mult <= 16; mult += 5) {
				3947	IncDecZHelper(config, &MacroAssembler::Sqdech, cnth, sub, mult, kHRegSize);
				3948	IncDecZHelper(config, &MacroAssembler::Sqdecw, cntw, sub, mult, kSRegSize);
				3949	IncDecZHelper(config, &MacroAssembler::Sqdecd, cntd, sub, mult, kDRegSize);
				3950	IncDecZHelper(config, &MacroAssembler::Sqinch, cnth, add, mult, kHRegSize);
				3951	IncDecZHelper(config, &MacroAssembler::Sqincw, cntw, add, mult, kSRegSize);
				3952	IncDecZHelper(config, &MacroAssembler::Sqincd, cntd, add, mult, kDRegSize);
				3953	}
				3954	}
				3955
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	3956	typedef void (MacroAssembler::*ArithPredicatedFn)(const ZRegister& zd,
				3957	const PRegisterM& pg,
				3958	const ZRegister& zn,
				3959	const ZRegister& zm);
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	3960
				3961	template <typename Td, typename Tg, typename Tn>
				3962	static void IntBinArithHelper(Test* config,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	3963	ArithPredicatedFn macro,
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	3964	unsigned lane_size_in_bits,
				3965	const Tg& pg_inputs,
				3966	const Tn& zn_inputs,
				3967	const Tn& zm_inputs,
				3968	const Td& zd_expected) {
				3969	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				3970	START();
				3971
				3972	ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
				3973	ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
				3974	InsrHelper(&masm, src_a, zn_inputs);
				3975	InsrHelper(&masm, src_b, zm_inputs);
				3976
				3977	Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
				3978
				3979	ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
				3980	ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
				3981	ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
				3982
				3983	// `instr` zd(dst), zd(src_a), zn(src_b)
				3984	__ Mov(zd_1, src_a);
				3985	(masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
				3986
				3987	// `instr` zd(dst), zm(src_a), zd(src_b)
				3988	// Based on whether zd and zm registers are aliased, the macro of instructions
				3989	// (`Instr`) swaps the order of operands if it has the commutative property,
				3990	// otherwise, transfer to the reversed `Instr`, such as subr and divr.
				3991	__ Mov(zd_2, src_b);
				3992	(masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
				3993
				3994	// `instr` zd(dst), zm(src_a), zn(src_b)
				3995	// The macro of instructions (`Instr`) automatically selects between `instr`
				3996	// and movprfx + `instr` based on whether zd and zn registers are aliased.
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	3997	// A generated movprfx instruction is predicated that using the same
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	3998	// governing predicate register. In order to keep the result constant,
				3999	// initialize the destination register first.
				4000	__ Mov(zd_3, src_a);
				4001	(masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
				4002
				4003	END();
				4004
				4005	if (CAN_RUN()) {
				4006	RUN();
				4007	ASSERT_EQUAL_SVE(zd_expected, zd_1);
				4008
				4009	for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
				4010	int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
				4011	if (!core.HasSVELane(zd_1, lane)) break;
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	4012	if ((pg_inputs[i] & 1) != 0) {
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4013	ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
				4014	} else {
				4015	ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
				4016	}
				4017	}
				4018
				4019	ASSERT_EQUAL_SVE(zd_expected, zd_3);
				4020	}
				4021	}
				4022
				4023	TEST_SVE(sve_binary_arithmetic_predicated_add) {
				4024	// clang-format off
				4025	unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
				4026
				4027	unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
				4028
				4029	unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
				4030
				4031	unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
				4032
				4033	unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
				4034	0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
				4035
				4036	unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
				4037	0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
				4038
				4039	uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
				4040	0x1010101010101010, 0x8181818181818181,
				4041	0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
				4042	0x0101010101010101, 0x7f7f7f7fffffffff};
				4043
				4044	uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
				4045	0x1010101010101010, 0x0000000000000000,
				4046	0x8181818181818181, 0x8080808080808080,
				4047	0xffffffffffffffff, 0xffffffffffffffff};
				4048
				4049	int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
				4050	int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
				4051	int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
				4052	int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
				4053
				4054	unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
				4055
				4056	unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
				4057	0x8180, 0x8f8f, 0x0101, 0x7f7e};
				4058
				4059	unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
				4060	0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
				4061
				4062	uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
				4063	0x2020202020202020, 0x8181818181818181,
				4064	0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
				4065	0x0101010101010100, 0x7f7f7f7ffffffffe};
				4066
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4067	ArithPredicatedFn fn = &MacroAssembler::Add;
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4068	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
				4069	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
				4070	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
				4071	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
				4072
				4073	unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
				4074
				4075	unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
				4076	0x7e7e, 0x8e8f, 0x0101, 0x7f80};
				4077
				4078	unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
				4079	0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
				4080
				4081	uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
				4082	0x0000000000000000, 0x8181818181818181,
				4083	0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
				4084	0x0101010101010102, 0x7f7f7f8000000000};
				4085
				4086	fn = &MacroAssembler::Sub;
				4087	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
				4088	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
				4089	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
				4090	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
				4091	// clang-format on
				4092	}
				4093
				4094	TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
				4095	// clang-format off
				4096	unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
				4097
				4098	unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
				4099
				4100	unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
				4101	0xff00, 0xba98, 0x5555, 0x4567};
				4102
				4103	unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
				4104	0xfe00, 0xabab, 0xcdcd, 0x5678};
				4105
				4106	unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
				4107	0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
				4108
				4109	unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
				4110	0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
				4111
				4112	uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
				4113	0x5555555555555555, 0x0000000001234567};
				4114
				4115	uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
				4116	0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
				4117
				4118	int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
				4119	int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
				4120	int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
				4121	int pg_d[] = {1, 0, 1, 1};
				4122
				4123	unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
				4124
				4125	unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
				4126	0xff00, 0xba98, 0x5555, 0x5678};
				4127
				4128	unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
				4129	0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
				4130
				4131	uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
				4132	0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
				4133
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4134	ArithPredicatedFn fn = &MacroAssembler::Umax;
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4135	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
				4136	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
				4137	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
				4138	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
				4139
				4140	unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
				4141
				4142	unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
				4143	0xfe00, 0xabab, 0x5555, 0x4567};
				4144
				4145	unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
				4146	0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
				4147
				4148	uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
				4149	0x5555555555555555, 0x0000000001234567};
				4150	fn = &MacroAssembler::Umin;
				4151	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
				4152	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
				4153	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
				4154	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
				4155
				4156	unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
				4157
				4158	unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
				4159	0x0100, 0x0eed, 0x5555, 0x1111};
				4160
				4161	unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
				4162	0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
				4163
				4164	uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
				4165	0x7878787878787878, 0x0000000011111111};
				4166
				4167	fn = &MacroAssembler::Uabd;
				4168	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
				4169	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
				4170	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
				4171	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
				4172	// clang-format on
				4173	}
				4174
				4175	TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
				4176	// clang-format off
				4177	int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
				4178
				4179	int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
				4180
				4181	int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
				4182	INT16_MIN, INT16_MAX, INT16_MAX, 1};
				4183
				4184	int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
				4185	INT16_MAX, INT16_MAX - 1, -1, 0};
				4186
				4187	int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
				4188	INT32_MIN, INT32_MAX, INT32_MAX, 1};
				4189
				4190	int zm_s[] = {-1, 0, -1, -INT32_MAX,
				4191	INT32_MAX, INT32_MAX - 1, -1, 0};
				4192
				4193	int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
				4194	INT64_MIN, INT64_MAX, INT64_MAX, 1};
				4195
				4196	int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
				4197	INT64_MAX, INT64_MAX - 1, -1, 0};
				4198
				4199	int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
				4200	int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
				4201	int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
				4202	int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
				4203
				4204	int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
				4205
				4206	int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
				4207	INT16_MAX, INT16_MAX, INT16_MAX, 1};
				4208
				4209	int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
				4210	INT32_MAX, INT32_MAX, INT32_MAX, 1};
				4211
				4212	int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
				4213	INT64_MIN, INT64_MAX, INT64_MAX, 1};
				4214
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4215	ArithPredicatedFn fn = &MacroAssembler::Smax;
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4216	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
				4217	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
				4218	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
				4219	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
				4220
				4221	int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
				4222
				4223	int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
				4224	INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
				4225
				4226	int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
				4227	INT32_MIN, INT32_MAX, -1, 0};
				4228
				4229	int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
				4230	INT64_MIN, INT64_MAX - 1, -1, 0};
				4231
				4232	fn = &MacroAssembler::Smin;
				4233	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
				4234	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
				4235	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
				4236	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
				4237
				4238	unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
				4239
				4240	unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
				4241
				4242	unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
				4243	0xffffffff, 0x7fffffff, 0x80000000, 1};
				4244
				4245	uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
				4246	0x8000000000000000, 1, 0x8000000000000000, 1};
				4247
				4248	fn = &MacroAssembler::Sabd;
				4249	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
				4250	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
				4251	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
				4252	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
				4253	// clang-format on
				4254	}
				4255
				4256	TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
				4257	// clang-format off
				4258	unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
				4259
				4260	unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
				4261
				4262	unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
				4263	0x8000, 0xff00, 0x5555, 0xaaaa};
				4264
				4265	unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
				4266	0x5555, 0xaaaa, 0x0001, 0x1234};
				4267
				4268	unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
				4269	0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
				4270
				4271	unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
				4272	0x12345678, 0x22223333, 0x55556666, 0x77778888};
				4273
				4274	uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
				4275	0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
				4276
				4277	uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
				4278	0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
				4279
				4280	int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
				4281	int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
				4282	int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
				4283	int pg_d[] = {1, 1, 0, 1};
				4284
				4285	unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
				4286
				4287	unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
				4288	0x8000, 0xff00, 0x5555, 0x9e88};
				4289
				4290	unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
				4291	0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
				4292
				4293	uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
				4294	0xffffffffffffffff, 0x38e38e38e38e38e4};
				4295
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4296	ArithPredicatedFn fn = &MacroAssembler::Mul;
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4297	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
				4298	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
				4299	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
				4300	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
				4301
				4302	unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
				4303
				4304	unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
				4305	0x2aaa, 0xff00, 0x0000, 0x0c22};
				4306
				4307	unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
				4308	0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
				4309
				4310	uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
				4311	0xffffffffffffffff, 0x71c71c71c71c71c6};
				4312
				4313	fn = &MacroAssembler::Umulh;
				4314	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
				4315	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
				4316	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
				4317	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
				4318	// clang-format on
				4319	}
				4320
				4321	TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
				4322	// clang-format off
				4323	int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
				4324
				4325	int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
				4326
				4327	int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
				4328
				4329	int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
				4330
				4331	int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
				4332
				4333	int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
				4334
				4335	int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
				4336
				4337	int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
				4338
				4339	int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
				4340	int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
				4341	int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
				4342	int pg_d[] = {1, 1, 0, 1};
				4343
				4344	int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
				4345
				4346	int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
				4347
				4348	int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
				4349
				4350	int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
				4351
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4352	ArithPredicatedFn fn = &MacroAssembler::Smulh;
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4353	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
				4354	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
				4355	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
				4356	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
				4357	// clang-format on
				4358	}
				4359
				4360	TEST_SVE(sve_binary_arithmetic_predicated_logical) {
				4361	// clang-format off
				4362	unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
				4363	unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
				4364
				4365	unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
				4366	0x8000, 0xffff, 0x5555, 0xaaaa};
				4367	unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
				4368	0x5555, 0xaaaa, 0x0000, 0x0800};
				4369
				4370	unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
				4371	unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
				4372
				4373	uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
				4374	0x0001200880ff55aa, 0x0022446688aaccee};
				4375	uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
				4376	0x7fcd80ff55aa0008, 0x1133557799bbddff};
				4377
				4378	int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
				4379	int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
				4380	int pg_s[] = {1, 1, 1, 0};
				4381	int pg_d[] = {1, 1, 0, 1};
				4382
				4383	unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
				4384
				4385	unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
				4386	0x0000, 0xffff, 0x0000, 0x0800};
				4387
				4388	unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
				4389
				4390	uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
				4391	0x0001200880ff55aa, 0x0022446688aaccee};
				4392
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4393	ArithPredicatedFn fn = &MacroAssembler::And;
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4394	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
				4395	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
				4396	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
				4397	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
				4398
				4399	unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
				4400
				4401	unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
				4402	0x8000, 0xffff, 0x5555, 0xa2aa};
				4403
				4404	unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
				4405
				4406	uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
				4407	0x0001200880ff55aa, 0x0000000000000000};
				4408
				4409	fn = &MacroAssembler::Bic;
				4410	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
				4411	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
				4412	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
				4413	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
				4414
				4415	unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
				4416
				4417	unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
				4418	0xd555, 0xffff, 0x5555, 0xa2aa};
				4419
				4420	unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
				4421
				4422	uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
				4423	0x0001200880ff55aa, 0x1111111111111111};
				4424
				4425	fn = &MacroAssembler::Eor;
				4426	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
				4427	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
				4428	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
				4429	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
				4430
				4431	unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
				4432
				4433	unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
				4434	0xd555, 0xffff, 0x5555, 0xaaaa};
				4435
				4436	unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
				4437
				4438	uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
				4439	0x0001200880ff55aa, 0x1133557799bbddff};
				4440
				4441	fn = &MacroAssembler::Orr;
				4442	IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
				4443	IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
				4444	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
				4445	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
				4446	// clang-format on
				4447	}
				4448
				4449	TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
				4450	// clang-format off
				4451	int zn_s[] = {0, 1, -1, 2468,
				4452	INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
				4453	-11111111, 87654321, 0, 0};
				4454
				4455	int zm_s[] = {1, -1, 1, 1234,
				4456	-1, INT32_MIN, 1, -1,
				4457	22222222, 80000000, -1, 0};
				4458
				4459	int64_t zn_d[] = {0, 1, -1, 2468,
				4460	INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
				4461	-11111111, 87654321, 0, 0};
				4462
				4463	int64_t zm_d[] = {1, -1, 1, 1234,
				4464	-1, INT64_MIN, 1, -1,
				4465	22222222, 80000000, -1, 0};
				4466
				4467	int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
				4468	int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
				4469
				4470	int exp_s[] = {0, 1, -1, 2,
				4471	INT32_MIN, 0, INT32_MIN, -INT32_MAX,
				4472	0, 1, 0, 0};
				4473
				4474	int64_t exp_d[] = {0, -1, -1, 2,
				4475	INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
				4476	0, 1, 0, 0};
				4477
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4478	ArithPredicatedFn fn = &MacroAssembler::Sdiv;
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4479	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
				4480	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
				4481	// clang-format on
				4482	}
				4483
				4484	TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
				4485	// clang-format off
				4486	unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
				4487	0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
				4488
				4489	unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
				4490	0x00000000, 0x00000001, 0x00008000, 0xf0000000};
				4491
				4492	uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
				4493	0xffffffffffffffff, 0x8000000000000000,
				4494	0xffffffffffffffff, 0x8000000000000000,
				4495	0xffffffffffffffff, 0xf0000000f0000000};
				4496
				4497	uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
				4498	0x8000000000000000, 0x0000000000000002,
				4499	0x8888888888888888, 0x0000000000000001,
				4500	0x0000000080000000, 0x00000000f0000000};
				4501
				4502	int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
				4503	int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
				4504
				4505	unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
				4506	0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
				4507
				4508	uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
				4509	0x0000000000000001, 0x4000000000000000,
				4510	0x0000000000000001, 0x8000000000000000,
				4511	0xffffffffffffffff, 0x0000000100000001};
				4512
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4513	ArithPredicatedFn fn = &MacroAssembler::Udiv;
TatWai Chong	1363476	2019-07-16 16:20:45 -0700	[diff] [blame]	4514	IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
				4515	IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
				4516	// clang-format on
				4517	}
				4518
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4519	typedef void (MacroAssembler::*ArithFn)(const ZRegister& zd,
				4520	const ZRegister& zn,
				4521	const ZRegister& zm);
TatWai Chong	845246b	2019-08-08 00:01:58 -0700	[diff] [blame]	4522
				4523	template <typename T>
				4524	static void IntArithHelper(Test* config,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4525	ArithFn macro,
TatWai Chong	845246b	2019-08-08 00:01:58 -0700	[diff] [blame]	4526	unsigned lane_size_in_bits,
				4527	const T& zn_inputs,
				4528	const T& zm_inputs,
				4529	const T& zd_expected) {
				4530	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				4531	START();
				4532
				4533	ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
				4534	ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
				4535	InsrHelper(&masm, zn, zn_inputs);
				4536	InsrHelper(&masm, zm, zm_inputs);
				4537
				4538	ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
				4539	(masm.*macro)(zd, zn, zm);
				4540
				4541	END();
				4542
				4543	if (CAN_RUN()) {
				4544	RUN();
				4545	ASSERT_EQUAL_SVE(zd_expected, zd);
				4546	}
				4547	}
				4548
				4549	TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
				4550	// clang-format off
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	4551	unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
				4552	unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
				4553	unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
				4554	uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong	845246b	2019-08-08 00:01:58 -0700	[diff] [blame]	4555	0x1000000010001010, 0xf0000000f000f0f0};
				4556
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4557	ArithFn fn = &MacroAssembler::Add;
TatWai Chong	845246b	2019-08-08 00:01:58 -0700	[diff] [blame]	4558
				4559	unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
				4560	unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
				4561	unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
				4562	uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
				4563	0x2000000020002020, 0xe0000001e001e1e0};
				4564
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	4565	IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
				4566	IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
				4567	IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
				4568	IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong	845246b	2019-08-08 00:01:58 -0700	[diff] [blame]	4569
				4570	fn = &MacroAssembler::Sqadd;
				4571
				4572	unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
				4573	unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
				4574	unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
				4575	uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
				4576	0x2000000020002020, 0xe0000001e001e1e0};
				4577
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	4578	IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
				4579	IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
				4580	IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
				4581	IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong	845246b	2019-08-08 00:01:58 -0700	[diff] [blame]	4582
				4583	fn = &MacroAssembler::Uqadd;
				4584
				4585	unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
				4586	unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
				4587	unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
				4588	uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
				4589	0x2000000020002020, 0xffffffffffffffff};
				4590
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	4591	IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
				4592	IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
				4593	IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
				4594	IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong	845246b	2019-08-08 00:01:58 -0700	[diff] [blame]	4595	// clang-format on
				4596	}
				4597
				4598	TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
				4599	// clang-format off
				4600
				4601	unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
				4602	unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
				4603
				4604	unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
				4605	unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
				4606
				4607	unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
				4608	unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
				4609
				4610	uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
				4611	0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
				4612	uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
				4613	0xf0000000f000f0f0, 0x5555555555555555};
				4614
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	4615	ArithFn fn = &MacroAssembler::Sub;
TatWai Chong	845246b	2019-08-08 00:01:58 -0700	[diff] [blame]	4616
				4617	unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
				4618	unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
				4619	unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
				4620	uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
				4621	0x8eeeeeed8eed8d8e, 0x5555555555555555};
				4622
				4623	IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
				4624	IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
				4625	IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
				4626	IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
				4627
				4628	unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
				4629	unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
				4630	unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
				4631	uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
				4632	0x7111111271127272, 0xaaaaaaaaaaaaaaab};
				4633
				4634	IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
				4635	IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
				4636	IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
				4637	IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
				4638
				4639	fn = &MacroAssembler::Sqsub;
				4640
				4641	unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
				4642	unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
				4643	unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
				4644	uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
				4645	0x7fffffffffffffff, 0x8000000000000000};
				4646
				4647	IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
				4648	IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
				4649	IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
				4650	IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
				4651
				4652	unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
				4653	unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
				4654	unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
				4655	uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
				4656	0x8000000000000000, 0x7fffffffffffffff};
				4657
				4658	IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
				4659	IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
				4660	IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
				4661	IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
				4662
				4663	fn = &MacroAssembler::Uqsub;
				4664
				4665	unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
				4666	unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
				4667	unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
				4668	uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
				4669	0x0000000000000000, 0x5555555555555555};
				4670
				4671	IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
				4672	IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
				4673	IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
				4674	IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
				4675
				4676	unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
				4677	unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
				4678	unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
				4679	uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
				4680	0x7111111271127272, 0x0000000000000000};
				4681
				4682	IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
				4683	IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
				4684	IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
				4685	IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
				4686	// clang-format on
				4687	}
				4688
Jacob Bramley	9e5da2a	2019-08-06 18:52:07 +0100	[diff] [blame]	4689	TEST_SVE(sve_rdvl) {
				4690	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				4691	START();
				4692
				4693	// Encodable multipliers.
				4694	__ Rdvl(x0, 0);
				4695	__ Rdvl(x1, 1);
				4696	__ Rdvl(x2, 2);
				4697	__ Rdvl(x3, 31);
				4698	__ Rdvl(x4, -1);
				4699	__ Rdvl(x5, -2);
				4700	__ Rdvl(x6, -32);
				4701
				4702	// For unencodable multipliers, the MacroAssembler uses a sequence of
				4703	// instructions.
				4704	__ Rdvl(x10, 32);
				4705	__ Rdvl(x11, -33);
				4706	__ Rdvl(x12, 42);
				4707	__ Rdvl(x13, -42);
				4708
				4709	// The maximum value of VL is 256 (bytes), so the multiplier is limited to the
				4710	// range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
				4711	// occurs in the macro.
				4712	__ Rdvl(x14, 0x007fffffffffffff);
				4713	__ Rdvl(x15, -0x0080000000000000);
				4714
				4715	END();
				4716
				4717	if (CAN_RUN()) {
				4718	RUN();
				4719
				4720	uint64_t vl = config->sve_vl_in_bytes();
				4721
				4722	ASSERT_EQUAL_64(vl * 0, x0);
				4723	ASSERT_EQUAL_64(vl * 1, x1);
				4724	ASSERT_EQUAL_64(vl * 2, x2);
				4725	ASSERT_EQUAL_64(vl * 31, x3);
				4726	ASSERT_EQUAL_64(vl * -1, x4);
				4727	ASSERT_EQUAL_64(vl * -2, x5);
				4728	ASSERT_EQUAL_64(vl * -32, x6);
				4729
				4730	ASSERT_EQUAL_64(vl * 32, x10);
				4731	ASSERT_EQUAL_64(vl * -33, x11);
				4732	ASSERT_EQUAL_64(vl * 42, x12);
				4733	ASSERT_EQUAL_64(vl * -42, x13);
				4734
				4735	ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
				4736	ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
				4737	}
				4738	}
				4739
				4740	TEST_SVE(sve_rdpl) {
				4741	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				4742	START();
				4743
				4744	// There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
				4745	// Addpl(xd, xzr, ...).
				4746
				4747	// Encodable multipliers (as `addvl`).
				4748	__ Rdpl(x0, 0);
				4749	__ Rdpl(x1, 8);
				4750	__ Rdpl(x2, 248);
				4751	__ Rdpl(x3, -8);
				4752	__ Rdpl(x4, -256);
				4753
				4754	// Encodable multipliers (as `movz` + `addpl`).
				4755	__ Rdpl(x7, 31);
Jacob Bramley	889984c	2019-10-28 17:28:48 +0000	[diff] [blame]	4756	__ Rdpl(x8, -31);
Jacob Bramley	9e5da2a	2019-08-06 18:52:07 +0100	[diff] [blame]	4757
				4758	// For unencodable multipliers, the MacroAssembler uses a sequence of
				4759	// instructions.
				4760	__ Rdpl(x10, 42);
				4761	__ Rdpl(x11, -42);
				4762
				4763	// The maximum value of VL is 256 (bytes), so the multiplier is limited to the
				4764	// range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
				4765	// occurs in the macro.
				4766	__ Rdpl(x12, 0x007fffffffffffff);
				4767	__ Rdpl(x13, -0x0080000000000000);
				4768
				4769	END();
				4770
				4771	if (CAN_RUN()) {
				4772	RUN();
				4773
				4774	uint64_t vl = config->sve_vl_in_bytes();
				4775	VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
				4776	uint64_t pl = vl / kZRegBitsPerPRegBit;
				4777
				4778	ASSERT_EQUAL_64(pl * 0, x0);
				4779	ASSERT_EQUAL_64(pl * 8, x1);
				4780	ASSERT_EQUAL_64(pl * 248, x2);
				4781	ASSERT_EQUAL_64(pl * -8, x3);
				4782	ASSERT_EQUAL_64(pl * -256, x4);
				4783
				4784	ASSERT_EQUAL_64(pl * 31, x7);
Jacob Bramley	889984c	2019-10-28 17:28:48 +0000	[diff] [blame]	4785	ASSERT_EQUAL_64(pl * -31, x8);
Jacob Bramley	9e5da2a	2019-08-06 18:52:07 +0100	[diff] [blame]	4786
				4787	ASSERT_EQUAL_64(pl * 42, x10);
				4788	ASSERT_EQUAL_64(pl * -42, x11);
				4789
				4790	ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
				4791	ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
				4792	}
				4793	}
				4794
				4795	TEST_SVE(sve_addvl) {
				4796	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				4797	START();
				4798
				4799	uint64_t base = 0x1234567800000000;
				4800	__ Mov(x30, base);
				4801
				4802	// Encodable multipliers.
				4803	__ Addvl(x0, x30, 0);
				4804	__ Addvl(x1, x30, 1);
				4805	__ Addvl(x2, x30, 31);
				4806	__ Addvl(x3, x30, -1);
				4807	__ Addvl(x4, x30, -32);
				4808
				4809	// For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
				4810	__ Addvl(x5, x30, 32);
				4811	__ Addvl(x6, x30, -33);
				4812
				4813	// Test the limits of the multiplier supported by the `Rdvl` macro.
				4814	__ Addvl(x7, x30, 0x007fffffffffffff);
				4815	__ Addvl(x8, x30, -0x0080000000000000);
				4816
				4817	// Check that xzr behaves correctly.
				4818	__ Addvl(x9, xzr, 8);
				4819	__ Addvl(x10, xzr, 42);
				4820
				4821	// Check that sp behaves correctly with encodable and unencodable multipliers.
				4822	__ Addvl(sp, sp, -5);
				4823	__ Addvl(sp, sp, -37);
				4824	__ Addvl(x11, sp, -2);
				4825	__ Addvl(sp, x11, 2);
				4826	__ Addvl(x12, sp, -42);
				4827
				4828	// Restore the value of sp.
				4829	__ Addvl(sp, x11, 39);
				4830	__ Addvl(sp, sp, 5);
				4831
				4832	// Adjust x11 and x12 to make the test sp-agnostic.
				4833	__ Sub(x11, sp, x11);
				4834	__ Sub(x12, sp, x12);
				4835
				4836	// Check cases where xd.Is(xn). This stresses scratch register allocation.
				4837	__ Mov(x20, x30);
				4838	__ Mov(x21, x30);
				4839	__ Mov(x22, x30);
				4840	__ Addvl(x20, x20, 4);
				4841	__ Addvl(x21, x21, 42);
				4842	__ Addvl(x22, x22, -0x0080000000000000);
				4843
				4844	END();
				4845
				4846	if (CAN_RUN()) {
				4847	RUN();
				4848
				4849	uint64_t vl = config->sve_vl_in_bytes();
				4850
				4851	ASSERT_EQUAL_64(base + (vl * 0), x0);
				4852	ASSERT_EQUAL_64(base + (vl * 1), x1);
				4853	ASSERT_EQUAL_64(base + (vl * 31), x2);
				4854	ASSERT_EQUAL_64(base + (vl * -1), x3);
				4855	ASSERT_EQUAL_64(base + (vl * -32), x4);
				4856
				4857	ASSERT_EQUAL_64(base + (vl * 32), x5);
				4858	ASSERT_EQUAL_64(base + (vl * -33), x6);
				4859
				4860	ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
				4861	ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
				4862
				4863	ASSERT_EQUAL_64(vl * 8, x9);
				4864	ASSERT_EQUAL_64(vl * 42, x10);
				4865
				4866	ASSERT_EQUAL_64(vl * 44, x11);
				4867	ASSERT_EQUAL_64(vl * 84, x12);
				4868
				4869	ASSERT_EQUAL_64(base + (vl * 4), x20);
				4870	ASSERT_EQUAL_64(base + (vl * 42), x21);
				4871	ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
				4872
				4873	ASSERT_EQUAL_64(base, x30);
				4874	}
				4875	}
				4876
				4877	TEST_SVE(sve_addpl) {
				4878	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				4879	START();
				4880
				4881	uint64_t base = 0x1234567800000000;
				4882	__ Mov(x30, base);
				4883
				4884	// Encodable multipliers.
				4885	__ Addpl(x0, x30, 0);
				4886	__ Addpl(x1, x30, 1);
				4887	__ Addpl(x2, x30, 31);
				4888	__ Addpl(x3, x30, -1);
				4889	__ Addpl(x4, x30, -32);
				4890
				4891	// For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
				4892	// it falls back to `Rdvl` and `Add`.
				4893	__ Addpl(x5, x30, 32);
				4894	__ Addpl(x6, x30, -33);
				4895
				4896	// Test the limits of the multiplier supported by the `Rdvl` macro.
				4897	__ Addpl(x7, x30, 0x007fffffffffffff);
				4898	__ Addpl(x8, x30, -0x0080000000000000);
				4899
				4900	// Check that xzr behaves correctly.
				4901	__ Addpl(x9, xzr, 8);
				4902	__ Addpl(x10, xzr, 42);
				4903
				4904	// Check that sp behaves correctly with encodable and unencodable multipliers.
				4905	__ Addpl(sp, sp, -5);
				4906	__ Addpl(sp, sp, -37);
				4907	__ Addpl(x11, sp, -2);
				4908	__ Addpl(sp, x11, 2);
				4909	__ Addpl(x12, sp, -42);
				4910
				4911	// Restore the value of sp.
				4912	__ Addpl(sp, x11, 39);
				4913	__ Addpl(sp, sp, 5);
				4914
				4915	// Adjust x11 and x12 to make the test sp-agnostic.
				4916	__ Sub(x11, sp, x11);
				4917	__ Sub(x12, sp, x12);
				4918
				4919	// Check cases where xd.Is(xn). This stresses scratch register allocation.
				4920	__ Mov(x20, x30);
				4921	__ Mov(x21, x30);
				4922	__ Mov(x22, x30);
				4923	__ Addpl(x20, x20, 4);
				4924	__ Addpl(x21, x21, 42);
				4925	__ Addpl(x22, x22, -0x0080000000000000);
				4926
				4927	END();
				4928
				4929	if (CAN_RUN()) {
				4930	RUN();
				4931
				4932	uint64_t vl = config->sve_vl_in_bytes();
				4933	VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
				4934	uint64_t pl = vl / kZRegBitsPerPRegBit;
				4935
				4936	ASSERT_EQUAL_64(base + (pl * 0), x0);
				4937	ASSERT_EQUAL_64(base + (pl * 1), x1);
				4938	ASSERT_EQUAL_64(base + (pl * 31), x2);
				4939	ASSERT_EQUAL_64(base + (pl * -1), x3);
				4940	ASSERT_EQUAL_64(base + (pl * -32), x4);
				4941
				4942	ASSERT_EQUAL_64(base + (pl * 32), x5);
				4943	ASSERT_EQUAL_64(base + (pl * -33), x6);
				4944
				4945	ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
				4946	ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
				4947
				4948	ASSERT_EQUAL_64(pl * 8, x9);
				4949	ASSERT_EQUAL_64(pl * 42, x10);
				4950
				4951	ASSERT_EQUAL_64(pl * 44, x11);
				4952	ASSERT_EQUAL_64(pl * 84, x12);
				4953
				4954	ASSERT_EQUAL_64(base + (pl * 4), x20);
				4955	ASSERT_EQUAL_64(base + (pl * 42), x21);
				4956	ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
				4957
				4958	ASSERT_EQUAL_64(base, x30);
				4959	}
				4960	}
				4961
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	4962	TEST_SVE(sve_calculate_sve_address) {
				4963	// Shadow the `MacroAssembler` type so that the test macros work without
				4964	// modification.
				4965	typedef CalculateSVEAddressMacroAssembler MacroAssembler;
				4966
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	4967	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	4968	START(); // NOLINT(clang-diagnostic-local-type-template-args)
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	4969
				4970	uint64_t base = 0x1234567800000000;
				4971	__ Mov(x28, base);
				4972	__ Mov(x29, 48);
				4973	__ Mov(x30, -48);
				4974
				4975	// Simple scalar (or equivalent) cases.
				4976
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	4977	__ CalculateSVEAddress(x0, SVEMemOperand(x28));
				4978	__ CalculateSVEAddress(x1, SVEMemOperand(x28, 0));
				4979	__ CalculateSVEAddress(x2, SVEMemOperand(x28, 0, SVE_MUL_VL));
				4980	__ CalculateSVEAddress(x3, SVEMemOperand(x28, 0, SVE_MUL_VL), 3);
				4981	__ CalculateSVEAddress(x4, SVEMemOperand(x28, xzr));
				4982	__ CalculateSVEAddress(x5, SVEMemOperand(x28, xzr, LSL, 42));
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	4983
				4984	// scalar-plus-immediate
				4985
				4986	// Unscaled immediates, handled with `Add`.
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	4987	__ CalculateSVEAddress(x6, SVEMemOperand(x28, 42));
				4988	__ CalculateSVEAddress(x7, SVEMemOperand(x28, -42));
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	4989	// Scaled immediates, handled with `Addvl` or `Addpl`.
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	4990	__ CalculateSVEAddress(x8, SVEMemOperand(x28, 31, SVE_MUL_VL), 0);
				4991	__ CalculateSVEAddress(x9, SVEMemOperand(x28, -32, SVE_MUL_VL), 0);
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	4992	// Out of `addvl` or `addpl` range.
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	4993	__ CalculateSVEAddress(x10, SVEMemOperand(x28, 42, SVE_MUL_VL), 0);
				4994	__ CalculateSVEAddress(x11, SVEMemOperand(x28, -42, SVE_MUL_VL), 0);
				4995	// As above, for VL-based accesses smaller than a Z register.
				4996	VIXL_STATIC_ASSERT(kZRegBitsPerPRegBitLog2 == 3);
				4997	__ CalculateSVEAddress(x12, SVEMemOperand(x28, -32 * 8, SVE_MUL_VL), 3);
				4998	__ CalculateSVEAddress(x13, SVEMemOperand(x28, -42 * 8, SVE_MUL_VL), 3);
				4999	__ CalculateSVEAddress(x14, SVEMemOperand(x28, -32 * 4, SVE_MUL_VL), 2);
				5000	__ CalculateSVEAddress(x15, SVEMemOperand(x28, -42 * 4, SVE_MUL_VL), 2);
				5001	__ CalculateSVEAddress(x18, SVEMemOperand(x28, -32 * 2, SVE_MUL_VL), 1);
				5002	__ CalculateSVEAddress(x19, SVEMemOperand(x28, -42 * 2, SVE_MUL_VL), 1);
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	5003
				5004	// scalar-plus-scalar
				5005
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	5006	__ CalculateSVEAddress(x20, SVEMemOperand(x28, x29));
				5007	__ CalculateSVEAddress(x21, SVEMemOperand(x28, x30));
				5008	__ CalculateSVEAddress(x22, SVEMemOperand(x28, x29, LSL, 8));
				5009	__ CalculateSVEAddress(x23, SVEMemOperand(x28, x30, LSL, 8));
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	5010
				5011	// In-place updates, to stress scratch register allocation.
				5012
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	5013	__ Mov(x24, 0xabcd000000000000);
				5014	__ Mov(x25, 0xabcd101100000000);
				5015	__ Mov(x26, 0xabcd202200000000);
				5016	__ Mov(x27, 0xabcd303300000000);
				5017	__ Mov(x28, 0xabcd404400000000);
				5018	__ Mov(x29, 0xabcd505500000000);
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	5019
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	5020	__ CalculateSVEAddress(x24, SVEMemOperand(x24));
				5021	__ CalculateSVEAddress(x25, SVEMemOperand(x25, 0x42));
				5022	__ CalculateSVEAddress(x26, SVEMemOperand(x26, 3, SVE_MUL_VL), 0);
				5023	__ CalculateSVEAddress(x27, SVEMemOperand(x27, 0x42, SVE_MUL_VL), 3);
				5024	__ CalculateSVEAddress(x28, SVEMemOperand(x28, x30));
				5025	__ CalculateSVEAddress(x29, SVEMemOperand(x29, x30, LSL, 4));
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	5026
				5027	END();
				5028
				5029	if (CAN_RUN()) {
				5030	RUN();
				5031
				5032	uint64_t vl = config->sve_vl_in_bytes();
				5033	VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
				5034	uint64_t pl = vl / kZRegBitsPerPRegBit;
				5035
				5036	// Simple scalar (or equivalent) cases.
				5037	ASSERT_EQUAL_64(base, x0);
				5038	ASSERT_EQUAL_64(base, x1);
				5039	ASSERT_EQUAL_64(base, x2);
				5040	ASSERT_EQUAL_64(base, x3);
				5041	ASSERT_EQUAL_64(base, x4);
				5042	ASSERT_EQUAL_64(base, x5);
				5043
				5044	// scalar-plus-immediate
				5045	ASSERT_EQUAL_64(base + 42, x6);
				5046	ASSERT_EQUAL_64(base - 42, x7);
				5047	ASSERT_EQUAL_64(base + (31 * vl), x8);
				5048	ASSERT_EQUAL_64(base - (32 * vl), x9);
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	5049	ASSERT_EQUAL_64(base + (42 * vl), x10);
				5050	ASSERT_EQUAL_64(base - (42 * vl), x11);
				5051	ASSERT_EQUAL_64(base - (32 * vl), x12);
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	5052	ASSERT_EQUAL_64(base - (42 * vl), x13);
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	5053	ASSERT_EQUAL_64(base - (32 * vl), x14);
				5054	ASSERT_EQUAL_64(base - (42 * vl), x15);
				5055	ASSERT_EQUAL_64(base - (32 * vl), x18);
				5056	ASSERT_EQUAL_64(base - (42 * vl), x19);
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	5057
				5058	// scalar-plus-scalar
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	5059	ASSERT_EQUAL_64(base + 48, x20);
				5060	ASSERT_EQUAL_64(base - 48, x21);
				5061	ASSERT_EQUAL_64(base + (48 << 8), x22);
				5062	ASSERT_EQUAL_64(base - (48 << 8), x23);
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	5063
				5064	// In-place updates.
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	5065	ASSERT_EQUAL_64(0xabcd000000000000, x24);
				5066	ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x25);
				5067	ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x26);
				5068	ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x27);
				5069	ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28);
				5070	ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29);
Jacob Bramley	1314c46	2019-08-08 10:54:16 +0100	[diff] [blame]	5071	}
				5072	}
				5073
TatWai Chong	4f28df7	2019-08-14 17:50:30 -0700	[diff] [blame]	5074	TEST_SVE(sve_permute_vector_unpredicated) {
				5075	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
				5076	START();
				5077
Jacob Bramley	e4983d4	2019-10-08 10:56:15 +0100	[diff] [blame]	5078	// Initialise registers with known values first.
				5079	__ Dup(z1.VnB(), 0x11);
				5080	__ Dup(z2.VnB(), 0x22);
				5081	__ Dup(z3.VnB(), 0x33);
				5082	__ Dup(z4.VnB(), 0x44);
				5083
TatWai Chong	4f28df7	2019-08-14 17:50:30 -0700	[diff] [blame]	5084	__ Mov(x0, 0x0123456789abcdef);
				5085	__ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
				5086	__ Insr(z1.VnS(), w0);
				5087	__ Insr(z2.VnD(), x0);
				5088	__ Insr(z3.VnH(), h0);
				5089	__ Insr(z4.VnD(), d0);
				5090
				5091	uint64_t inputs[] = {0xfedcba9876543210,
				5092	0x0123456789abcdef,
				5093	0x8f8e8d8c8b8a8988,
				5094	0x8786858483828180};
				5095
				5096	// Initialize a distinguishable value throughout the register first.
				5097	__ Dup(z9.VnB(), 0xff);
				5098	InsrHelper(&masm, z9.VnD(), inputs);
				5099
				5100	__ Rev(z5.VnB(), z9.VnB());
				5101	__ Rev(z6.VnH(), z9.VnH());
				5102	__ Rev(z7.VnS(), z9.VnS());
				5103	__ Rev(z8.VnD(), z9.VnD());
				5104
				5105	int index[7] = {22, 7, 7, 3, 1, 1, 63};
				5106	// Broadcasting an data within the input array.
				5107	__ Dup(z10.VnB(), z9.VnB(), index[0]);
				5108	__ Dup(z11.VnH(), z9.VnH(), index[1]);
				5109	__ Dup(z12.VnS(), z9.VnS(), index[2]);
				5110	__ Dup(z13.VnD(), z9.VnD(), index[3]);
				5111	__ Dup(z14.VnQ(), z9.VnQ(), index[4]);
				5112	// Test dst == src
				5113	__ Mov(z15, z9);
				5114	__ Dup(z15.VnS(), z15.VnS(), index[5]);
				5115	// Selecting an data beyond the input array.
				5116	__ Dup(z16.VnB(), z9.VnB(), index[6]);
				5117
				5118	END();
				5119
				5120	if (CAN_RUN()) {
				5121	RUN();
				5122
				5123	// Insr
Jacob Bramley	e4983d4	2019-10-08 10:56:15 +0100	[diff] [blame]	5124	uint64_t z1_expected[] = {0x1111111111111111, 0x1111111189abcdef};
				5125	uint64_t z2_expected[] = {0x2222222222222222, 0x0123456789abcdef};
				5126	uint64_t z3_expected[] = {0x3333333333333333, 0x3333333333333456};
				5127	uint64_t z4_expected[] = {0x4444444444444444, 0x7ffaaaaa22223456};
TatWai Chong	4f28df7	2019-08-14 17:50:30 -0700	[diff] [blame]	5128	ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
				5129	ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
				5130	ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
				5131	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				5132
				5133	// Rev
				5134	int lane_count = core.GetSVELaneCount(kBRegSize);
				5135	for (int i = 0; i < lane_count; i++) {
				5136	uint64_t expected =
				5137	core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
				5138	uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
				5139	ASSERT_EQUAL_64(expected, input);
				5140	}
				5141
				5142	lane_count = core.GetSVELaneCount(kHRegSize);
				5143	for (int i = 0; i < lane_count; i++) {
				5144	uint64_t expected =
				5145	core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
				5146	uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
				5147	ASSERT_EQUAL_64(expected, input);
				5148	}
				5149
				5150	lane_count = core.GetSVELaneCount(kSRegSize);
				5151	for (int i = 0; i < lane_count; i++) {
				5152	uint64_t expected =
				5153	core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
				5154	uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
				5155	ASSERT_EQUAL_64(expected, input);
				5156	}
				5157
				5158	lane_count = core.GetSVELaneCount(kDRegSize);
				5159	for (int i = 0; i < lane_count; i++) {
				5160	uint64_t expected =
				5161	core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
				5162	uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
				5163	ASSERT_EQUAL_64(expected, input);
				5164	}
				5165
				5166	// Dup
				5167	unsigned vl = config->sve_vl_in_bits();
				5168	lane_count = core.GetSVELaneCount(kBRegSize);
				5169	uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
				5170	for (int i = 0; i < lane_count; i++) {
				5171	ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
				5172	}
				5173
				5174	lane_count = core.GetSVELaneCount(kHRegSize);
				5175	uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
				5176	for (int i = 0; i < lane_count; i++) {
				5177	ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
				5178	}
				5179
				5180	lane_count = core.GetSVELaneCount(kSRegSize);
				5181	uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
				5182	for (int i = 0; i < lane_count; i++) {
				5183	ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
				5184	}
				5185
				5186	lane_count = core.GetSVELaneCount(kDRegSize);
				5187	uint64_t expected_z13 =
				5188	(vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
				5189	for (int i = 0; i < lane_count; i++) {
				5190	ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
				5191	}
				5192
				5193	lane_count = core.GetSVELaneCount(kDRegSize);
				5194	uint64_t expected_z14_lo = 0;
				5195	uint64_t expected_z14_hi = 0;
				5196	if (vl > (index[4] * kQRegSize)) {
				5197	expected_z14_lo = 0x0123456789abcdef;
				5198	expected_z14_hi = 0xfedcba9876543210;
				5199	}
				5200	for (int i = 0; i < lane_count; i += 2) {
				5201	ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
				5202	ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
				5203	}
				5204
				5205	lane_count = core.GetSVELaneCount(kSRegSize);
				5206	uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
				5207	for (int i = 0; i < lane_count; i++) {
				5208	ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
				5209	}
				5210
				5211	lane_count = core.GetSVELaneCount(kBRegSize);
				5212	uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
				5213	for (int i = 0; i < lane_count; i++) {
				5214	ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
				5215	}
				5216	}
				5217	}
				5218
Martyn Capewell	2e95429	2020-01-14 14:56:42 +0000	[diff] [blame]	5219	TEST_SVE(sve_permute_vector_unpredicated_unpack_vector_elements) {
TatWai Chong	4f28df7	2019-08-14 17:50:30 -0700	[diff] [blame]	5220	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				5221	START();
				5222
				5223	uint64_t z9_inputs[] = {0xfedcba9876543210,
				5224	0x0123456789abcdef,
				5225	0x8f8e8d8c8b8a8988,
				5226	0x8786858483828180};
				5227	InsrHelper(&masm, z9.VnD(), z9_inputs);
				5228
				5229	__ Sunpkhi(z10.VnH(), z9.VnB());
				5230	__ Sunpkhi(z11.VnS(), z9.VnH());
				5231	__ Sunpkhi(z12.VnD(), z9.VnS());
				5232
				5233	__ Sunpklo(z13.VnH(), z9.VnB());
				5234	__ Sunpklo(z14.VnS(), z9.VnH());
				5235	__ Sunpklo(z15.VnD(), z9.VnS());
				5236
				5237	__ Uunpkhi(z16.VnH(), z9.VnB());
				5238	__ Uunpkhi(z17.VnS(), z9.VnH());
				5239	__ Uunpkhi(z18.VnD(), z9.VnS());
				5240
				5241	__ Uunpklo(z19.VnH(), z9.VnB());
				5242	__ Uunpklo(z20.VnS(), z9.VnH());
				5243	__ Uunpklo(z21.VnD(), z9.VnS());
				5244
Martyn Capewell	2e95429	2020-01-14 14:56:42 +0000	[diff] [blame]	5245	// Test unpacking with same source and destination.
				5246	__ Mov(z22, z9);
				5247	__ Sunpklo(z22.VnH(), z22.VnB());
				5248	__ Mov(z23, z9);
				5249	__ Uunpklo(z23.VnH(), z23.VnB());
				5250
TatWai Chong	4f28df7	2019-08-14 17:50:30 -0700	[diff] [blame]	5251	END();
				5252
				5253	if (CAN_RUN()) {
				5254	RUN();
				5255
				5256	// Suunpkhi
				5257	int lane_count = core.GetSVELaneCount(kHRegSize);
				5258	for (int i = lane_count - 1; i >= 0; i--) {
				5259	uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
				5260	uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
				5261	uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
				5262	ASSERT_EQUAL_64(expected, input);
				5263	}
				5264
				5265	lane_count = core.GetSVELaneCount(kSRegSize);
				5266	for (int i = lane_count - 1; i >= 0; i--) {
				5267	uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
				5268	uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
				5269	uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
				5270	ASSERT_EQUAL_64(expected, input);
				5271	}
				5272
				5273	lane_count = core.GetSVELaneCount(kDRegSize);
				5274	for (int i = lane_count - 1; i >= 0; i--) {
				5275	uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
				5276	uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
				5277	uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
				5278	ASSERT_EQUAL_64(expected, input);
				5279	}
				5280
				5281	// Suunpklo
				5282	lane_count = core.GetSVELaneCount(kHRegSize);
				5283	for (int i = lane_count - 1; i >= 0; i--) {
				5284	uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
				5285	uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
				5286	uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
				5287	ASSERT_EQUAL_64(expected, input);
				5288	}
				5289
				5290	lane_count = core.GetSVELaneCount(kSRegSize);
				5291	for (int i = lane_count - 1; i >= 0; i--) {
				5292	uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
				5293	uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
				5294	uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
				5295	ASSERT_EQUAL_64(expected, input);
				5296	}
				5297
				5298	lane_count = core.GetSVELaneCount(kDRegSize);
				5299	for (int i = lane_count - 1; i >= 0; i--) {
				5300	uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
				5301	uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
				5302	uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
				5303	ASSERT_EQUAL_64(expected, input);
				5304	}
				5305
				5306	// Uuunpkhi
				5307	lane_count = core.GetSVELaneCount(kHRegSize);
				5308	for (int i = lane_count - 1; i >= 0; i--) {
				5309	uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
				5310	uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
				5311	ASSERT_EQUAL_64(expected, input);
				5312	}
				5313
				5314	lane_count = core.GetSVELaneCount(kSRegSize);
				5315	for (int i = lane_count - 1; i >= 0; i--) {
				5316	uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
				5317	uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
				5318	ASSERT_EQUAL_64(expected, input);
				5319	}
				5320
				5321	lane_count = core.GetSVELaneCount(kDRegSize);
				5322	for (int i = lane_count - 1; i >= 0; i--) {
				5323	uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
				5324	uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
				5325	ASSERT_EQUAL_64(expected, input);
				5326	}
				5327
				5328	// Uuunpklo
				5329	lane_count = core.GetSVELaneCount(kHRegSize);
				5330	for (int i = lane_count - 1; i >= 0; i--) {
				5331	uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
				5332	uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
				5333	ASSERT_EQUAL_64(expected, input);
				5334	}
				5335
				5336	lane_count = core.GetSVELaneCount(kSRegSize);
				5337	for (int i = lane_count - 1; i >= 0; i--) {
				5338	uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
				5339	uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
				5340	ASSERT_EQUAL_64(expected, input);
				5341	}
				5342
				5343	lane_count = core.GetSVELaneCount(kDRegSize);
				5344	for (int i = lane_count - 1; i >= 0; i--) {
				5345	uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
				5346	uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
				5347	ASSERT_EQUAL_64(expected, input);
				5348	}
Martyn Capewell	2e95429	2020-01-14 14:56:42 +0000	[diff] [blame]	5349
				5350	ASSERT_EQUAL_SVE(z13, z22);
				5351	ASSERT_EQUAL_SVE(z19, z23);
TatWai Chong	4f28df7	2019-08-14 17:50:30 -0700	[diff] [blame]	5352	}
				5353	}
				5354
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5355	TEST_SVE(sve_cnot_not) {
				5356	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				5357	START();
				5358
				5359	uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
				5360
				5361	// For simplicity, we re-use the same pg for various lane sizes.
				5362	// For D lanes: 1, 1, 0
				5363	// For S lanes: 1, 1, 1, 0, 0
				5364	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				5365	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				5366	Initialise(&masm, p0.VnB(), pg_in);
				5367	PRegisterM pg = p0.Merging();
				5368
				5369	// These are merging operations, so we have to initialise the result register.
				5370	// We use a mixture of constructive and destructive operations.
				5371
				5372	InsrHelper(&masm, z31.VnD(), in);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	5373	// Make a copy so we can check that constructive operations preserve zn.
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5374	__ Mov(z30, z31);
				5375
				5376	// For constructive operations, use a different initial result value.
				5377	__ Index(z29.VnB(), 0, -1);
				5378
				5379	__ Mov(z0, z31);
				5380	__ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
				5381	__ Mov(z1, z29);
				5382	__ Cnot(z1.VnH(), pg, z31.VnH());
				5383	__ Mov(z2, z31);
				5384	__ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
				5385	__ Mov(z3, z29);
				5386	__ Cnot(z3.VnD(), pg, z31.VnD());
				5387
				5388	__ Mov(z4, z29);
				5389	__ Not(z4.VnB(), pg, z31.VnB());
				5390	__ Mov(z5, z31);
				5391	__ Not(z5.VnH(), pg, z5.VnH()); // destructive
				5392	__ Mov(z6, z29);
				5393	__ Not(z6.VnS(), pg, z31.VnS());
				5394	__ Mov(z7, z31);
				5395	__ Not(z7.VnD(), pg, z7.VnD()); // destructive
				5396
				5397	END();
				5398
				5399	if (CAN_RUN()) {
				5400	RUN();
				5401
				5402	// Check that constructive operations preserve their inputs.
				5403	ASSERT_EQUAL_SVE(z30, z31);
				5404
				5405	// clang-format off
				5406
				5407	// Cnot (B) destructive
				5408	uint64_t expected_z0[] =
				5409	// pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
				5410	{0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
				5411	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				5412
				5413	// Cnot (H)
				5414	uint64_t expected_z1[] =
				5415	// pg: 0 0 0 1 0 1 1 1 0 0 1 0
				5416	{0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
				5417	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				5418
				5419	// Cnot (S) destructive
				5420	uint64_t expected_z2[] =
				5421	// pg: 0 1 1 1 0 0
				5422	{0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
				5423	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				5424
				5425	// Cnot (D)
				5426	uint64_t expected_z3[] =
				5427	// pg: 1 1 0
				5428	{0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
				5429	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				5430
				5431	// Not (B)
				5432	uint64_t expected_z4[] =
				5433	// pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
				5434	{0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
				5435	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				5436
				5437	// Not (H) destructive
				5438	uint64_t expected_z5[] =
				5439	// pg: 0 0 0 1 0 1 1 1 0 0 1 0
				5440	{0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
				5441	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				5442
				5443	// Not (S)
				5444	uint64_t expected_z6[] =
				5445	// pg: 0 1 1 1 0 0
				5446	{0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
				5447	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				5448
				5449	// Not (D) destructive
				5450	uint64_t expected_z7[] =
				5451	// pg: 1 1 0
				5452	{0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
				5453	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				5454
				5455	// clang-format on
				5456	}
				5457	}
				5458
				5459	TEST_SVE(sve_fabs_fneg) {
				5460	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				5461	START();
				5462
				5463	// Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
				5464	// NaNs, but fabs and fneg do not.
				5465	uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
				5466	0xfff00000ff80fc01, // Signalling NaNs.
				5467	0x123456789abcdef0};
				5468
				5469	// For simplicity, we re-use the same pg for various lane sizes.
				5470	// For D lanes: 1, 1, 0
				5471	// For S lanes: 1, 1, 1, 0, 0
				5472	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				5473	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				5474	Initialise(&masm, p0.VnB(), pg_in);
				5475	PRegisterM pg = p0.Merging();
				5476
				5477	// These are merging operations, so we have to initialise the result register.
				5478	// We use a mixture of constructive and destructive operations.
				5479
				5480	InsrHelper(&masm, z31.VnD(), in);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	5481	// Make a copy so we can check that constructive operations preserve zn.
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5482	__ Mov(z30, z31);
				5483
				5484	// For constructive operations, use a different initial result value.
				5485	__ Index(z29.VnB(), 0, -1);
				5486
				5487	__ Mov(z0, z29);
				5488	__ Fabs(z0.VnH(), pg, z31.VnH());
				5489	__ Mov(z1, z31);
				5490	__ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
				5491	__ Mov(z2, z29);
				5492	__ Fabs(z2.VnD(), pg, z31.VnD());
				5493
				5494	__ Mov(z3, z31);
				5495	__ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
				5496	__ Mov(z4, z29);
				5497	__ Fneg(z4.VnS(), pg, z31.VnS());
				5498	__ Mov(z5, z31);
				5499	__ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
				5500
				5501	END();
				5502
				5503	if (CAN_RUN()) {
				5504	RUN();
				5505
				5506	// Check that constructive operations preserve their inputs.
				5507	ASSERT_EQUAL_SVE(z30, z31);
				5508
				5509	// clang-format off
				5510
				5511	// Fabs (H)
				5512	uint64_t expected_z0[] =
				5513	// pg: 0 0 0 1 0 1 1 1 0 0 1 0
				5514	{0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
				5515	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				5516
				5517	// Fabs (S) destructive
				5518	uint64_t expected_z1[] =
				5519	// pg: 0 1 1 1 0 0
				5520	{0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
				5521	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				5522
				5523	// Fabs (D)
				5524	uint64_t expected_z2[] =
				5525	// pg: 1 1 0
				5526	{0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
				5527	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				5528
				5529	// Fneg (H) destructive
				5530	uint64_t expected_z3[] =
				5531	// pg: 0 0 0 1 0 1 1 1 0 0 1 0
				5532	{0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
				5533	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				5534
				5535	// Fneg (S)
				5536	uint64_t expected_z4[] =
				5537	// pg: 0 1 1 1 0 0
				5538	{0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
				5539	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				5540
				5541	// Fneg (D) destructive
				5542	uint64_t expected_z5[] =
				5543	// pg: 1 1 0
				5544	{0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
				5545	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				5546
				5547	// clang-format on
				5548	}
				5549	}
				5550
				5551	TEST_SVE(sve_cls_clz_cnt) {
				5552	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				5553	START();
				5554
				5555	uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
				5556
				5557	// For simplicity, we re-use the same pg for various lane sizes.
				5558	// For D lanes: 1, 1, 0
				5559	// For S lanes: 1, 1, 1, 0, 0
				5560	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				5561	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				5562	Initialise(&masm, p0.VnB(), pg_in);
				5563	PRegisterM pg = p0.Merging();
				5564
				5565	// These are merging operations, so we have to initialise the result register.
				5566	// We use a mixture of constructive and destructive operations.
				5567
				5568	InsrHelper(&masm, z31.VnD(), in);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	5569	// Make a copy so we can check that constructive operations preserve zn.
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5570	__ Mov(z30, z31);
				5571
				5572	// For constructive operations, use a different initial result value.
				5573	__ Index(z29.VnB(), 0, -1);
				5574
				5575	__ Mov(z0, z29);
				5576	__ Cls(z0.VnB(), pg, z31.VnB());
				5577	__ Mov(z1, z31);
				5578	__ Clz(z1.VnH(), pg, z1.VnH()); // destructive
				5579	__ Mov(z2, z29);
				5580	__ Cnt(z2.VnS(), pg, z31.VnS());
				5581	__ Mov(z3, z31);
				5582	__ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
				5583
				5584	END();
				5585
				5586	if (CAN_RUN()) {
				5587	RUN();
				5588	// Check that non-destructive operations preserve their inputs.
				5589	ASSERT_EQUAL_SVE(z30, z31);
				5590
				5591	// clang-format off
				5592
				5593	// cls (B)
				5594	uint8_t expected_z0[] =
				5595	// pg: 0 0 0 0 1 0 1 1
				5596	// pg: 1 0 0 1 0 1 1 1
				5597	// pg: 0 0 1 0 1 1 1 0
				5598	{0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
				5599	6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
				5600	0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
				5601	ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
				5602
				5603	// clz (H) destructive
				5604	uint16_t expected_z1[] =
				5605	// pg: 0 0 0 1
				5606	// pg: 0 1 1 1
				5607	// pg: 0 0 1 0
				5608	{0x0000, 0x0000, 0x0000, 16,
				5609	0xfefc, 0, 0, 0,
				5610	0x1234, 0x5678, 0, 0xdef0};
				5611	ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
				5612
				5613	// cnt (S)
				5614	uint32_t expected_z2[] =
				5615	// pg: 0 1
				5616	// pg: 1 1
				5617	// pg: 0 0
				5618	{0xe9eaebec, 0,
				5619	22, 16,
				5620	0xf9fafbfc, 0xfdfeff00};
				5621	ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
				5622
				5623	// cnt (D) destructive
				5624	uint64_t expected_z3[] =
				5625	// pg: 1 1 0
				5626	{ 0, 38, 0x123456789abcdef0};
				5627	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				5628
				5629	// clang-format on
				5630	}
				5631	}
				5632
				5633	TEST_SVE(sve_sxt) {
				5634	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				5635	START();
				5636
				5637	uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
				5638
				5639	// For simplicity, we re-use the same pg for various lane sizes.
				5640	// For D lanes: 1, 1, 0
				5641	// For S lanes: 1, 1, 1, 0, 0
				5642	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				5643	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				5644	Initialise(&masm, p0.VnB(), pg_in);
				5645	PRegisterM pg = p0.Merging();
				5646
				5647	// These are merging operations, so we have to initialise the result register.
				5648	// We use a mixture of constructive and destructive operations.
				5649
				5650	InsrHelper(&masm, z31.VnD(), in);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	5651	// Make a copy so we can check that constructive operations preserve zn.
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5652	__ Mov(z30, z31);
				5653
				5654	// For constructive operations, use a different initial result value.
				5655	__ Index(z29.VnB(), 0, -1);
				5656
				5657	__ Mov(z0, z31);
				5658	__ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
				5659	__ Mov(z1, z29);
				5660	__ Sxtb(z1.VnS(), pg, z31.VnS());
				5661	__ Mov(z2, z31);
				5662	__ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
				5663	__ Mov(z3, z29);
				5664	__ Sxth(z3.VnS(), pg, z31.VnS());
				5665	__ Mov(z4, z31);
				5666	__ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
				5667	__ Mov(z5, z29);
				5668	__ Sxtw(z5.VnD(), pg, z31.VnD());
				5669
				5670	END();
				5671
				5672	if (CAN_RUN()) {
				5673	RUN();
				5674	// Check that constructive operations preserve their inputs.
				5675	ASSERT_EQUAL_SVE(z30, z31);
				5676
				5677	// clang-format off
				5678
				5679	// Sxtb (H) destructive
				5680	uint64_t expected_z0[] =
				5681	// pg: 0 0 0 1 0 1 1 1 0 0 1 0
				5682	{0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
				5683	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				5684
				5685	// Sxtb (S)
				5686	uint64_t expected_z1[] =
				5687	// pg: 0 1 1 1 0 0
				5688	{0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
				5689	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				5690
				5691	// Sxtb (D) destructive
				5692	uint64_t expected_z2[] =
				5693	// pg: 1 1 0
				5694	{0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
				5695	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				5696
				5697	// Sxth (S)
				5698	uint64_t expected_z3[] =
				5699	// pg: 0 1 1 1 0 0
				5700	{0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
				5701	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				5702
				5703	// Sxth (D) destructive
				5704	uint64_t expected_z4[] =
				5705	// pg: 1 1 0
				5706	{0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
				5707	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				5708
				5709	// Sxtw (D)
				5710	uint64_t expected_z5[] =
				5711	// pg: 1 1 0
				5712	{0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
				5713	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				5714
				5715	// clang-format on
				5716	}
				5717	}
				5718
				5719	TEST_SVE(sve_uxt) {
				5720	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				5721	START();
				5722
				5723	uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
				5724
				5725	// For simplicity, we re-use the same pg for various lane sizes.
				5726	// For D lanes: 1, 1, 0
				5727	// For S lanes: 1, 1, 1, 0, 0
				5728	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				5729	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				5730	Initialise(&masm, p0.VnB(), pg_in);
				5731	PRegisterM pg = p0.Merging();
				5732
				5733	// These are merging operations, so we have to initialise the result register.
				5734	// We use a mixture of constructive and destructive operations.
				5735
				5736	InsrHelper(&masm, z31.VnD(), in);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	5737	// Make a copy so we can check that constructive operations preserve zn.
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5738	__ Mov(z30, z31);
				5739
				5740	// For constructive operations, use a different initial result value.
				5741	__ Index(z29.VnB(), 0, -1);
				5742
				5743	__ Mov(z0, z29);
				5744	__ Uxtb(z0.VnH(), pg, z31.VnH());
				5745	__ Mov(z1, z31);
				5746	__ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
				5747	__ Mov(z2, z29);
				5748	__ Uxtb(z2.VnD(), pg, z31.VnD());
				5749	__ Mov(z3, z31);
				5750	__ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
				5751	__ Mov(z4, z29);
				5752	__ Uxth(z4.VnD(), pg, z31.VnD());
				5753	__ Mov(z5, z31);
				5754	__ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
				5755
				5756	END();
				5757
				5758	if (CAN_RUN()) {
				5759	RUN();
				5760	// clang-format off
				5761
				5762	// Uxtb (H)
				5763	uint64_t expected_z0[] =
				5764	// pg: 0 0 0 1 0 1 1 1 0 0 1 0
				5765	{0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
				5766	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				5767
				5768	// Uxtb (S) destructive
				5769	uint64_t expected_z1[] =
				5770	// pg: 0 1 1 1 0 0
				5771	{0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
				5772	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				5773
				5774	// Uxtb (D)
				5775	uint64_t expected_z2[] =
				5776	// pg: 1 1 0
				5777	{0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
				5778	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				5779
				5780	// Uxth (S) destructive
				5781	uint64_t expected_z3[] =
				5782	// pg: 0 1 1 1 0 0
				5783	{0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
				5784	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				5785
				5786	// Uxth (D)
				5787	uint64_t expected_z4[] =
				5788	// pg: 1 1 0
				5789	{0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
				5790	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				5791
				5792	// Uxtw (D) destructive
				5793	uint64_t expected_z5[] =
				5794	// pg: 1 1 0
				5795	{0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
				5796	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				5797
				5798	// clang-format on
				5799	}
				5800	}
				5801
				5802	TEST_SVE(sve_abs_neg) {
				5803	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				5804	START();
				5805
				5806	uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
				5807
				5808	// For simplicity, we re-use the same pg for various lane sizes.
				5809	// For D lanes: 1, 1, 0
				5810	// For S lanes: 1, 1, 1, 0, 0
				5811	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				5812	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				5813	Initialise(&masm, p0.VnB(), pg_in);
				5814	PRegisterM pg = p0.Merging();
				5815
				5816	InsrHelper(&masm, z31.VnD(), in);
				5817
				5818	// These are merging operations, so we have to initialise the result register.
				5819	// We use a mixture of constructive and destructive operations.
				5820
				5821	InsrHelper(&masm, z31.VnD(), in);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	5822	// Make a copy so we can check that constructive operations preserve zn.
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5823	__ Mov(z30, z31);
				5824
				5825	// For constructive operations, use a different initial result value.
				5826	__ Index(z29.VnB(), 0, -1);
				5827
				5828	__ Mov(z0, z31);
				5829	__ Abs(z0.VnD(), pg, z0.VnD()); // destructive
				5830	__ Mov(z1, z29);
				5831	__ Abs(z1.VnB(), pg, z31.VnB());
				5832
				5833	__ Mov(z2, z31);
				5834	__ Neg(z2.VnH(), pg, z2.VnH()); // destructive
				5835	__ Mov(z3, z29);
				5836	__ Neg(z3.VnS(), pg, z31.VnS());
				5837
Jacob Bramley	c006627	2019-09-30 16:30:47 +0100	[diff] [blame]	5838	// The unpredicated form of `Neg` is implemented using `subr`.
				5839	__ Mov(z4, z31);
				5840	__ Neg(z4.VnB(), z4.VnB()); // destructive
				5841	__ Mov(z5, z29);
				5842	__ Neg(z5.VnD(), z31.VnD());
				5843
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5844	END();
				5845
				5846	if (CAN_RUN()) {
				5847	RUN();
Jacob Bramley	c006627	2019-09-30 16:30:47 +0100	[diff] [blame]	5848
				5849	ASSERT_EQUAL_SVE(z30, z31);
				5850
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5851	// clang-format off
				5852
				5853	// Abs (D) destructive
				5854	uint64_t expected_z0[] =
				5855	// pg: 1 1 0
				5856	{0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
				5857	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				5858
				5859	// Abs (B)
				5860	uint64_t expected_z1[] =
				5861	// pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
				5862	{0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
				5863	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				5864
				5865	// Neg (H) destructive
				5866	uint64_t expected_z2[] =
				5867	// pg: 0 0 0 1 0 1 1 1 0 0 1 0
				5868	{0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
				5869	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				5870
				5871	// Neg (S)
				5872	uint64_t expected_z3[] =
				5873	// pg: 0 1 1 1 0 0
				5874	{0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
				5875	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				5876
Jacob Bramley	c006627	2019-09-30 16:30:47 +0100	[diff] [blame]	5877	// Neg (B) destructive, unpredicated
				5878	uint64_t expected_z4[] =
				5879	{0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
				5880	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				5881
				5882	// Neg (D) unpredicated
				5883	uint64_t expected_z5[] =
				5884	{0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
				5885	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				5886
Jacob Bramley	bc21a0d	2019-09-20 18:49:15 +0100	[diff] [blame]	5887	// clang-format on
				5888	}
				5889	}
				5890
Jacob Bramley	0093bb9	2019-10-04 15:54:10 +0100	[diff] [blame]	5891	TEST_SVE(sve_cpy) {
				5892	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
				5893	START();
				5894
				5895	// For simplicity, we re-use the same pg for various lane sizes.
				5896	// For D lanes: 0, 1, 1
				5897	// For S lanes: 0, 1, 1, 0, 1
				5898	// For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
				5899	int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
				5900
				5901	PRegisterM pg = p7.Merging();
				5902	Initialise(&masm, pg.VnB(), pg_in);
				5903
				5904	// These are merging operations, so we have to initialise the result registers
				5905	// for each operation.
				5906	for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
				5907	__ Index(ZRegister(i, kBRegSize), 0, -1);
				5908	}
				5909
				5910	// Recognisable values to copy.
				5911	__ Mov(x0, 0xdeadbeefdeadbe42);
				5912	__ Mov(x1, 0xdeadbeefdead8421);
				5913	__ Mov(x2, 0xdeadbeef80042001);
				5914	__ Mov(x3, 0x8000000420000001);
				5915
				5916	// Use NEON moves, to avoid testing SVE `cpy` against itself.
				5917	__ Dup(v28.V2D(), x0);
				5918	__ Dup(v29.V2D(), x1);
				5919	__ Dup(v30.V2D(), x2);
				5920	__ Dup(v31.V2D(), x3);
				5921
				5922	// Register forms (CPY_z_p_r)
				5923	__ Cpy(z0.VnB(), pg, w0);
				5924	__ Cpy(z1.VnH(), pg, x1); // X registers are accepted for small lanes.
				5925	__ Cpy(z2.VnS(), pg, w2);
				5926	__ Cpy(z3.VnD(), pg, x3);
				5927
				5928	// VRegister forms (CPY_z_p_v)
				5929	__ Cpy(z4.VnB(), pg, b28);
				5930	__ Cpy(z5.VnH(), pg, h29);
				5931	__ Cpy(z6.VnS(), pg, s30);
				5932	__ Cpy(z7.VnD(), pg, d31);
				5933
				5934	// Check that we can copy the stack pointer.
				5935	__ Mov(x10, sp);
				5936	__ Mov(sp, 0xabcabcabcabcabca); // Set sp to a known value.
				5937	__ Cpy(z16.VnB(), pg, sp);
				5938	__ Cpy(z17.VnH(), pg, wsp);
				5939	__ Cpy(z18.VnS(), pg, wsp);
				5940	__ Cpy(z19.VnD(), pg, sp);
				5941	__ Mov(sp, x10); // Restore sp.
				5942
				5943	END();
				5944
				5945	if (CAN_RUN()) {
				5946	RUN();
				5947	// clang-format off
				5948
				5949	uint64_t expected_b[] =
				5950	// pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
				5951	{0xe9eaebec424242f0, 0x42f2f34242f64242, 0xf942fbfcfdfeff42};
				5952	ASSERT_EQUAL_SVE(expected_b, z0.VnD());
				5953	ASSERT_EQUAL_SVE(expected_b, z4.VnD());
				5954
				5955	uint64_t expected_h[] =
				5956	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				5957	{0xe9eaebec8421eff0, 0xf1f28421f5f68421, 0x8421fbfcfdfe8421};
				5958	ASSERT_EQUAL_SVE(expected_h, z1.VnD());
				5959	ASSERT_EQUAL_SVE(expected_h, z5.VnD());
				5960
				5961	uint64_t expected_s[] =
				5962	// pg: 0 0 1 1 0 1
				5963	{0xe9eaebecedeeeff0, 0x8004200180042001, 0xf9fafbfc80042001};
				5964	ASSERT_EQUAL_SVE(expected_s, z2.VnD());
				5965	ASSERT_EQUAL_SVE(expected_s, z6.VnD());
				5966
				5967	uint64_t expected_d[] =
				5968	// pg: 0 1 1
				5969	{0xe9eaebecedeeeff0, 0x8000000420000001, 0x8000000420000001};
				5970	ASSERT_EQUAL_SVE(expected_d, z3.VnD());
				5971	ASSERT_EQUAL_SVE(expected_d, z7.VnD());
				5972
				5973
				5974	uint64_t expected_b_sp[] =
				5975	// pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
				5976	{0xe9eaebeccacacaf0, 0xcaf2f3cacaf6caca, 0xf9cafbfcfdfeffca};
				5977	ASSERT_EQUAL_SVE(expected_b_sp, z16.VnD());
				5978
				5979	uint64_t expected_h_sp[] =
				5980	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				5981	{0xe9eaebecabcaeff0, 0xf1f2abcaf5f6abca, 0xabcafbfcfdfeabca};
				5982	ASSERT_EQUAL_SVE(expected_h_sp, z17.VnD());
				5983
				5984	uint64_t expected_s_sp[] =
				5985	// pg: 0 0 1 1 0 1
				5986	{0xe9eaebecedeeeff0, 0xcabcabcacabcabca, 0xf9fafbfccabcabca};
				5987	ASSERT_EQUAL_SVE(expected_s_sp, z18.VnD());
				5988
				5989	uint64_t expected_d_sp[] =
				5990	// pg: 0 1 1
				5991	{0xe9eaebecedeeeff0, 0xabcabcabcabcabca, 0xabcabcabcabcabca};
				5992	ASSERT_EQUAL_SVE(expected_d_sp, z19.VnD());
				5993
				5994	// clang-format on
				5995	}
				5996	}
				5997
Jacob Bramley	0f62eab	2019-10-23 17:07:47 +0100	[diff] [blame]	5998	TEST_SVE(sve_cpy_imm) {
				5999	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				6000	START();
				6001
				6002	// For simplicity, we re-use the same pg for various lane sizes.
				6003	// For D lanes: 0, 1, 1
				6004	// For S lanes: 0, 1, 1, 0, 1
				6005	// For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
				6006	int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
				6007
				6008	PRegister pg = p7;
				6009	Initialise(&masm, pg.VnB(), pg_in);
				6010
				6011	// These are (mostly) merging operations, so we have to initialise the result
				6012	// registers for each operation.
				6013	for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
				6014	__ Index(ZRegister(i, kBRegSize), 0, -1);
				6015	}
				6016
				6017	// Encodable integer forms (CPY_z_p_i)
				6018	__ Cpy(z0.VnB(), pg.Merging(), 0);
				6019	__ Cpy(z1.VnB(), pg.Zeroing(), 42);
				6020	__ Cpy(z2.VnB(), pg.Merging(), -42);
				6021	__ Cpy(z3.VnB(), pg.Zeroing(), 0xff);
				6022	__ Cpy(z4.VnH(), pg.Merging(), 127);
				6023	__ Cpy(z5.VnS(), pg.Zeroing(), -128);
				6024	__ Cpy(z6.VnD(), pg.Merging(), -1);
				6025
				6026	// Forms encodable using fcpy.
				6027	__ Cpy(z7.VnH(), pg.Merging(), Float16ToRawbits(Float16(-31.0)));
				6028	__ Cpy(z8.VnS(), pg.Zeroing(), FloatToRawbits(2.0f));
				6029	__ Cpy(z9.VnD(), pg.Merging(), DoubleToRawbits(-4.0));
				6030
				6031	// Other forms use a scratch register.
				6032	__ Cpy(z10.VnH(), pg.Merging(), 0xff);
				6033	__ Cpy(z11.VnD(), pg.Zeroing(), 0x0123456789abcdef);
				6034
				6035	END();
				6036
				6037	if (CAN_RUN()) {
				6038	RUN();
				6039	// clang-format off
				6040
				6041	uint64_t expected_z0[] =
				6042	// pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
				6043	{0xe9eaebec000000f0, 0x00f2f30000f60000, 0xf900fbfcfdfeff00};
				6044	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				6045
				6046	uint64_t expected_z1[] =
				6047	// pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
				6048	{0x000000002a2a2a00, 0x2a00002a2a002a2a, 0x002a00000000002a};
				6049	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				6050
				6051	uint64_t expected_z2[] =
				6052	// pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
				6053	{0xe9eaebecd6d6d6f0, 0xd6f2f3d6d6f6d6d6, 0xf9d6fbfcfdfeffd6};
				6054	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				6055
				6056	uint64_t expected_z3[] =
				6057	// pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
				6058	{0x00000000ffffff00, 0xff0000ffff00ffff, 0x00ff0000000000ff};
				6059	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				6060
				6061	uint64_t expected_z4[] =
				6062	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				6063	{0xe9eaebec007feff0, 0xf1f2007ff5f6007f, 0x007ffbfcfdfe007f};
				6064	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				6065
				6066	uint64_t expected_z5[] =
				6067	// pg: 0 0 1 1 0 1
				6068	{0x0000000000000000, 0xffffff80ffffff80, 0x00000000ffffff80};
				6069	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				6070
				6071	uint64_t expected_z6[] =
				6072	// pg: 0 1 1
				6073	{0xe9eaebecedeeeff0, 0xffffffffffffffff, 0xffffffffffffffff};
				6074	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				6075
				6076	uint64_t expected_z7[] =
				6077	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				6078	{0xe9eaebeccfc0eff0, 0xf1f2cfc0f5f6cfc0, 0xcfc0fbfcfdfecfc0};
				6079	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				6080
				6081	uint64_t expected_z8[] =
				6082	// pg: 0 0 1 1 0 1
				6083	{0x0000000000000000, 0x4000000040000000, 0x0000000040000000};
				6084	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				6085
				6086	uint64_t expected_z9[] =
				6087	// pg: 0 1 1
				6088	{0xe9eaebecedeeeff0, 0xc010000000000000, 0xc010000000000000};
				6089	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				6090
				6091	uint64_t expected_z10[] =
				6092	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				6093	{0xe9eaebec00ffeff0, 0xf1f200fff5f600ff, 0x00fffbfcfdfe00ff};
				6094	ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
				6095
				6096	uint64_t expected_z11[] =
				6097	// pg: 0 1 1
				6098	{0x0000000000000000, 0x0123456789abcdef, 0x0123456789abcdef};
				6099	ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
				6100
				6101	// clang-format on
				6102	}
				6103	}
				6104
				6105	TEST_SVE(sve_fcpy_imm) {
				6106	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				6107	START();
				6108
				6109	// For simplicity, we re-use the same pg for various lane sizes.
				6110	// For D lanes: 0, 1, 1
				6111	// For S lanes: 0, 1, 1, 0, 1
				6112	// For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
				6113	int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
				6114
				6115	PRegister pg = p7;
				6116	Initialise(&masm, pg.VnB(), pg_in);
				6117
				6118	// These are (mostly) merging operations, so we have to initialise the result
				6119	// registers for each operation.
				6120	for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
				6121	__ Index(ZRegister(i, kBRegSize), 0, -1);
				6122	}
				6123
				6124	// Encodable floating-point forms (FCPY_z_p_i)
				6125	__ Fcpy(z1.VnH(), pg.Merging(), Float16(1.0));
				6126	__ Fcpy(z2.VnH(), pg.Merging(), -2.0f);
				6127	__ Fcpy(z3.VnH(), pg.Merging(), 3.0);
				6128	__ Fcpy(z4.VnS(), pg.Merging(), Float16(-4.0));
				6129	__ Fcpy(z5.VnS(), pg.Merging(), 5.0f);
				6130	__ Fcpy(z6.VnS(), pg.Merging(), 6.0);
				6131	__ Fcpy(z7.VnD(), pg.Merging(), Float16(7.0));
				6132	__ Fcpy(z8.VnD(), pg.Merging(), 8.0f);
				6133	__ Fcpy(z9.VnD(), pg.Merging(), -9.0);
				6134
				6135	// Unencodable immediates.
				6136	__ Fcpy(z10.VnS(), pg.Merging(), 0.0);
				6137	__ Fcpy(z11.VnH(), pg.Merging(), Float16(42.0));
				6138	__ Fcpy(z12.VnD(), pg.Merging(), RawbitsToDouble(0x7ff0000012340000)); // NaN
				6139	__ Fcpy(z13.VnH(), pg.Merging(), kFP64NegativeInfinity);
				6140
				6141	END();
				6142
				6143	if (CAN_RUN()) {
				6144	RUN();
				6145	// clang-format off
				6146
				6147	// 1.0 as FP16: 0x3c00
				6148	uint64_t expected_z1[] =
				6149	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				6150	{0xe9eaebec3c00eff0, 0xf1f23c00f5f63c00, 0x3c00fbfcfdfe3c00};
				6151	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				6152
				6153	// -2.0 as FP16: 0xc000
				6154	uint64_t expected_z2[] =
				6155	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				6156	{0xe9eaebecc000eff0, 0xf1f2c000f5f6c000, 0xc000fbfcfdfec000};
				6157	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				6158
				6159	// 3.0 as FP16: 0x4200
				6160	uint64_t expected_z3[] =
				6161	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				6162	{0xe9eaebec4200eff0, 0xf1f24200f5f64200, 0x4200fbfcfdfe4200};
				6163	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				6164
				6165	// -4.0 as FP32: 0xc0800000
				6166	uint64_t expected_z4[] =
				6167	// pg: 0 0 1 1 0 1
				6168	{0xe9eaebecedeeeff0, 0xc0800000c0800000, 0xf9fafbfcc0800000};
				6169	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				6170
				6171	// 5.0 as FP32: 0x40a00000
				6172	uint64_t expected_z5[] =
				6173	// pg: 0 0 1 1 0 1
				6174	{0xe9eaebecedeeeff0, 0x40a0000040a00000, 0xf9fafbfc40a00000};
				6175	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				6176
				6177	// 6.0 as FP32: 0x40c00000
				6178	uint64_t expected_z6[] =
				6179	// pg: 0 0 1 1 0 1
				6180	{0xe9eaebecedeeeff0, 0x40c0000040c00000, 0xf9fafbfc40c00000};
				6181	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				6182
				6183	// 7.0 as FP64: 0x401c000000000000
				6184	uint64_t expected_z7[] =
				6185	// pg: 0 1 1
				6186	{0xe9eaebecedeeeff0, 0x401c000000000000, 0x401c000000000000};
				6187	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				6188
				6189	// 8.0 as FP64: 0x4020000000000000
				6190	uint64_t expected_z8[] =
				6191	// pg: 0 1 1
				6192	{0xe9eaebecedeeeff0, 0x4020000000000000, 0x4020000000000000};
				6193	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				6194
				6195	// -9.0 as FP64: 0xc022000000000000
				6196	uint64_t expected_z9[] =
				6197	// pg: 0 1 1
				6198	{0xe9eaebecedeeeff0, 0xc022000000000000, 0xc022000000000000};
				6199	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				6200
				6201	// 0.0 as FP32: 0x00000000
				6202	uint64_t expected_z10[] =
				6203	// pg: 0 0 1 1 0 1
				6204	{0xe9eaebecedeeeff0, 0x0000000000000000, 0xf9fafbfc00000000};
				6205	ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
				6206
				6207	// 42.0 as FP16: 0x5140
				6208	uint64_t expected_z11[] =
				6209	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				6210	{0xe9eaebec5140eff0, 0xf1f25140f5f65140, 0x5140fbfcfdfe5140};
				6211	ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
				6212
				6213	// Signalling NaN (with payload): 0x7ff0000012340000
				6214	uint64_t expected_z12[] =
				6215	// pg: 0 1 1
				6216	{0xe9eaebecedeeeff0, 0x7ff0000012340000, 0x7ff0000012340000};
				6217	ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
				6218
				6219	// -infinity as FP16: 0xfc00
				6220	uint64_t expected_z13[] =
				6221	// pg: 0 0 1 0 0 1 0 1 1 0 0 1
				6222	{0xe9eaebecfc00eff0, 0xf1f2fc00f5f6fc00, 0xfc00fbfcfdfefc00};
				6223	ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
				6224
				6225	// clang-format on
				6226	}
				6227	}
				6228
TatWai Chong	4f28df7	2019-08-14 17:50:30 -0700	[diff] [blame]	6229	TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
				6230	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				6231	START();
				6232
				6233	uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
				6234
				6235	int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
				6236
				6237	int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
				6238
				6239	int index_s[] = {1, 3, 2, 31, -1};
				6240
				6241	int index_d[] = {31, 1};
				6242
				6243	// Initialize the register with a value that doesn't existed in the table.
				6244	__ Dup(z9.VnB(), 0x1f);
				6245	InsrHelper(&masm, z9.VnD(), table_inputs);
				6246
				6247	ZRegister ind_b = z0.WithLaneSize(kBRegSize);
				6248	ZRegister ind_h = z1.WithLaneSize(kHRegSize);
				6249	ZRegister ind_s = z2.WithLaneSize(kSRegSize);
				6250	ZRegister ind_d = z3.WithLaneSize(kDRegSize);
				6251
				6252	InsrHelper(&masm, ind_b, index_b);
				6253	InsrHelper(&masm, ind_h, index_h);
				6254	InsrHelper(&masm, ind_s, index_s);
				6255	InsrHelper(&masm, ind_d, index_d);
				6256
				6257	__ Tbl(z26.VnB(), z9.VnB(), ind_b);
				6258
				6259	__ Tbl(z27.VnH(), z9.VnH(), ind_h);
				6260
				6261	__ Tbl(z28.VnS(), z9.VnS(), ind_s);
				6262
				6263	__ Tbl(z29.VnD(), z9.VnD(), ind_d);
				6264
				6265	END();
				6266
				6267	if (CAN_RUN()) {
				6268	RUN();
				6269
				6270	// clang-format off
				6271	unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
				6272	0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
				6273
				6274	unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
				6275	0x5544, 0x7766, 0xddcc, 0x9988};
				6276
				6277	unsigned z28_expected[] =
				6278	{0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
				6279
				6280	uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
				6281	// clang-format on
				6282
				6283	unsigned vl = config->sve_vl_in_bits();
				6284	for (size_t i = 0; i < ArrayLength(index_b); i++) {
				6285	int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
				6286	if (!core.HasSVELane(z26.VnB(), lane)) break;
				6287	uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
				6288	ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
				6289	}
				6290
				6291	for (size_t i = 0; i < ArrayLength(index_h); i++) {
				6292	int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
				6293	if (!core.HasSVELane(z27.VnH(), lane)) break;
				6294	uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
				6295	ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
				6296	}
				6297
				6298	for (size_t i = 0; i < ArrayLength(index_s); i++) {
				6299	int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
				6300	if (!core.HasSVELane(z28.VnS(), lane)) break;
				6301	uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
				6302	ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
				6303	}
				6304
				6305	for (size_t i = 0; i < ArrayLength(index_d); i++) {
				6306	int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
				6307	if (!core.HasSVELane(z29.VnD(), lane)) break;
				6308	uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
				6309	ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
				6310	}
				6311	}
				6312	}
				6313
Jacob Bramley	199339d	2019-08-05 18:49:13 +0100	[diff] [blame]	6314	TEST_SVE(ldr_str_z_bi) {
				6315	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				6316	START();
				6317
				6318	int vl = config->sve_vl_in_bytes();
				6319
				6320	// The immediate can address [-256, 255] times the VL, so allocate enough
				6321	// space to exceed that in both directions.
				6322	int data_size = vl * 1024;
				6323
				6324	uint8_t* data = new uint8_t[data_size];
				6325	memset(data, 0, data_size);
				6326
				6327	// Set the base half-way through the buffer so we can use negative indices.
				6328	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				6329
				6330	__ Index(z1.VnB(), 1, 3);
				6331	__ Index(z2.VnB(), 2, 5);
				6332	__ Index(z3.VnB(), 3, 7);
				6333	__ Index(z4.VnB(), 4, 11);
				6334	__ Index(z5.VnB(), 5, 13);
				6335	__ Index(z6.VnB(), 6, 2);
				6336	__ Index(z7.VnB(), 7, 3);
				6337	__ Index(z8.VnB(), 8, 5);
				6338	__ Index(z9.VnB(), 9, 7);
				6339
				6340	// Encodable cases.
				6341	__ Str(z1, SVEMemOperand(x0));
				6342	__ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
				6343	__ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
				6344	__ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
				6345	__ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
				6346
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	6347	// Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley	199339d	2019-08-05 18:49:13 +0100	[diff] [blame]	6348	__ Str(z6, SVEMemOperand(x0, 6 * vl));
				6349	__ Str(z7, SVEMemOperand(x0, -7 * vl));
				6350	__ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
				6351	__ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
				6352
				6353	// Corresponding loads.
				6354	__ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
				6355	__ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
				6356	__ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
				6357	__ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
				6358	__ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
				6359
				6360	__ Ldr(z16, SVEMemOperand(x0, 6 * vl));
				6361	__ Ldr(z17, SVEMemOperand(x0, -7 * vl));
				6362	__ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
				6363	__ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
				6364
				6365	END();
				6366
				6367	if (CAN_RUN()) {
				6368	RUN();
				6369
				6370	uint8_t* expected = new uint8_t[data_size];
				6371	memset(expected, 0, data_size);
				6372	uint8_t* middle = &expected[data_size / 2];
				6373
				6374	for (int i = 0; i < vl; i++) {
				6375	middle[i] = (1 + (3 * i)) & 0xff; // z1
				6376	middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
				6377	middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
				6378	middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
				6379	middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
				6380	middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
				6381	middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
				6382	middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
				6383	middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
				6384	}
				6385
Jacob Bramley	33c99f9	2019-10-08 15:24:12 +0100	[diff] [blame]	6386	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley	199339d	2019-08-05 18:49:13 +0100	[diff] [blame]	6387
				6388	ASSERT_EQUAL_SVE(z1, z11);
				6389	ASSERT_EQUAL_SVE(z2, z12);
				6390	ASSERT_EQUAL_SVE(z3, z13);
				6391	ASSERT_EQUAL_SVE(z4, z14);
				6392	ASSERT_EQUAL_SVE(z5, z15);
				6393	ASSERT_EQUAL_SVE(z6, z16);
				6394	ASSERT_EQUAL_SVE(z7, z17);
				6395	ASSERT_EQUAL_SVE(z8, z18);
				6396	ASSERT_EQUAL_SVE(z9, z19);
				6397
				6398	delete[] expected;
				6399	}
				6400	delete[] data;
				6401	}
				6402
				6403	TEST_SVE(ldr_str_p_bi) {
				6404	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				6405	START();
				6406
				6407	int vl = config->sve_vl_in_bytes();
				6408	VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
				6409	int pl = vl / kZRegBitsPerPRegBit;
				6410
				6411	// The immediate can address [-256, 255] times the PL, so allocate enough
				6412	// space to exceed that in both directions.
				6413	int data_size = pl * 1024;
				6414
				6415	uint8_t* data = new uint8_t[data_size];
				6416	memset(data, 0, data_size);
				6417
				6418	// Set the base half-way through the buffer so we can use negative indices.
				6419	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				6420
				6421	uint64_t pattern[4] = {0x1010101011101111,
				6422	0x0010111011000101,
				6423	0x1001101110010110,
				6424	0x1010110101100011};
				6425	for (int i = 8; i <= 15; i++) {
				6426	// Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
				6427	Initialise(&masm,
				6428	PRegister(i),
				6429	pattern[3] * i,
				6430	pattern[2] * i,
				6431	pattern[1] * i,
				6432	pattern[0] * i);
				6433	}
				6434
				6435	// Encodable cases.
				6436	__ Str(p8, SVEMemOperand(x0));
				6437	__ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
				6438	__ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
				6439	__ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
				6440
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	6441	// Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley	199339d	2019-08-05 18:49:13 +0100	[diff] [blame]	6442	__ Str(p12, SVEMemOperand(x0, 6 * pl));
				6443	__ Str(p13, SVEMemOperand(x0, -7 * pl));
				6444	__ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
				6445	__ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
				6446
				6447	// Corresponding loads.
				6448	__ Ldr(p0, SVEMemOperand(x0));
				6449	__ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
				6450	__ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
				6451	__ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
				6452
				6453	__ Ldr(p4, SVEMemOperand(x0, 6 * pl));
				6454	__ Ldr(p5, SVEMemOperand(x0, -7 * pl));
				6455	__ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
				6456	__ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
				6457
				6458	END();
				6459
				6460	if (CAN_RUN()) {
				6461	RUN();
				6462
				6463	uint8_t* expected = new uint8_t[data_size];
				6464	memset(expected, 0, data_size);
				6465	uint8_t* middle = &expected[data_size / 2];
				6466
				6467	for (int i = 0; i < pl; i++) {
				6468	int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
				6469	size_t index = i / sizeof(pattern[0]);
				6470	VIXL_ASSERT(index < ArrayLength(pattern));
				6471	uint64_t byte = (pattern[index] >> bit_index) & 0xff;
				6472	// Each byte of `pattern` can be multiplied by 15 without carry.
				6473	VIXL_ASSERT((byte * 15) <= 0xff);
				6474
				6475	middle[i] = byte * 8; // p8
				6476	middle[(2 * pl) + i] = byte * 9; // p9
				6477	middle[(-3 * pl) + i] = byte * 10; // p10
				6478	middle[(255 * pl) + i] = byte * 11; // p11
				6479	middle[(6 * pl) + i] = byte * 12; // p12
				6480	middle[(-7 * pl) + i] = byte * 13; // p13
				6481	middle[(314 * pl) + i] = byte * 14; // p14
				6482	middle[(-314 * pl) + i] = byte * 15; // p15
				6483	}
				6484
Jacob Bramley	33c99f9	2019-10-08 15:24:12 +0100	[diff] [blame]	6485	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley	199339d	2019-08-05 18:49:13 +0100	[diff] [blame]	6486
				6487	ASSERT_EQUAL_SVE(p0, p8);
				6488	ASSERT_EQUAL_SVE(p1, p9);
				6489	ASSERT_EQUAL_SVE(p2, p10);
				6490	ASSERT_EQUAL_SVE(p3, p11);
				6491	ASSERT_EQUAL_SVE(p4, p12);
				6492	ASSERT_EQUAL_SVE(p5, p13);
				6493	ASSERT_EQUAL_SVE(p6, p14);
				6494	ASSERT_EQUAL_SVE(p7, p15);
				6495
				6496	delete[] expected;
				6497	}
				6498	delete[] data;
				6499	}
				6500
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6501	template <typename T>
				6502	static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
				6503	memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
				6504	}
				6505
				6506	TEST_SVE(sve_ld1_st1_contiguous) {
				6507	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				6508	START();
				6509
				6510	int vl = config->sve_vl_in_bytes();
				6511
				6512	// The immediate can address [-8, 7] times the VL, so allocate enough space to
				6513	// exceed that in both directions.
				6514	int data_size = vl * 128;
				6515
				6516	uint8_t* data = new uint8_t[data_size];
				6517	memset(data, 0, data_size);
				6518
				6519	// Set the base half-way through the buffer so we can use negative indeces.
				6520	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				6521
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6522	// Encodable scalar-plus-immediate cases.
				6523	__ Index(z1.VnB(), 1, -3);
				6524	__ Ptrue(p1.VnB());
				6525	__ St1b(z1.VnB(), p1, SVEMemOperand(x0));
				6526
				6527	__ Index(z2.VnH(), -2, 5);
				6528	__ Ptrue(p2.VnH(), SVE_MUL3);
				6529	__ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
				6530
				6531	__ Index(z3.VnS(), 3, -7);
				6532	__ Ptrue(p3.VnS(), SVE_POW2);
				6533	__ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
				6534
				6535	// Encodable scalar-plus-scalar cases.
				6536	__ Index(z4.VnD(), -4, 11);
				6537	__ Ptrue(p4.VnD(), SVE_VL3);
				6538	__ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
				6539	__ Mov(x2, 17);
				6540	__ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
				6541
				6542	__ Index(z5.VnD(), 6, -2);
				6543	__ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong	6205eb4	2019-09-24 10:07:20 +0100	[diff] [blame]	6544	__ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
				6545	__ Mov(x4, 6);
				6546	__ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6547
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	6548	// Unencodable cases fall back on `CalculateSVEAddress`.
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6549	__ Index(z6.VnS(), -7, 3);
				6550	// Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
				6551	// predicate bits when handling larger lanes.
				6552	__ Ptrue(p6.VnB(), SVE_ALL);
				6553	__ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
				6554
TatWai Chong	6205eb4	2019-09-24 10:07:20 +0100	[diff] [blame]	6555	__ Index(z7.VnD(), 32, -11);
				6556	__ Ptrue(p7.VnD(), SVE_MUL4);
				6557	__ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6558
TatWai Chong	6205eb4	2019-09-24 10:07:20 +0100	[diff] [blame]	6559	// Corresponding loads.
				6560	__ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
				6561	__ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
				6562	__ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
				6563	__ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
				6564	__ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
				6565	__ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
				6566
				6567	__ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
				6568	__ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
				6569	__ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
				6570	__ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
				6571
				6572	// We can test ld1 by comparing the value loaded with the value stored. In
				6573	// most cases, there are two complications:
				6574	// - Loads have zeroing predication, so we have to clear the inactive
				6575	// elements on our reference.
				6576	// - We have to replicate any sign- or zero-extension.
				6577
				6578	// Ld1b(z8.VnB(), ...)
				6579	__ Dup(z18.VnB(), 0);
				6580	__ Mov(z18.VnB(), p1.Merging(), z1.VnB());
				6581
				6582	// Ld1b(z9.VnH(), ...)
				6583	__ Dup(z19.VnH(), 0);
				6584	__ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
				6585
				6586	// Ld1h(z10.VnS(), ...)
				6587	__ Dup(z20.VnS(), 0);
				6588	__ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
				6589
				6590	// Ld1b(z11.VnD(), ...)
				6591	__ Dup(z21.VnD(), 0);
				6592	__ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
				6593
				6594	// Ld1d(z12.VnD(), ...)
				6595	__ Dup(z22.VnD(), 0);
				6596	__ Mov(z22.VnD(), p5.Merging(), z5.VnD());
				6597
				6598	// Ld1w(z13.VnS(), ...)
				6599	__ Dup(z23.VnS(), 0);
				6600	__ Mov(z23.VnS(), p6.Merging(), z6.VnS());
				6601
				6602	// Ld1sb(z14.VnH(), ...)
				6603	__ Dup(z24.VnH(), 0);
				6604	__ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
				6605
				6606	// Ld1sh(z15.VnS(), ...)
				6607	__ Dup(z25.VnS(), 0);
				6608	__ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
				6609
				6610	// Ld1sb(z16.VnD(), ...)
				6611	__ Dup(z26.VnD(), 0);
				6612	__ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
				6613
				6614	// Ld1sw(z17.VnD(), ...)
				6615	__ Dup(z27.VnD(), 0);
				6616	__ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6617
				6618	END();
				6619
				6620	if (CAN_RUN()) {
				6621	RUN();
				6622
				6623	uint8_t* expected = new uint8_t[data_size];
				6624	memset(expected, 0, data_size);
				6625	uint8_t* middle = &expected[data_size / 2];
				6626
				6627	int vl_b = vl / kBRegSizeInBytes;
				6628	int vl_h = vl / kHRegSizeInBytes;
				6629	int vl_s = vl / kSRegSizeInBytes;
				6630	int vl_d = vl / kDRegSizeInBytes;
				6631
				6632	// Encodable cases.
				6633
				6634	// st1b { z1.b }, SVE_ALL
				6635	for (int i = 0; i < vl_b; i++) {
				6636	MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
				6637	}
				6638
				6639	// st1b { z2.h }, SVE_MUL3
				6640	int vl_h_mul3 = vl_h - (vl_h % 3);
				6641	for (int i = 0; i < vl_h_mul3; i++) {
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	6642	int64_t offset = 7 * static_cast<int>(vl / (kHRegSize / kBRegSize));
				6643	MemoryWrite(middle, offset, i, static_cast<uint8_t>(-2 + (5 * i)));
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6644	}
				6645
				6646	// st1h { z3.s }, SVE_POW2
				6647	int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
				6648	for (int i = 0; i < vl_s_pow2; i++) {
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	6649	int64_t offset = -8 * static_cast<int>(vl / (kSRegSize / kHRegSize));
				6650	MemoryWrite(middle, offset, i, static_cast<uint16_t>(3 - (7 * i)));
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6651	}
				6652
				6653	// st1b { z4.d }, SVE_VL3
				6654	if (vl_d >= 3) {
				6655	for (int i = 0; i < 3; i++) {
				6656	MemoryWrite(middle,
				6657	(8 * vl) + 17,
				6658	i,
				6659	static_cast<uint8_t>(-4 + (11 * i)));
				6660	}
				6661	}
				6662
				6663	// st1d { z5.d }, SVE_VL16
				6664	if (vl_d >= 16) {
				6665	for (int i = 0; i < 16; i++) {
				6666	MemoryWrite(middle,
				6667	(10 * vl) + (6 * kDRegSizeInBytes),
				6668	i,
				6669	static_cast<uint64_t>(6 - (2 * i)));
				6670	}
				6671	}
				6672
				6673	// Unencodable cases.
				6674
				6675	// st1w { z6.s }, SVE_ALL
				6676	for (int i = 0; i < vl_s; i++) {
				6677	MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
				6678	}
				6679
TatWai Chong	6205eb4	2019-09-24 10:07:20 +0100	[diff] [blame]	6680	// st1w { z7.d }, SVE_MUL4
				6681	int vl_d_mul4 = vl_d - (vl_d % 4);
				6682	for (int i = 0; i < vl_d_mul4; i++) {
Jacob Bramley	6ebbba6	2019-10-09 15:02:10 +0100	[diff] [blame]	6683	int64_t offset = 22 * static_cast<int>(vl / (kDRegSize / kWRegSize));
				6684	MemoryWrite(middle, offset, i, static_cast<uint32_t>(32 + (-11 * i)));
TatWai Chong	6205eb4	2019-09-24 10:07:20 +0100	[diff] [blame]	6685	}
				6686
Jacob Bramley	33c99f9	2019-10-08 15:24:12 +0100	[diff] [blame]	6687	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6688
TatWai Chong	6205eb4	2019-09-24 10:07:20 +0100	[diff] [blame]	6689	// Check that we loaded back the expected values.
				6690
				6691	ASSERT_EQUAL_SVE(z18, z8);
				6692	ASSERT_EQUAL_SVE(z19, z9);
				6693	ASSERT_EQUAL_SVE(z20, z10);
				6694	ASSERT_EQUAL_SVE(z21, z11);
				6695	ASSERT_EQUAL_SVE(z22, z12);
				6696	ASSERT_EQUAL_SVE(z23, z13);
				6697	ASSERT_EQUAL_SVE(z24, z14);
				6698	ASSERT_EQUAL_SVE(z25, z15);
				6699	ASSERT_EQUAL_SVE(z26, z16);
				6700	ASSERT_EQUAL_SVE(z27, z17);
				6701
Jacob Bramley	e668b20	2019-08-14 17:57:34 +0100	[diff] [blame]	6702	delete[] expected;
				6703	}
				6704	delete[] data;
				6705	}
				6706
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6707	TEST_SVE(sve_ld2_st2_scalar_plus_imm) {
				6708	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				6709	START();
				6710
				6711	int vl = config->sve_vl_in_bytes();
				6712
				6713	// The immediate can address [-16, 14] times the VL, so allocate enough space
				6714	// to exceed that in both directions.
				6715	int data_size = vl * 128;
				6716
				6717	uint8_t* data = new uint8_t[data_size];
				6718	memset(data, 0, data_size);
				6719
				6720	// Set the base half-way through the buffer so we can use negative indeces.
				6721	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				6722
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6723	__ Index(z14.VnB(), 1, -3);
				6724	__ Index(z15.VnB(), 2, -3);
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6725	__ Ptrue(p0.VnB());
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6726	__ St2b(z14.VnB(), z15.VnB(), p0, SVEMemOperand(x0));
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6727
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6728	__ Index(z16.VnH(), -2, 5);
				6729	__ Index(z17.VnH(), -3, 5);
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6730	__ Ptrue(p1.VnH(), SVE_MUL3);
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6731	__ St2h(z16.VnH(), z17.VnH(), p1, SVEMemOperand(x0, 8, SVE_MUL_VL));
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6732
				6733	// Wrap around from z31 to z0.
				6734	__ Index(z31.VnS(), 3, -7);
				6735	__ Index(z0.VnS(), 4, -7);
				6736	__ Ptrue(p2.VnS(), SVE_POW2);
				6737	__ St2w(z31.VnS(), z0.VnS(), p2, SVEMemOperand(x0, -12, SVE_MUL_VL));
				6738
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6739	__ Index(z18.VnD(), -7, 3);
				6740	__ Index(z19.VnD(), -8, 3);
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6741	// Sparse predication, including some irrelevant bits (0xe). To make the
				6742	// results easy to check, activate each lane <n> where n is a multiple of 5.
				6743	Initialise(&masm,
				6744	p3,
				6745	0xeee10000000001ee,
				6746	0xeeeeeee100000000,
				6747	0x01eeeeeeeee10000,
				6748	0x000001eeeeeeeee1);
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6749	__ St2d(z18.VnD(), z19.VnD(), p3, SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6750
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6751	// We can test ld2 by comparing the values loaded with the values stored.
				6752	// There are two complications:
				6753	// - Loads have zeroing predication, so we have to clear the inactive
				6754	// elements on our reference.
				6755	// - We want to test both loads and stores that span { z31, z0 }, so we have
				6756	// to move some values around.
				6757	//
				6758	// Registers z4-z11 will hold as-stored values (with inactive elements
				6759	// cleared). Registers z20-z27 will hold the values that were loaded.
				6760
				6761	// Ld2b(z14.VnB(), z15.VnB(), ...)
				6762	__ Dup(z4.VnB(), 0);
				6763	__ Dup(z5.VnB(), 0);
				6764	__ Mov(z4.VnB(), p0.Merging(), z14.VnB());
				6765	__ Mov(z5.VnB(), p0.Merging(), z15.VnB());
				6766
				6767	// Ld2h(z16.VnH(), z17.VnH(), ...)
				6768	__ Dup(z6.VnH(), 0);
				6769	__ Dup(z7.VnH(), 0);
				6770	__ Mov(z6.VnH(), p1.Merging(), z16.VnH());
				6771	__ Mov(z7.VnH(), p1.Merging(), z17.VnH());
				6772
				6773	// Ld2w(z31.VnS(), z0.VnS(), ...)
				6774	__ Dup(z8.VnS(), 0);
				6775	__ Dup(z9.VnS(), 0);
				6776	__ Mov(z8.VnS(), p2.Merging(), z31.VnS());
				6777	__ Mov(z9.VnS(), p2.Merging(), z0.VnS());
				6778
				6779	// Ld2d(z18.VnD(), z19.VnD(), ...)
				6780	__ Dup(z10.VnD(), 0);
				6781	__ Dup(z11.VnD(), 0);
				6782	__ Mov(z10.VnD(), p3.Merging(), z18.VnD());
				6783	__ Mov(z11.VnD(), p3.Merging(), z19.VnD());
				6784
				6785	// Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
				6786	__ Ld2b(z31.VnB(), z0.VnB(), p0.Zeroing(), SVEMemOperand(x0));
				6787	__ Mov(z20, z31);
				6788	__ Mov(z21, z0);
				6789
				6790	__ Ld2h(z22.VnH(), z23.VnH(), p1.Zeroing(), SVEMemOperand(x0, 8, SVE_MUL_VL));
				6791	__ Ld2w(z24.VnS(),
				6792	z25.VnS(),
				6793	p2.Zeroing(),
				6794	SVEMemOperand(x0, -12, SVE_MUL_VL));
				6795	__ Ld2d(z26.VnD(),
				6796	z27.VnD(),
				6797	p3.Zeroing(),
				6798	SVEMemOperand(x0, 14, SVE_MUL_VL));
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6799
				6800	END();
				6801
				6802	if (CAN_RUN()) {
				6803	RUN();
				6804
				6805	uint8_t* expected = new uint8_t[data_size];
				6806	memset(expected, 0, data_size);
				6807	uint8_t* middle = &expected[data_size / 2];
				6808
				6809	int vl_b = vl / kBRegSizeInBytes;
				6810	int vl_h = vl / kHRegSizeInBytes;
				6811	int vl_s = vl / kSRegSizeInBytes;
				6812	int vl_d = vl / kDRegSizeInBytes;
				6813
				6814	int reg_count = 2;
				6815
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6816	// st2b { z14.b, z15.b }, SVE_ALL
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6817	for (int i = 0; i < vl_b; i++) {
				6818	uint8_t lane0 = 1 - (3 * i);
				6819	uint8_t lane1 = 2 - (3 * i);
				6820	MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
				6821	MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
				6822	}
				6823
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6824	// st2h { z16.h, z17.h }, SVE_MUL3
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6825	int vl_h_mul3 = vl_h - (vl_h % 3);
				6826	for (int i = 0; i < vl_h_mul3; i++) {
				6827	int64_t offset = 8 * vl;
				6828	uint16_t lane0 = -2 + (5 * i);
				6829	uint16_t lane1 = -3 + (5 * i);
				6830	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				6831	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				6832	}
				6833
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6834	// st2w { z31.s, z0.s }, SVE_POW2
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6835	int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
				6836	for (int i = 0; i < vl_s_pow2; i++) {
				6837	int64_t offset = -12 * vl;
				6838	uint32_t lane0 = 3 - (7 * i);
				6839	uint32_t lane1 = 4 - (7 * i);
				6840	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				6841	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				6842	}
				6843
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6844	// st2d { z18.d, z19.d }, ((i % 5) == 0)
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6845	for (int i = 0; i < vl_d; i++) {
				6846	if ((i % 5) == 0) {
				6847	int64_t offset = 14 * vl;
				6848	uint64_t lane0 = -7 + (3 * i);
				6849	uint64_t lane1 = -8 + (3 * i);
				6850	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				6851	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				6852	}
				6853	}
				6854
				6855	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
				6856
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	6857	// Check that we loaded back the expected values.
				6858
				6859	// st2b/ld2b
				6860	ASSERT_EQUAL_SVE(z4, z20);
				6861	ASSERT_EQUAL_SVE(z5, z21);
				6862
				6863	// st2h/ld2h
				6864	ASSERT_EQUAL_SVE(z6, z22);
				6865	ASSERT_EQUAL_SVE(z7, z23);
				6866
				6867	// st2w/ld2w
				6868	ASSERT_EQUAL_SVE(z8, z24);
				6869	ASSERT_EQUAL_SVE(z9, z25);
				6870
				6871	// st2d/ld2d
				6872	ASSERT_EQUAL_SVE(z10, z26);
				6873	ASSERT_EQUAL_SVE(z11, z27);
				6874
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	6875	delete[] expected;
				6876	}
				6877	delete[] data;
				6878	}
				6879
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6880	TEST_SVE(sve_ld2_st2_scalar_plus_scalar) {
				6881	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				6882	START();
				6883
				6884	int vl = config->sve_vl_in_bytes();
				6885
				6886	// Allocate plenty of space to enable indexing in both directions.
				6887	int data_size = vl * 128;
				6888
				6889	uint8_t* data = new uint8_t[data_size];
				6890	memset(data, 0, data_size);
				6891
				6892	// Set the base half-way through the buffer so we can use negative indeces.
				6893	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				6894
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6895	__ Index(z10.VnB(), -4, 11);
				6896	__ Index(z11.VnB(), -5, 11);
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6897	__ Ptrue(p7.VnB(), SVE_MUL4);
				6898	__ Mov(x1, 0);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6899	__ St2b(z10.VnB(), z11.VnB(), p7, SVEMemOperand(x0, x1));
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6900
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6901	__ Index(z12.VnH(), 6, -2);
				6902	__ Index(z13.VnH(), 7, -2);
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6903	__ Ptrue(p6.VnH(), SVE_VL16);
				6904	__ Rdvl(x2, 3); // Make offsets VL-dependent so we can avoid overlap.
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6905	__ St2h(z12.VnH(), z13.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6906
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6907	__ Index(z14.VnS(), -7, 3);
				6908	__ Index(z15.VnS(), -8, 3);
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6909	// Sparse predication, including some irrelevant bits (0xe). To make the
				6910	// results easy to check, activate each lane <n> where n is a multiple of 5.
				6911	Initialise(&masm,
				6912	p5,
				6913	0xeee1000010000100,
				6914	0x001eeee100001000,
				6915	0x0100001eeee10000,
				6916	0x10000100001eeee1);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6917	__ Rdvl(x3, -3);
				6918	__ St2w(z14.VnS(), z15.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6919
				6920	// Wrap around from z31 to z0.
				6921	__ Index(z31.VnD(), 32, -11);
				6922	__ Index(z0.VnD(), 33, -11);
				6923	__ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6924	__ Rdvl(x4, 1);
				6925	__ St2d(z31.VnD(), z0.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6926
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6927	// We can test ld2 by comparing the values loaded with the values stored.
				6928	// There are two complications:
				6929	// - Loads have zeroing predication, so we have to clear the inactive
				6930	// elements on our reference.
				6931	// - We want to test both loads and stores that span { z31, z0 }, so we have
				6932	// to move some values around.
				6933	//
				6934	// Registers z4-z11 will hold as-stored values (with inactive elements
				6935	// cleared). Registers z20-z27 will hold the values that were loaded.
				6936
				6937	// Ld2b(z20.VnB(), z21.VnB(), ...)
				6938	__ Dup(z4.VnB(), 0);
				6939	__ Dup(z5.VnB(), 0);
				6940	__ Mov(z4.VnB(), p7.Merging(), z10.VnB());
				6941	__ Mov(z5.VnB(), p7.Merging(), z11.VnB());
				6942
				6943	// Ld2h(z22.VnH(), z23.VnH(), ...)
				6944	__ Dup(z6.VnH(), 0);
				6945	__ Dup(z7.VnH(), 0);
				6946	__ Mov(z6.VnH(), p6.Merging(), z12.VnH());
				6947	__ Mov(z7.VnH(), p6.Merging(), z13.VnH());
				6948
				6949	// Ld2w(z24.VnS(), z25.VnS(), ...)
				6950	__ Dup(z8.VnS(), 0);
				6951	__ Dup(z9.VnS(), 0);
				6952	__ Mov(z8.VnS(), p5.Merging(), z14.VnS());
				6953	__ Mov(z9.VnS(), p5.Merging(), z15.VnS());
				6954
				6955	// Ld2d(z31.VnD(), z0.VnD(), ...)
				6956	__ Dup(z10.VnD(), 0);
				6957	__ Dup(z11.VnD(), 0);
				6958	__ Mov(z10.VnD(), p4.Merging(), z31.VnD());
				6959	__ Mov(z11.VnD(), p4.Merging(), z0.VnD());
				6960
				6961	// Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
				6962	__ Ld2b(z31.VnB(), z0.VnB(), p7.Zeroing(), SVEMemOperand(x0, x1));
				6963	__ Mov(z20, z31);
				6964	__ Mov(z21, z0);
				6965
				6966	__ Ld2h(z22.VnH(), z23.VnH(), p6.Zeroing(), SVEMemOperand(x0, x2, LSL, 1));
				6967	__ Ld2w(z24.VnS(), z25.VnS(), p5.Zeroing(), SVEMemOperand(x0, x3, LSL, 2));
				6968	__ Ld2d(z26.VnD(), z27.VnD(), p4.Zeroing(), SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6969
				6970	END();
				6971
				6972	if (CAN_RUN()) {
				6973	RUN();
				6974
				6975	uint8_t* expected = new uint8_t[data_size];
				6976	memset(expected, 0, data_size);
				6977	uint8_t* middle = &expected[data_size / 2];
				6978
				6979	int vl_b = vl / kBRegSizeInBytes;
				6980	int vl_h = vl / kHRegSizeInBytes;
				6981	int vl_s = vl / kSRegSizeInBytes;
				6982	int vl_d = vl / kDRegSizeInBytes;
				6983
				6984	int reg_count = 2;
				6985
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6986	// st2b { z10.b, z11.b }, SVE_MUL4
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6987	int vl_b_mul4 = vl_b - (vl_b % 4);
				6988	for (int i = 0; i < vl_b_mul4; i++) {
				6989	uint8_t lane0 = -4 + (11 * i);
				6990	uint8_t lane1 = -5 + (11 * i);
				6991	MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
				6992	MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
				6993	}
				6994
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	6995	// st2h { z12.h, z13.h }, SVE_VL16
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	6996	if (vl_h >= 16) {
				6997	for (int i = 0; i < 16; i++) {
				6998	int64_t offset = (3 << kHRegSizeInBytesLog2) * vl;
				6999	uint16_t lane0 = 6 - (2 * i);
				7000	uint16_t lane1 = 7 - (2 * i);
				7001	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7002	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7003	}
				7004	}
				7005
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7006	// st2w { z14.s, z15.s }, ((i % 5) == 0)
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7007	for (int i = 0; i < vl_s; i++) {
				7008	if ((i % 5) == 0) {
				7009	int64_t offset = -(3 << kSRegSizeInBytesLog2) * vl;
				7010	uint32_t lane0 = -7 + (3 * i);
				7011	uint32_t lane1 = -8 + (3 * i);
				7012	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7013	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7014	}
				7015	}
				7016
				7017	// st2d { z31.b, z0.b }, SVE_MUL3
				7018	int vl_d_mul3 = vl_d - (vl_d % 3);
				7019	for (int i = 0; i < vl_d_mul3; i++) {
				7020	int64_t offset = (1 << kDRegSizeInBytesLog2) * vl;
				7021	uint64_t lane0 = 32 - (11 * i);
				7022	uint64_t lane1 = 33 - (11 * i);
				7023	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7024	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7025	}
				7026
				7027	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
				7028
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7029	// Check that we loaded back the expected values.
				7030
				7031	// st2b/ld2b
				7032	ASSERT_EQUAL_SVE(z4, z20);
				7033	ASSERT_EQUAL_SVE(z5, z21);
				7034
				7035	// st2h/ld2h
				7036	ASSERT_EQUAL_SVE(z6, z22);
				7037	ASSERT_EQUAL_SVE(z7, z23);
				7038
				7039	// st2w/ld2w
				7040	ASSERT_EQUAL_SVE(z8, z24);
				7041	ASSERT_EQUAL_SVE(z9, z25);
				7042
				7043	// st2d/ld2d
				7044	ASSERT_EQUAL_SVE(z10, z26);
				7045	ASSERT_EQUAL_SVE(z11, z27);
				7046
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7047	delete[] expected;
				7048	}
				7049	delete[] data;
				7050	}
				7051
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7052	TEST_SVE(sve_ld3_st3_scalar_plus_imm) {
				7053	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				7054	START();
				7055
				7056	int vl = config->sve_vl_in_bytes();
				7057
				7058	// The immediate can address [-24, 21] times the VL, so allocate enough space
				7059	// to exceed that in both directions.
				7060	int data_size = vl * 128;
				7061
				7062	uint8_t* data = new uint8_t[data_size];
				7063	memset(data, 0, data_size);
				7064
				7065	// Set the base half-way through the buffer so we can use negative indeces.
				7066	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				7067
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7068	// We can test ld3 by comparing the values loaded with the values stored.
				7069	// There are two complications:
				7070	// - Loads have zeroing predication, so we have to clear the inactive
				7071	// elements on our reference.
				7072	// - We want to test both loads and stores that span { z31, z0 }, so we have
				7073	// to move some values around.
				7074	//
				7075	// Registers z4-z15 will hold as-stored values (with inactive elements
				7076	// cleared). Registers z16-z27 will hold the values that were loaded.
				7077
				7078	__ Index(z10.VnB(), 1, -3);
				7079	__ Index(z11.VnB(), 2, -3);
				7080	__ Index(z12.VnB(), 3, -3);
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7081	__ Ptrue(p0.VnB());
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7082	__ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p0, SVEMemOperand(x0));
				7083	// Save the stored values for ld3 tests.
				7084	__ Dup(z4.VnB(), 0);
				7085	__ Dup(z5.VnB(), 0);
				7086	__ Dup(z6.VnB(), 0);
				7087	__ Mov(z4.VnB(), p0.Merging(), z10.VnB());
				7088	__ Mov(z5.VnB(), p0.Merging(), z11.VnB());
				7089	__ Mov(z6.VnB(), p0.Merging(), z12.VnB());
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7090
				7091	// Wrap around from z31 to z0.
				7092	__ Index(z31.VnH(), -2, 5);
				7093	__ Index(z0.VnH(), -3, 5);
				7094	__ Index(z1.VnH(), -4, 5);
				7095	__ Ptrue(p1.VnH(), SVE_MUL3);
				7096	__ St3h(z31.VnH(), z0.VnH(), z1.VnH(), p1, SVEMemOperand(x0, 9, SVE_MUL_VL));
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7097	// Save the stored values for ld3 tests.
				7098	__ Dup(z7.VnH(), 0);
				7099	__ Dup(z8.VnH(), 0);
				7100	__ Dup(z9.VnH(), 0);
				7101	__ Mov(z7.VnH(), p1.Merging(), z31.VnH());
				7102	__ Mov(z8.VnH(), p1.Merging(), z0.VnH());
				7103	__ Mov(z9.VnH(), p1.Merging(), z1.VnH());
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7104
				7105	__ Index(z30.VnS(), 3, -7);
				7106	__ Index(z31.VnS(), 4, -7);
				7107	__ Index(z0.VnS(), 5, -7);
				7108	__ Ptrue(p2.VnS(), SVE_POW2);
				7109	__ St3w(z30.VnS(),
				7110	z31.VnS(),
				7111	z0.VnS(),
				7112	p2,
				7113	SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7114	// Save the stored values for ld3 tests.
				7115	__ Dup(z10.VnS(), 0);
				7116	__ Dup(z11.VnS(), 0);
				7117	__ Dup(z12.VnS(), 0);
				7118	__ Mov(z10.VnS(), p2.Merging(), z30.VnS());
				7119	__ Mov(z11.VnS(), p2.Merging(), z31.VnS());
				7120	__ Mov(z12.VnS(), p2.Merging(), z0.VnS());
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7121
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7122	__ Index(z0.VnD(), -7, 3);
				7123	__ Index(z1.VnD(), -8, 3);
				7124	__ Index(z2.VnD(), -9, 3);
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7125	// Sparse predication, including some irrelevant bits (0xee). To make the
				7126	// results easy to check, activate each lane <n> where n is a multiple of 5.
				7127	Initialise(&masm,
				7128	p3,
				7129	0xeee10000000001ee,
				7130	0xeeeeeee100000000,
				7131	0x01eeeeeeeee10000,
				7132	0x000001eeeeeeeee1);
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7133	__ St3d(z0.VnD(), z1.VnD(), z2.VnD(), p3, SVEMemOperand(x0, 15, SVE_MUL_VL));
				7134	// Save the stored values for ld3 tests.
				7135	__ Dup(z13.VnD(), 0);
				7136	__ Dup(z14.VnD(), 0);
				7137	__ Dup(z15.VnD(), 0);
				7138	__ Mov(z13.VnD(), p3.Merging(), z0.VnD());
				7139	__ Mov(z14.VnD(), p3.Merging(), z1.VnD());
				7140	__ Mov(z15.VnD(), p3.Merging(), z2.VnD());
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7141
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7142	// Corresponding loads.
				7143	// Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
				7144	__ Ld3b(z31.VnB(), z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(x0));
				7145	__ Mov(z16, z31);
				7146	__ Mov(z17, z0);
				7147	__ Mov(z18, z1);
				7148	__ Ld3h(z30.VnH(),
				7149	z31.VnH(),
				7150	z0.VnH(),
				7151	p1.Zeroing(),
				7152	SVEMemOperand(x0, 9, SVE_MUL_VL));
				7153	__ Mov(z19, z30);
				7154	__ Mov(z20, z31);
				7155	__ Mov(z21, z0);
				7156	__ Ld3w(z22.VnS(),
				7157	z23.VnS(),
				7158	z24.VnS(),
				7159	p2.Zeroing(),
				7160	SVEMemOperand(x0, -12, SVE_MUL_VL));
				7161	__ Ld3d(z25.VnD(),
				7162	z26.VnD(),
				7163	z27.VnD(),
				7164	p3.Zeroing(),
				7165	SVEMemOperand(x0, 15, SVE_MUL_VL));
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7166
				7167	END();
				7168
				7169	if (CAN_RUN()) {
				7170	RUN();
				7171
				7172	uint8_t* expected = new uint8_t[data_size];
				7173	memset(expected, 0, data_size);
				7174	uint8_t* middle = &expected[data_size / 2];
				7175
				7176	int vl_b = vl / kBRegSizeInBytes;
				7177	int vl_h = vl / kHRegSizeInBytes;
				7178	int vl_s = vl / kSRegSizeInBytes;
				7179	int vl_d = vl / kDRegSizeInBytes;
				7180
				7181	int reg_count = 3;
				7182
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7183	// st3b { z10.b, z11.b, z12.b }, SVE_ALL
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7184	for (int i = 0; i < vl_b; i++) {
				7185	uint8_t lane0 = 1 - (3 * i);
				7186	uint8_t lane1 = 2 - (3 * i);
				7187	uint8_t lane2 = 3 - (3 * i);
				7188	MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
				7189	MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
				7190	MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
				7191	}
				7192
				7193	// st3h { z31.h, z0.h, z1.h }, SVE_MUL3
				7194	int vl_h_mul3 = vl_h - (vl_h % 3);
				7195	for (int i = 0; i < vl_h_mul3; i++) {
				7196	int64_t offset = 9 * vl;
				7197	uint16_t lane0 = -2 + (5 * i);
				7198	uint16_t lane1 = -3 + (5 * i);
				7199	uint16_t lane2 = -4 + (5 * i);
				7200	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7201	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7202	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7203	}
				7204
				7205	// st3w { z30.s, z31.s, z0.s }, SVE_POW2
				7206	int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
				7207	for (int i = 0; i < vl_s_pow2; i++) {
				7208	int64_t offset = -12 * vl;
				7209	uint32_t lane0 = 3 - (7 * i);
				7210	uint32_t lane1 = 4 - (7 * i);
				7211	uint32_t lane2 = 5 - (7 * i);
				7212	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7213	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7214	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7215	}
				7216
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7217	// st3d { z0.d, z1.d, z2.d }, ((i % 5) == 0)
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7218	for (int i = 0; i < vl_d; i++) {
				7219	if ((i % 5) == 0) {
				7220	int64_t offset = 15 * vl;
				7221	uint64_t lane0 = -7 + (3 * i);
				7222	uint64_t lane1 = -8 + (3 * i);
				7223	uint64_t lane2 = -9 + (3 * i);
				7224	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7225	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7226	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7227	}
				7228	}
				7229
				7230	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
				7231
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7232	// Check that we loaded back the expected values.
				7233
				7234	// st3b/ld3b
				7235	ASSERT_EQUAL_SVE(z4, z16);
				7236	ASSERT_EQUAL_SVE(z5, z17);
				7237	ASSERT_EQUAL_SVE(z6, z18);
				7238
				7239	// st3h/ld3h
				7240	ASSERT_EQUAL_SVE(z7, z19);
				7241	ASSERT_EQUAL_SVE(z8, z20);
				7242	ASSERT_EQUAL_SVE(z9, z21);
				7243
				7244	// st3w/ld3w
				7245	ASSERT_EQUAL_SVE(z10, z22);
				7246	ASSERT_EQUAL_SVE(z11, z23);
				7247	ASSERT_EQUAL_SVE(z12, z24);
				7248
				7249	// st3d/ld3d
				7250	ASSERT_EQUAL_SVE(z13, z25);
				7251	ASSERT_EQUAL_SVE(z14, z26);
				7252	ASSERT_EQUAL_SVE(z15, z27);
				7253
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7254	delete[] expected;
				7255	}
				7256	delete[] data;
				7257	}
				7258
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7259	TEST_SVE(sve_ld3_st3_scalar_plus_scalar) {
				7260	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				7261	START();
				7262
				7263	int vl = config->sve_vl_in_bytes();
				7264
				7265	// Allocate plenty of space to enable indexing in both directions.
				7266	int data_size = vl * 128;
				7267
				7268	uint8_t* data = new uint8_t[data_size];
				7269	memset(data, 0, data_size);
				7270
				7271	// Set the base half-way through the buffer so we can use negative indeces.
				7272	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				7273
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7274	// We can test ld3 by comparing the values loaded with the values stored.
				7275	// There are two complications:
				7276	// - Loads have zeroing predication, so we have to clear the inactive
				7277	// elements on our reference.
				7278	// - We want to test both loads and stores that span { z31, z0 }, so we have
				7279	// to move some values around.
				7280	//
				7281	// Registers z4-z15 will hold as-stored values (with inactive elements
				7282	// cleared). Registers z16-z27 will hold the values that were loaded.
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7283
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7284	__ Index(z10.VnB(), -4, 11);
				7285	__ Index(z11.VnB(), -5, 11);
				7286	__ Index(z12.VnB(), -6, 11);
				7287	__ Ptrue(p7.VnB(), SVE_MUL4);
				7288	__ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
				7289	__ St3b(z10.VnB(), z11.VnB(), z12.VnB(), p7, SVEMemOperand(x0, x1, LSL, 0));
				7290	// Save the stored values for ld3 tests.
				7291	__ Dup(z4.VnB(), 0);
				7292	__ Dup(z5.VnB(), 0);
				7293	__ Dup(z6.VnB(), 0);
				7294	__ Mov(z4.VnB(), p7.Merging(), z10.VnB());
				7295	__ Mov(z5.VnB(), p7.Merging(), z11.VnB());
				7296	__ Mov(z6.VnB(), p7.Merging(), z12.VnB());
				7297
				7298	__ Index(z13.VnH(), 6, -2);
				7299	__ Index(z14.VnH(), 7, -2);
				7300	__ Index(z15.VnH(), 8, -2);
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7301	__ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7302	__ Rdvl(x2, 5); // (5 * vl) << 1 = 10 * vl
				7303	__ St3h(z13.VnH(), z14.VnH(), z15.VnH(), p6, SVEMemOperand(x0, x2, LSL, 1));
				7304	// Save the stored values for ld3 tests.
				7305	__ Dup(z7.VnH(), 0);
				7306	__ Dup(z8.VnH(), 0);
				7307	__ Dup(z9.VnH(), 0);
				7308	__ Mov(z7.VnH(), p6.Merging(), z13.VnH());
				7309	__ Mov(z8.VnH(), p6.Merging(), z14.VnH());
				7310	__ Mov(z9.VnH(), p6.Merging(), z15.VnH());
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7311
				7312	// Wrap around from z31 to z0.
				7313	__ Index(z30.VnS(), -7, 3);
				7314	__ Index(z31.VnS(), -8, 3);
				7315	__ Index(z0.VnS(), -9, 3);
				7316	// Sparse predication, including some irrelevant bits (0xe). To make the
				7317	// results easy to check, activate each lane <n> where n is a multiple of 5.
				7318	Initialise(&masm,
				7319	p5,
				7320	0xeee1000010000100,
				7321	0x001eeee100001000,
				7322	0x0100001eeee10000,
				7323	0x10000100001eeee1);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7324	__ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
				7325	__ St3w(z30.VnS(), z31.VnS(), z0.VnS(), p5, SVEMemOperand(x0, x3, LSL, 2));
				7326	// Save the stored values for ld3 tests.
				7327	__ Dup(z10.VnS(), 0);
				7328	__ Dup(z11.VnS(), 0);
				7329	__ Dup(z12.VnS(), 0);
				7330	__ Mov(z10.VnS(), p5.Merging(), z30.VnS());
				7331	__ Mov(z11.VnS(), p5.Merging(), z31.VnS());
				7332	__ Mov(z12.VnS(), p5.Merging(), z0.VnS());
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7333
				7334	__ Index(z31.VnD(), 32, -11);
				7335	__ Index(z0.VnD(), 33, -11);
				7336	__ Index(z1.VnD(), 34, -11);
				7337	__ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7338	__ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 * vl
				7339	__ St3d(z31.VnD(), z0.VnD(), z1.VnD(), p4, SVEMemOperand(x0, x4, LSL, 3));
				7340	// Save the stored values for ld3 tests.
				7341	__ Dup(z13.VnD(), 0);
				7342	__ Dup(z14.VnD(), 0);
				7343	__ Dup(z15.VnD(), 0);
				7344	__ Mov(z13.VnD(), p4.Merging(), z31.VnD());
				7345	__ Mov(z14.VnD(), p4.Merging(), z0.VnD());
				7346	__ Mov(z15.VnD(), p4.Merging(), z1.VnD());
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7347
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7348	// Corresponding loads.
				7349	// Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
				7350	__ Ld3b(z31.VnB(),
				7351	z0.VnB(),
				7352	z1.VnB(),
				7353	p7.Zeroing(),
				7354	SVEMemOperand(x0, x1, LSL, 0));
				7355	__ Mov(z16, z31);
				7356	__ Mov(z17, z0);
				7357	__ Mov(z18, z1);
				7358	__ Ld3h(z30.VnH(),
				7359	z31.VnH(),
				7360	z0.VnH(),
				7361	p6.Zeroing(),
				7362	SVEMemOperand(x0, x2, LSL, 1));
				7363	__ Mov(z19, z30);
				7364	__ Mov(z20, z31);
				7365	__ Mov(z21, z0);
				7366	__ Ld3w(z22.VnS(),
				7367	z23.VnS(),
				7368	z24.VnS(),
				7369	p5.Zeroing(),
				7370	SVEMemOperand(x0, x3, LSL, 2));
				7371	__ Ld3d(z25.VnD(),
				7372	z26.VnD(),
				7373	z27.VnD(),
				7374	p4.Zeroing(),
				7375	SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7376
				7377	END();
				7378
				7379	if (CAN_RUN()) {
				7380	RUN();
				7381
				7382	uint8_t* expected = new uint8_t[data_size];
				7383	memset(expected, 0, data_size);
				7384	uint8_t* middle = &expected[data_size / 2];
				7385
				7386	int vl_b = vl / kBRegSizeInBytes;
				7387	int vl_h = vl / kHRegSizeInBytes;
				7388	int vl_s = vl / kSRegSizeInBytes;
				7389	int vl_d = vl / kDRegSizeInBytes;
				7390
				7391	int reg_count = 3;
				7392
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7393	// st3b { z10.b, z11.b, z12.b }, SVE_MUL4
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7394	int vl_b_mul4 = vl_b - (vl_b % 4);
				7395	for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7396	int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7397	uint8_t lane0 = -4 + (11 * i);
				7398	uint8_t lane1 = -5 + (11 * i);
				7399	uint8_t lane2 = -6 + (11 * i);
				7400	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7401	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7402	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7403	}
				7404
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7405	// st3h { z13.h, z14.h, z15.h }, SVE_VL16
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7406	if (vl_h >= 16) {
				7407	for (int i = 0; i < 16; i++) {
				7408	int64_t offset = (5 << kHRegSizeInBytesLog2) * vl;
				7409	uint16_t lane0 = 6 - (2 * i);
				7410	uint16_t lane1 = 7 - (2 * i);
				7411	uint16_t lane2 = 8 - (2 * i);
				7412	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7413	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7414	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7415	}
				7416	}
				7417
				7418	// st3w { z30.s, z31.s, z0.s }, ((i % 5) == 0)
				7419	for (int i = 0; i < vl_s; i++) {
				7420	if ((i % 5) == 0) {
				7421	int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
				7422	uint32_t lane0 = -7 + (3 * i);
				7423	uint32_t lane1 = -8 + (3 * i);
				7424	uint32_t lane2 = -9 + (3 * i);
				7425	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7426	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7427	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7428	}
				7429	}
				7430
				7431	// st3d { z31.d, z0.d, z1.d }, SVE_MUL3
				7432	int vl_d_mul3 = vl_d - (vl_d % 3);
				7433	for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7434	int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7435	uint64_t lane0 = 32 - (11 * i);
				7436	uint64_t lane1 = 33 - (11 * i);
				7437	uint64_t lane2 = 34 - (11 * i);
				7438	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7439	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7440	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7441	}
				7442
				7443	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
				7444
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7445	// Check that we loaded back the expected values.
				7446
				7447	// st3b/ld3b
				7448	ASSERT_EQUAL_SVE(z4, z16);
				7449	ASSERT_EQUAL_SVE(z5, z17);
				7450	ASSERT_EQUAL_SVE(z6, z18);
				7451
				7452	// st3h/ld3h
				7453	ASSERT_EQUAL_SVE(z7, z19);
				7454	ASSERT_EQUAL_SVE(z8, z20);
				7455	ASSERT_EQUAL_SVE(z9, z21);
				7456
				7457	// st3w/ld3w
				7458	ASSERT_EQUAL_SVE(z10, z22);
				7459	ASSERT_EQUAL_SVE(z11, z23);
				7460	ASSERT_EQUAL_SVE(z12, z24);
				7461
				7462	// st3d/ld3d
				7463	ASSERT_EQUAL_SVE(z13, z25);
				7464	ASSERT_EQUAL_SVE(z14, z26);
				7465	ASSERT_EQUAL_SVE(z15, z27);
				7466
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7467	delete[] expected;
				7468	}
				7469	delete[] data;
				7470	}
				7471
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7472	TEST_SVE(sve_ld4_st4_scalar_plus_imm) {
				7473	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				7474	START();
				7475
				7476	int vl = config->sve_vl_in_bytes();
				7477
				7478	// The immediate can address [-24, 21] times the VL, so allocate enough space
				7479	// to exceed that in both directions.
				7480	int data_size = vl * 128;
				7481
				7482	uint8_t* data = new uint8_t[data_size];
				7483	memset(data, 0, data_size);
				7484
				7485	// Set the base half-way through the buffer so we can use negative indeces.
				7486	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				7487
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7488	// We can test ld4 by comparing the values loaded with the values stored.
				7489	// There are two complications:
				7490	// - Loads have zeroing predication, so we have to clear the inactive
				7491	// elements on our reference.
				7492	// - We want to test both loads and stores that span { z31, z0 }, so we have
				7493	// to move some values around.
				7494	//
				7495	// Registers z3-z18 will hold as-stored values (with inactive elements
				7496	// cleared). Registers z19-z31 and z0-z2 will hold the values that were
				7497	// loaded.
				7498
				7499	__ Index(z10.VnB(), 1, -7);
				7500	__ Index(z11.VnB(), 2, -7);
				7501	__ Index(z12.VnB(), 3, -7);
				7502	__ Index(z13.VnB(), 4, -7);
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7503	__ Ptrue(p0.VnB());
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7504	__ St4b(z10.VnB(), z11.VnB(), z12.VnB(), z13.VnB(), p0, SVEMemOperand(x0));
				7505	// Save the stored values for ld4 tests.
				7506	__ Dup(z3.VnB(), 0);
				7507	__ Dup(z4.VnB(), 0);
				7508	__ Dup(z5.VnB(), 0);
				7509	__ Dup(z6.VnB(), 0);
				7510	__ Mov(z3.VnB(), p0.Merging(), z10.VnB());
				7511	__ Mov(z4.VnB(), p0.Merging(), z11.VnB());
				7512	__ Mov(z5.VnB(), p0.Merging(), z12.VnB());
				7513	__ Mov(z6.VnB(), p0.Merging(), z13.VnB());
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7514
				7515	// Wrap around from z31 to z0.
				7516	__ Index(z31.VnH(), -2, 5);
				7517	__ Index(z0.VnH(), -3, 5);
				7518	__ Index(z1.VnH(), -4, 5);
				7519	__ Index(z2.VnH(), -5, 5);
				7520	__ Ptrue(p1.VnH(), SVE_MUL3);
				7521	__ St4h(z31.VnH(),
				7522	z0.VnH(),
				7523	z1.VnH(),
				7524	z2.VnH(),
				7525	p1,
				7526	SVEMemOperand(x0, 4, SVE_MUL_VL));
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7527	// Save the stored values for ld4 tests.
				7528	__ Dup(z7.VnH(), 0);
				7529	__ Dup(z8.VnH(), 0);
				7530	__ Dup(z9.VnH(), 0);
				7531	__ Dup(z10.VnH(), 0);
				7532	__ Mov(z7.VnH(), p1.Merging(), z31.VnH());
				7533	__ Mov(z8.VnH(), p1.Merging(), z0.VnH());
				7534	__ Mov(z9.VnH(), p1.Merging(), z1.VnH());
				7535	__ Mov(z10.VnH(), p1.Merging(), z2.VnH());
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7536
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7537	// Wrap around from z31 to z0.
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7538	__ Index(z29.VnS(), 2, -7);
				7539	__ Index(z30.VnS(), 3, -7);
				7540	__ Index(z31.VnS(), 4, -7);
				7541	__ Index(z0.VnS(), 5, -7);
				7542	__ Ptrue(p2.VnS(), SVE_POW2);
				7543	__ St4w(z29.VnS(),
				7544	z30.VnS(),
				7545	z31.VnS(),
				7546	z0.VnS(),
				7547	p2,
				7548	SVEMemOperand(x0, -12, SVE_MUL_VL));
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7549	// Save the stored values for ld4 tests.
				7550	__ Dup(z11.VnS(), 0);
				7551	__ Dup(z12.VnS(), 0);
				7552	__ Dup(z13.VnS(), 0);
				7553	__ Dup(z14.VnS(), 0);
				7554	__ Mov(z11.VnS(), p2.Merging(), z29.VnS());
				7555	__ Mov(z12.VnS(), p2.Merging(), z30.VnS());
				7556	__ Mov(z13.VnS(), p2.Merging(), z31.VnS());
				7557	__ Mov(z14.VnS(), p2.Merging(), z0.VnS());
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7558
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7559	__ Index(z20.VnD(), -7, 8);
				7560	__ Index(z21.VnD(), -8, 8);
				7561	__ Index(z22.VnD(), -9, 8);
				7562	__ Index(z23.VnD(), -10, 8);
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7563	// Sparse predication, including some irrelevant bits (0xee). To make the
				7564	// results easy to check, activate each lane <n> where n is a multiple of 5.
				7565	Initialise(&masm,
				7566	p3,
				7567	0xeee10000000001ee,
				7568	0xeeeeeee100000000,
				7569	0x01eeeeeeeee10000,
				7570	0x000001eeeeeeeee1);
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7571	__ St4d(z20.VnD(),
				7572	z21.VnD(),
				7573	z22.VnD(),
				7574	z23.VnD(),
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7575	p3,
				7576	SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7577	// Save the stored values for ld4 tests.
				7578	__ Dup(z15.VnD(), 0);
				7579	__ Dup(z16.VnD(), 0);
				7580	__ Dup(z17.VnD(), 0);
				7581	__ Dup(z18.VnD(), 0);
				7582	__ Mov(z15.VnD(), p3.Merging(), z20.VnD());
				7583	__ Mov(z16.VnD(), p3.Merging(), z21.VnD());
				7584	__ Mov(z17.VnD(), p3.Merging(), z22.VnD());
				7585	__ Mov(z18.VnD(), p3.Merging(), z23.VnD());
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7586
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7587	// Corresponding loads.
				7588	// Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
				7589	__ Ld4b(z31.VnB(),
				7590	z0.VnB(),
				7591	z1.VnB(),
				7592	z2.VnB(),
				7593	p0.Zeroing(),
				7594	SVEMemOperand(x0));
				7595	__ Mov(z19, z31);
				7596	__ Mov(z20, z0);
				7597	__ Mov(z21, z1);
				7598	__ Mov(z22, z2);
				7599	__ Ld4h(z23.VnH(),
				7600	z24.VnH(),
				7601	z25.VnH(),
				7602	z26.VnH(),
				7603	p1.Zeroing(),
				7604	SVEMemOperand(x0, 4, SVE_MUL_VL));
				7605	__ Ld4w(z27.VnS(),
				7606	z28.VnS(),
				7607	z29.VnS(),
				7608	z30.VnS(),
				7609	p2.Zeroing(),
				7610	SVEMemOperand(x0, -12, SVE_MUL_VL));
				7611	// Wrap around from z31 to z0.
				7612	__ Ld4d(z31.VnD(),
				7613	z0.VnD(),
				7614	z1.VnD(),
				7615	z2.VnD(),
				7616	p3.Zeroing(),
				7617	SVEMemOperand(x0, 16, SVE_MUL_VL));
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7618
				7619	END();
				7620
				7621	if (CAN_RUN()) {
				7622	RUN();
				7623
				7624	uint8_t* expected = new uint8_t[data_size];
				7625	memset(expected, 0, data_size);
				7626	uint8_t* middle = &expected[data_size / 2];
				7627
				7628	int vl_b = vl / kBRegSizeInBytes;
				7629	int vl_h = vl / kHRegSizeInBytes;
				7630	int vl_s = vl / kSRegSizeInBytes;
				7631	int vl_d = vl / kDRegSizeInBytes;
				7632
				7633	int reg_count = 4;
				7634
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7635	// st2b { z10.b, z11.b, z12.b, z13.b }, SVE_ALL
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7636	for (int i = 0; i < vl_b; i++) {
				7637	uint8_t lane0 = 1 - (7 * i);
				7638	uint8_t lane1 = 2 - (7 * i);
				7639	uint8_t lane2 = 3 - (7 * i);
				7640	uint8_t lane3 = 4 - (7 * i);
				7641	MemoryWrite(middle, 0, (i * reg_count) + 0, lane0);
				7642	MemoryWrite(middle, 0, (i * reg_count) + 1, lane1);
				7643	MemoryWrite(middle, 0, (i * reg_count) + 2, lane2);
				7644	MemoryWrite(middle, 0, (i * reg_count) + 3, lane3);
				7645	}
				7646
				7647	// st4h { z31.h, z0.h, z1.h, z2.h }, SVE_MUL3
				7648	int vl_h_mul3 = vl_h - (vl_h % 3);
				7649	for (int i = 0; i < vl_h_mul3; i++) {
				7650	int64_t offset = 4 * vl;
				7651	uint16_t lane0 = -2 + (5 * i);
				7652	uint16_t lane1 = -3 + (5 * i);
				7653	uint16_t lane2 = -4 + (5 * i);
				7654	uint16_t lane3 = -5 + (5 * i);
				7655	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7656	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7657	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7658	MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
				7659	}
				7660
				7661	// st4w { z29.s, z30.s, z31.s, z0.s }, SVE_POW2
				7662	int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
				7663	for (int i = 0; i < vl_s_pow2; i++) {
				7664	int64_t offset = -12 * vl;
				7665	uint32_t lane0 = 2 - (7 * i);
				7666	uint32_t lane1 = 3 - (7 * i);
				7667	uint32_t lane2 = 4 - (7 * i);
				7668	uint32_t lane3 = 5 - (7 * i);
				7669	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7670	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7671	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7672	MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
				7673	}
				7674
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7675	// st4d { z20.d, z21.d, z22.d, z23.d }, ((i % 5) == 0)
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7676	for (int i = 0; i < vl_d; i++) {
				7677	if ((i % 5) == 0) {
				7678	int64_t offset = 16 * vl;
				7679	uint64_t lane0 = -7 + (8 * i);
				7680	uint64_t lane1 = -8 + (8 * i);
				7681	uint64_t lane2 = -9 + (8 * i);
				7682	uint64_t lane3 = -10 + (8 * i);
				7683	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7684	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7685	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7686	MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
				7687	}
				7688	}
				7689
				7690	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
				7691
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	7692	// Check that we loaded back the expected values.
				7693
				7694	// st4b/ld4b
				7695	ASSERT_EQUAL_SVE(z3, z19);
				7696	ASSERT_EQUAL_SVE(z4, z20);
				7697	ASSERT_EQUAL_SVE(z5, z21);
				7698	ASSERT_EQUAL_SVE(z6, z22);
				7699
				7700	// st4h/ld4h
				7701	ASSERT_EQUAL_SVE(z7, z23);
				7702	ASSERT_EQUAL_SVE(z8, z24);
				7703	ASSERT_EQUAL_SVE(z9, z25);
				7704	ASSERT_EQUAL_SVE(z10, z26);
				7705
				7706	// st4w/ld4w
				7707	ASSERT_EQUAL_SVE(z11, z27);
				7708	ASSERT_EQUAL_SVE(z12, z28);
				7709	ASSERT_EQUAL_SVE(z13, z29);
				7710	ASSERT_EQUAL_SVE(z14, z30);
				7711
				7712	// st4d/ld4d
				7713	ASSERT_EQUAL_SVE(z15, z31);
				7714	ASSERT_EQUAL_SVE(z16, z0);
				7715	ASSERT_EQUAL_SVE(z17, z1);
				7716	ASSERT_EQUAL_SVE(z18, z2);
				7717
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	7718	delete[] expected;
				7719	}
				7720	delete[] data;
				7721	}
				7722
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7723	TEST_SVE(sve_ld4_st4_scalar_plus_scalar) {
				7724	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				7725	START();
				7726
				7727	int vl = config->sve_vl_in_bytes();
				7728
				7729	// Allocate plenty of space to enable indexing in both directions.
				7730	int data_size = vl * 128;
				7731
				7732	uint8_t* data = new uint8_t[data_size];
				7733	memset(data, 0, data_size);
				7734
				7735	// Set the base half-way through the buffer so we can use negative indeces.
				7736	__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
				7737
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7738	// We can test ld4 by comparing the values loaded with the values stored.
				7739	// There are two complications:
				7740	// - Loads have zeroing predication, so we have to clear the inactive
				7741	// elements on our reference.
				7742	// - We want to test both loads and stores that span { z31, z0 }, so we have
				7743	// to move some values around.
				7744	//
				7745	// Registers z3-z18 will hold as-stored values (with inactive elements
				7746	// cleared). Registers z19-z31 and z0-z2 will hold the values that were
				7747	// loaded.
				7748
				7749	__ Index(z19.VnB(), -4, 11);
				7750	__ Index(z20.VnB(), -5, 11);
				7751	__ Index(z21.VnB(), -6, 11);
				7752	__ Index(z22.VnB(), -7, 11);
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7753	__ Ptrue(p7.VnB(), SVE_MUL4);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7754	__ Rdvl(x1, -1); // Make offsets VL-dependent so we can avoid overlap.
				7755	__ St4b(z19.VnB(),
				7756	z20.VnB(),
				7757	z21.VnB(),
				7758	z22.VnB(),
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7759	p7,
				7760	SVEMemOperand(x0, x1, LSL, 0));
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7761	// Save the stored values for ld4 tests.
				7762	__ Dup(z3.VnB(), 0);
				7763	__ Dup(z4.VnB(), 0);
				7764	__ Dup(z5.VnB(), 0);
				7765	__ Dup(z6.VnB(), 0);
				7766	__ Mov(z3.VnB(), p7.Merging(), z19.VnB());
				7767	__ Mov(z4.VnB(), p7.Merging(), z20.VnB());
				7768	__ Mov(z5.VnB(), p7.Merging(), z21.VnB());
				7769	__ Mov(z6.VnB(), p7.Merging(), z22.VnB());
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7770
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7771	__ Index(z23.VnH(), 6, -2);
				7772	__ Index(z24.VnH(), 7, -2);
				7773	__ Index(z25.VnH(), 8, -2);
				7774	__ Index(z26.VnH(), 9, -2);
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7775	__ Ptrue(p6.VnH(), SVE_VL16);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7776	__ Rdvl(x2, 7); // (7 * vl) << 1 = 14 * vl
				7777	__ St4h(z23.VnH(),
				7778	z24.VnH(),
				7779	z25.VnH(),
				7780	z26.VnH(),
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7781	p6,
				7782	SVEMemOperand(x0, x2, LSL, 1));
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7783	// Save the stored values for ld4 tests.
				7784	__ Dup(z7.VnH(), 0);
				7785	__ Dup(z8.VnH(), 0);
				7786	__ Dup(z9.VnH(), 0);
				7787	__ Dup(z10.VnH(), 0);
				7788	__ Mov(z7.VnH(), p6.Merging(), z23.VnH());
				7789	__ Mov(z8.VnH(), p6.Merging(), z24.VnH());
				7790	__ Mov(z9.VnH(), p6.Merging(), z25.VnH());
				7791	__ Mov(z10.VnH(), p6.Merging(), z26.VnH());
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7792
				7793	// Wrap around from z31 to z0.
				7794	__ Index(z29.VnS(), -6, 7);
				7795	__ Index(z30.VnS(), -7, 7);
				7796	__ Index(z31.VnS(), -8, 7);
				7797	__ Index(z0.VnS(), -9, 7);
				7798	// Sparse predication, including some irrelevant bits (0xe). To make the
				7799	// results easy to check, activate each lane <n> where n is a multiple of 5.
				7800	Initialise(&masm,
				7801	p5,
				7802	0xeee1000010000100,
				7803	0x001eeee100001000,
				7804	0x0100001eeee10000,
				7805	0x10000100001eeee1);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7806	__ Rdvl(x3, -5); // -(5 * vl) << 2 = -20 * vl
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7807	__ St4w(z29.VnS(),
				7808	z30.VnS(),
				7809	z31.VnS(),
				7810	z0.VnS(),
				7811	p5,
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7812	SVEMemOperand(x0, x3, LSL, 2));
				7813	// Save the stored values for ld4 tests.
				7814	__ Dup(z11.VnS(), 0);
				7815	__ Dup(z12.VnS(), 0);
				7816	__ Dup(z13.VnS(), 0);
				7817	__ Dup(z14.VnS(), 0);
				7818	__ Mov(z11.VnS(), p5.Merging(), z29.VnS());
				7819	__ Mov(z12.VnS(), p5.Merging(), z30.VnS());
				7820	__ Mov(z13.VnS(), p5.Merging(), z31.VnS());
				7821	__ Mov(z14.VnS(), p5.Merging(), z0.VnS());
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7822
				7823	__ Index(z31.VnD(), 32, -11);
				7824	__ Index(z0.VnD(), 33, -11);
				7825	__ Index(z1.VnD(), 34, -11);
				7826	__ Index(z2.VnD(), 35, -11);
				7827	__ Ptrue(p4.VnD(), SVE_MUL3);
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7828	__ Rdvl(x4, -1); // -(1 * vl) << 3 = -8 *vl
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7829	__ St4d(z31.VnD(),
				7830	z0.VnD(),
				7831	z1.VnD(),
				7832	z2.VnD(),
				7833	p4,
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7834	SVEMemOperand(x0, x4, LSL, 3));
				7835	// Save the stored values for ld4 tests.
				7836	__ Dup(z15.VnD(), 0);
				7837	__ Dup(z16.VnD(), 0);
				7838	__ Dup(z17.VnD(), 0);
				7839	__ Dup(z18.VnD(), 0);
				7840	__ Mov(z15.VnD(), p4.Merging(), z31.VnD());
				7841	__ Mov(z16.VnD(), p4.Merging(), z0.VnD());
				7842	__ Mov(z17.VnD(), p4.Merging(), z1.VnD());
				7843	__ Mov(z18.VnD(), p4.Merging(), z2.VnD());
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7844
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7845	// Corresponding loads.
				7846	// Wrap around from z31 to z0, moving the results elsewhere to avoid overlap.
				7847	__ Ld4b(z31.VnB(),
				7848	z0.VnB(),
				7849	z1.VnB(),
				7850	z2.VnB(),
				7851	p7.Zeroing(),
				7852	SVEMemOperand(x0, x1, LSL, 0));
				7853	__ Mov(z19, z31);
				7854	__ Mov(z20, z0);
				7855	__ Mov(z21, z1);
				7856	__ Mov(z22, z2);
				7857	__ Ld4h(z23.VnH(),
				7858	z24.VnH(),
				7859	z25.VnH(),
				7860	z26.VnH(),
				7861	p6.Zeroing(),
				7862	SVEMemOperand(x0, x2, LSL, 1));
				7863	__ Ld4w(z27.VnS(),
				7864	z28.VnS(),
				7865	z29.VnS(),
				7866	z30.VnS(),
				7867	p5.Zeroing(),
				7868	SVEMemOperand(x0, x3, LSL, 2));
				7869	// Wrap around from z31 to z0.
				7870	__ Ld4d(z31.VnD(),
				7871	z0.VnD(),
				7872	z1.VnD(),
				7873	z2.VnD(),
				7874	p4.Zeroing(),
				7875	SVEMemOperand(x0, x4, LSL, 3));
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7876
				7877	END();
				7878
				7879	if (CAN_RUN()) {
				7880	RUN();
				7881
				7882	uint8_t* expected = new uint8_t[data_size];
				7883	memset(expected, 0, data_size);
				7884	uint8_t* middle = &expected[data_size / 2];
				7885
				7886	int vl_b = vl / kBRegSizeInBytes;
				7887	int vl_h = vl / kHRegSizeInBytes;
				7888	int vl_s = vl / kSRegSizeInBytes;
				7889	int vl_d = vl / kDRegSizeInBytes;
				7890
				7891	int reg_count = 4;
				7892
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7893	// st4b { z19.b, z20.b, z21.b, z22.b }, SVE_MUL4
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7894	int vl_b_mul4 = vl_b - (vl_b % 4);
				7895	for (int i = 0; i < vl_b_mul4; i++) {
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7896	int64_t offset = -(1 << kBRegSizeInBytesLog2) * vl;
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7897	uint8_t lane0 = -4 + (11 * i);
				7898	uint8_t lane1 = -5 + (11 * i);
				7899	uint8_t lane2 = -6 + (11 * i);
				7900	uint8_t lane3 = -7 + (11 * i);
				7901	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7902	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7903	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7904	MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
				7905	}
				7906
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7907	// st4h { z22.h, z23.h, z24.h, z25.h }, SVE_VL16
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7908	if (vl_h >= 16) {
				7909	for (int i = 0; i < 16; i++) {
				7910	int64_t offset = (7 << kHRegSizeInBytesLog2) * vl;
				7911	uint16_t lane0 = 6 - (2 * i);
				7912	uint16_t lane1 = 7 - (2 * i);
				7913	uint16_t lane2 = 8 - (2 * i);
				7914	uint16_t lane3 = 9 - (2 * i);
				7915	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7916	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7917	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7918	MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
				7919	}
				7920	}
				7921
				7922	// st4w { z29.s, z30.s, z31.s, z0.s }, ((i % 5) == 0)
				7923	for (int i = 0; i < vl_s; i++) {
				7924	if ((i % 5) == 0) {
				7925	int64_t offset = -(5 << kSRegSizeInBytesLog2) * vl;
				7926	uint32_t lane0 = -6 + (7 * i);
				7927	uint32_t lane1 = -7 + (7 * i);
				7928	uint32_t lane2 = -8 + (7 * i);
				7929	uint32_t lane3 = -9 + (7 * i);
				7930	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7931	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7932	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7933	MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
				7934	}
				7935	}
				7936
				7937	// st4d { z31.d, z0.d, z1.d, z2.d }, SVE_MUL3
				7938	int vl_d_mul3 = vl_d - (vl_d % 3);
				7939	for (int i = 0; i < vl_d_mul3; i++) {
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7940	int64_t offset = -(1 << kDRegSizeInBytesLog2) * vl;
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7941	uint64_t lane0 = 32 - (11 * i);
				7942	uint64_t lane1 = 33 - (11 * i);
				7943	uint64_t lane2 = 34 - (11 * i);
				7944	uint64_t lane3 = 35 - (11 * i);
				7945	MemoryWrite(middle, offset, (i * reg_count) + 0, lane0);
				7946	MemoryWrite(middle, offset, (i * reg_count) + 1, lane1);
				7947	MemoryWrite(middle, offset, (i * reg_count) + 2, lane2);
				7948	MemoryWrite(middle, offset, (i * reg_count) + 3, lane3);
				7949	}
				7950
				7951	ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
				7952
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	7953	// Check that we loaded back the expected values.
				7954
				7955	// st4b/ld4b
				7956	ASSERT_EQUAL_SVE(z3, z19);
				7957	ASSERT_EQUAL_SVE(z4, z20);
				7958	ASSERT_EQUAL_SVE(z5, z21);
				7959	ASSERT_EQUAL_SVE(z6, z22);
				7960
				7961	// st4h/ld4h
				7962	ASSERT_EQUAL_SVE(z7, z23);
				7963	ASSERT_EQUAL_SVE(z8, z24);
				7964	ASSERT_EQUAL_SVE(z9, z25);
				7965	ASSERT_EQUAL_SVE(z10, z26);
				7966
				7967	// st4w/ld4w
				7968	ASSERT_EQUAL_SVE(z11, z27);
				7969	ASSERT_EQUAL_SVE(z12, z28);
				7970	ASSERT_EQUAL_SVE(z13, z29);
				7971	ASSERT_EQUAL_SVE(z14, z30);
				7972
				7973	// st4d/ld4d
				7974	ASSERT_EQUAL_SVE(z15, z31);
				7975	ASSERT_EQUAL_SVE(z16, z0);
				7976	ASSERT_EQUAL_SVE(z17, z1);
				7977	ASSERT_EQUAL_SVE(z18, z2);
				7978
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	7979	delete[] expected;
				7980	}
				7981	delete[] data;
				7982	}
				7983
				7984	TEST_SVE(sve_ld234_st234_scalar_plus_scalar_sp) {
				7985	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				7986	START();
				7987
				7988	// Check that the simulator correctly interprets rn == 31 as sp.
				7989	// The indexing logic is the same regardless so we just check one load and
				7990	// store of each type.
				7991
				7992	// There are no pre- or post-indexing modes, so reserve space first.
				7993	__ ClaimVL(2 + 3 + 4);
				7994
				7995	__ Index(z0.VnB(), 42, 2);
				7996	__ Index(z1.VnB(), 43, 2);
				7997	__ Ptrue(p0.VnB(), SVE_VL7);
				7998	__ Rdvl(x0, 0);
				7999	__ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, x0));
				8000
				8001	__ Index(z4.VnH(), 42, 3);
				8002	__ Index(z5.VnH(), 43, 3);
				8003	__ Index(z6.VnH(), 44, 3);
				8004	__ Ptrue(p1.VnH(), SVE_POW2);
				8005	__ Rdvl(x1, 2);
				8006	__ Lsr(x1, x1, 1);
				8007	__ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, x1, LSL, 1));
				8008
				8009	__ Index(z8.VnS(), 42, 4);
				8010	__ Index(z9.VnS(), 43, 4);
				8011	__ Index(z10.VnS(), 44, 4);
				8012	__ Index(z11.VnS(), 45, 4);
				8013	__ Ptrue(p2.VnS());
				8014	__ Rdvl(x2, 2 + 3);
				8015	__ Lsr(x2, x2, 2);
				8016	__ St4w(z8.VnS(),
				8017	z9.VnS(),
				8018	z10.VnS(),
				8019	z11.VnS(),
				8020	p2,
				8021	SVEMemOperand(sp, x2, LSL, 2));
				8022
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	8023	// Corresponding loads.
				8024	// We have to explicitly zero inactive lanes in the reference values because
				8025	// loads have zeroing predication.
				8026	__ Dup(z12.VnB(), 0);
				8027	__ Dup(z13.VnB(), 0);
				8028	__ Mov(z12.VnB(), p0.Merging(), z0.VnB());
				8029	__ Mov(z13.VnB(), p0.Merging(), z1.VnB());
				8030	__ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, x0));
				8031
				8032	__ Dup(z16.VnH(), 0);
				8033	__ Dup(z17.VnH(), 0);
				8034	__ Dup(z18.VnH(), 0);
				8035	__ Mov(z16.VnH(), p1.Merging(), z4.VnH());
				8036	__ Mov(z17.VnH(), p1.Merging(), z5.VnH());
				8037	__ Mov(z18.VnH(), p1.Merging(), z6.VnH());
				8038	__ Ld3h(z4.VnH(),
				8039	z5.VnH(),
				8040	z6.VnH(),
				8041	p1.Zeroing(),
				8042	SVEMemOperand(sp, x1, LSL, 1));
				8043
				8044	__ Dup(z20.VnS(), 0);
				8045	__ Dup(z21.VnS(), 0);
				8046	__ Dup(z22.VnS(), 0);
				8047	__ Dup(z23.VnS(), 0);
				8048	__ Mov(z20.VnS(), p2.Merging(), z8.VnS());
				8049	__ Mov(z21.VnS(), p2.Merging(), z9.VnS());
				8050	__ Mov(z22.VnS(), p2.Merging(), z10.VnS());
				8051	__ Mov(z23.VnS(), p2.Merging(), z11.VnS());
				8052	__ Ld4w(z8.VnS(),
				8053	z9.VnS(),
				8054	z10.VnS(),
				8055	z11.VnS(),
				8056	p2.Zeroing(),
				8057	SVEMemOperand(sp, x2, LSL, 2));
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	8058
				8059	__ DropVL(2 + 3 + 4);
				8060
				8061	END();
				8062
				8063	if (CAN_RUN()) {
				8064	RUN();
				8065
				8066	// The most likely failure mode is the that simulator reads sp as xzr and
				8067	// crashes on execution. We already test the address calculations separately
				8068	// and sp doesn't change this, so just test that we load the values we
				8069	// stored.
Jacob Bramley	e483ce5	2019-11-05 16:52:29 +0000	[diff] [blame]	8070
				8071	// st2b/ld2b
				8072	ASSERT_EQUAL_SVE(z0, z12);
				8073	ASSERT_EQUAL_SVE(z1, z13);
				8074
				8075	// st3h/ld3h
				8076	ASSERT_EQUAL_SVE(z4, z16);
				8077	ASSERT_EQUAL_SVE(z5, z17);
				8078	ASSERT_EQUAL_SVE(z6, z18);
				8079
				8080	// st4h/ld4h
				8081	ASSERT_EQUAL_SVE(z8, z20);
				8082	ASSERT_EQUAL_SVE(z9, z21);
				8083	ASSERT_EQUAL_SVE(z10, z22);
				8084	ASSERT_EQUAL_SVE(z11, z23);
Jacob Bramley	bc4a54f	2019-11-04 16:44:01 +0000	[diff] [blame]	8085	}
				8086	}
				8087
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	8088	TEST_SVE(sve_ld234_st234_scalar_plus_imm_sp) {
				8089	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				8090	START();
				8091
				8092	// Check that the simulator correctly interprets rn == 31 as sp.
				8093	// The indexing logic is the same regardless so we just check one load and
				8094	// store of each type.
				8095
				8096	// There are no pre- or post-indexing modes, so reserve space first.
				8097	// Note that the stores fill in an order that allows each immediate to be a
				8098	// multiple of the number of registers.
				8099	__ ClaimVL(4 + 2 + 3);
				8100
				8101	__ Index(z0.VnB(), 42, 2);
				8102	__ Index(z1.VnB(), 43, 2);
				8103	__ Ptrue(p0.VnB(), SVE_POW2);
				8104	__ St2b(z0.VnB(), z1.VnB(), p0, SVEMemOperand(sp, 4, SVE_MUL_VL));
				8105
				8106	__ Index(z4.VnH(), 42, 3);
				8107	__ Index(z5.VnH(), 43, 3);
				8108	__ Index(z6.VnH(), 44, 3);
				8109	__ Ptrue(p1.VnH(), SVE_VL7);
				8110	__ St3h(z4.VnH(), z5.VnH(), z6.VnH(), p1, SVEMemOperand(sp, 6, SVE_MUL_VL));
				8111
				8112	__ Index(z8.VnS(), 42, 4);
				8113	__ Index(z9.VnS(), 43, 4);
				8114	__ Index(z10.VnS(), 44, 4);
				8115	__ Index(z11.VnS(), 45, 4);
				8116	__ Ptrue(p2.VnS());
				8117	__ St4w(z8.VnS(), z9.VnS(), z10.VnS(), z11.VnS(), p2, SVEMemOperand(sp));
				8118
Jacob Bramley	e5ab0fe	2019-11-05 16:52:29 +0000	[diff] [blame]	8119	// Corresponding loads.
				8120	// We have to explicitly zero inactive lanes in the reference values because
				8121	// loads have zeroing predication.
				8122	__ Dup(z12.VnB(), 0);
				8123	__ Dup(z13.VnB(), 0);
				8124	__ Mov(z12.VnB(), p0.Merging(), z0.VnB());
				8125	__ Mov(z13.VnB(), p0.Merging(), z1.VnB());
				8126	__ Ld2b(z0.VnB(), z1.VnB(), p0.Zeroing(), SVEMemOperand(sp, 4, SVE_MUL_VL));
				8127
				8128	__ Dup(z16.VnH(), 0);
				8129	__ Dup(z17.VnH(), 0);
				8130	__ Dup(z18.VnH(), 0);
				8131	__ Mov(z16.VnH(), p1.Merging(), z4.VnH());
				8132	__ Mov(z17.VnH(), p1.Merging(), z5.VnH());
				8133	__ Mov(z18.VnH(), p1.Merging(), z6.VnH());
				8134	__ Ld3h(z4.VnH(),
				8135	z5.VnH(),
				8136	z6.VnH(),
				8137	p1.Zeroing(),
				8138	SVEMemOperand(sp, 6, SVE_MUL_VL));
				8139
				8140	__ Dup(z20.VnS(), 0);
				8141	__ Dup(z21.VnS(), 0);
				8142	__ Dup(z22.VnS(), 0);
				8143	__ Dup(z23.VnS(), 0);
				8144	__ Mov(z20.VnS(), p2.Merging(), z8.VnS());
				8145	__ Mov(z21.VnS(), p2.Merging(), z9.VnS());
				8146	__ Mov(z22.VnS(), p2.Merging(), z10.VnS());
				8147	__ Mov(z23.VnS(), p2.Merging(), z11.VnS());
				8148	__ Ld4w(z8.VnS(),
				8149	z9.VnS(),
				8150	z10.VnS(),
				8151	z11.VnS(),
				8152	p2.Zeroing(),
				8153	SVEMemOperand(sp));
Jacob Bramley	d4dd9c2	2019-11-04 16:44:01 +0000	[diff] [blame]	8154
				8155	__ DropVL(4 + 2 + 3);
				8156
				8157	END();
				8158
				8159	if (CAN_RUN()) {
				8160	RUN();
				8161
				8162	// The most likely failure mode is the that simulator reads sp as xzr and
				8163	// crashes on execution. We already test the address calculations separately
				8164	// and sp doesn't change this, so just test that we load the values we
				8165	// stored.
				8166	// TODO: Actually do this, once loads are implemented.
				8167	}
				8168	}
				8169
Jacob Bramley	85a9c10	2019-12-09 17:48:29 +0000	[diff] [blame]	8170	TEST_SVE(sve_ldff1_scalar_plus_scalar) {
				8171	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				8172	START();
				8173
				8174	int vl = config->sve_vl_in_bytes();
				8175	size_t page_size = sysconf(_SC_PAGE_SIZE);
				8176	VIXL_ASSERT(page_size > static_cast<size_t>(vl));
				8177
				8178	// Allocate two pages, then mprotect the second one to make it inaccessible.
				8179	uintptr_t data = reinterpret_cast<uintptr_t>(mmap(NULL,
				8180	page_size * 2,
				8181	PROT_READ \| PROT_WRITE,
				8182	MAP_PRIVATE \| MAP_ANONYMOUS,
				8183	-1,
				8184	0));
				8185	mprotect(reinterpret_cast<void*>(data + page_size), page_size, PROT_NONE);
				8186
				8187	// Fill the accessible page with arbitrary data.
				8188	for (size_t i = 0; i < page_size; i++) {
				8189	// Reverse bits so we get a mixture of positive and negative values.
				8190	uint8_t byte = ReverseBits(static_cast<uint8_t>(i));
				8191	memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
				8192	}
				8193
				8194	__ Mov(x20, data);
				8195
				8196	PRegister all = p7;
				8197	__ Ptrue(all.VnB());
				8198
				8199	size_t offset_modifier = 0;
				8200
				8201	// The highest adddress at which a load stopped. Every FF load should fault at
				8202	// `data + page_size`, so this value should not exceed that value. However,
				8203	// the architecture allows fault-tolerant loads to fault arbitrarily, so the
				8204	// real value may be lower.
				8205	//
				8206	// This is used to check that the `mprotect` above really does make the second
				8207	// page inaccessible, and that the resulting FFR from each load reflects that.
				8208	Register limit = x22;
				8209	__ Mov(limit, 0);
				8210
				8211	// If the FFR grows unexpectedly, we increment this register by the
				8212	// difference. FFR should never grow, except when explicitly set.
				8213	Register ffr_grow_count = x23;
				8214	__ Mov(ffr_grow_count, 0);
				8215
				8216	#define VIXL_EMIT_LDFF1_TEST(LDFF1, M_SIZE, Zt, E_SIZE, LD1, ZtRef) \
				8217	do { \
				8218	/* Set the offset so that the load is guaranteed to start in the */ \
				8219	/* accessible page, but end in the inaccessible one. */ \
				8220	VIXL_ASSERT((page_size % k##M_SIZE##RegSizeInBytes) == 0); \
				8221	VIXL_ASSERT((vl % k##M_SIZE##RegSizeInBytes) == 0); \
				8222	size_t elements_per_page = page_size / k##M_SIZE##RegSizeInBytes; \
				8223	size_t elements_per_access = vl / k##E_SIZE##RegSizeInBytes; \
				8224	size_t min_offset = (elements_per_page - elements_per_access) + 1; \
				8225	size_t max_offset = elements_per_page - 1; \
				8226	size_t offset = \
				8227	min_offset + (offset_modifier % (max_offset - min_offset + 1)); \
				8228	offset_modifier++; \
				8229	__ Mov(x21, offset); \
				8230	__ Setffr(); \
				8231	__ LDFF1(Zt.Vn##E_SIZE(), \
				8232	all.Zeroing(), \
				8233	SVEMemOperand(x20, x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
				8234	__ Rdffrs(p0.VnB(), all.Zeroing()); \
				8235	/* Execute another LDFF1 with no offset, so that every element could be */ \
				8236	/* read. It should respect FFR, and load no more than we loaded the */ \
				8237	/* first time. */ \
				8238	__ LDFF1(ZtRef.Vn##E_SIZE(), all.Zeroing(), SVEMemOperand(x20)); \
				8239	__ Rdffrs(p1.VnB(), all.Zeroing()); \
				8240	__ Cntp(x0, all, p1.VnB()); \
				8241	__ Uqdecp(x0, p0.VnB()); \
				8242	__ Add(ffr_grow_count, ffr_grow_count, x0); \
				8243	/* Use the FFR to predicate the normal load. If it wasn't properly set, */ \
				8244	/* the normal load will abort. */ \
				8245	__ LD1(ZtRef.Vn##E_SIZE(), \
				8246	p0.Zeroing(), \
				8247	SVEMemOperand(x20, x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
				8248	/* Work out the address after the one that was just accessed. */ \
				8249	__ Incp(x21, p0.Vn##E_SIZE()); \
				8250	__ Add(x0, x20, Operand(x21, LSL, k##M_SIZE##RegSizeInBytesLog2)); \
				8251	__ Cmp(limit, x0); \
				8252	__ Csel(limit, limit, x0, hs); \
				8253	/* Clear lanes inactive in FFR. These have an undefined result. */ \
				8254	/* TODO: Use the 'Not' and 'Mov' aliases once they are implemented. */ \
				8255	__ Eor(p0.Vn##E_SIZE(), all.Zeroing(), p0.Vn##E_SIZE(), all.Vn##E_SIZE()); \
				8256	__ Cpy(Zt.Vn##E_SIZE(), p0.Merging(), 0); \
				8257	} while (0)
				8258
				8259	VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z0, B, Ld1b, z16);
				8260	VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z1, H, Ld1b, z17);
				8261	VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z2, S, Ld1b, z18);
				8262	VIXL_EMIT_LDFF1_TEST(Ldff1b, B, z3, D, Ld1b, z19);
				8263
				8264	VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z4, H, Ld1h, z20);
				8265	VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z5, S, Ld1h, z21);
				8266	VIXL_EMIT_LDFF1_TEST(Ldff1h, H, z6, D, Ld1h, z22);
				8267
				8268	VIXL_EMIT_LDFF1_TEST(Ldff1w, S, z7, S, Ld1w, z23);
				8269	VIXL_EMIT_LDFF1_TEST(Ldff1w, S, z8, D, Ld1w, z24);
				8270
				8271	VIXL_EMIT_LDFF1_TEST(Ldff1d, D, z9, D, Ld1d, z25);
				8272
				8273	VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z10, H, Ld1sb, z26);
				8274	VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z11, S, Ld1sb, z27);
				8275	VIXL_EMIT_LDFF1_TEST(Ldff1sb, B, z12, D, Ld1sb, z28);
				8276
				8277	VIXL_EMIT_LDFF1_TEST(Ldff1sh, H, z13, S, Ld1sh, z29);
				8278	VIXL_EMIT_LDFF1_TEST(Ldff1sh, H, z14, D, Ld1sh, z30);
				8279
				8280	VIXL_EMIT_LDFF1_TEST(Ldff1sw, S, z15, D, Ld1sw, z31);
				8281
				8282	#undef VIXL_EMIT_LDFF1_TEST
				8283
				8284	END();
				8285
				8286	if (CAN_RUN()) {
				8287	RUN();
				8288
				8289	uintptr_t expected_limit = data + page_size;
				8290	uintptr_t measured_limit = core.xreg(limit.GetCode());
				8291	VIXL_CHECK(measured_limit <= expected_limit);
				8292	if (measured_limit < expected_limit) {
				8293	// We can't fail the test for this case, but a warning is helpful for
				8294	// manually-run tests.
				8295	printf(
				8296	"WARNING: All fault-tolerant loads detected faults before the\n"
				8297	"expected limit. This is architecturally possible, but improbable,\n"
				8298	"and could be a symptom of another problem.\n");
				8299	}
				8300
				8301	ASSERT_EQUAL_64(0, ffr_grow_count);
				8302
				8303	// Ldff1b
				8304	ASSERT_EQUAL_SVE(z0.VnB(), z16.VnB());
				8305	ASSERT_EQUAL_SVE(z1.VnH(), z17.VnH());
				8306	ASSERT_EQUAL_SVE(z2.VnS(), z18.VnS());
				8307	ASSERT_EQUAL_SVE(z3.VnD(), z19.VnD());
				8308
				8309	// Ldff1h
				8310	ASSERT_EQUAL_SVE(z4.VnH(), z20.VnH());
				8311	ASSERT_EQUAL_SVE(z5.VnS(), z21.VnS());
				8312	ASSERT_EQUAL_SVE(z6.VnD(), z22.VnD());
				8313
				8314	// Ldff1w
				8315	ASSERT_EQUAL_SVE(z7.VnS(), z23.VnS());
				8316	ASSERT_EQUAL_SVE(z8.VnD(), z24.VnD());
				8317
				8318	// Ldff1d
				8319	ASSERT_EQUAL_SVE(z9.VnD(), z25.VnD());
				8320
				8321	// Ldff1sb
				8322	ASSERT_EQUAL_SVE(z10.VnH(), z26.VnH());
				8323	ASSERT_EQUAL_SVE(z11.VnS(), z27.VnS());
				8324	ASSERT_EQUAL_SVE(z12.VnD(), z28.VnD());
				8325
				8326	// Ldff1sh
				8327	ASSERT_EQUAL_SVE(z13.VnS(), z29.VnS());
				8328	ASSERT_EQUAL_SVE(z14.VnD(), z30.VnD());
				8329
				8330	// Ldff1sw
				8331	ASSERT_EQUAL_SVE(z15.VnD(), z31.VnD());
				8332	}
				8333
				8334	munmap(reinterpret_cast<void>(data), page_size 2);
				8335	}
				8336
Jacob Bramley	dcdbd75	2020-01-20 11:47:36 +0000	[diff] [blame]	8337	// Test gather loads by comparing them with the result of a set of equivalent
				8338	// scalar loads.
				8339	template <typename F>
				8340	static void GatherLoadHelper(Test* config,
				8341	unsigned msize_in_bits,
				8342	unsigned esize_in_bits,
				8343	F sve_ld1,
				8344	bool is_signed) {
				8345	// SVE supports 32- and 64-bit addressing for gather loads.
				8346	VIXL_ASSERT((esize_in_bits == kSRegSize) \|\| (esize_in_bits == kDRegSize));
				8347	static const unsigned kMaxLaneCount = kZRegMaxSize / kSRegSize;
				8348
				8349	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				8350	START();
				8351
				8352	unsigned msize_in_bytes = msize_in_bits / kBitsPerByte;
				8353	unsigned esize_in_bytes = esize_in_bits / kBitsPerByte;
				8354	int vl = config->sve_vl_in_bytes();
				8355
				8356	// Use a fixed seed for nrand48() so that test runs are reproducible.
				8357	unsigned short seed[3] = {1, 2, 3}; // NOLINT(runtime/int)
				8358
				8359	// Fill a buffer with arbitrary data.
				8360	size_t buffer_size = vl * 64;
				8361	uint64_t data = reinterpret_cast<uintptr_t>(malloc(buffer_size));
				8362	for (size_t i = 0; i < buffer_size; i++) {
				8363	uint8_t byte = nrand48(seed) & 0xff;
				8364	memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
				8365	}
				8366
				8367	// Vectors of random addresses and offsets into the buffer.
				8368	uint64_t addresses[kMaxLaneCount];
				8369	uint64_t offsets[kMaxLaneCount];
				8370	uint64_t max_address = 0;
				8371	for (unsigned i = 0; i < kMaxLaneCount; i++) {
				8372	uint64_t rnd = nrand48(seed);
				8373	// Limit the range to the set of completely-accessible elements in memory.
				8374	offsets[i] = rnd % (buffer_size - msize_in_bytes);
				8375	addresses[i] = data + offsets[i];
				8376	max_address = std::max(max_address, addresses[i]);
				8377	}
				8378
				8379	// Maximised offsets, to ensure that the address calculation is modulo-2^64,
				8380	// and that the vector addresses are not sign-extended.
				8381	uint64_t uint_e_max = (esize_in_bits == kDRegSize) ? UINT64_MAX : UINT32_MAX;
				8382	uint64_t maxed_offsets[kMaxLaneCount];
				8383	uint64_t maxed_offsets_imm = max_address - uint_e_max;
				8384	for (unsigned i = 0; i < kMaxLaneCount; i++) {
				8385	maxed_offsets[i] = addresses[i] - maxed_offsets_imm;
				8386	}
				8387
				8388	ZRegister zn = z0.WithLaneSize(esize_in_bits);
				8389	ZRegister zt_addresses = z1.WithLaneSize(esize_in_bits);
				8390	ZRegister zt_offsets = z2.WithLaneSize(esize_in_bits);
				8391	ZRegister zt_maxed = z3.WithLaneSize(esize_in_bits);
				8392	ZRegister zt_ref = z4.WithLaneSize(esize_in_bits);
				8393
				8394	PRegisterZ pg = p0.Zeroing();
				8395	Initialise(&masm,
				8396	pg,
				8397	0x9abcdef012345678,
				8398	0xabcdef0123456789,
				8399	0xf4f3f1f0fefdfcfa,
				8400	0xf9f8f6f5f3f2f0ff);
				8401
				8402	// Execute each load.
				8403
				8404	if (esize_in_bits == kDRegSize) {
				8405	// Only test `addresses` if we can use 64-bit pointers. InsrHelper will fail
				8406	// if any value won't fit in a lane of zn.
				8407	InsrHelper(&masm, zn, addresses);
				8408	(masm.*sve_ld1)(zt_addresses, pg, SVEMemOperand(zn));
				8409	}
				8410
				8411	InsrHelper(&masm, zn, offsets);
				8412	(masm.*sve_ld1)(zt_offsets, pg, SVEMemOperand(zn, data));
				8413
				8414	InsrHelper(&masm, zn, maxed_offsets);
				8415	(masm.*sve_ld1)(zt_maxed, pg, SVEMemOperand(zn, maxed_offsets_imm));
				8416
				8417	// TODO: Also test scalar-plus-vector SVEMemOperands.
				8418	// TODO: Also test first-fault loads.
				8419
				8420	// Generate a reference result using scalar loads.
				8421
				8422	ZRegister lane_numbers = z10.WithLaneSize(esize_in_bits);
				8423	__ Index(lane_numbers, 0, 1);
				8424	__ Dup(zt_ref, 0);
				8425	for (unsigned i = 0; i < (vl / esize_in_bytes); i++) {
				8426	__ Mov(x0, addresses[ArrayLength(addresses) - i - 1]);
				8427	Register rt(0, esize_in_bits);
				8428	if (is_signed) {
				8429	switch (msize_in_bits) {
				8430	case kBRegSize:
				8431	__ Ldrsb(rt, MemOperand(x0));
				8432	break;
				8433	case kHRegSize:
				8434	__ Ldrsh(rt, MemOperand(x0));
				8435	break;
				8436	case kWRegSize:
				8437	__ Ldrsw(rt, MemOperand(x0));
				8438	break;
				8439	}
				8440	} else {
				8441	switch (msize_in_bits) {
				8442	case kBRegSize:
				8443	__ Ldrb(rt, MemOperand(x0));
				8444	break;
				8445	case kHRegSize:
				8446	__ Ldrh(rt, MemOperand(x0));
				8447	break;
				8448	case kWRegSize:
				8449	__ Ldr(rt.W(), MemOperand(x0));
				8450	break;
				8451	case kXRegSize:
				8452	__ Ldr(rt, MemOperand(x0));
				8453	break;
				8454	}
				8455	}
				8456
				8457	// Emulate predication.
				8458	__ Cmpeq(p7.WithLaneSize(esize_in_bits), pg, lane_numbers, i);
				8459	__ Cpy(zt_ref, p7.Merging(), rt);
				8460	}
				8461
				8462	END();
				8463
				8464	if (CAN_RUN()) {
				8465	RUN();
				8466
				8467	if (esize_in_bits == kDRegSize) {
				8468	ASSERT_EQUAL_SVE(zt_ref, zt_addresses);
				8469	}
				8470	ASSERT_EQUAL_SVE(zt_ref, zt_offsets);
				8471	ASSERT_EQUAL_SVE(zt_ref, zt_maxed);
				8472	}
				8473
				8474	free(reinterpret_cast<void*>(data));
				8475	}
				8476
				8477	TEST_SVE(sve_ld1b_64bit_vector_plus_immediate) {
				8478	GatherLoadHelper(config, kBRegSize, kDRegSize, &MacroAssembler::Ld1b, false);
				8479	}
				8480
				8481	TEST_SVE(sve_ld1h_64bit_vector_plus_immediate) {
				8482	GatherLoadHelper(config, kHRegSize, kDRegSize, &MacroAssembler::Ld1h, false);
				8483	}
				8484
				8485	TEST_SVE(sve_ld1w_64bit_vector_plus_immediate) {
				8486	GatherLoadHelper(config, kSRegSize, kDRegSize, &MacroAssembler::Ld1w, false);
				8487	}
				8488
				8489	TEST_SVE(sve_ld1d_64bit_vector_plus_immediate) {
				8490	GatherLoadHelper(config, kDRegSize, kDRegSize, &MacroAssembler::Ld1d, false);
				8491	}
				8492
				8493	TEST_SVE(sve_ld1sb_64bit_vector_plus_immediate) {
				8494	GatherLoadHelper(config, kBRegSize, kDRegSize, &MacroAssembler::Ld1sb, true);
				8495	}
				8496
				8497	TEST_SVE(sve_ld1sh_64bit_vector_plus_immediate) {
				8498	GatherLoadHelper(config, kHRegSize, kDRegSize, &MacroAssembler::Ld1sh, true);
				8499	}
				8500
				8501	TEST_SVE(sve_ld1sw_64bit_vector_plus_immediate) {
				8502	GatherLoadHelper(config, kSRegSize, kDRegSize, &MacroAssembler::Ld1sw, true);
				8503	}
				8504
				8505	TEST_SVE(sve_ld1b_32bit_vector_plus_immediate) {
				8506	GatherLoadHelper(config, kBRegSize, kSRegSize, &MacroAssembler::Ld1b, false);
				8507	}
				8508
				8509	TEST_SVE(sve_ld1h_32bit_vector_plus_immediate) {
				8510	GatherLoadHelper(config, kHRegSize, kSRegSize, &MacroAssembler::Ld1h, false);
				8511	}
				8512
				8513	TEST_SVE(sve_ld1w_32bit_vector_plus_immediate) {
				8514	GatherLoadHelper(config, kSRegSize, kSRegSize, &MacroAssembler::Ld1w, false);
				8515	}
				8516
				8517	TEST_SVE(sve_ld1sb_32bit_vector_plus_immediate) {
				8518	GatherLoadHelper(config, kBRegSize, kSRegSize, &MacroAssembler::Ld1sb, true);
				8519	}
				8520
				8521	TEST_SVE(sve_ld1sh_32bit_vector_plus_immediate) {
				8522	GatherLoadHelper(config, kHRegSize, kSRegSize, &MacroAssembler::Ld1sh, true);
				8523	}
				8524
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8525	typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
				8526	const ZRegister& zn,
				8527	const IntegerOperand imm);
				8528
				8529	template <typename F, typename Td, typename Tn>
				8530	static void IntWideImmHelper(Test* config,
				8531	F macro,
				8532	unsigned lane_size_in_bits,
				8533	const Tn& zn_inputs,
				8534	IntegerOperand imm,
				8535	const Td& zd_expected) {
				8536	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				8537	START();
				8538
				8539	ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
				8540	InsrHelper(&masm, zd1, zn_inputs);
				8541
				8542	// Also test with a different zn, to test the movprfx case.
				8543	ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
				8544	InsrHelper(&masm, zn, zn_inputs);
				8545	ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
				8546	ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
				8547
				8548	// Make a copy so we can check that constructive operations preserve zn.
				8549	__ Mov(zn_copy, zn);
				8550
				8551	{
				8552	UseScratchRegisterScope temps(&masm);
				8553	// The MacroAssembler needs a P scratch register for some of these macros,
				8554	// and it doesn't have one by default.
				8555	temps.Include(p3);
				8556
				8557	(masm.*macro)(zd1, zd1, imm);
				8558	(masm.*macro)(zd2, zn, imm);
				8559	}
				8560
				8561	END();
				8562
				8563	if (CAN_RUN()) {
				8564	RUN();
				8565
				8566	ASSERT_EQUAL_SVE(zd_expected, zd1);
				8567
				8568	// Check the result from `instr` with movprfx is the same as
				8569	// the immediate version.
				8570	ASSERT_EQUAL_SVE(zd_expected, zd2);
				8571
				8572	ASSERT_EQUAL_SVE(zn_copy, zn);
				8573	}
				8574	}
				8575
				8576	TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
				8577	int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
				8578	int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
				8579	int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
				8580	int64_t in_d[] = {1, 10, 10000, 1000000};
				8581
				8582	IntWideImmFn fn = &MacroAssembler::Smax;
				8583
				8584	int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
				8585	int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
				8586	int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
				8587	int64_t exp_d_1[] = {99, 99, 10000, 1000000};
				8588
				8589	IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
				8590	IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
				8591	IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
				8592	IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
				8593
				8594	int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
				8595	int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
				8596	int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
				8597
				8598	// The immediate is in the range [-128, 127], but the macro is able to
				8599	// synthesise unencodable immediates.
				8600	// B-sized lanes cannot take an immediate out of the range [-128, 127].
				8601	IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
				8602	IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
				8603	IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
				8604	}
				8605
				8606	TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
				8607	int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
				8608	int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
				8609	int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
				8610	int64_t in_d[] = {1, 10, 10000, 1000000};
				8611
				8612	IntWideImmFn fn = &MacroAssembler::Smin;
				8613
				8614	int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
				8615	int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
				8616	int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
				8617	int64_t exp_d_1[] = {1, 10, 99, 99};
				8618
				8619	IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
				8620	IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
				8621	IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
				8622	IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
				8623
				8624	int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
				8625	int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
				8626	int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
				8627
				8628	// The immediate is in the range [-128, 127], but the macro is able to
				8629	// synthesise unencodable immediates.
				8630	// B-sized lanes cannot take an immediate out of the range [-128, 127].
				8631	IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
				8632	IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
				8633	IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
				8634	}
				8635
				8636	TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
				8637	int in_b[] = {0, 255, 127, 0x80, 1, 55};
				8638	int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
				8639	int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
				8640	int64_t in_d[] = {1, 10, 10000, 1000000};
				8641
				8642	IntWideImmFn fn = &MacroAssembler::Umax;
				8643
				8644	int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
				8645	int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
				8646	int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
				8647	int64_t exp_d_1[] = {99, 99, 10000, 1000000};
				8648
				8649	IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
				8650	IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
				8651	IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
				8652	IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
				8653
				8654	int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
				8655	int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
				8656	int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
				8657
				8658	// The immediate is in the range [0, 255], but the macro is able to
				8659	// synthesise unencodable immediates.
				8660	// B-sized lanes cannot take an immediate out of the range [0, 255].
				8661	IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
				8662	IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
				8663	IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
				8664	}
				8665
				8666	TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
				8667	int in_b[] = {0, 255, 127, 0x80, 1, 55};
				8668	int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
				8669	int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
				8670	int64_t in_d[] = {1, 10, 10000, 1000000};
				8671
				8672	IntWideImmFn fn = &MacroAssembler::Umin;
				8673
				8674	int exp_b_1[] = {0, 17, 17, 17, 1, 17};
				8675	int exp_h_1[] = {0, 127, 127, 127, 1, 127};
				8676	int exp_s_1[] = {0, 255, 127, 255, 1, 255};
				8677	int64_t exp_d_1[] = {1, 10, 99, 99};
				8678
				8679	IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
				8680	IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
				8681	IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
				8682	IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
				8683
				8684	int exp_h_2[] = {0, 255, 127, 511, 1, 511};
				8685	int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
				8686	int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
				8687
				8688	// The immediate is in the range [0, 255], but the macro is able to
				8689	// synthesise unencodable immediates.
				8690	// B-sized lanes cannot take an immediate out of the range [0, 255].
				8691	IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
				8692	IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
				8693	IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
				8694	}
				8695
				8696	TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
				8697	int in_b[] = {11, -1, 7, -3};
				8698	int in_h[] = {111, -1, 17, -123};
				8699	int in_s[] = {11111, -1, 117, -12345};
				8700	int64_t in_d[] = {0x7fffffff, 0x80000000};
				8701
				8702	IntWideImmFn fn = &MacroAssembler::Mul;
				8703
				8704	int exp_b_1[] = {66, -6, 42, -18};
				8705	int exp_h_1[] = {-14208, 128, -2176, 15744};
				8706	int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
				8707	int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
				8708
				8709	IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
				8710	IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
				8711	IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
				8712	IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
				8713
				8714	int exp_h_2[] = {-28305, 255, -4335, 31365};
				8715	int exp_s_2[] = {22755328, -2048, 239616, -25282560};
				8716	int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
				8717
				8718	// The immediate is in the range [-128, 127], but the macro is able to
				8719	// synthesise unencodable immediates.
				8720	// B-sized lanes cannot take an immediate out of the range [0, 255].
				8721	IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
				8722	IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
				8723	IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
				8724
				8725	// Integer overflow on multiplication.
				8726	unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
				8727
				8728	IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
				8729	}
				8730
				8731	TEST_SVE(sve_int_wide_imm_unpredicated_add) {
				8732	unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
				8733	unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
				8734	unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
				8735	uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
				8736
				8737	IntWideImmFn fn = &MacroAssembler::Add;
				8738
				8739	unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
				8740	unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
				8741	unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
				8742	uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
				8743
				8744	// Encodable with `add` (shift 0).
				8745	IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
				8746	IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
				8747	IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
				8748	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
				8749
				8750	unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
				8751	unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
				8752	uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
				8753
				8754	// Encodable with `add` (shift 8).
				8755	// B-sized lanes cannot take a shift of 8.
				8756	IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
				8757	IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
				8758	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
				8759
				8760	unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
				8761
				8762	// The macro is able to synthesise unencodable immediates.
				8763	IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramley	d9f929c	2019-10-02 11:42:56 +0100	[diff] [blame]	8764
				8765	unsigned exp_b_4[] = {0x61, 0x5f, 0xf0, 0xdf};
				8766	unsigned exp_h_4[] = {0x6181, 0x5f7f, 0xf010, 0x8aaa};
				8767	unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
				8768	uint64_t exp_d_4[] = {0x8000000180018180, 0x7fffffff7fff7f7e};
				8769
				8770	// Negative immediates use `sub`.
				8771	IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
				8772	IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
				8773	IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
				8774	IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8775	}
				8776
				8777	TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
				8778	unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
				8779	unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
				8780	unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
				8781	uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
				8782
				8783	IntWideImmFn fn = &MacroAssembler::Sqadd;
				8784
Jacob Bramley	b28f617	2019-10-02 12:12:35 +0100	[diff] [blame]	8785	unsigned exp_b_1[] = {0x02, 0x7f, 0x7f, 0x7f};
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8786	unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
				8787	unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
				8788	uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
				8789
				8790	// Encodable with `sqadd` (shift 0).
Jacob Bramley	b28f617	2019-10-02 12:12:35 +0100	[diff] [blame]	8791	// Note that encodable immediates are unsigned, even for signed saturation.
				8792	IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8793	IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
				8794	IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramley	b28f617	2019-10-02 12:12:35 +0100	[diff] [blame]	8795	IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8796
				8797	unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
				8798	unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
				8799	uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
				8800
				8801	// Encodable with `sqadd` (shift 8).
				8802	// B-sized lanes cannot take a shift of 8.
				8803	IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
				8804	IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
				8805	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8806	}
				8807
				8808	TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
				8809	unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
				8810	unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
				8811	unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
				8812	uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
				8813
				8814	IntWideImmFn fn = &MacroAssembler::Uqadd;
				8815
				8816	unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
				8817	unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
				8818	unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
				8819	uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
				8820
				8821	// Encodable with `uqadd` (shift 0).
				8822	IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
				8823	IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
				8824	IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
				8825	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
				8826
				8827	unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
				8828	unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
				8829	uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
				8830
				8831	// Encodable with `uqadd` (shift 8).
				8832	// B-sized lanes cannot take a shift of 8.
				8833	IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
				8834	IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
				8835	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8836	}
				8837
				8838	TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
				8839	unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
				8840	unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
				8841	unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
				8842	uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
				8843
				8844	IntWideImmFn fn = &MacroAssembler::Sub;
				8845
				8846	unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
				8847	unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
				8848	unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
				8849	uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
				8850
				8851	// Encodable with `sub` (shift 0).
				8852	IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
				8853	IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
				8854	IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
				8855	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
				8856
				8857	unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
				8858	unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
				8859	uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
				8860
				8861	// Encodable with `sub` (shift 8).
				8862	// B-sized lanes cannot take a shift of 8.
				8863	IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
				8864	IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
				8865	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
				8866
				8867	unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
				8868
				8869	// The macro is able to synthesise unencodable immediates.
				8870	IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramley	d9f929c	2019-10-02 11:42:56 +0100	[diff] [blame]	8871
				8872	unsigned exp_b_4[] = {0xa1, 0x9f, 0x30, 0x1f};
				8873	unsigned exp_h_4[] = {0xa181, 0x9f7f, 0x3010, 0xcaaa};
				8874	unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
				8875	uint64_t exp_d_4[] = {0x8000000180018182, 0x7fffffff7fff7f80};
				8876
				8877	// Negative immediates use `add`.
				8878	IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
				8879	IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
				8880	IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
				8881	IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8882	}
				8883
				8884	TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
				8885	unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
				8886	unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
				8887	unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
				8888	uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
				8889
				8890	IntWideImmFn fn = &MacroAssembler::Sqsub;
				8891
Jacob Bramley	b28f617	2019-10-02 12:12:35 +0100	[diff] [blame]	8892	unsigned exp_b_1[] = {0x80, 0xfe, 0x8f, 0x80};
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8893	unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
				8894	unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
				8895	uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
				8896
				8897	// Encodable with `sqsub` (shift 0).
Jacob Bramley	b28f617	2019-10-02 12:12:35 +0100	[diff] [blame]	8898	// Note that encodable immediates are unsigned, even for signed saturation.
				8899	IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8900	IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
				8901	IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramley	b28f617	2019-10-02 12:12:35 +0100	[diff] [blame]	8902	IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8903
				8904	unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
				8905	unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
				8906	uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
				8907
				8908	// Encodable with `sqsub` (shift 8).
				8909	// B-sized lanes cannot take a shift of 8.
				8910	IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
				8911	IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
				8912	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8913	}
				8914
				8915	TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
				8916	unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
				8917	unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
				8918	unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
				8919	uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
				8920
				8921	IntWideImmFn fn = &MacroAssembler::Uqsub;
				8922
				8923	unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
				8924	unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
				8925	unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
				8926	uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
				8927
				8928	// Encodable with `uqsub` (shift 0).
				8929	IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
				8930	IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
				8931	IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
				8932	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
				8933
				8934	unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
				8935	unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
				8936	uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
				8937
				8938	// Encodable with `uqsub` (shift 8).
				8939	// B-sized lanes cannot take a shift of 8.
				8940	IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
				8941	IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
				8942	IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong	6995bfd	2019-09-26 10:48:05 +0100	[diff] [blame]	8943	}
				8944
				8945	TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
				8946	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				8947	START();
				8948
				8949	// Encodable with `subr` (shift 0).
				8950	__ Index(z0.VnD(), 1, 1);
				8951	__ Sub(z0.VnD(), 100, z0.VnD());
				8952	__ Index(z1.VnS(), 0x7f, 1);
				8953	__ Sub(z1.VnS(), 0xf7, z1.VnS());
				8954	__ Index(z2.VnH(), 0xaaaa, 0x2222);
				8955	__ Sub(z2.VnH(), 0x80, z2.VnH());
				8956	__ Index(z3.VnB(), 133, 1);
				8957	__ Sub(z3.VnB(), 255, z3.VnB());
				8958
				8959	// Encodable with `subr` (shift 8).
				8960	__ Index(z4.VnD(), 256, -1);
				8961	__ Sub(z4.VnD(), 42 * 256, z4.VnD());
				8962	__ Index(z5.VnS(), 0x7878, 1);
				8963	__ Sub(z5.VnS(), 0x8000, z5.VnS());
				8964	__ Index(z6.VnH(), 0x30f0, -1);
				8965	__ Sub(z6.VnH(), 0x7f00, z6.VnH());
				8966	// B-sized lanes cannot take a shift of 8.
				8967
				8968	// Select with movprfx.
				8969	__ Index(z31.VnD(), 256, 4001);
				8970	__ Sub(z7.VnD(), 42 * 256, z31.VnD());
				8971
				8972	// Out of immediate encodable range of `sub`.
				8973	__ Index(z30.VnS(), 0x11223344, 1);
				8974	__ Sub(z8.VnS(), 0x88776655, z30.VnS());
				8975
				8976	END();
				8977
				8978	if (CAN_RUN()) {
				8979	RUN();
				8980
				8981	int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
				8982	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				8983
				8984	int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
				8985	ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
				8986
				8987	int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
				8988	ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
				8989
				8990	int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
				8991	ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
				8992
				8993	int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
				8994	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				8995
				8996	int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
				8997	ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
				8998
				8999	int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
				9000	ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
				9001
				9002	int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
				9003	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				9004
				9005	int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
				9006	ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
				9007	}
				9008	}
				9009
				9010	TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
				9011	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				9012	START();
				9013
				9014	// Immediates which can be encoded in the instructions.
				9015	__ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
				9016	__ Fdup(z1.VnS(), Float16(2.0));
				9017	__ Fdup(z2.VnD(), Float16(3.875));
				9018	__ Fdup(z3.VnH(), 8.0f);
				9019	__ Fdup(z4.VnS(), -4.75f);
				9020	__ Fdup(z5.VnD(), 0.5f);
				9021	__ Fdup(z6.VnH(), 1.0);
				9022	__ Fdup(z7.VnS(), 2.125);
				9023	__ Fdup(z8.VnD(), -13.0);
				9024
				9025	// Immediates which cannot be encoded in the instructions.
				9026	__ Fdup(z10.VnH(), Float16(0.0));
				9027	__ Fdup(z11.VnH(), kFP16PositiveInfinity);
				9028	__ Fdup(z12.VnS(), 255.0f);
				9029	__ Fdup(z13.VnS(), kFP32NegativeInfinity);
				9030	__ Fdup(z14.VnD(), 12.3456);
				9031	__ Fdup(z15.VnD(), kFP64PositiveInfinity);
				9032
				9033	END();
				9034
				9035	if (CAN_RUN()) {
				9036	RUN();
				9037
				9038	ASSERT_EQUAL_SVE(0xc500, z0.VnH());
				9039	ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
				9040	ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
				9041	ASSERT_EQUAL_SVE(0x4800, z3.VnH());
				9042	ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
				9043	ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
				9044	ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
				9045	ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
				9046	ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
				9047
				9048	ASSERT_EQUAL_SVE(0x0000, z10.VnH());
				9049	ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
				9050	ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
				9051	ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
				9052	ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
				9053	ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
				9054	}
				9055	}
				9056
TatWai Chong	6f111bc	2019-10-07 09:20:37 +0100	[diff] [blame]	9057	TEST_SVE(sve_andv_eorv_orv) {
				9058	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				9059	START();
				9060
				9061	uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
				9062	InsrHelper(&masm, z31.VnD(), in);
				9063
				9064	// For simplicity, we re-use the same pg for various lane sizes.
				9065	// For D lanes: 1, 1, 0
				9066	// For S lanes: 1, 1, 1, 0, 0
				9067	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				9068	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				9069	Initialise(&masm, p0.VnB(), pg_in);
				9070
				9071	// Make a copy so we can check that constructive operations preserve zn.
				9072	__ Mov(z0, z31);
				9073	__ Andv(b0, p0, z0.VnB()); // destructive
				9074	__ Andv(h1, p0, z31.VnH());
				9075	__ Mov(z2, z31);
				9076	__ Andv(s2, p0, z2.VnS()); // destructive
				9077	__ Andv(d3, p0, z31.VnD());
				9078
				9079	__ Eorv(b4, p0, z31.VnB());
				9080	__ Mov(z5, z31);
				9081	__ Eorv(h5, p0, z5.VnH()); // destructive
				9082	__ Eorv(s6, p0, z31.VnS());
				9083	__ Mov(z7, z31);
				9084	__ Eorv(d7, p0, z7.VnD()); // destructive
				9085
				9086	__ Mov(z8, z31);
				9087	__ Orv(b8, p0, z8.VnB()); // destructive
				9088	__ Orv(h9, p0, z31.VnH());
				9089	__ Mov(z10, z31);
				9090	__ Orv(s10, p0, z10.VnS()); // destructive
				9091	__ Orv(d11, p0, z31.VnD());
				9092
				9093	END();
				9094
				9095	if (CAN_RUN()) {
				9096	RUN();
				9097
				9098	if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
				9099	ASSERT_EQUAL_64(0x10, d0);
				9100	ASSERT_EQUAL_64(0x1010, d1);
				9101	ASSERT_EQUAL_64(0x33331111, d2);
				9102	ASSERT_EQUAL_64(0x7777555533331111, d3);
				9103	ASSERT_EQUAL_64(0xbf, d4);
				9104	ASSERT_EQUAL_64(0xedcb, d5);
				9105	ASSERT_EQUAL_64(0x44444444, d6);
				9106	ASSERT_EQUAL_64(0x7777555533331111, d7);
				9107	ASSERT_EQUAL_64(0xff, d8);
				9108	ASSERT_EQUAL_64(0xffff, d9);
				9109	ASSERT_EQUAL_64(0x77775555, d10);
				9110	ASSERT_EQUAL_64(0x7777555533331111, d11);
				9111	} else {
				9112	ASSERT_EQUAL_64(0, d0);
				9113	ASSERT_EQUAL_64(0x0010, d1);
				9114	ASSERT_EQUAL_64(0x00110011, d2);
				9115	ASSERT_EQUAL_64(0x0011001100110011, d3);
				9116	ASSERT_EQUAL_64(0x62, d4);
				9117	ASSERT_EQUAL_64(0x0334, d5);
				9118	ASSERT_EQUAL_64(0x8899aabb, d6);
				9119	ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
				9120	ASSERT_EQUAL_64(0xff, d8);
				9121	ASSERT_EQUAL_64(0xffff, d9);
				9122	ASSERT_EQUAL_64(0xffffffff, d10);
				9123	ASSERT_EQUAL_64(0xffffffffffffffff, d11);
				9124	}
				9125
				9126	// Check the upper lanes above the top of the V register are all clear.
				9127	for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
				9128	ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
				9129	ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
				9130	ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
				9131	ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
				9132	ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
				9133	ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
				9134	ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
				9135	ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
				9136	ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
				9137	ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
				9138	ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
				9139	ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
				9140	}
				9141	}
				9142	}
				9143
TatWai Chong	b2d8d1f	2019-10-21 15:19:31 -0700	[diff] [blame]	9144
				9145	TEST_SVE(sve_saddv_uaddv) {
				9146	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				9147	START();
				9148
				9149	uint64_t in[] = {0x8899aabbccddeeff, 0x8182838485868788, 0x0807060504030201};
				9150	InsrHelper(&masm, z31.VnD(), in);
				9151
				9152	// For simplicity, we re-use the same pg for various lane sizes.
				9153	// For D lanes: 1, 1, 0
				9154	// For S lanes: 1, 1, 1, 0, 0
				9155	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				9156	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				9157	Initialise(&masm, p0.VnB(), pg_in);
				9158
				9159	// Make a copy so we can check that constructive operations preserve zn.
				9160	__ Mov(z0, z31);
				9161	__ Saddv(b0, p0, z0.VnB()); // destructive
				9162	__ Saddv(h1, p0, z31.VnH());
				9163	__ Mov(z2, z31);
				9164	__ Saddv(s2, p0, z2.VnS()); // destructive
				9165
				9166	__ Uaddv(b4, p0, z31.VnB());
				9167	__ Mov(z5, z31);
				9168	__ Uaddv(h5, p0, z5.VnH()); // destructive
				9169	__ Uaddv(s6, p0, z31.VnS());
				9170	__ Mov(z7, z31);
				9171	__ Uaddv(d7, p0, z7.VnD()); // destructive
				9172
				9173	END();
				9174
				9175	if (CAN_RUN()) {
				9176	RUN();
				9177
				9178	if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
				9179	// Saddv
				9180	ASSERT_EQUAL_64(0xfffffffffffffda9, d0);
				9181	ASSERT_EQUAL_64(0xfffffffffffe9495, d1);
				9182	ASSERT_EQUAL_64(0xffffffff07090b0c, d2);
				9183	// Uaddv
				9184	ASSERT_EQUAL_64(0x00000000000002a9, d4);
				9185	ASSERT_EQUAL_64(0x0000000000019495, d5);
				9186	ASSERT_EQUAL_64(0x0000000107090b0c, d6);
				9187	ASSERT_EQUAL_64(0x8182838485868788, d7);
				9188	} else {
				9189	// Saddv
				9190	ASSERT_EQUAL_64(0xfffffffffffffd62, d0);
				9191	ASSERT_EQUAL_64(0xfffffffffffe8394, d1);
				9192	ASSERT_EQUAL_64(0xfffffffed3e6fa0b, d2);
				9193	// Uaddv
				9194	ASSERT_EQUAL_64(0x0000000000000562, d4);
				9195	ASSERT_EQUAL_64(0x0000000000028394, d5);
				9196	ASSERT_EQUAL_64(0x00000001d3e6fa0b, d6);
				9197	ASSERT_EQUAL_64(0x0a1c2e4052647687, d7);
				9198	}
				9199
				9200	// Check the upper lanes above the top of the V register are all clear.
				9201	for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
				9202	ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
				9203	ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
				9204	ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
				9205	ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
				9206	ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
				9207	ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
				9208	ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
				9209	}
				9210	}
				9211	}
				9212
				9213
				9214	TEST_SVE(sve_sminv_uminv) {
				9215	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				9216	START();
				9217
				9218	uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
				9219	InsrHelper(&masm, z31.VnD(), in);
				9220
				9221	// For simplicity, we re-use the same pg for various lane sizes.
				9222	// For D lanes: 1, 0, 1
				9223	// For S lanes: 1, 1, 0, 0, 1
				9224	// For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
				9225	int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
				9226	Initialise(&masm, p0.VnB(), pg_in);
				9227
				9228	// Make a copy so we can check that constructive operations preserve zn.
				9229	__ Mov(z0, z31);
				9230	__ Sminv(b0, p0, z0.VnB()); // destructive
				9231	__ Sminv(h1, p0, z31.VnH());
				9232	__ Mov(z2, z31);
				9233	__ Sminv(s2, p0, z2.VnS()); // destructive
				9234	__ Sminv(d3, p0, z31.VnD());
				9235
				9236	__ Uminv(b4, p0, z31.VnB());
				9237	__ Mov(z5, z31);
				9238	__ Uminv(h5, p0, z5.VnH()); // destructive
				9239	__ Uminv(s6, p0, z31.VnS());
				9240	__ Mov(z7, z31);
				9241	__ Uminv(d7, p0, z7.VnD()); // destructive
				9242
				9243	END();
				9244
				9245	if (CAN_RUN()) {
				9246	RUN();
				9247
				9248	if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
				9249	// Sminv
				9250	ASSERT_EQUAL_64(0xaa, d0);
				9251	ASSERT_EQUAL_64(0xaabb, d1);
				9252	ASSERT_EQUAL_64(0xaabbfc00, d2);
				9253	ASSERT_EQUAL_64(0x00112233aabbfc00, d3); // The smaller lane is inactive.
				9254	// Uminv
				9255	ASSERT_EQUAL_64(0, d4);
				9256	ASSERT_EQUAL_64(0x2233, d5);
				9257	ASSERT_EQUAL_64(0x112233, d6);
				9258	ASSERT_EQUAL_64(0x00112233aabbfc00, d7); // The smaller lane is inactive.
				9259	} else {
				9260	// Sminv
				9261	ASSERT_EQUAL_64(0xaa, d0);
				9262	ASSERT_EQUAL_64(0xaaaa, d1);
				9263	ASSERT_EQUAL_64(0xaaaaaaaa, d2);
				9264	ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d3);
				9265	// Uminv
				9266	ASSERT_EQUAL_64(0, d4);
				9267	ASSERT_EQUAL_64(0x2233, d5);
				9268	ASSERT_EQUAL_64(0x112233, d6);
				9269	ASSERT_EQUAL_64(0x00112233aabbfc00, d7);
				9270	}
				9271
				9272	// Check the upper lanes above the top of the V register are all clear.
				9273	for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
				9274	ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
				9275	ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
				9276	ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
				9277	ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
				9278	ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
				9279	ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
				9280	ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
				9281	ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
				9282	}
				9283	}
				9284	}
				9285
				9286	TEST_SVE(sve_smaxv_umaxv) {
				9287	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				9288	START();
				9289
				9290	uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
				9291	InsrHelper(&masm, z31.VnD(), in);
				9292
				9293	// For simplicity, we re-use the same pg for various lane sizes.
				9294	// For D lanes: 1, 0, 1
				9295	// For S lanes: 1, 1, 0, 0, 1
				9296	// For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
				9297	int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
				9298	Initialise(&masm, p0.VnB(), pg_in);
				9299
				9300	// Make a copy so we can check that constructive operations preserve zn.
				9301	__ Mov(z0, z31);
				9302	__ Smaxv(b0, p0, z0.VnB()); // destructive
				9303	__ Smaxv(h1, p0, z31.VnH());
				9304	__ Mov(z2, z31);
				9305	__ Smaxv(s2, p0, z2.VnS()); // destructive
				9306	__ Smaxv(d3, p0, z31.VnD());
				9307
				9308	__ Umaxv(b4, p0, z31.VnB());
				9309	__ Mov(z5, z31);
				9310	__ Umaxv(h5, p0, z5.VnH()); // destructive
				9311	__ Umaxv(s6, p0, z31.VnS());
				9312	__ Mov(z7, z31);
				9313	__ Umaxv(d7, p0, z7.VnD()); // destructive
				9314
				9315	END();
				9316
				9317	if (CAN_RUN()) {
				9318	RUN();
				9319
				9320	if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
				9321	// Smaxv
				9322	ASSERT_EQUAL_64(0x33, d0);
				9323	ASSERT_EQUAL_64(0x44aa, d1);
				9324	ASSERT_EQUAL_64(0x112233, d2);
				9325	ASSERT_EQUAL_64(0x112233aabbfc00, d3);
				9326	// Umaxv
				9327	ASSERT_EQUAL_64(0xfe, d4);
				9328	ASSERT_EQUAL_64(0xfc00, d5);
				9329	ASSERT_EQUAL_64(0xaabbfc00, d6);
				9330	ASSERT_EQUAL_64(0x112233aabbfc00, d7);
				9331	} else {
				9332	// Smaxv
				9333	ASSERT_EQUAL_64(0x33, d0);
				9334	ASSERT_EQUAL_64(0x44aa, d1);
				9335	ASSERT_EQUAL_64(0x112233, d2);
				9336	ASSERT_EQUAL_64(0x00112233aabbfc00, d3);
				9337	// Umaxv
				9338	ASSERT_EQUAL_64(0xfe, d4);
				9339	ASSERT_EQUAL_64(0xfc00, d5);
				9340	ASSERT_EQUAL_64(0xaabbfc00, d6);
				9341	ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d7);
				9342	}
				9343
				9344	// Check the upper lanes above the top of the V register are all clear.
				9345	for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
				9346	ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
				9347	ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
				9348	ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
				9349	ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
				9350	ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
				9351	ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
				9352	ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
				9353	ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
				9354	}
				9355	}
				9356	}
				9357
TatWai Chong	4d2a4e9	2019-10-23 16:19:32 -0700	[diff] [blame]	9358	typedef void (MacroAssembler::*SdotUdotFn)(const ZRegister& zd,
				9359	const ZRegister& za,
				9360	const ZRegister& zn,
				9361	const ZRegister& zm);
				9362
				9363	template <typename Td, typename Ts, typename Te>
				9364	static void SdotUdotHelper(Test* config,
				9365	SdotUdotFn macro,
				9366	unsigned lane_size_in_bits,
				9367	const Td& zd_inputs,
				9368	const Td& za_inputs,
				9369	const Ts& zn_inputs,
				9370	const Ts& zm_inputs,
				9371	const Te& zd_expected,
				9372	const Te& zdnm_expected) {
				9373	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				9374	START();
				9375
				9376	ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
				9377	ZRegister za = z1.WithLaneSize(lane_size_in_bits);
				9378	ZRegister zn = z2.WithLaneSize(lane_size_in_bits / 4);
				9379	ZRegister zm = z3.WithLaneSize(lane_size_in_bits / 4);
				9380
				9381	InsrHelper(&masm, zd, zd_inputs);
				9382	InsrHelper(&masm, za, za_inputs);
				9383	InsrHelper(&masm, zn, zn_inputs);
				9384	InsrHelper(&masm, zm, zm_inputs);
				9385
				9386	// The Dot macro handles arbitrarily-aliased registers in the argument list.
				9387	ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
				9388	ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
				9389	ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
				9390	ZRegister dnm_result = z13.WithLaneSize(lane_size_in_bits);
				9391	ZRegister d_result = z14.WithLaneSize(lane_size_in_bits);
				9392
				9393	__ Mov(da_result, za);
				9394	// zda = zda + (zn . zm)
				9395	(masm.*macro)(da_result, da_result, zn, zm);
				9396
				9397	__ Mov(dn_result, zn);
				9398	// zdn = za + (zdn . zm)
Jacob Bramley	378fc89	2019-10-30 11:26:09 +0000	[diff] [blame]	9399	(masm.*macro)(dn_result, za, dn_result.WithSameLaneSizeAs(zn), zm);
TatWai Chong	4d2a4e9	2019-10-23 16:19:32 -0700	[diff] [blame]	9400
				9401	__ Mov(dm_result, zm);
				9402	// zdm = za + (zn . zdm)
Jacob Bramley	378fc89	2019-10-30 11:26:09 +0000	[diff] [blame]	9403	(masm.*macro)(dm_result, za, zn, dm_result.WithSameLaneSizeAs(zm));
TatWai Chong	4d2a4e9	2019-10-23 16:19:32 -0700	[diff] [blame]	9404
				9405	__ Mov(d_result, zd);
				9406	// zd = za + (zn . zm)
				9407	(masm.*macro)(d_result, za, zn, zm);
				9408
				9409	__ Mov(dnm_result, zn);
				9410	// zdnm = za + (zdmn . zdnm)
Jacob Bramley	378fc89	2019-10-30 11:26:09 +0000	[diff] [blame]	9411	(masm.*macro)(dnm_result,
				9412	za,
				9413	dnm_result.WithSameLaneSizeAs(zn),
				9414	dnm_result.WithSameLaneSizeAs(zm));
TatWai Chong	4d2a4e9	2019-10-23 16:19:32 -0700	[diff] [blame]	9415
				9416	END();
				9417
				9418	if (CAN_RUN()) {
				9419	RUN();
				9420
				9421	ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
				9422	ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits / 4));
				9423	ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits / 4));
				9424
				9425	ASSERT_EQUAL_SVE(zd_expected, da_result);
				9426	ASSERT_EQUAL_SVE(zd_expected, dn_result);
				9427	ASSERT_EQUAL_SVE(zd_expected, dm_result);
				9428	ASSERT_EQUAL_SVE(zd_expected, d_result);
				9429
				9430	ASSERT_EQUAL_SVE(zdnm_expected, dnm_result);
				9431	}
				9432	}
				9433
				9434	TEST_SVE(sve_sdot) {
				9435	int zd_inputs[] = {0x33, 0xee, 0xff};
				9436	int za_inputs[] = {INT32_MAX, -3, 2};
				9437	int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
				9438	int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
				9439
				9440	// zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
				9441	int32_t zd_expected_s[] = {-2147418113, -183, 133}; // 0x8000ffff
				9442	int64_t zd_expected_d[] = {2147549183, -183, 133}; // 0x8000ffff
				9443
				9444	// zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
				9445	int32_t zdnm_expected_s[] = {-2147418113, 980, 572};
				9446	int64_t zdnm_expected_d[] = {2147549183, 980, 572};
				9447
				9448	SdotUdotHelper(config,
				9449	&MacroAssembler::Sdot,
				9450	kSRegSize,
				9451	zd_inputs,
				9452	za_inputs,
				9453	zn_inputs,
				9454	zm_inputs,
				9455	zd_expected_s,
				9456	zdnm_expected_s);
				9457	SdotUdotHelper(config,
				9458	&MacroAssembler::Sdot,
				9459	kDRegSize,
				9460	zd_inputs,
				9461	za_inputs,
				9462	zn_inputs,
				9463	zm_inputs,
				9464	zd_expected_d,
				9465	zdnm_expected_d);
				9466	}
				9467
				9468	TEST_SVE(sve_udot) {
				9469	int zd_inputs[] = {0x33, 0xee, 0xff};
				9470	int za_inputs[] = {INT32_MAX, -3, 2};
				9471	int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
				9472	int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
				9473
				9474	// zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
				9475	uint32_t zd_expected_s[] = {0x8000ffff, 0x00001749, 0x0000f085};
				9476	uint64_t zd_expected_d[] = {0x000000047c00ffff,
				9477	0x000000000017ff49,
				9478	0x00000000fff00085};
				9479
				9480	// zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
				9481	uint32_t zdnm_expected_s[] = {0x8000ffff, 0x000101d4, 0x0001d03c};
				9482	uint64_t zdnm_expected_d[] = {0x000000047c00ffff,
				9483	0x00000000fffe03d4,
				9484	0x00000001ffce023c};
				9485
				9486	SdotUdotHelper(config,
				9487	&MacroAssembler::Udot,
				9488	kSRegSize,
				9489	zd_inputs,
				9490	za_inputs,
				9491	zn_inputs,
				9492	zm_inputs,
				9493	zd_expected_s,
				9494	zdnm_expected_s);
				9495	SdotUdotHelper(config,
				9496	&MacroAssembler::Udot,
				9497	kDRegSize,
				9498	zd_inputs,
				9499	za_inputs,
				9500	zn_inputs,
				9501	zm_inputs,
				9502	zd_expected_d,
				9503	zdnm_expected_d);
				9504	}
				9505
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9506	template <typename T, size_t N>
				9507	static void FPToRawbitsWithSize(const T (&inputs)[N],
				9508	uint64_t* outputs,
				9509	unsigned size_in_bits) {
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9510	for (size_t i = 0; i < N; i++) {
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9511	outputs[i] = vixl::FPToRawbitsWithSize(size_in_bits, inputs[i]);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9512	}
				9513	}
				9514
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9515	template <typename Ti, typename Te, size_t N>
				9516	static void FPBinArithHelper(Test* config,
				9517	ArithFn macro,
				9518	int lane_size_in_bits,
				9519	const Ti (&zn_inputs)[N],
				9520	const Ti (&zm_inputs)[N],
				9521	const Te (&zd_expected)[N]) {
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9522	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9523
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9524	START();
				9525
				9526	ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
				9527	ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
				9528	ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
				9529
				9530	uint64_t zn_rawbits[N];
				9531	uint64_t zm_rawbits[N];
				9532
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9533	FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
				9534	FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9535
				9536	InsrHelper(&masm, zn, zn_rawbits);
				9537	InsrHelper(&masm, zm, zm_rawbits);
				9538
				9539	(masm.*macro)(zd, zn, zm);
				9540
				9541	END();
				9542
				9543	if (CAN_RUN()) {
				9544	RUN();
				9545
				9546	ASSERT_EQUAL_SVE(zd_expected, zd);
				9547	}
				9548	}
				9549
				9550	TEST_SVE(sve_fp_arithmetic_unpredicated_fadd) {
				9551	double zn_inputs[] = {24.0,
				9552	5.5,
				9553	0.0,
				9554	3.875,
				9555	2.125,
				9556	kFP64PositiveInfinity,
				9557	kFP64NegativeInfinity};
				9558
				9559	double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
				9560
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9561	ArithFn fn = &MacroAssembler::Fadd;
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9562
				9563	uint16_t expected_h[] = {Float16ToRawbits(Float16(1048.0)),
				9564	Float16ToRawbits(Float16(2053.5)),
				9565	Float16ToRawbits(Float16(0.1)),
				9566	Float16ToRawbits(Float16(-0.875)),
				9567	Float16ToRawbits(Float16(14.465)),
				9568	Float16ToRawbits(kFP16PositiveInfinity),
				9569	Float16ToRawbits(kFP16NegativeInfinity)};
				9570
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9571	FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9572
				9573	uint32_t expected_s[] = {FloatToRawbits(1048.0f),
				9574	FloatToRawbits(2053.5f),
				9575	FloatToRawbits(0.1f),
				9576	FloatToRawbits(-0.875f),
				9577	FloatToRawbits(14.465f),
				9578	FloatToRawbits(kFP32PositiveInfinity),
				9579	FloatToRawbits(kFP32NegativeInfinity)};
				9580
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9581	FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9582
				9583	uint64_t expected_d[] = {DoubleToRawbits(1048.0),
				9584	DoubleToRawbits(2053.5),
				9585	DoubleToRawbits(0.1),
				9586	DoubleToRawbits(-0.875),
				9587	DoubleToRawbits(14.465),
				9588	DoubleToRawbits(kFP64PositiveInfinity),
				9589	DoubleToRawbits(kFP64NegativeInfinity)};
				9590
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9591	FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9592	}
				9593
				9594	TEST_SVE(sve_fp_arithmetic_unpredicated_fsub) {
				9595	double zn_inputs[] = {24.0,
				9596	5.5,
				9597	0.0,
				9598	3.875,
				9599	2.125,
				9600	kFP64PositiveInfinity,
				9601	kFP64NegativeInfinity};
				9602
				9603	double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
				9604
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9605	ArithFn fn = &MacroAssembler::Fsub;
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9606
				9607	uint16_t expected_h[] = {Float16ToRawbits(Float16(-1000.0)),
				9608	Float16ToRawbits(Float16(-2042.5)),
				9609	Float16ToRawbits(Float16(-0.1)),
				9610	Float16ToRawbits(Float16(8.625)),
				9611	Float16ToRawbits(Float16(-10.215)),
				9612	Float16ToRawbits(kFP16PositiveInfinity),
				9613	Float16ToRawbits(kFP16NegativeInfinity)};
				9614
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9615	FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9616
				9617	uint32_t expected_s[] = {FloatToRawbits(-1000.0),
				9618	FloatToRawbits(-2042.5),
				9619	FloatToRawbits(-0.1),
				9620	FloatToRawbits(8.625),
				9621	FloatToRawbits(-10.215),
				9622	FloatToRawbits(kFP32PositiveInfinity),
				9623	FloatToRawbits(kFP32NegativeInfinity)};
				9624
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9625	FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9626
				9627	uint64_t expected_d[] = {DoubleToRawbits(-1000.0),
				9628	DoubleToRawbits(-2042.5),
				9629	DoubleToRawbits(-0.1),
				9630	DoubleToRawbits(8.625),
				9631	DoubleToRawbits(-10.215),
				9632	DoubleToRawbits(kFP64PositiveInfinity),
				9633	DoubleToRawbits(kFP64NegativeInfinity)};
				9634
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9635	FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9636	}
				9637
				9638	TEST_SVE(sve_fp_arithmetic_unpredicated_fmul) {
				9639	double zn_inputs[] = {24.0,
				9640	5.5,
				9641	0.0,
				9642	3.875,
				9643	2.125,
				9644	kFP64PositiveInfinity,
				9645	kFP64NegativeInfinity};
				9646
				9647	double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
				9648
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9649	ArithFn fn = &MacroAssembler::Fmul;
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9650
				9651	uint16_t expected_h[] = {Float16ToRawbits(Float16(24576.0)),
				9652	Float16ToRawbits(Float16(11264.0)),
				9653	Float16ToRawbits(Float16(0.0)),
				9654	Float16ToRawbits(Float16(-18.4)),
				9655	Float16ToRawbits(Float16(26.23)),
				9656	Float16ToRawbits(kFP16PositiveInfinity),
				9657	Float16ToRawbits(kFP16PositiveInfinity)};
				9658
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9659	FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9660
				9661	uint32_t expected_s[] = {FloatToRawbits(24576.0),
				9662	FloatToRawbits(11264.0),
				9663	FloatToRawbits(0.0),
				9664	FloatToRawbits(-18.40625),
				9665	FloatToRawbits(26.2225),
				9666	FloatToRawbits(kFP32PositiveInfinity),
				9667	FloatToRawbits(kFP32PositiveInfinity)};
				9668
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9669	FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9670
				9671	uint64_t expected_d[] = {DoubleToRawbits(24576.0),
				9672	DoubleToRawbits(11264.0),
				9673	DoubleToRawbits(0.0),
				9674	DoubleToRawbits(-18.40625),
				9675	DoubleToRawbits(26.2225),
				9676	DoubleToRawbits(kFP64PositiveInfinity),
				9677	DoubleToRawbits(kFP64PositiveInfinity)};
				9678
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9679	FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chong	fe53604	2019-10-23 16:34:11 -0700	[diff] [blame]	9680	}
				9681
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9682	typedef void (MacroAssembler::*FPArithPredicatedFn)(
				9683	const ZRegister& zd,
				9684	const PRegisterM& pg,
				9685	const ZRegister& zn,
				9686	const ZRegister& zm,
				9687	FPMacroNaNPropagationOption nan_option);
				9688
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9689	typedef void (MacroAssembler::*FPArithPredicatedNoNaNOptFn)(
				9690	const ZRegister& zd,
				9691	const PRegisterM& pg,
				9692	const ZRegister& zn,
				9693	const ZRegister& zm);
				9694
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9695	template <typename Ti, typename Te, size_t N>
				9696	static void FPBinArithHelper(
				9697	Test* config,
				9698	FPArithPredicatedFn macro,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9699	FPArithPredicatedNoNaNOptFn macro_nonan,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9700	unsigned lane_size_in_bits,
				9701	const Ti (&zd_inputs)[N],
				9702	const int (&pg_inputs)[N],
				9703	const Ti (&zn_inputs)[N],
				9704	const Ti (&zm_inputs)[N],
				9705	const Te (&zd_expected)[N],
				9706	FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9707	VIXL_ASSERT((macro == NULL) ^ (macro_nonan == NULL));
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9708	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				9709	START();
				9710
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9711	// Avoid choosing default scratch registers.
				9712	ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
				9713	ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
				9714	ZRegister zm = z28.WithLaneSize(lane_size_in_bits);
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9715
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9716	uint64_t zn_inputs_rawbits[N];
				9717	uint64_t zm_inputs_rawbits[N];
				9718	uint64_t zd_inputs_rawbits[N];
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9719
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9720	FPToRawbitsWithSize(zn_inputs, zn_inputs_rawbits, lane_size_in_bits);
				9721	FPToRawbitsWithSize(zm_inputs, zm_inputs_rawbits, lane_size_in_bits);
				9722	FPToRawbitsWithSize(zd_inputs, zd_inputs_rawbits, lane_size_in_bits);
				9723
				9724	InsrHelper(&masm, zn, zn_inputs_rawbits);
				9725	InsrHelper(&masm, zm, zm_inputs_rawbits);
				9726	InsrHelper(&masm, zd, zd_inputs_rawbits);
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9727
				9728	PRegisterWithLaneSize pg = p0.WithLaneSize(lane_size_in_bits);
				9729	Initialise(&masm, pg, pg_inputs);
				9730
				9731	// `instr` zdn, pg, zdn, zm
				9732	ZRegister dn_result = z0.WithLaneSize(lane_size_in_bits);
				9733	__ Mov(dn_result, zn);
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9734	if (macro_nonan == NULL) {
				9735	(masm.*macro)(dn_result, pg.Merging(), dn_result, zm, nan_option);
				9736	} else {
				9737	(masm.*macro_nonan)(dn_result, pg.Merging(), dn_result, zm);
				9738	}
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9739
				9740	// Based on whether zd and zm registers are aliased, the macro of instructions
				9741	// (`Instr`) swaps the order of operands if it has the commutative property,
				9742	// otherwise, transfer to the reversed `Instr`, such as fdivr.
				9743	// `instr` zdm, pg, zn, zdm
				9744	ZRegister dm_result = z1.WithLaneSize(lane_size_in_bits);
				9745	__ Mov(dm_result, zm);
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9746	if (macro_nonan == NULL) {
				9747	(masm.*macro)(dm_result, pg.Merging(), zn, dm_result, nan_option);
				9748	} else {
				9749	(masm.*macro_nonan)(dm_result, pg.Merging(), zn, dm_result);
				9750	}
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9751
				9752	// The macro of instructions (`Instr`) automatically selects between `instr`
				9753	// and movprfx + `instr` based on whether zd and zn registers are aliased.
				9754	// A generated movprfx instruction is predicated that using the same
				9755	// governing predicate register. In order to keep the result constant,
				9756	// initialize the destination register first.
				9757	// `instr` zd, pg, zn, zm
				9758	ZRegister d_result = z2.WithLaneSize(lane_size_in_bits);
				9759	__ Mov(d_result, zd);
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9760	if (macro_nonan == NULL) {
				9761	(masm.*macro)(d_result, pg.Merging(), zn, zm, nan_option);
				9762	} else {
				9763	(masm.*macro_nonan)(d_result, pg.Merging(), zn, zm);
				9764	}
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9765
				9766	END();
				9767
				9768	if (CAN_RUN()) {
				9769	RUN();
				9770
				9771	for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
				9772	int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
				9773	if (!core.HasSVELane(dn_result, lane)) break;
				9774	if ((pg_inputs[i] & 1) != 0) {
				9775	ASSERT_EQUAL_SVE_LANE(zd_expected[i], dn_result, lane);
				9776	} else {
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9777	ASSERT_EQUAL_SVE_LANE(zn_inputs_rawbits[i], dn_result, lane);
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9778	}
				9779	}
				9780
				9781	for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
				9782	int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
				9783	if (!core.HasSVELane(dm_result, lane)) break;
				9784	if ((pg_inputs[i] & 1) != 0) {
				9785	ASSERT_EQUAL_SVE_LANE(zd_expected[i], dm_result, lane);
				9786	} else {
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9787	ASSERT_EQUAL_SVE_LANE(zm_inputs_rawbits[i], dm_result, lane);
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9788	}
				9789	}
				9790
				9791	ASSERT_EQUAL_SVE(zd_expected, d_result);
				9792	}
				9793	}
				9794
				9795	TEST_SVE(sve_binary_arithmetic_predicated_fdiv) {
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9796	// The inputs are shared with different precision tests.
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9797	double zd_in[] = {0.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};
				9798
				9799	double zn_in[] = {24.0,
				9800	24.0,
				9801	-2.0,
				9802	-2.0,
				9803	5.5,
				9804	5.5,
				9805	kFP64PositiveInfinity,
				9806	kFP64PositiveInfinity,
				9807	kFP64NegativeInfinity,
				9808	kFP64NegativeInfinity};
				9809
				9810	double zm_in[] = {-2.0, -2.0, 24.0, 24.0, 0.5, 0.5, 0.65, 0.65, 24.0, 24.0};
				9811
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9812	int pg_in[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
				9813
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9814	uint16_t exp_h[] = {Float16ToRawbits(Float16(0.1)),
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9815	Float16ToRawbits(Float16(-12.0)),
				9816	Float16ToRawbits(Float16(2.2)),
				9817	Float16ToRawbits(Float16(-0.0833)),
				9818	Float16ToRawbits(Float16(4.4)),
				9819	Float16ToRawbits(Float16(11.0)),
				9820	Float16ToRawbits(Float16(6.6)),
				9821	Float16ToRawbits(kFP16PositiveInfinity),
				9822	Float16ToRawbits(Float16(8.8)),
				9823	Float16ToRawbits(kFP16NegativeInfinity)};
				9824
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9825	FPBinArithHelper(config,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9826	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9827	&MacroAssembler::Fdiv,
				9828	kHRegSize,
				9829	zd_in,
				9830	pg_in,
				9831	zn_in,
				9832	zm_in,
				9833	exp_h);
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9834
				9835	uint32_t exp_s[] = {FloatToRawbits(0.1),
				9836	FloatToRawbits(-12.0),
				9837	FloatToRawbits(2.2),
				9838	0xbdaaaaab,
				9839	FloatToRawbits(4.4),
				9840	FloatToRawbits(11.0),
				9841	FloatToRawbits(6.6),
				9842	FloatToRawbits(kFP32PositiveInfinity),
				9843	FloatToRawbits(8.8),
				9844	FloatToRawbits(kFP32NegativeInfinity)};
				9845
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9846	FPBinArithHelper(config,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9847	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9848	&MacroAssembler::Fdiv,
				9849	kSRegSize,
				9850	zd_in,
				9851	pg_in,
				9852	zn_in,
				9853	zm_in,
				9854	exp_s);
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9855
				9856	uint64_t exp_d[] = {DoubleToRawbits(0.1),
				9857	DoubleToRawbits(-12.0),
				9858	DoubleToRawbits(2.2),
				9859	0xbfb5555555555555,
				9860	DoubleToRawbits(4.4),
				9861	DoubleToRawbits(11.0),
				9862	DoubleToRawbits(6.6),
				9863	DoubleToRawbits(kFP64PositiveInfinity),
				9864	DoubleToRawbits(8.8),
				9865	DoubleToRawbits(kFP64NegativeInfinity)};
				9866
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9867	FPBinArithHelper(config,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9868	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9869	&MacroAssembler::Fdiv,
				9870	kDRegSize,
				9871	zd_in,
				9872	pg_in,
				9873	zn_in,
				9874	zm_in,
				9875	exp_d);
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9876	}
				9877
Martyn Capewell	9cc3f14	2019-10-29 14:06:35 +0000	[diff] [blame]	9878	TEST_SVE(sve_select) {
				9879	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				9880	START();
				9881
				9882	uint64_t in0[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
				9883	uint64_t in1[] = {0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa};
				9884
				9885	// For simplicity, we re-use the same pg for various lane sizes.
				9886	// For D lanes: 1, 1, 0
				9887	// For S lanes: 1, 1, 1, 0, 0
				9888	// For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
				9889	int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
				9890	Initialise(&masm, p0.VnB(), pg_in);
				9891	PRegisterM pg = p0.Merging();
				9892
				9893	InsrHelper(&masm, z30.VnD(), in0);
				9894	InsrHelper(&masm, z31.VnD(), in1);
				9895
				9896	__ Sel(z0.VnB(), pg, z30.VnB(), z31.VnB());
				9897	__ Sel(z1.VnH(), pg, z30.VnH(), z31.VnH());
				9898	__ Sel(z2.VnS(), pg, z30.VnS(), z31.VnS());
				9899	__ Sel(z3.VnD(), pg, z30.VnD(), z31.VnD());
				9900
				9901	END();
				9902
				9903	if (CAN_RUN()) {
				9904	RUN();
				9905
				9906	uint64_t expected_z0[] = {0xaaaaaaaa05aa07f8,
				9907	0xfeaaaaf0aac3870f,
				9908	0xaaaa56aa9abcdeaa};
				9909	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				9910
				9911	uint64_t expected_z1[] = {0xaaaaaaaaaaaa07f8,
				9912	0xaaaaf8f0e1c3870f,
				9913	0xaaaaaaaa9abcaaaa};
				9914	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				9915
				9916	uint64_t expected_z2[] = {0xaaaaaaaa05f607f8,
				9917	0xfefcf8f0e1c3870f,
				9918	0xaaaaaaaaaaaaaaaa};
				9919	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				9920
				9921	uint64_t expected_z3[] = {0x01f203f405f607f8,
				9922	0xfefcf8f0e1c3870f,
				9923	0xaaaaaaaaaaaaaaaa};
				9924	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				9925	}
				9926	}
TatWai Chong	d316c5e	2019-10-16 12:22:10 -0700	[diff] [blame]	9927
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9928	TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_h) {
				9929	double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
				9930	double zn_inputs[] = {-2.1,
				9931	8.5,
				9932	225.5,
				9933	0.0,
				9934	8.8,
				9935	-4.75,
				9936	kFP64PositiveInfinity,
				9937	kFP64NegativeInfinity};
				9938	double zm_inputs[] = {-2.0,
				9939	-13.0,
				9940	24.0,
				9941	0.01,
				9942	0.5,
				9943	300.75,
				9944	kFP64NegativeInfinity,
				9945	kFP64PositiveInfinity};
				9946	int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
				9947
				9948	uint16_t zd_expected_max[] = {Float16ToRawbits(Float16(-2.0)),
				9949	Float16ToRawbits(Float16(8.5)),
				9950	Float16ToRawbits(Float16(3.3)),
				9951	Float16ToRawbits(Float16(0.01)),
				9952	Float16ToRawbits(Float16(5.5)),
				9953	Float16ToRawbits(Float16(300.75)),
				9954	Float16ToRawbits(kFP16PositiveInfinity),
				9955	Float16ToRawbits(kFP16PositiveInfinity)};
				9956	FPBinArithHelper(config,
				9957	&MacroAssembler::Fmax,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9958	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9959	kHRegSize,
				9960	zd_inputs,
				9961	pg_inputs,
				9962	zn_inputs,
				9963	zm_inputs,
				9964	zd_expected_max);
				9965
				9966	uint16_t zd_expected_min[] = {Float16ToRawbits(Float16(-2.1)),
				9967	Float16ToRawbits(Float16(-13.0)),
				9968	Float16ToRawbits(Float16(3.3)),
				9969	Float16ToRawbits(Float16(0.0)),
				9970	Float16ToRawbits(Float16(5.5)),
				9971	Float16ToRawbits(Float16(-4.75)),
				9972	Float16ToRawbits(kFP16NegativeInfinity),
				9973	Float16ToRawbits(kFP16NegativeInfinity)};
				9974	FPBinArithHelper(config,
				9975	&MacroAssembler::Fmin,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	9976	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	9977	kHRegSize,
				9978	zd_inputs,
				9979	pg_inputs,
				9980	zn_inputs,
				9981	zm_inputs,
				9982	zd_expected_min);
				9983	}
				9984
				9985	TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_s) {
				9986	double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
				9987	double zn_inputs[] = {-2.1,
				9988	8.5,
				9989	225.5,
				9990	0.0,
				9991	8.8,
				9992	-4.75,
				9993	kFP64PositiveInfinity,
				9994	kFP64NegativeInfinity};
				9995	double zm_inputs[] = {-2.0,
				9996	-13.0,
				9997	24.0,
				9998	0.01,
				9999	0.5,
				10000	300.75,
				10001	kFP64NegativeInfinity,
				10002	kFP64PositiveInfinity};
				10003	int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
				10004
				10005	uint32_t zd_expected_max[] = {FloatToRawbits(-2.0),
				10006	FloatToRawbits(8.5),
				10007	FloatToRawbits(3.3),
				10008	FloatToRawbits(0.01),
				10009	FloatToRawbits(5.5),
				10010	FloatToRawbits(300.75),
				10011	FloatToRawbits(kFP32PositiveInfinity),
				10012	FloatToRawbits(kFP32PositiveInfinity)};
				10013	FPBinArithHelper(config,
				10014	&MacroAssembler::Fmax,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	10015	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	10016	kSRegSize,
				10017	zd_inputs,
				10018	pg_inputs,
				10019	zn_inputs,
				10020	zm_inputs,
				10021	zd_expected_max);
				10022
				10023	uint32_t zd_expected_min[] = {FloatToRawbits(-2.1),
				10024	FloatToRawbits(-13.0),
				10025	FloatToRawbits(3.3),
				10026	FloatToRawbits(0.0),
				10027	FloatToRawbits(5.5),
				10028	FloatToRawbits(-4.75),
				10029	FloatToRawbits(kFP32NegativeInfinity),
				10030	FloatToRawbits(kFP32NegativeInfinity)};
				10031	FPBinArithHelper(config,
				10032	&MacroAssembler::Fmin,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	10033	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	10034	kSRegSize,
				10035	zd_inputs,
				10036	pg_inputs,
				10037	zn_inputs,
				10038	zm_inputs,
				10039	zd_expected_min);
				10040	}
				10041
				10042	TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_d) {
				10043	double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
				10044	double zn_inputs[] = {-2.1,
				10045	8.5,
				10046	225.5,
				10047	0.0,
				10048	8.8,
				10049	-4.75,
				10050	kFP64PositiveInfinity,
				10051	kFP64NegativeInfinity};
				10052	double zm_inputs[] = {-2.0,
				10053	-13.0,
				10054	24.0,
				10055	0.01,
				10056	0.5,
				10057	300.75,
				10058	kFP64NegativeInfinity,
				10059	kFP64PositiveInfinity};
				10060	int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
				10061
				10062	uint64_t zd_expected_max[] = {DoubleToRawbits(-2.0),
				10063	DoubleToRawbits(8.5),
				10064	DoubleToRawbits(3.3),
				10065	DoubleToRawbits(0.01),
				10066	DoubleToRawbits(5.5),
				10067	DoubleToRawbits(300.75),
				10068	DoubleToRawbits(kFP64PositiveInfinity),
				10069	DoubleToRawbits(kFP64PositiveInfinity)};
				10070	FPBinArithHelper(config,
				10071	&MacroAssembler::Fmax,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	10072	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	10073	kDRegSize,
				10074	zd_inputs,
				10075	pg_inputs,
				10076	zn_inputs,
				10077	zm_inputs,
				10078	zd_expected_max);
				10079
				10080	uint64_t zd_expected_min[] = {DoubleToRawbits(-2.1),
				10081	DoubleToRawbits(-13.0),
				10082	DoubleToRawbits(3.3),
				10083	DoubleToRawbits(0.0),
				10084	DoubleToRawbits(5.5),
				10085	DoubleToRawbits(-4.75),
				10086	DoubleToRawbits(kFP64NegativeInfinity),
				10087	DoubleToRawbits(kFP64NegativeInfinity)};
				10088	FPBinArithHelper(config,
				10089	&MacroAssembler::Fmin,
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	10090	NULL,
TatWai Chong	7a0d367	2019-10-23 17:35:18 -0700	[diff] [blame]	10091	kDRegSize,
				10092	zd_inputs,
				10093	pg_inputs,
				10094	zn_inputs,
				10095	zm_inputs,
				10096	zd_expected_min);
				10097	}
TatWai Chong	29a0c43	2019-11-06 22:20:44 -0800	[diff] [blame]	10098
				10099	template <typename T, size_t N>
				10100	static void BitwiseShiftImmHelper(Test* config,
				10101	int lane_size_in_bits,
				10102	const T (&zn_inputs)[N],
				10103	int shift) {
				10104	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10105	START();
				10106
				10107	ZRegister zd_asr = z25.WithLaneSize(lane_size_in_bits);
				10108	ZRegister zd_lsr = z26.WithLaneSize(lane_size_in_bits);
				10109	ZRegister zd_lsl = z27.WithLaneSize(lane_size_in_bits);
				10110	ZRegister zn = z28.WithLaneSize(lane_size_in_bits);
				10111
				10112	InsrHelper(&masm, zn, zn_inputs);
				10113
				10114	__ Asr(zd_asr, zn, shift);
				10115	__ Lsr(zd_lsr, zn, shift);
				10116	__ Lsl(zd_lsl, zn, shift);
				10117
				10118	END();
				10119
				10120	if (CAN_RUN()) {
				10121	RUN();
				10122
				10123	const uint64_t mask = GetUintMask(lane_size_in_bits);
				10124	for (int i = 0; i < static_cast<int>(N); i++) {
				10125	int lane = N - i - 1;
				10126	if (!core.HasSVELane(zd_asr, lane)) break;
				10127	bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
				10128	uint64_t result;
				10129	if (shift >= lane_size_in_bits) {
				10130	result = is_negative ? mask : 0;
				10131	} else {
				10132	result = zn_inputs[i] >> shift;
				10133	if (is_negative) {
				10134	result \|= mask << (lane_size_in_bits - shift);
				10135	result &= mask;
				10136	}
				10137	}
				10138	ASSERT_EQUAL_SVE_LANE(result, zd_asr, lane);
				10139	}
				10140
				10141	for (int i = 0; i < static_cast<int>(N); i++) {
				10142	int lane = N - i - 1;
				10143	if (!core.HasSVELane(zd_lsr, lane)) break;
				10144	uint64_t result =
				10145	(shift >= lane_size_in_bits) ? 0 : zn_inputs[i] >> shift;
				10146	ASSERT_EQUAL_SVE_LANE(result, zd_lsr, lane);
				10147	}
				10148
				10149	for (int i = 0; i < static_cast<int>(N); i++) {
				10150	int lane = N - i - 1;
				10151	if (!core.HasSVELane(zd_lsl, lane)) break;
				10152	uint64_t result = (shift >= lane_size_in_bits) ? 0 : zn_inputs[i]
				10153	<< shift;
				10154	ASSERT_EQUAL_SVE_LANE(result & mask, zd_lsl, lane);
				10155	}
				10156	}
				10157	}
				10158
				10159	TEST_SVE(sve_bitwise_shift_imm_unpredicated) {
				10160	uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
				10161	int shift_b[] = {1, 3, 5, 8};
				10162	for (size_t i = 0; i < ArrayLength(shift_b); i++) {
				10163	BitwiseShiftImmHelper(config, kBRegSize, inputs_b, shift_b[i]);
				10164	}
				10165
				10166	uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233};
				10167	int shift_h[] = {1, 8, 11, 16};
				10168	for (size_t i = 0; i < ArrayLength(shift_h); i++) {
				10169	BitwiseShiftImmHelper(config, kHRegSize, inputs_h, shift_h[i]);
				10170	}
				10171
				10172	uint64_t inputs_s[] = {0xfedcba98, 0xfffa55aa, 0x00112233};
				10173	int shift_s[] = {1, 9, 17, 32};
				10174	for (size_t i = 0; i < ArrayLength(shift_s); i++) {
				10175	BitwiseShiftImmHelper(config, kSRegSize, inputs_s, shift_s[i]);
				10176	}
				10177
				10178	uint64_t inputs_d[] = {0xfedcba98fedcba98,
				10179	0xfffa5555aaaaaaaa,
				10180	0x0011223344aafe80};
				10181	int shift_d[] = {1, 23, 45, 64};
				10182	for (size_t i = 0; i < ArrayLength(shift_d); i++) {
				10183	BitwiseShiftImmHelper(config, kDRegSize, inputs_d, shift_d[i]);
				10184	}
				10185	}
				10186
				10187	template <typename T, typename R, size_t N>
				10188	static void BitwiseShiftWideElementsHelper(Test* config,
				10189	Shift shift_type,
				10190	int lane_size_in_bits,
				10191	const T (&zn_inputs)[N],
				10192	const R& zm_inputs,
				10193	const T (&zd_expected)[N]) {
				10194	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10195	START();
				10196
				10197	ArithFn macro;
				10198	// Since logical shift left and right by the current lane size width is equal
				10199	// to 0, so initialize the array to 0 for convenience.
				10200	uint64_t zd_expected_max_shift_amount[N] = {0};
				10201	switch (shift_type) {
				10202	case ASR: {
				10203	macro = &MacroAssembler::Asr;
				10204	uint64_t mask = GetUintMask(lane_size_in_bits);
				10205	for (size_t i = 0; i < ArrayLength(zn_inputs); i++) {
				10206	bool is_negative = (zn_inputs[i] & GetSignMask(lane_size_in_bits)) != 0;
				10207	zd_expected_max_shift_amount[i] = is_negative ? mask : 0;
				10208	}
				10209	break;
				10210	}
				10211	case LSR:
				10212	macro = &MacroAssembler::Lsr;
				10213	break;
				10214	case LSL:
				10215	macro = &MacroAssembler::Lsl;
				10216	break;
				10217	default:
				10218	VIXL_UNIMPLEMENTED();
				10219	macro = NULL;
				10220	break;
				10221	}
				10222
				10223	ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
				10224	ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
				10225	ZRegister zm = z28.WithLaneSize(kDRegSize);
				10226
				10227	InsrHelper(&masm, zn, zn_inputs);
				10228	InsrHelper(&masm, zm, zm_inputs);
				10229
				10230	(masm.*macro)(zd, zn, zm);
				10231
				10232	ZRegister zm_max_shift_amount = z25.WithLaneSize(kDRegSize);
				10233	ZRegister zd_max_shift_amount = z24.WithLaneSize(lane_size_in_bits);
				10234
				10235	__ Dup(zm_max_shift_amount, lane_size_in_bits);
				10236	(masm.*macro)(zd_max_shift_amount, zn, zm_max_shift_amount);
				10237
				10238	ZRegister zm_out_of_range = z23.WithLaneSize(kDRegSize);
				10239	ZRegister zd_out_of_range = z22.WithLaneSize(lane_size_in_bits);
				10240
				10241	__ Dup(zm_out_of_range, GetUintMask(lane_size_in_bits));
				10242	(masm.*macro)(zd_out_of_range, zn, zm_out_of_range);
				10243
				10244	END();
				10245
				10246	if (CAN_RUN()) {
				10247	RUN();
				10248
				10249	ASSERT_EQUAL_SVE(zd_expected, zd);
				10250	ASSERT_EQUAL_SVE(zd_expected_max_shift_amount, zd_max_shift_amount);
				10251	ASSERT_EQUAL_SVE(zd_max_shift_amount, zd_out_of_range);
				10252	}
				10253	}
				10254
				10255	TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_asr) {
				10256	// clang-format off
				10257	uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
				10258	0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
				10259	int shift_b[] = {1, 3};
				10260	uint64_t expected_b[] = {0xff, 0xee, 0xdd, 0xcc, 0xff, 0x2a, 0xd5, 0xc0,
				10261	0xff, 0xfb, 0xf7, 0xf3, 0xff, 0x0a, 0xf5, 0xf0};
				10262	BitwiseShiftWideElementsHelper(config,
				10263	ASR,
				10264	kBRegSize,
				10265	inputs_b,
				10266	shift_b,
				10267	expected_b);
				10268
				10269	uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
				10270	0xfedc, 0xfa55, 0x0011, 0x2233,
				10271	0xfedc, 0xfa55, 0x0011, 0x2233};
				10272	int shift_h[] = {1, 8, 11};
				10273	uint64_t expected_h[] = {0xff6e, 0xfd2a, 0x0008, 0x1119,
				10274	0xfffe, 0xfffa, 0x0000, 0x0022,
				10275	0xffff, 0xffff, 0x0000, 0x0004};
				10276	BitwiseShiftWideElementsHelper(config,
				10277	ASR,
				10278	kHRegSize,
				10279	inputs_h,
				10280	shift_h,
				10281	expected_h);
				10282
				10283	uint64_t inputs_s[] =
				10284	{0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
				10285	int shift_s[] = {1, 9, 23};
				10286	uint64_t expected_s[] =
				10287	{0xff6e5d4c, 0xfffd2ad5, 0x00000891, 0x000091a2, 0xffffff55, 0xffffff11};
				10288	BitwiseShiftWideElementsHelper(config,
				10289	ASR,
				10290	kSRegSize,
				10291	inputs_s,
				10292	shift_s,
				10293	expected_s);
				10294	// clang-format on
				10295	}
				10296
				10297	TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsr) {
				10298	// clang-format off
				10299	uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
				10300	0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
				10301	int shift_b[] = {1, 3};
				10302	uint64_t expected_b[] = {0x7f, 0x6e, 0x5d, 0x4c, 0x7f, 0x2a, 0x55, 0x40,
				10303	0x1f, 0x1b, 0x17, 0x13, 0x1f, 0x0a, 0x15, 0x10};
				10304
				10305	BitwiseShiftWideElementsHelper(config,
				10306	LSR,
				10307	kBRegSize,
				10308	inputs_b,
				10309	shift_b,
				10310	expected_b);
				10311
				10312	uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
				10313	0xfedc, 0xfa55, 0x0011, 0x2233,
				10314	0xfedc, 0xfa55, 0x0011, 0x2233};
				10315	int shift_h[] = {1, 8, 11};
				10316	uint64_t expected_h[] = {0x7f6e, 0x7d2a, 0x0008, 0x1119,
				10317	0x00fe, 0x00fa, 0x0000, 0x0022,
				10318	0x001f, 0x001f, 0x0000, 0x0004};
				10319	BitwiseShiftWideElementsHelper(config,
				10320	LSR,
				10321	kHRegSize,
				10322	inputs_h,
				10323	shift_h,
				10324	expected_h);
				10325
				10326	uint64_t inputs_s[] =
				10327	{0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
				10328	int shift_s[] = {1, 9, 23};
				10329	uint64_t expected_s[] =
				10330	{0x7f6e5d4c, 0x7ffd2ad5, 0x00000891, 0x000091a2, 0x00000155, 0x00000111};
				10331	BitwiseShiftWideElementsHelper(config,
				10332	LSR,
				10333	kSRegSize,
				10334	inputs_s,
				10335	shift_s,
				10336	expected_s);
				10337	// clang-format on
				10338	}
				10339
				10340	TEST_SVE(sve_bitwise_shift_wide_elements_unpredicated_lsl) {
				10341	// clang-format off
				10342	uint64_t inputs_b[] = {0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80,
				10343	0xfe, 0xdc, 0xba, 0x98, 0xff, 0x55, 0xaa, 0x80};
				10344	int shift_b[] = {1, 5};
				10345
				10346	uint64_t expected_b[] = {0xfc, 0xb8, 0x74, 0x30, 0xfe, 0xaa, 0x54, 0x00,
				10347	0xc0, 0x80, 0x40, 0x00, 0xe0, 0xa0, 0x40, 0x00};
				10348
				10349	BitwiseShiftWideElementsHelper(config,
				10350	LSL,
				10351	kBRegSize,
				10352	inputs_b,
				10353	shift_b,
				10354	expected_b);
				10355	uint64_t inputs_h[] = {0xfedc, 0xfa55, 0x0011, 0x2233,
				10356	0xfedc, 0xfa55, 0x0011, 0x2233,
				10357	0xfedc, 0xfa55, 0x0011, 0x2233};
				10358	int shift_h[] = {1, 2, 14};
				10359
				10360	uint64_t expected_h[] = {0xfdb8, 0xf4aa, 0x0022, 0x4466,
				10361	0xfb70, 0xe954, 0x0044, 0x88cc,
				10362	0x0000, 0x4000, 0x4000, 0xc000};
				10363	BitwiseShiftWideElementsHelper(config,
				10364	LSL,
				10365	kHRegSize,
				10366	inputs_h,
				10367	shift_h,
				10368	expected_h);
				10369	uint64_t inputs_s[] =
				10370	{0xfedcba98, 0xfffa55aa, 0x00112233, 0x01234567, 0xaaaaaaaa, 0x88888888};
				10371	int shift_s[] = {1, 19, 26};
				10372	uint64_t expected_s[] =
				10373	{0xfdb97530, 0xfff4ab54, 0x11980000, 0x2b380000, 0xa8000000, 0x20000000};
				10374	BitwiseShiftWideElementsHelper(config,
				10375	LSL,
				10376	kSRegSize,
				10377	inputs_s,
				10378	shift_s,
				10379	expected_s);
				10380	// clang-format on
				10381	}
				10382
TatWai Chong	4023d7a	2019-11-18 14:16:28 -0800	[diff] [blame]	10383	TEST_SVE(sve_setffr) {
				10384	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10385	START();
				10386
				10387	__ Ptrue(p15.VnB());
				10388	__ Setffr();
				10389	__ Rdffr(p14.VnB());
				10390
				10391	END();
				10392
				10393	if (CAN_RUN()) {
				10394	RUN();
				10395
				10396	ASSERT_EQUAL_SVE(p14.VnB(), p15.VnB());
				10397	}
				10398	}
				10399
				10400	static void WrffrHelper(Test* config, unsigned active_lanes) {
				10401	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10402	START();
				10403
				10404	int inputs[kPRegMaxSize] = {0};
				10405	VIXL_ASSERT(active_lanes <= kPRegMaxSize);
				10406	for (unsigned i = 0; i < active_lanes; i++) {
				10407	// The rightmost (highest-indexed) array element maps to the lowest-numbered
				10408	// lane.
				10409	inputs[kPRegMaxSize - i - 1] = 1;
				10410	}
				10411
				10412	Initialise(&masm, p1.VnB(), inputs);
				10413	__ Wrffr(p1.VnB());
				10414	__ Rdffr(p2.VnB());
				10415
				10416	END();
				10417
				10418	if (CAN_RUN()) {
				10419	RUN();
				10420
				10421	ASSERT_EQUAL_SVE(p1.VnB(), p2.VnB());
				10422	}
				10423	}
				10424
				10425	TEST_SVE(sve_wrffr) {
				10426	int active_lanes_inputs[] = {0, 1, 7, 10, 32, 48, kPRegMaxSize};
				10427	for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
				10428	WrffrHelper(config, active_lanes_inputs[i]);
				10429	}
				10430	}
				10431
TatWai Chong	a3e8b17	2019-11-22 21:48:56 -0800	[diff] [blame]	10432	template <size_t N>
				10433	static void RdffrHelper(Test* config,
				10434	size_t active_lanes,
				10435	const int (&pg_inputs)[N]) {
				10436	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10437	START();
				10438
				10439	VIXL_ASSERT(active_lanes <= kPRegMaxSize);
				10440
				10441	// The rightmost (highest-indexed) array element maps to the lowest-numbered
				10442	// lane.
				10443	int pd[kPRegMaxSize] = {0};
				10444	for (unsigned i = 0; i < active_lanes; i++) {
				10445	pd[kPRegMaxSize - i - 1] = 1;
				10446	}
				10447
				10448	int pg[kPRegMaxSize] = {0};
				10449	for (unsigned i = 0; i < N; i++) {
				10450	pg[kPRegMaxSize - i - 1] = pg_inputs[i];
				10451	}
				10452
				10453	int pd_expected[kPRegMaxSize] = {0};
				10454	for (unsigned i = 0; i < std::min(active_lanes, N); i++) {
				10455	int lane = kPRegMaxSize - i - 1;
				10456	pd_expected[lane] = pd[lane] & pg[lane];
				10457	}
				10458
				10459	Initialise(&masm, p0.VnB(), pg);
				10460	Initialise(&masm, p1.VnB(), pd);
				10461
				10462	// The unpredicated form of rdffr has been tested in `WrffrHelper`.
				10463	__ Wrffr(p1.VnB());
				10464	__ Rdffr(p14.VnB(), p0.Zeroing());
				10465	__ Rdffrs(p13.VnB(), p0.Zeroing());
				10466	__ Mrs(x8, NZCV);
				10467
				10468	END();
				10469
				10470	if (CAN_RUN()) {
				10471	RUN();
				10472
				10473	ASSERT_EQUAL_SVE(pd_expected, p14.VnB());
				10474	ASSERT_EQUAL_SVE(pd_expected, p13.VnB());
				10475	StatusFlags nzcv_expected =
				10476	GetPredTestFlags(pd_expected, pg, core.GetSVELaneCount(kBRegSize));
				10477	ASSERT_EQUAL_64(nzcv_expected, x8);
				10478	}
				10479	}
				10480
				10481	TEST_SVE(sve_rdffr_rdffrs) {
				10482	// clang-format off
				10483	int active_lanes_inputs[] = {0, 1, 15, 26, 39, 47, kPRegMaxSize};
				10484	int pg_inputs_0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				10485	int pg_inputs_1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
				10486	int pg_inputs_2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				10487	int pg_inputs_3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
				10488	int pg_inputs_4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				10489	// clang-format on
				10490
				10491	for (size_t i = 0; i < ArrayLength(active_lanes_inputs); i++) {
				10492	RdffrHelper(config, active_lanes_inputs[i], pg_inputs_0);
				10493	RdffrHelper(config, active_lanes_inputs[i], pg_inputs_1);
				10494	RdffrHelper(config, active_lanes_inputs[i], pg_inputs_2);
				10495	RdffrHelper(config, active_lanes_inputs[i], pg_inputs_3);
				10496	RdffrHelper(config, active_lanes_inputs[i], pg_inputs_4);
				10497	}
				10498	}
				10499
TatWai Chong	38303d9	2019-12-02 15:49:29 -0800	[diff] [blame]	10500	typedef void (MacroAssembler::*BrkpFn)(const PRegisterWithLaneSize& pd,
				10501	const PRegisterZ& pg,
				10502	const PRegisterWithLaneSize& pn,
				10503	const PRegisterWithLaneSize& pm);
				10504
				10505	template <typename Tg, typename Tn, typename Td>
				10506	static void BrkpaBrkpbHelper(Test* config,
				10507	BrkpFn macro,
				10508	BrkpFn macro_set_flags,
				10509	const Tg& pg_inputs,
				10510	const Tn& pn_inputs,
				10511	const Tn& pm_inputs,
				10512	const Td& pd_expected) {
				10513	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10514	START();
				10515
				10516	PRegister pg = p15;
				10517	PRegister pn = p14;
				10518	PRegister pm = p13;
				10519	Initialise(&masm, pg.VnB(), pg_inputs);
				10520	Initialise(&masm, pn.VnB(), pn_inputs);
				10521	Initialise(&masm, pm.VnB(), pm_inputs);
				10522
				10523	// Initialise NZCV to an impossible value, to check that we actually write it.
				10524	__ Mov(x10, NZCVFlag);
				10525	__ Msr(NZCV, x10);
				10526
				10527	(masm.*macro_set_flags)(p0.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
				10528	__ Mrs(x0, NZCV);
				10529
				10530	(masm.*macro)(p1.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
				10531
				10532	END();
				10533
				10534	if (CAN_RUN()) {
				10535	RUN();
				10536
				10537	ASSERT_EQUAL_SVE(pd_expected, p0.VnB());
				10538
				10539	// Check that the flags were properly set.
				10540	StatusFlags nzcv_expected =
				10541	GetPredTestFlags(pd_expected,
				10542	pg_inputs,
				10543	core.GetSVELaneCount(kBRegSize));
				10544	ASSERT_EQUAL_64(nzcv_expected, x0);
				10545	ASSERT_EQUAL_SVE(p0.VnB(), p1.VnB());
				10546	}
				10547	}
				10548
				10549	template <typename Tg, typename Tn, typename Td>
				10550	static void BrkpaHelper(Test* config,
				10551	const Tg& pg_inputs,
				10552	const Tn& pn_inputs,
				10553	const Tn& pm_inputs,
				10554	const Td& pd_expected) {
				10555	BrkpaBrkpbHelper(config,
				10556	&MacroAssembler::Brkpa,
				10557	&MacroAssembler::Brkpas,
				10558	pg_inputs,
				10559	pn_inputs,
				10560	pm_inputs,
				10561	pd_expected);
				10562	}
				10563
				10564	template <typename Tg, typename Tn, typename Td>
				10565	static void BrkpbHelper(Test* config,
				10566	const Tg& pg_inputs,
				10567	const Tn& pn_inputs,
				10568	const Tn& pm_inputs,
				10569	const Td& pd_expected) {
				10570	BrkpaBrkpbHelper(config,
				10571	&MacroAssembler::Brkpb,
				10572	&MacroAssembler::Brkpbs,
				10573	pg_inputs,
				10574	pn_inputs,
				10575	pm_inputs,
				10576	pd_expected);
				10577	}
				10578
				10579	TEST_SVE(sve_brkpb) {
				10580	// clang-format off
				10581	// The last active element of `pn` are `true` in all vector length configurations.
				10582	// \| boundary of 128-bits VL.
				10583	// v
				10584	int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				10585	int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				10586	int pg_3[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
				10587
				10588	// \| highest-numbered lane lowest-numbered lane \|
				10589	// v v
				10590	int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
				10591	int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
				10592	int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1};
				10593
				10594	int pm_1[] = {1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
				10595	int pm_2[] = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				10596	int pm_3[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				10597
				10598	// \| first active
				10599	// v
				10600	int exp_1_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
				10601	// \| first active
				10602	// v
				10603	int exp_1_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				10604	// \| first active
				10605	// v
				10606	int exp_1_3_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
				10607
				10608	BrkpbHelper(config, pg_1, pn_1, pm_1, exp_1_1_1);
				10609	BrkpbHelper(config, pg_1, pn_2, pm_2, exp_1_2_2);
				10610	BrkpbHelper(config, pg_1, pn_3, pm_3, exp_1_3_3);
				10611
				10612	// \| first active
				10613	// v
				10614	int exp_2_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				10615	// \| first active
				10616	// v
				10617	int exp_2_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				10618	// \| first active
				10619	// v
				10620	int exp_2_3_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
				10621	BrkpbHelper(config, pg_2, pn_1, pm_2, exp_2_1_2);
				10622	BrkpbHelper(config, pg_2, pn_2, pm_3, exp_2_2_3);
				10623	BrkpbHelper(config, pg_2, pn_3, pm_1, exp_2_3_1);
				10624
				10625	// \| first active
				10626	// v
				10627	int exp_3_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
				10628	// \| first active
				10629	// v
				10630	int exp_3_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
				10631	// \| first active
				10632	// v
				10633	int exp_3_3_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
				10634	BrkpbHelper(config, pg_3, pn_1, pm_3, exp_3_1_3);
				10635	BrkpbHelper(config, pg_3, pn_2, pm_1, exp_3_2_1);
				10636	BrkpbHelper(config, pg_3, pn_3, pm_2, exp_3_3_2);
				10637
				10638	// The last active element of `pn` are `false` in all vector length configurations.
				10639	// \| last active lane when VL > 128 bits.
				10640	// v
				10641	// \| last active lane when VL == 128 bits.
				10642	// v
				10643	int pg_4[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
				10644	int exp_4_x_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				10645	BrkpbHelper(config, pg_4, pn_1, pm_1, exp_4_x_x);
				10646	BrkpbHelper(config, pg_4, pn_2, pm_2, exp_4_x_x);
				10647	BrkpbHelper(config, pg_4, pn_3, pm_3, exp_4_x_x);
				10648	// clang-format on
				10649	}
				10650
				10651	TEST_SVE(sve_brkpa) {
				10652	// clang-format off
				10653	// The last active element of `pn` are `true` in all vector length configurations.
				10654	// \| boundary of 128-bits VL.
				10655	// v
				10656	int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				10657	int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				10658	int pg_3[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
				10659
				10660	// \| highest-numbered lane lowest-numbered lane \|
				10661	// v v
				10662	int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
				10663	int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
				10664	int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1};
				10665
				10666	int pm_1[] = {1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
				10667	int pm_2[] = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				10668	int pm_3[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				10669
				10670	// \| first active
				10671	// v
				10672	int exp_1_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
				10673	// \| first active
				10674	// v
				10675	int exp_1_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				10676	// \| first active
				10677	// v
				10678	int exp_1_3_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
				10679
				10680	BrkpaHelper(config, pg_1, pn_1, pm_1, exp_1_1_1);
				10681	BrkpaHelper(config, pg_1, pn_2, pm_2, exp_1_2_2);
				10682	BrkpaHelper(config, pg_1, pn_3, pm_3, exp_1_3_3);
				10683
				10684	// \| first active
				10685	// v
				10686	int exp_2_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				10687	// \| first active
				10688	// v
				10689	int exp_2_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				10690	// \| first active
				10691	// v
				10692	int exp_2_3_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1};
				10693	BrkpaHelper(config, pg_2, pn_1, pm_2, exp_2_1_2);
				10694	BrkpaHelper(config, pg_2, pn_2, pm_3, exp_2_2_3);
				10695	BrkpaHelper(config, pg_2, pn_3, pm_1, exp_2_3_1);
				10696
				10697	// \| first active
				10698	// v
				10699	int exp_3_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
				10700	// \| first active
				10701	// v
				10702	int exp_3_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
				10703	// \| first active
				10704	// v
				10705	int exp_3_3_2[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
				10706	BrkpaHelper(config, pg_3, pn_1, pm_3, exp_3_1_3);
				10707	BrkpaHelper(config, pg_3, pn_2, pm_1, exp_3_2_1);
				10708	BrkpaHelper(config, pg_3, pn_3, pm_2, exp_3_3_2);
				10709
				10710	// The last active element of `pn` are `false` in all vector length configurations.
				10711	// \| last active lane when VL > 128 bits.
				10712	// v
				10713	// \| last active lane when VL == 128 bits.
				10714	// v
				10715	int pg_4[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
				10716	int exp_4_x_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				10717	BrkpaHelper(config, pg_4, pn_1, pm_1, exp_4_x_x);
				10718	BrkpaHelper(config, pg_4, pn_2, pm_2, exp_4_x_x);
				10719	BrkpaHelper(config, pg_4, pn_3, pm_3, exp_4_x_x);
				10720	// clang-format on
				10721	}
				10722
Martyn Capewell	77b6d98	2019-12-02 18:34:59 +0000	[diff] [blame]	10723	TEST_SVE(sve_rbit) {
				10724	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10725	START();
				10726
				10727	uint64_t inputs[] = {0xaaaaaaaa55555555, 0xaaaa5555aa55aa55};
				10728	InsrHelper(&masm, z0.VnD(), inputs);
				10729
				10730	__ Ptrue(p1.VnB());
				10731	int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
				10732	Initialise(&masm, p2.VnB(), pred);
				10733
				10734	__ Rbit(z0.VnB(), p1.Merging(), z0.VnB());
				10735	__ Rbit(z0.VnB(), p1.Merging(), z0.VnB());
				10736
				10737	__ Rbit(z1.VnB(), p1.Merging(), z0.VnB());
				10738	__ Rbit(z2.VnH(), p1.Merging(), z0.VnH());
				10739	__ Rbit(z3.VnS(), p1.Merging(), z0.VnS());
				10740	__ Rbit(z4.VnD(), p1.Merging(), z0.VnD());
				10741
				10742	__ Dup(z5.VnB(), 0x42);
				10743	__ Rbit(z5.VnB(), p2.Merging(), z0.VnB());
				10744	__ Dup(z6.VnB(), 0x42);
				10745	__ Rbit(z6.VnS(), p2.Merging(), z0.VnS());
				10746
				10747	END();
				10748
				10749	if (CAN_RUN()) {
				10750	RUN();
				10751
				10752	ASSERT_EQUAL_SVE(inputs, z0.VnD());
				10753
				10754	uint64_t expected_z1[] = {0x55555555aaaaaaaa, 0x5555aaaa55aa55aa};
				10755	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				10756	uint64_t expected_z2[] = {0x55555555aaaaaaaa, 0x5555aaaaaa55aa55};
				10757	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				10758	uint64_t expected_z3[] = {0x55555555aaaaaaaa, 0xaaaa5555aa55aa55};
				10759	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				10760	uint64_t expected_z4[] = {0xaaaaaaaa55555555, 0xaa55aa55aaaa5555};
				10761	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				10762	uint64_t expected_z5[] = {0x4255425542aa42aa, 0x4255424242aa42aa};
				10763	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				10764	uint64_t expected_z6[] = {0x55555555aaaaaaaa, 0x42424242aa55aa55};
				10765	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				10766	}
				10767	}
				10768
				10769	TEST_SVE(sve_rev_bhw) {
				10770	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10771	START();
				10772
				10773	uint64_t inputs[] = {0xaaaaaaaa55555555, 0xaaaa5555aa55aa55};
				10774	InsrHelper(&masm, z0.VnD(), inputs);
				10775
				10776	__ Ptrue(p1.VnB());
				10777	int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
				10778	Initialise(&masm, p2.VnB(), pred);
				10779
				10780	__ Revb(z1.VnH(), p1.Merging(), z0.VnH());
				10781	__ Revb(z2.VnS(), p1.Merging(), z0.VnS());
				10782	__ Revb(z3.VnD(), p1.Merging(), z0.VnD());
				10783	__ Revh(z4.VnS(), p1.Merging(), z0.VnS());
				10784	__ Revh(z5.VnD(), p1.Merging(), z0.VnD());
				10785	__ Revw(z6.VnD(), p1.Merging(), z0.VnD());
				10786
				10787	__ Dup(z7.VnB(), 0x42);
				10788	__ Revb(z7.VnH(), p2.Merging(), z0.VnH());
				10789	__ Dup(z8.VnB(), 0x42);
				10790	__ Revh(z8.VnS(), p2.Merging(), z0.VnS());
				10791
				10792	END();
				10793
				10794	if (CAN_RUN()) {
				10795	RUN();
				10796
				10797	uint64_t expected_z1[] = {0xaaaaaaaa55555555, 0xaaaa555555aa55aa};
				10798	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				10799	uint64_t expected_z2[] = {0xaaaaaaaa55555555, 0x5555aaaa55aa55aa};
				10800	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				10801	uint64_t expected_z3[] = {0x55555555aaaaaaaa, 0x55aa55aa5555aaaa};
				10802	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				10803	uint64_t expected_z4[] = {0xaaaaaaaa55555555, 0x5555aaaaaa55aa55};
				10804	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				10805	uint64_t expected_z5[] = {0x55555555aaaaaaaa, 0xaa55aa555555aaaa};
				10806	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				10807	uint64_t expected_z6[] = {0x55555555aaaaaaaa, 0xaa55aa55aaaa5555};
				10808	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				10809	uint64_t expected_z7[] = {0xaaaaaaaa55555555, 0xaaaa424255aa55aa};
				10810	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				10811	uint64_t expected_z8[] = {0xaaaaaaaa55555555, 0x42424242aa55aa55};
				10812	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				10813	}
				10814	}
				10815
Martyn Capewell	4378263	2019-12-12 13:22:10 +0000	[diff] [blame]	10816	TEST_SVE(sve_ftssel) {
				10817	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10818	START();
				10819
				10820	uint64_t in[] = {0x1111777766665555, 0xaaaabbbbccccdddd};
				10821	uint64_t q[] = {0x0001000300000002, 0x0001000200000003};
				10822	InsrHelper(&masm, z0.VnD(), in);
				10823	InsrHelper(&masm, z1.VnD(), q);
				10824
				10825	__ Ftssel(z2.VnH(), z0.VnH(), z1.VnH());
				10826	__ Ftssel(z3.VnS(), z0.VnS(), z1.VnS());
				10827	__ Ftssel(z4.VnD(), z0.VnD(), z1.VnD());
				10828
				10829	END();
				10830
				10831	if (CAN_RUN()) {
				10832	RUN();
				10833
				10834	uint64_t expected_z2[] = {0x3c00bc006666d555, 0x3c003bbbccccbc00};
				10835	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				10836	uint64_t expected_z3[] = {0xbf800000e6665555, 0x2aaabbbbbf800000};
				10837	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				10838	uint64_t expected_z4[] = {0x9111777766665555, 0xbff0000000000000};
				10839	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				10840	}
				10841	}
				10842
				10843	TEST_SVE(sve_fexpa) {
				10844	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10845	START();
				10846
				10847	uint64_t in0[] = {0x3ff0000000000000, 0x3ff0000000011001};
				10848	uint64_t in1[] = {0x3ff000000002200f, 0xbff000000003301f};
				10849	uint64_t in2[] = {0xbff000000004403f, 0x3ff0000000055040};
				10850	uint64_t in3[] = {0x3f800000bf800001, 0x3f80000f3f80001f};
				10851	uint64_t in4[] = {0x3f80002f3f82203f, 0xbf8000403f833041};
				10852	uint64_t in5[] = {0x3c003c01bc00bc07, 0x3c08bc0f3c1fbc20};
				10853	InsrHelper(&masm, z0.VnD(), in0);
				10854	InsrHelper(&masm, z1.VnD(), in1);
				10855	InsrHelper(&masm, z2.VnD(), in2);
				10856	InsrHelper(&masm, z3.VnD(), in3);
				10857	InsrHelper(&masm, z4.VnD(), in4);
				10858	InsrHelper(&masm, z5.VnD(), in5);
				10859
				10860	__ Fexpa(z6.VnD(), z0.VnD());
				10861	__ Fexpa(z7.VnD(), z1.VnD());
				10862	__ Fexpa(z8.VnD(), z2.VnD());
				10863	__ Fexpa(z9.VnS(), z3.VnS());
				10864	__ Fexpa(z10.VnS(), z4.VnS());
				10865	__ Fexpa(z11.VnH(), z5.VnH());
				10866
				10867	END();
				10868
				10869	if (CAN_RUN()) {
				10870	RUN();
				10871	uint64_t expected_z6[] = {0x0000000000000000, 0x44002c9a3e778061};
				10872	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				10873	uint64_t expected_z7[] = {0x0802d285a6e4030b, 0x4c06623882552225};
				10874	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				10875	uint64_t expected_z8[] = {0x100fa7c1819e90d8, 0x5410000000000000};
				10876	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				10877	uint64_t expected_z9[] = {0x00000000000164d2, 0x0016942d003311c4};
				10878	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				10879	uint64_t expected_z10[] = {0x0054f35b407d3e0c, 0x00800000608164d2};
				10880	ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
				10881	uint64_t expected_z11[] = {0x00000016000000a8, 0x00c2018903d40400};
				10882	ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
				10883	}
				10884	}
				10885
Martyn Capewell	7fd6fd5	2019-12-06 14:50:15 +0000	[diff] [blame]	10886	TEST_SVE(sve_rev_p) {
				10887	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10888	START();
				10889
				10890	Initialise(&masm,
				10891	p0.VnB(),
				10892	0xabcdabcdabcdabcd,
				10893	0xabcdabcdabcdabcd,
				10894	0xabcdabcdabcdabcd,
				10895	0xabcdabcdabcdabcd);
				10896
				10897	__ Rev(p1.VnB(), p0.VnB());
				10898	__ Rev(p2.VnH(), p0.VnH());
				10899	__ Rev(p3.VnS(), p0.VnS());
				10900	__ Rev(p4.VnD(), p0.VnD());
				10901
				10902	END();
				10903
				10904	if (CAN_RUN()) {
				10905	RUN();
				10906
				10907	int p1_expected[] = {1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1};
				10908	ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
				10909	int p2_expected[] = {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0};
				10910	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				10911	int p3_expected[] = {1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0};
				10912	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				10913	int p4_expected[] = {1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1};
				10914	ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
				10915	}
				10916	}
				10917
				10918	TEST_SVE(sve_trn_p_bh) {
				10919	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10920	START();
				10921
				10922	Initialise(&masm, p0.VnB(), 0xa5a55a5a);
				10923	__ Pfalse(p1.VnB());
				10924
				10925	__ Trn1(p2.VnB(), p0.VnB(), p0.VnB());
				10926	__ Trn2(p3.VnB(), p0.VnB(), p0.VnB());
				10927	__ Trn1(p4.VnB(), p1.VnB(), p0.VnB());
				10928	__ Trn2(p5.VnB(), p1.VnB(), p0.VnB());
				10929	__ Trn1(p6.VnB(), p0.VnB(), p1.VnB());
				10930	__ Trn2(p7.VnB(), p0.VnB(), p1.VnB());
				10931
				10932	__ Trn1(p8.VnH(), p0.VnH(), p0.VnH());
				10933	__ Trn2(p9.VnH(), p0.VnH(), p0.VnH());
				10934	__ Trn1(p10.VnH(), p1.VnH(), p0.VnH());
				10935	__ Trn2(p11.VnH(), p1.VnH(), p0.VnH());
				10936	__ Trn1(p12.VnH(), p0.VnH(), p1.VnH());
				10937	__ Trn2(p13.VnH(), p0.VnH(), p1.VnH());
				10938
				10939	END();
				10940
				10941	if (CAN_RUN()) {
				10942	RUN();
				10943	int p2_expected[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
				10944	int p3_expected[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
				10945	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				10946	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				10947
				10948	int p4_expected[] = {1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
				10949	int p5_expected[] = {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0};
				10950	ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
				10951	ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
				10952
				10953	int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0};
				10954	int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1};
				10955	ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
				10956	ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
				10957
				10958	int p8_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				10959	int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				10960	ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
				10961	ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
				10962
				10963	int p10_expected[] = {0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0};
				10964	int p11_expected[] = {0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0};
				10965	ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
				10966	ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
				10967
				10968	int p12_expected[] = {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0};
				10969	int p13_expected[] = {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0};
				10970	ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
				10971	ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
				10972	}
				10973	}
				10974
				10975	TEST_SVE(sve_trn_p_sd) {
				10976	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				10977	START();
				10978
				10979	Initialise(&masm, p0.VnB(), 0x55a55aaa);
				10980	__ Pfalse(p1.VnB());
				10981
				10982	__ Trn1(p2.VnS(), p0.VnS(), p0.VnS());
				10983	__ Trn2(p3.VnS(), p0.VnS(), p0.VnS());
				10984	__ Trn1(p4.VnS(), p1.VnS(), p0.VnS());
				10985	__ Trn2(p5.VnS(), p1.VnS(), p0.VnS());
				10986	__ Trn1(p6.VnS(), p0.VnS(), p1.VnS());
				10987	__ Trn2(p7.VnS(), p0.VnS(), p1.VnS());
				10988
				10989	__ Trn1(p8.VnD(), p0.VnD(), p0.VnD());
				10990	__ Trn2(p9.VnD(), p0.VnD(), p0.VnD());
				10991	__ Trn1(p10.VnD(), p1.VnD(), p0.VnD());
				10992	__ Trn2(p11.VnD(), p1.VnD(), p0.VnD());
				10993	__ Trn1(p12.VnD(), p0.VnD(), p1.VnD());
				10994	__ Trn2(p13.VnD(), p0.VnD(), p1.VnD());
				10995
				10996	END();
				10997
				10998	if (CAN_RUN()) {
				10999	RUN();
				11000	int p2_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
				11001	int p3_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
				11002	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				11003	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				11004
				11005	int p4_expected[] = {1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
				11006	int p5_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
				11007	ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
				11008	ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
				11009
				11010	int p6_expected[] = {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0};
				11011	int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
				11012	ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
				11013	ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
				11014
				11015	int p8_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
				11016	int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				11017	ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
				11018	ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
				11019
				11020	int p10_expected[] = {1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11021	int p11_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11022	ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
				11023	ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
				11024
				11025	int p12_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0};
				11026	int p13_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				11027	ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
				11028	ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
				11029	}
				11030	}
				11031
				11032	TEST_SVE(sve_zip_p_bh) {
				11033	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11034	START();
				11035
				11036	Initialise(&masm,
				11037	p0.VnB(),
				11038	0x5a5a5a5a5a5a5a5a,
				11039	0x5a5a5a5a5a5a5a5a,
				11040	0x5a5a5a5a5a5a5a5a,
				11041	0x5a5a5a5a5a5a5a5a);
				11042	__ Pfalse(p1.VnB());
				11043
				11044	__ Zip1(p2.VnB(), p0.VnB(), p0.VnB());
				11045	__ Zip2(p3.VnB(), p0.VnB(), p0.VnB());
				11046	__ Zip1(p4.VnB(), p1.VnB(), p0.VnB());
				11047	__ Zip2(p5.VnB(), p1.VnB(), p0.VnB());
				11048	__ Zip1(p6.VnB(), p0.VnB(), p1.VnB());
				11049	__ Zip2(p7.VnB(), p0.VnB(), p1.VnB());
				11050
				11051	__ Zip1(p8.VnH(), p0.VnH(), p0.VnH());
				11052	__ Zip2(p9.VnH(), p0.VnH(), p0.VnH());
				11053	__ Zip1(p10.VnH(), p1.VnH(), p0.VnH());
				11054	__ Zip2(p11.VnH(), p1.VnH(), p0.VnH());
				11055	__ Zip1(p12.VnH(), p0.VnH(), p1.VnH());
				11056	__ Zip2(p13.VnH(), p0.VnH(), p1.VnH());
				11057
				11058	END();
				11059
				11060	if (CAN_RUN()) {
				11061	RUN();
				11062	int p2_expected[] = {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0};
				11063	int p3_expected[] = {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0};
				11064	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				11065	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				11066
				11067	int p4_expected[] = {0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				11068	int p5_expected[] = {0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				11069	ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
				11070	ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
				11071
				11072	int p6_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0};
				11073	int p7_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0};
				11074	ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
				11075	ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
				11076
				11077	int p8_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
				11078	int p9_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
				11079	ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
				11080	ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
				11081
				11082	int p10_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				11083	int p11_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
				11084	ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
				11085	ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
				11086
				11087	int p12_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0};
				11088	int p13_expected[] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0};
				11089	ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
				11090	ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
				11091	}
				11092	}
				11093
				11094	TEST_SVE(sve_zip_p_sd) {
				11095	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11096	START();
				11097
				11098	Initialise(&masm,
				11099	p0.VnB(),
				11100	0x5a5a5a5a5a5a5a5a,
				11101	0x5a5a5a5a5a5a5a5a,
				11102	0x5a5a5a5a5a5a5a5a,
				11103	0x5a5a5a5a5a5a5a5a);
				11104	__ Pfalse(p1.VnB());
				11105
				11106	__ Zip1(p2.VnS(), p0.VnS(), p0.VnS());
				11107	__ Zip2(p3.VnS(), p0.VnS(), p0.VnS());
				11108	__ Zip1(p4.VnS(), p1.VnS(), p0.VnS());
				11109	__ Zip2(p5.VnS(), p1.VnS(), p0.VnS());
				11110	__ Zip1(p6.VnS(), p0.VnS(), p1.VnS());
				11111	__ Zip2(p7.VnS(), p0.VnS(), p1.VnS());
				11112
				11113	__ Zip1(p8.VnD(), p0.VnD(), p0.VnD());
				11114	__ Zip2(p9.VnD(), p0.VnD(), p0.VnD());
				11115	__ Zip1(p10.VnD(), p1.VnD(), p0.VnD());
				11116	__ Zip2(p11.VnD(), p1.VnD(), p0.VnD());
				11117	__ Zip1(p12.VnD(), p0.VnD(), p1.VnD());
				11118	__ Zip2(p13.VnD(), p0.VnD(), p1.VnD());
				11119
				11120	END();
				11121
				11122	if (CAN_RUN()) {
				11123	RUN();
				11124	int p2_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
				11125	int p3_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
				11126	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				11127	ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
				11128
				11129	int p4_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
				11130	int p5_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0};
				11131	ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
				11132	ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
				11133
				11134	int p6_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
				11135	int p7_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0};
				11136	ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
				11137	ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
				11138
				11139	int p8_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				11140	int p9_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				11141	ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
				11142	ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
				11143
				11144	int p10_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11145	int p11_expected[] = {0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11146	ASSERT_EQUAL_SVE(p10_expected, p10.VnB());
				11147	ASSERT_EQUAL_SVE(p11_expected, p11.VnB());
				11148
				11149	int p12_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				11150	int p13_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				11151	ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
				11152	ASSERT_EQUAL_SVE(p13_expected, p13.VnB());
				11153	}
				11154	}
				11155
				11156	TEST_SVE(sve_uzp_p) {
				11157	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11158	START();
				11159
				11160	Initialise(&masm,
				11161	p0.VnB(),
				11162	0xf0f0ff00ffff0000,
				11163	0x4242424242424242,
				11164	0x5a5a5a5a5a5a5a5a,
				11165	0x0123456789abcdef);
				11166	__ Rev(p1.VnB(), p0.VnB());
				11167
				11168	__ Zip1(p2.VnB(), p0.VnB(), p1.VnB());
				11169	__ Zip2(p3.VnB(), p0.VnB(), p1.VnB());
				11170	__ Uzp1(p4.VnB(), p2.VnB(), p3.VnB());
				11171	__ Uzp2(p5.VnB(), p2.VnB(), p3.VnB());
				11172
				11173	__ Zip1(p2.VnH(), p0.VnH(), p1.VnH());
				11174	__ Zip2(p3.VnH(), p0.VnH(), p1.VnH());
				11175	__ Uzp1(p6.VnH(), p2.VnH(), p3.VnH());
				11176	__ Uzp2(p7.VnH(), p2.VnH(), p3.VnH());
				11177
				11178	__ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
				11179	__ Zip2(p3.VnS(), p0.VnS(), p1.VnS());
				11180	__ Uzp1(p8.VnS(), p2.VnS(), p3.VnS());
				11181	__ Uzp2(p9.VnS(), p2.VnS(), p3.VnS());
				11182
				11183	__ Zip1(p2.VnD(), p0.VnD(), p1.VnD());
				11184	__ Zip2(p3.VnD(), p0.VnD(), p1.VnD());
				11185	__ Uzp1(p10.VnD(), p2.VnD(), p3.VnD());
				11186	__ Uzp2(p11.VnD(), p2.VnD(), p3.VnD());
				11187
				11188	END();
				11189
				11190	if (CAN_RUN()) {
				11191	RUN();
				11192
				11193	ASSERT_EQUAL_SVE(p0, p4);
				11194	ASSERT_EQUAL_SVE(p1, p5);
				11195	ASSERT_EQUAL_SVE(p0, p6);
				11196	ASSERT_EQUAL_SVE(p1, p7);
				11197	ASSERT_EQUAL_SVE(p0, p8);
				11198	ASSERT_EQUAL_SVE(p1, p9);
				11199	ASSERT_EQUAL_SVE(p0, p10);
				11200	ASSERT_EQUAL_SVE(p1, p11);
				11201	}
				11202	}
				11203
				11204	TEST_SVE(sve_punpk) {
				11205	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11206	START();
				11207
				11208	Initialise(&masm,
				11209	p0.VnB(),
				11210	0xf0a0f0a0f0a0f0a0,
				11211	0xf0a0f0a0f0a0f0a0,
				11212	0xa0f0a0f0a0f0a0f0,
				11213	0xa0f0a0f0a0f0a0f0);
				11214	__ Punpklo(p1.VnH(), p0.VnB());
				11215	__ Punpkhi(p2.VnH(), p0.VnB());
				11216
				11217	END();
				11218
				11219	if (CAN_RUN()) {
				11220	RUN();
				11221
				11222	int p1_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
				11223	int p2_expected[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11224	ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
				11225	ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
				11226	}
				11227	}
				11228
TatWai Chong	5d87229	2020-01-02 15:39:51 -0800	[diff] [blame]	11229	typedef void (MacroAssembler::*BrkFn)(const PRegisterWithLaneSize& pd,
				11230	const PRegister& pg,
				11231	const PRegisterWithLaneSize& pn);
				11232
				11233	typedef void (MacroAssembler::*BrksFn)(const PRegisterWithLaneSize& pd,
				11234	const PRegisterZ& pg,
				11235	const PRegisterWithLaneSize& pn);
				11236
				11237	template <typename T, size_t N>
				11238	static void BrkaBrkbHelper(Test* config,
				11239	BrkFn macro,
				11240	BrksFn macro_set_flags,
				11241	const T (&pd_inputs)[N],
				11242	const T (&pg_inputs)[N],
				11243	const T (&pn_inputs)[N],
				11244	const T (&pd_z_expected)[N]) {
				11245	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11246	START();
				11247
				11248	PRegister pg = p10;
				11249	PRegister pn = p9;
				11250	PRegister pd_z = p0;
				11251	PRegister pd_z_s = p1;
				11252	PRegister pd_m = p2;
				11253	Initialise(&masm, pg.VnB(), pg_inputs);
				11254	Initialise(&masm, pn.VnB(), pn_inputs);
				11255	Initialise(&masm, pd_m.VnB(), pd_inputs);
				11256
				11257	// Initialise NZCV to an impossible value, to check that we actually write it.
				11258	__ Mov(x10, NZCVFlag);
				11259	__ Msr(NZCV, x10);
				11260
				11261	(masm.*macro)(pd_z.VnB(), pg.Zeroing(), pn.VnB());
				11262	(masm.*macro_set_flags)(pd_z_s.VnB(), pg.Zeroing(), pn.VnB());
				11263	__ Mrs(x0, NZCV);
				11264
				11265	(masm.*macro)(pd_m.VnB(), pg.Merging(), pn.VnB());
				11266
				11267	END();
				11268
				11269	if (CAN_RUN()) {
				11270	RUN();
				11271
				11272	ASSERT_EQUAL_SVE(pd_z_expected, pd_z.VnB());
				11273
				11274	// Check that the flags were properly set.
				11275	StatusFlags nzcv_expected =
				11276	GetPredTestFlags(pd_z_expected,
				11277	pg_inputs,
				11278	core.GetSVELaneCount(kBRegSize));
				11279	ASSERT_EQUAL_64(nzcv_expected, x0);
				11280	ASSERT_EQUAL_SVE(pd_z.VnB(), pd_z_s.VnB());
				11281
				11282	T pd_m_expected[N];
				11283	// Set expected `pd` result on merging predication.
				11284	for (size_t i = 0; i < N; i++) {
				11285	pd_m_expected[i] = pg_inputs[i] ? pd_z_expected[i] : pd_inputs[i];
				11286	}
				11287	ASSERT_EQUAL_SVE(pd_m_expected, pd_m.VnB());
				11288	}
				11289	}
				11290
				11291	template <typename T>
				11292	static void BrkaHelper(Test* config,
				11293	const T& pd_inputs,
				11294	const T& pg_inputs,
				11295	const T& pn_inputs,
				11296	const T& pd_expected) {
				11297	BrkaBrkbHelper(config,
				11298	&MacroAssembler::Brka,
				11299	&MacroAssembler::Brkas,
				11300	pd_inputs,
				11301	pg_inputs,
				11302	pn_inputs,
				11303	pd_expected);
				11304	}
				11305
				11306	TEST_SVE(sve_brka) {
				11307	// clang-format off
				11308	// \| boundary of 128-bits VL.
				11309	// v
				11310	int pd[] = {1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11311
				11312	// \| highest-numbered lane lowest-numbered lane \|
				11313	// v v
				11314	int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				11315	int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				11316
				11317	int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
				11318	int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11319	int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1};
				11320
				11321	// \| first break
				11322	// v
				11323	int exp_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0};
				11324	// \| first break
				11325	// v
				11326	int exp_1_2[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				11327	// \| first break
				11328	// v
				11329	int exp_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				11330
				11331	BrkaHelper(config, pd, pg_1, pn_1, exp_1_1);
				11332	BrkaHelper(config, pd, pg_1, pn_2, exp_1_2);
				11333	BrkaHelper(config, pd, pg_1, pn_3, exp_1_3);
				11334
				11335	// \| first break
				11336	// v
				11337	int exp_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1};
				11338	// \| first break
				11339	// v
				11340	int exp_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				11341	// \| first break
				11342	// v
				11343	int exp_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
				11344	BrkaHelper(config, pd, pg_2, pn_1, exp_2_1);
				11345	BrkaHelper(config, pd, pg_2, pn_2, exp_2_2);
				11346	BrkaHelper(config, pd, pg_2, pn_3, exp_2_3);
				11347
				11348	// The all-inactive zeroing predicate sets destination predicate all-false.
				11349	int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11350	int exp_3_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11351	BrkaHelper(config, pd, pg_3, pn_1, exp_3_x);
				11352	BrkaHelper(config, pd, pg_3, pn_2, exp_3_x);
				11353	BrkaHelper(config, pd, pg_3, pn_3, exp_3_x);
				11354	// clang-format on
				11355	}
				11356
				11357	template <typename T>
				11358	static void BrkbHelper(Test* config,
				11359	const T& pd_inputs,
				11360	const T& pg_inputs,
				11361	const T& pn_inputs,
				11362	const T& pd_expected) {
				11363	BrkaBrkbHelper(config,
				11364	&MacroAssembler::Brkb,
				11365	&MacroAssembler::Brkbs,
				11366	pd_inputs,
				11367	pg_inputs,
				11368	pn_inputs,
				11369	pd_expected);
				11370	}
				11371
				11372	TEST_SVE(sve_brkb) {
				11373	// clang-format off
				11374	// \| boundary of 128-bits VL.
				11375	// v
				11376	int pd[] = {1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11377
				11378	// \| highest-numbered lane lowest-numbered lane \|
				11379	// v v
				11380	int pg_1[] = {1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				11381	int pg_2[] = {1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				11382
				11383	int pn_1[] = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0};
				11384	int pn_2[] = {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11385	int pn_3[] = {1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1};
				11386
				11387	// \| first break
				11388	// v
				11389	int exp_1_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
				11390	// \| first break
				11391	// v
				11392	int exp_1_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				11393	// \| first break
				11394	// v
				11395	int exp_1_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0};
				11396
				11397	BrkbHelper(config, pd, pg_1, pn_1, exp_1_1);
				11398	BrkbHelper(config, pd, pg_1, pn_2, exp_1_2);
				11399	BrkbHelper(config, pd, pg_1, pn_3, exp_1_3);
				11400
				11401	// \| first break
				11402	// v
				11403	int exp_2_1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
				11404	// \| first break
				11405	// v
				11406	int exp_2_2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1};
				11407	// \| first break
				11408	// v
				11409	int exp_2_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11410	BrkbHelper(config, pd, pg_2, pn_1, exp_2_1);
				11411	BrkbHelper(config, pd, pg_2, pn_2, exp_2_2);
				11412	BrkbHelper(config, pd, pg_2, pn_3, exp_2_3);
				11413
				11414	// The all-inactive zeroing predicate sets destination predicate all-false.
				11415	int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11416	int exp_3_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
				11417	BrkbHelper(config, pd, pg_3, pn_1, exp_3_x);
				11418	BrkbHelper(config, pd, pg_3, pn_2, exp_3_x);
				11419	BrkbHelper(config, pd, pg_3, pn_3, exp_3_x);
				11420	// clang-format on
				11421	}
				11422
				11423	typedef void (MacroAssembler::*BrknFn)(const PRegisterWithLaneSize& pd,
				11424	const PRegisterZ& pg,
				11425	const PRegisterWithLaneSize& pn,
				11426	const PRegisterWithLaneSize& pm);
				11427
				11428	typedef void (MacroAssembler::*BrknsFn)(const PRegisterWithLaneSize& pd,
				11429	const PRegisterZ& pg,
				11430	const PRegisterWithLaneSize& pn,
				11431	const PRegisterWithLaneSize& pm);
				11432
				11433	enum BrknDstPredicateState { kAllFalse, kUnchanged };
				11434
				11435	template <typename T, size_t N>
				11436	static void BrknHelper(Test* config,
				11437	BrknFn macro,
				11438	BrknsFn macro_set_flags,
				11439	const T (&pd_inputs)[N],
				11440	const T (&pg_inputs)[N],
				11441	const T (&pn_inputs)[N],
				11442	const T (&pm_inputs)[N],
				11443	BrknDstPredicateState expected_pd_state) {
				11444	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11445	START();
				11446
				11447	PRegister pg = p10;
				11448	PRegister pn = p9;
				11449	PRegister pm = p8;
				11450	PRegister pdm = p0;
				11451	PRegister pd = p1;
				11452	PRegister pd_s = p2;
				11453	Initialise(&masm, pg.VnB(), pg_inputs);
				11454	Initialise(&masm, pn.VnB(), pn_inputs);
				11455	Initialise(&masm, pm.VnB(), pm_inputs);
				11456	Initialise(&masm, pdm.VnB(), pm_inputs);
				11457	Initialise(&masm, pd.VnB(), pd_inputs);
				11458	Initialise(&masm, pd_s.VnB(), pd_inputs);
				11459
				11460	// Initialise NZCV to an impossible value, to check that we actually write it.
				11461	__ Mov(x10, NZCVFlag);
				11462	__ Msr(NZCV, x10);
				11463
				11464	(masm.*macro)(pdm.VnB(), pg.Zeroing(), pn.VnB(), pdm.VnB());
				11465	// !pd.Aliases(pm).
				11466	(masm.*macro)(pd.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
				11467	(masm.*macro_set_flags)(pd_s.VnB(), pg.Zeroing(), pn.VnB(), pm.VnB());
				11468	__ Mrs(x0, NZCV);
				11469
				11470	END();
				11471
				11472	if (CAN_RUN()) {
				11473	RUN();
				11474
				11475	T all_false[N] = {0};
				11476	if (expected_pd_state == kAllFalse) {
				11477	ASSERT_EQUAL_SVE(all_false, pd.VnB());
				11478	} else {
				11479	ASSERT_EQUAL_SVE(pm_inputs, pd.VnB());
				11480	}
				11481	ASSERT_EQUAL_SVE(pm_inputs, pm.VnB());
				11482
				11483	// Check that the flags were properly set.
				11484	StatusFlags nzcv_expected =
				11485	GetPredTestFlags((expected_pd_state == kAllFalse) ? all_false
				11486	: pm_inputs,
				11487	pg_inputs,
				11488	core.GetSVELaneCount(kBRegSize));
				11489	ASSERT_EQUAL_64(nzcv_expected, x0);
				11490	ASSERT_EQUAL_SVE(pd.VnB(), pdm.VnB());
				11491	ASSERT_EQUAL_SVE(pd.VnB(), pd_s.VnB());
				11492	}
				11493	}
				11494
				11495	TEST_SVE(sve_brkn) {
				11496	// clang-format off
				11497	int pd[] = {1, 0, 0, 1, 0, 1, 1, 0, 1, 0};
				11498	int pm[] = {0, 1, 1, 1, 1, 0, 0, 1, 0, 1};
				11499
				11500	int pg_1[] = {1, 1, 0, 0, 1, 0, 1, 1, 0, 0};
				11501	int pg_2[] = {0, 0, 0, 1, 1, 1, 0, 0, 1, 1};
				11502	int pg_3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // all-false
				11503
				11504	int pn_1[] = {1, 0, 0, 0, 0, 1, 1, 0, 0, 0};
				11505	int pn_2[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0};
				11506	int pn_3[] = {0, 0, 0, 0, 1, 1, 0, 0, 1, 1};
				11507
				11508	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_1, pm, kUnchanged);
				11509	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_2, pm, kAllFalse);
				11510	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_1, pn_3, pm, kAllFalse);
				11511
				11512	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_1, pm, kAllFalse);
				11513	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_2, pm, kUnchanged);
				11514	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_2, pn_3, pm, kAllFalse);
				11515
				11516	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_1, pm, kAllFalse);
				11517	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_2, pm, kAllFalse);
				11518	BrknHelper(config, &MacroAssembler::Brkn, &MacroAssembler::Brkns, pd, pg_3, pn_3, pm, kAllFalse);
				11519	// clang-format on
				11520	}
				11521
Martyn Capewell	15f8901	2020-01-09 11:18:30 +0000	[diff] [blame]	11522	TEST_SVE(sve_trn) {
				11523	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11524	START();
				11525
				11526	uint64_t in0[] = {0xffeeddccbbaa9988, 0x7766554433221100};
				11527	uint64_t in1[] = {0xaa55aa55aa55aa55, 0x55aa55aa55aa55aa};
				11528	InsrHelper(&masm, z0.VnD(), in0);
				11529	InsrHelper(&masm, z1.VnD(), in1);
				11530
				11531	__ Trn1(z2.VnB(), z0.VnB(), z1.VnB());
				11532	__ Trn2(z3.VnB(), z0.VnB(), z1.VnB());
				11533	__ Trn1(z4.VnH(), z0.VnH(), z1.VnH());
				11534	__ Trn2(z5.VnH(), z0.VnH(), z1.VnH());
				11535	__ Trn1(z6.VnS(), z0.VnS(), z1.VnS());
				11536	__ Trn2(z7.VnS(), z0.VnS(), z1.VnS());
				11537	__ Trn1(z8.VnD(), z0.VnD(), z1.VnD());
				11538	__ Trn2(z9.VnD(), z0.VnD(), z1.VnD());
				11539
				11540	END();
				11541
				11542	if (CAN_RUN()) {
				11543	RUN();
				11544	uint64_t expected_z2[] = {0x55ee55cc55aa5588, 0xaa66aa44aa22aa00};
				11545	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				11546	uint64_t expected_z3[] = {0xaaffaaddaabbaa99, 0x5577555555335511};
				11547	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				11548	uint64_t expected_z4[] = {0xaa55ddccaa559988, 0x55aa554455aa1100};
				11549	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				11550	uint64_t expected_z5[] = {0xaa55ffeeaa55bbaa, 0x55aa776655aa3322};
				11551	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				11552	uint64_t expected_z6[] = {0xaa55aa55bbaa9988, 0x55aa55aa33221100};
				11553	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				11554	uint64_t expected_z7[] = {0xaa55aa55ffeeddcc, 0x55aa55aa77665544};
				11555	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				11556	uint64_t expected_z8[] = {0x55aa55aa55aa55aa, 0x7766554433221100};
				11557	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				11558	uint64_t expected_z9[] = {0xaa55aa55aa55aa55, 0xffeeddccbbaa9988};
				11559	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				11560	}
				11561	}
				11562
				11563	TEST_SVE(sve_zip_uzp) {
				11564	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11565	START();
				11566
				11567	__ Dup(z0.VnD(), 0xffeeddccbbaa9988);
				11568	__ Insr(z0.VnD(), 0x7766554433221100);
				11569	__ Dup(z1.VnD(), 0xaa55aa55aa55aa55);
				11570	__ Insr(z1.VnD(), 0x55aa55aa55aa55aa);
				11571
				11572	__ Zip1(z2.VnB(), z0.VnB(), z1.VnB());
				11573	__ Zip2(z3.VnB(), z0.VnB(), z1.VnB());
				11574	__ Zip1(z4.VnH(), z0.VnH(), z1.VnH());
				11575	__ Zip2(z5.VnH(), z0.VnH(), z1.VnH());
				11576	__ Zip1(z6.VnS(), z0.VnS(), z1.VnS());
				11577	__ Zip2(z7.VnS(), z0.VnS(), z1.VnS());
				11578	__ Zip1(z8.VnD(), z0.VnD(), z1.VnD());
				11579	__ Zip2(z9.VnD(), z0.VnD(), z1.VnD());
				11580
				11581	__ Uzp1(z10.VnB(), z2.VnB(), z3.VnB());
				11582	__ Uzp2(z11.VnB(), z2.VnB(), z3.VnB());
				11583	__ Uzp1(z12.VnH(), z4.VnH(), z5.VnH());
				11584	__ Uzp2(z13.VnH(), z4.VnH(), z5.VnH());
				11585	__ Uzp1(z14.VnS(), z6.VnS(), z7.VnS());
				11586	__ Uzp2(z15.VnS(), z6.VnS(), z7.VnS());
				11587	__ Uzp1(z16.VnD(), z8.VnD(), z9.VnD());
				11588	__ Uzp2(z17.VnD(), z8.VnD(), z9.VnD());
				11589
				11590	END();
				11591
				11592	if (CAN_RUN()) {
				11593	RUN();
				11594	uint64_t expected_z2[] = {0x5577aa665555aa44, 0x5533aa225511aa00};
				11595	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				11596	uint64_t expected_z3[] = {0xaaff55eeaadd55cc, 0xaabb55aaaa995588};
				11597	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				11598	uint64_t expected_z4[] = {0x55aa776655aa5544, 0x55aa332255aa1100};
				11599	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				11600	uint64_t expected_z5[] = {0xaa55ffeeaa55ddcc, 0xaa55bbaaaa559988};
				11601	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				11602	uint64_t expected_z6[] = {0x55aa55aa77665544, 0x55aa55aa33221100};
				11603	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				11604	uint64_t expected_z7[] = {0xaa55aa55ffeeddcc, 0xaa55aa55bbaa9988};
				11605	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				11606	uint64_t expected_z8[] = {0x55aa55aa55aa55aa, 0x7766554433221100};
				11607	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				11608	uint64_t expected_z9[] = {0xaa55aa55aa55aa55, 0xffeeddccbbaa9988};
				11609	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				11610
				11611	// Check uzp is the opposite of zip.
				11612	ASSERT_EQUAL_SVE(z0.VnD(), z10.VnD());
				11613	ASSERT_EQUAL_SVE(z1.VnD(), z11.VnD());
				11614	ASSERT_EQUAL_SVE(z0.VnD(), z12.VnD());
				11615	ASSERT_EQUAL_SVE(z1.VnD(), z13.VnD());
				11616	ASSERT_EQUAL_SVE(z0.VnD(), z14.VnD());
				11617	ASSERT_EQUAL_SVE(z1.VnD(), z15.VnD());
				11618	ASSERT_EQUAL_SVE(z0.VnD(), z16.VnD());
				11619	ASSERT_EQUAL_SVE(z1.VnD(), z17.VnD());
				11620	}
				11621	}
Martyn Capewell	50e9f55	2020-01-07 17:45:03 +0000	[diff] [blame]	11622
				11623	TEST_SVE(sve_fpmul_index) {
				11624	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11625	START();
				11626
				11627	uint64_t in0[] = {0x3ff000003f803c00, 0xbff00000bf80bc00};
				11628	uint64_t in1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76};
				11629
				11630	InsrHelper(&masm, z0.VnD(), in0);
				11631	InsrHelper(&masm, z1.VnD(), in1);
				11632
				11633	__ Fmul(z2.VnH(), z1.VnH(), z0.VnH(), 0);
				11634	__ Fmul(z3.VnH(), z1.VnH(), z0.VnH(), 1);
				11635	__ Fmul(z4.VnH(), z1.VnH(), z0.VnH(), 4);
				11636	__ Fmul(z5.VnH(), z1.VnH(), z0.VnH(), 7);
				11637
				11638	__ Fmul(z6.VnS(), z1.VnS(), z0.VnS(), 0);
				11639	__ Fmul(z7.VnS(), z1.VnS(), z0.VnS(), 1);
				11640	__ Fmul(z8.VnS(), z1.VnS(), z0.VnS(), 2);
				11641	__ Fmul(z9.VnS(), z1.VnS(), z0.VnS(), 3);
				11642
				11643	__ Fmul(z10.VnD(), z1.VnD(), z0.VnD(), 0);
				11644	__ Fmul(z11.VnD(), z1.VnD(), z0.VnD(), 1);
				11645
				11646	// Compute the results using other instructions.
				11647	__ Dup(z12.VnH(), z0.VnH(), 0);
				11648	__ Fmul(z12.VnH(), z1.VnH(), z12.VnH());
				11649	__ Dup(z13.VnH(), z0.VnH(), 1);
				11650	__ Fmul(z13.VnH(), z1.VnH(), z13.VnH());
				11651	__ Dup(z14.VnH(), z0.VnH(), 4);
				11652	__ Fmul(z14.VnH(), z1.VnH(), z14.VnH());
				11653	__ Dup(z15.VnH(), z0.VnH(), 7);
				11654	__ Fmul(z15.VnH(), z1.VnH(), z15.VnH());
				11655
				11656	__ Dup(z16.VnS(), z0.VnS(), 0);
				11657	__ Fmul(z16.VnS(), z1.VnS(), z16.VnS());
				11658	__ Dup(z17.VnS(), z0.VnS(), 1);
				11659	__ Fmul(z17.VnS(), z1.VnS(), z17.VnS());
				11660	__ Dup(z18.VnS(), z0.VnS(), 2);
				11661	__ Fmul(z18.VnS(), z1.VnS(), z18.VnS());
				11662	__ Dup(z19.VnS(), z0.VnS(), 3);
				11663	__ Fmul(z19.VnS(), z1.VnS(), z19.VnS());
				11664
				11665	__ Dup(z20.VnD(), z0.VnD(), 0);
				11666	__ Fmul(z20.VnD(), z1.VnD(), z20.VnD());
				11667	__ Dup(z21.VnD(), z0.VnD(), 1);
				11668	__ Fmul(z21.VnD(), z1.VnD(), z21.VnD());
				11669
				11670	END();
				11671
				11672	if (CAN_RUN()) {
				11673	RUN();
				11674	ASSERT_EQUAL_SVE(z12.VnH(), z2.VnH());
				11675	ASSERT_EQUAL_SVE(z13.VnH(), z3.VnH());
				11676	ASSERT_EQUAL_SVE(z14.VnH(), z4.VnH());
				11677	ASSERT_EQUAL_SVE(z15.VnH(), z5.VnH());
				11678	ASSERT_EQUAL_SVE(z16.VnS(), z6.VnS());
				11679	ASSERT_EQUAL_SVE(z17.VnS(), z7.VnS());
				11680	ASSERT_EQUAL_SVE(z18.VnS(), z8.VnS());
				11681	ASSERT_EQUAL_SVE(z19.VnS(), z9.VnS());
				11682	ASSERT_EQUAL_SVE(z20.VnD(), z10.VnD());
				11683	ASSERT_EQUAL_SVE(z21.VnD(), z11.VnD());
				11684	}
				11685	}
				11686
Martyn Capewell	5fb2ad6	2020-01-10 14:08:27 +0000	[diff] [blame]	11687	TEST_SVE(sve_ftmad) {
				11688	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11689	START();
				11690
				11691	uint64_t in_h0[] = {0x7c027e01fc02fe01,
				11692	0x3c003c00bc00bc00,
				11693	0x3c003c00bc00bc00};
				11694	uint64_t in_h1[] = {0xfe01fc027e017e01,
				11695	0x3c00bc003c00bc00,
				11696	0x3c00bc003c00bc00};
				11697	uint64_t in_s0[] = {0x7f800002ffc00001,
				11698	0x3f8000003f800000,
				11699	0xbf800000bf800000};
				11700	uint64_t in_s1[] = {0xffc00001ffc00001,
				11701	0x3f800000bf800000,
				11702	0x3f800000bf800000};
				11703	uint64_t in_d0[] = {0x7ff8000000000001,
				11704	0x3ff0000000000000,
				11705	0xbff0000000000000};
				11706	uint64_t in_d1[] = {0xfff0000000000002,
				11707	0xbff0000000000000,
				11708	0x3ff0000000000000};
				11709	InsrHelper(&masm, z0.VnD(), in_h0);
				11710	InsrHelper(&masm, z1.VnD(), in_h1);
				11711	InsrHelper(&masm, z2.VnD(), in_s0);
				11712	InsrHelper(&masm, z3.VnD(), in_s1);
				11713	InsrHelper(&masm, z4.VnD(), in_d0);
				11714	InsrHelper(&masm, z5.VnD(), in_d1);
				11715
				11716	__ Mov(z6, z0);
				11717	__ Ftmad(z6.VnH(), z6.VnH(), z1.VnH(), 0);
				11718	__ Mov(z7, z0);
				11719	__ Ftmad(z7.VnH(), z7.VnH(), z1.VnH(), 1);
				11720	__ Mov(z8, z0);
				11721	__ Ftmad(z8.VnH(), z8.VnH(), z1.VnH(), 2);
				11722
				11723	__ Mov(z9, z2);
				11724	__ Ftmad(z9.VnS(), z9.VnS(), z3.VnS(), 0);
				11725	__ Mov(z10, z2);
				11726	__ Ftmad(z10.VnS(), z10.VnS(), z3.VnS(), 3);
				11727	__ Mov(z11, z2);
				11728	__ Ftmad(z11.VnS(), z11.VnS(), z3.VnS(), 4);
				11729
				11730	__ Mov(z12, z4);
				11731	__ Ftmad(z12.VnD(), z12.VnD(), z5.VnD(), 0);
				11732	__ Mov(z13, z4);
				11733	__ Ftmad(z13.VnD(), z13.VnD(), z5.VnD(), 5);
				11734	__ Mov(z14, z4);
				11735	__ Ftmad(z14.VnD(), z14.VnD(), z5.VnD(), 7);
				11736
				11737	END();
				11738
				11739	if (CAN_RUN()) {
				11740	RUN();
				11741	uint64_t expected_z6[] = {0x7e027e02fe02fe01,
				11742	0x4000400000000000,
				11743	0x4000400000000000};
				11744	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				11745	uint64_t expected_z7[] = {0x7e027e02fe02fe01,
				11746	0x3aab3800bcabbe00,
				11747	0x3aab3800bcabbe00};
				11748	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				11749	uint64_t expected_z8[] = {0x7e027e02fe02fe01,
				11750	0x3c083c2abbefbbac,
				11751	0x3c083c2abbefbbac};
				11752	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				11753	uint64_t expected_z9[] = {0x7fc00002ffc00001,
				11754	0x4000000040000000,
				11755	0x0000000000000000};
				11756	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				11757	uint64_t expected_z10[] = {0x7fc00002ffc00001,
				11758	0x3f7ff2ff3f7fa4fc,
				11759	0xbf800680bf802d82};
				11760	ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
				11761	uint64_t expected_z11[] = {0x7fc00002ffc00001,
				11762	0x3f8000173f8000cd,
				11763	0xbf7fffd2bf7ffe66};
				11764	ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
				11765	uint64_t expected_z12[] = {0x7ff8000000000002,
				11766	0x4000000000000000,
				11767	0x0000000000000000};
				11768	ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
				11769	uint64_t expected_z13[] = {0x7ff8000000000002,
				11770	0x3fefffff6c0d846c,
				11771	0xbff0000006b978ae};
				11772	ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
				11773	uint64_t expected_z14[] = {0x7ff8000000000002,
				11774	0x3feffffffffe708a,
				11775	0xbff0000000000000};
				11776	ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
				11777	}
				11778	}
				11779
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	11780	static void BasicFPArithHelper(MacroAssembler* masm,
				11781	int lane_size_in_bits,
				11782	const uint64_t (&inputs)[2],
				11783	const uint64_t (&inputs_fmulx)[2],
				11784	const uint64_t (&inputs_nans)[2]) {
				11785	int ls = lane_size_in_bits;
				11786
				11787	for (int i = 0; i < 16; i++) {
				11788	InsrHelper(masm, z0.VnD(), inputs);
				11789	}
				11790	ZRegister rvrs = z1.WithLaneSize(ls);
				11791	masm->Rev(rvrs, z0.WithLaneSize(ls));
				11792
				11793	int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
				11794	Initialise(masm, p2.VnB(), pred);
				11795	PRegisterM p2m = p2.Merging();
				11796
				11797	masm->Mov(z2, z0);
				11798	masm->Fadd(z2.WithLaneSize(ls),
				11799	p2m,
				11800	z2.WithLaneSize(ls),
				11801	rvrs,
				11802	FastNaNPropagation);
				11803	masm->Mov(z3, z0);
				11804	masm->Fsub(z3.WithLaneSize(ls), p2m, z3.WithLaneSize(ls), rvrs);
				11805	masm->Mov(z4, z0);
				11806	masm->Fsub(z4.WithLaneSize(ls), p2m, rvrs, z4.WithLaneSize(ls));
				11807	masm->Mov(z5, z0);
				11808	masm->Fabd(z5.WithLaneSize(ls),
				11809	p2m,
				11810	z5.WithLaneSize(ls),
				11811	rvrs,
				11812	FastNaNPropagation);
				11813	masm->Mov(z6, z0);
				11814	masm->Fmul(z6.WithLaneSize(ls),
				11815	p2m,
				11816	z6.WithLaneSize(ls),
				11817	rvrs,
				11818	FastNaNPropagation);
				11819
				11820	for (int i = 0; i < 16; i++) {
				11821	InsrHelper(masm, z7.VnD(), inputs_fmulx);
				11822	}
				11823	masm->Rev(z8.WithLaneSize(ls), z7.WithLaneSize(ls));
				11824	masm->Fmulx(z7.WithLaneSize(ls),
				11825	p2m,
				11826	z7.WithLaneSize(ls),
				11827	z8.WithLaneSize(ls),
				11828	FastNaNPropagation);
				11829
				11830	InsrHelper(masm, z8.VnD(), inputs_nans);
				11831	masm->Mov(z9, z8);
				11832	masm->Fminnm(z9.WithLaneSize(ls),
				11833	p2m,
				11834	z9.WithLaneSize(ls),
				11835	rvrs,
				11836	FastNaNPropagation);
				11837	masm->Mov(z10, z8);
				11838	masm->Fmaxnm(z10.WithLaneSize(ls),
				11839	p2m,
				11840	z10.WithLaneSize(ls),
				11841	rvrs,
				11842	FastNaNPropagation);
				11843	}
				11844
				11845	TEST_SVE(sve_fp_arith_pred_h) {
				11846	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11847	START();
				11848
				11849	uint64_t inputs[] = {0x4800470046004500, 0x4400420040003c00};
				11850	uint64_t inputs_fmulx[] = {0x7c00fc007c00fc00, 0x0000800000008000};
				11851	uint64_t inputs_nans[] = {0x7fffffff7fffffff, 0x7bfffbff7fbbfbff};
				11852
				11853	BasicFPArithHelper(&masm, kHRegSize, inputs, inputs_fmulx, inputs_nans);
				11854
				11855	END();
				11856
				11857	if (CAN_RUN()) {
				11858	RUN();
				11859	uint64_t expected_z2[] = {0x4880488048804880, 0x4880420048804880};
				11860	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				11861	uint64_t expected_z3[] = {0x4700450042003c00, 0xbc004200c500c700};
				11862	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				11863	uint64_t expected_z4[] = {0xc700c500c200bc00, 0x3c00420045004700};
				11864	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				11865	uint64_t expected_z5[] = {0x4700450042003c00, 0x3c00420045004700};
				11866	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				11867	uint64_t expected_z6[] = {0x48004b004c804d00, 0x4d0042004b004800};
				11868	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				11869	uint64_t expected_z7[] = {0xc000c000c000c000, 0xc0008000c000c000};
				11870	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				11871	uint64_t expected_z9[] = {0x3c00400042004400, 0x4500fbff4700fbff};
				11872	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				11873	uint64_t expected_z10[] = {0x3c00400042004400, 0x7bfffbff47004800};
				11874	ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
				11875	}
				11876	}
				11877
				11878	TEST_SVE(sve_fp_arith_pred_s) {
				11879	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11880	START();
				11881
				11882	uint64_t inputs[] = {0x4080000040400000, 0x400000003f800000};
				11883	uint64_t inputs_fmulx[] = {0x7f800000ff800000, 0x0000000080000000};
				11884	uint64_t inputs_nans[] = {0x7fffffffffffffff, 0x41000000c1000000};
				11885
				11886	BasicFPArithHelper(&masm, kSRegSize, inputs, inputs_fmulx, inputs_nans);
				11887
				11888	END();
				11889
				11890	if (CAN_RUN()) {
				11891	RUN();
				11892	uint64_t expected_z2[] = {0x40a0000040a00000, 0x4000000040a00000};
				11893	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				11894	uint64_t expected_z3[] = {0x404000003f800000, 0x40000000c0400000};
				11895	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				11896	uint64_t expected_z4[] = {0xc0400000bf800000, 0x4000000040400000};
				11897	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				11898	uint64_t expected_z5[] = {0x404000003f800000, 0x4000000040400000};
				11899	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				11900	uint64_t expected_z6[] = {0x4080000040c00000, 0x4000000040800000};
				11901	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				11902	uint64_t expected_z7[] = {0xc0000000c0000000, 0x00000000c0000000};
				11903	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				11904	uint64_t expected_z9[] = {0x3f80000040000000, 0x41000000c1000000};
				11905	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				11906	uint64_t expected_z10[] = {0x3f80000040000000, 0x4100000040800000};
				11907	ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
				11908	}
				11909	}
				11910
				11911	TEST_SVE(sve_fp_arith_pred_d) {
				11912	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11913	START();
				11914
				11915	uint64_t inputs[] = {0x4000000000000000, 0x3ff0000000000000};
				11916	uint64_t inputs_fmulx[] = {0x7ff0000000000000, 0x8000000000000000};
				11917	uint64_t inputs_nans[] = {0x7fffffffffffffff, 0x4100000000000000};
				11918
				11919	BasicFPArithHelper(&masm, kDRegSize, inputs, inputs_fmulx, inputs_nans);
				11920
				11921	END();
				11922
				11923	if (CAN_RUN()) {
				11924	RUN();
				11925	uint64_t expected_z2[] = {0x4008000000000000, 0x4008000000000000};
				11926	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				11927	uint64_t expected_z3[] = {0x3ff0000000000000, 0xbff0000000000000};
				11928	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				11929	uint64_t expected_z4[] = {0xbff0000000000000, 0x3ff0000000000000};
				11930	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				11931	uint64_t expected_z5[] = {0x3ff0000000000000, 0x3ff0000000000000};
				11932	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				11933	uint64_t expected_z6[] = {0x4000000000000000, 0x4000000000000000};
				11934	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				11935	uint64_t expected_z7[] = {0xc000000000000000, 0xc000000000000000};
				11936	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				11937	uint64_t expected_z9[] = {0x3ff0000000000000, 0x4000000000000000};
				11938	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				11939	uint64_t expected_z10[] = {0x3ff0000000000000, 0x4100000000000000};
				11940	ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
				11941	}
				11942	}
				11943
Martyn Capewell	a2fadc2	2020-01-16 16:09:55 +0000	[diff] [blame]	11944	TEST_SVE(sve_fp_arith_pred_imm) {
				11945	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				11946	START();
				11947
				11948	int pred[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1};
				11949	Initialise(&masm, p0.VnB(), pred);
				11950	PRegisterM p0m = p0.Merging();
				11951	__ Ptrue(p1.VnB());
				11952
				11953	__ Fdup(z0.VnD(), 0.0);
				11954
				11955	__ Mov(z1, z0);
				11956	__ Fdiv(z1.VnH(), p1.Merging(), z1.VnH(), z1.VnH());
				11957	__ Mov(z2, z0);
				11958	__ Fadd(z2.VnH(), p0m, z2.VnH(), 0.5);
				11959	__ Mov(z3, z2);
				11960	__ Fsub(z3.VnH(), p0m, z3.VnH(), 1.0);
				11961	__ Mov(z4, z3);
				11962	__ Fsub(z4.VnH(), p0m, 1.0, z4.VnH());
				11963	__ Mov(z5, z4);
				11964	__ Fmul(z5.VnH(), p0m, z5.VnH(), 2.0);
				11965	__ Mov(z6, z1);
				11966	__ Fminnm(z6.VnH(), p0m, z6.VnH(), 0.0);
				11967	__ Mov(z7, z1);
				11968	__ Fmaxnm(z7.VnH(), p0m, z7.VnH(), 1.0);
				11969	__ Mov(z8, z5);
				11970	__ Fmin(z8.VnH(), p0m, z8.VnH(), 1.0);
				11971	__ Mov(z9, z5);
				11972	__ Fmax(z9.VnH(), p0m, z9.VnH(), 0.0);
				11973
				11974	__ Mov(z11, z0);
				11975	__ Fdiv(z11.VnS(), p1.Merging(), z11.VnS(), z11.VnS());
				11976	__ Mov(z12, z0);
				11977	__ Fadd(z12.VnS(), p0m, z12.VnS(), 0.5);
				11978	__ Mov(z13, z12);
				11979	__ Fsub(z13.VnS(), p0m, z13.VnS(), 1.0);
				11980	__ Mov(z14, z13);
				11981	__ Fsub(z14.VnS(), p0m, 1.0, z14.VnS());
				11982	__ Mov(z15, z14);
				11983	__ Fmul(z15.VnS(), p0m, z15.VnS(), 2.0);
				11984	__ Mov(z16, z11);
				11985	__ Fminnm(z16.VnS(), p0m, z16.VnS(), 0.0);
				11986	__ Mov(z17, z11);
				11987	__ Fmaxnm(z17.VnS(), p0m, z17.VnS(), 1.0);
				11988	__ Mov(z18, z15);
				11989	__ Fmin(z18.VnS(), p0m, z18.VnS(), 1.0);
				11990	__ Mov(z19, z15);
				11991	__ Fmax(z19.VnS(), p0m, z19.VnS(), 0.0);
				11992
				11993	__ Mov(z21, z0);
				11994	__ Fdiv(z21.VnD(), p1.Merging(), z21.VnD(), z21.VnD());
				11995	__ Mov(z22, z0);
				11996	__ Fadd(z22.VnD(), p0m, z22.VnD(), 0.5);
				11997	__ Mov(z23, z22);
				11998	__ Fsub(z23.VnD(), p0m, z23.VnD(), 1.0);
				11999	__ Mov(z24, z23);
				12000	__ Fsub(z24.VnD(), p0m, 1.0, z24.VnD());
				12001	__ Mov(z25, z24);
				12002	__ Fmul(z25.VnD(), p0m, z25.VnD(), 2.0);
				12003	__ Mov(z26, z21);
				12004	__ Fminnm(z26.VnD(), p0m, z26.VnD(), 0.0);
				12005	__ Mov(z27, z21);
				12006	__ Fmaxnm(z27.VnD(), p0m, z27.VnD(), 1.0);
				12007	__ Mov(z28, z25);
				12008	__ Fmin(z28.VnD(), p0m, z28.VnD(), 1.0);
				12009	__ Mov(z29, z25);
				12010	__ Fmax(z29.VnD(), p0m, z29.VnD(), 0.0);
				12011
				12012	__ Index(z0.VnH(), -3, 1);
				12013	__ Scvtf(z0.VnH(), p1.Merging(), z0.VnH());
				12014	__ Fmax(z0.VnH(), p1.Merging(), z0.VnH(), 0.0);
				12015	__ Index(z1.VnS(), -4, 2);
				12016	__ Scvtf(z1.VnS(), p1.Merging(), z1.VnS());
				12017	__ Fadd(z1.VnS(), p1.Merging(), z1.VnS(), 1.0);
				12018
				12019	END();
				12020
				12021	if (CAN_RUN()) {
				12022	RUN();
				12023	uint64_t expected_z2[] = {0x3800380038003800, 0x3800000038003800};
				12024	ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
				12025	uint64_t expected_z3[] = {0xb800b800b800b800, 0xb8000000b800b800};
				12026	ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
				12027	uint64_t expected_z4[] = {0x3e003e003e003e00, 0x3e0000003e003e00};
				12028	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				12029	uint64_t expected_z5[] = {0x4200420042004200, 0x4200000042004200};
				12030	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				12031	uint64_t expected_z6[] = {0x0000000000000000, 0x00007e0000000000};
				12032	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				12033	uint64_t expected_z7[] = {0x3c003c003c003c00, 0x3c007e003c003c00};
				12034	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				12035	uint64_t expected_z8[] = {0x3c003c003c003c00, 0x3c0000003c003c00};
				12036	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				12037	uint64_t expected_z9[] = {0x4200420042004200, 0x4200000042004200};
				12038	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				12039
				12040	uint64_t expected_z12[] = {0x3f0000003f000000, 0x000000003f000000};
				12041	ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
				12042	uint64_t expected_z13[] = {0xbf000000bf000000, 0x00000000bf000000};
				12043	ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
				12044	uint64_t expected_z14[] = {0x3fc000003fc00000, 0x000000003fc00000};
				12045	ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
				12046	uint64_t expected_z15[] = {0x4040000040400000, 0x0000000040400000};
				12047	ASSERT_EQUAL_SVE(expected_z15, z15.VnD());
				12048	uint64_t expected_z16[] = {0x0000000000000000, 0x7fc0000000000000};
				12049	ASSERT_EQUAL_SVE(expected_z16, z16.VnD());
				12050	uint64_t expected_z17[] = {0x3f8000003f800000, 0x7fc000003f800000};
				12051	ASSERT_EQUAL_SVE(expected_z17, z17.VnD());
				12052	uint64_t expected_z18[] = {0x3f8000003f800000, 0x000000003f800000};
				12053	ASSERT_EQUAL_SVE(expected_z18, z18.VnD());
				12054	uint64_t expected_z19[] = {0x4040000040400000, 0x0000000040400000};
				12055	ASSERT_EQUAL_SVE(expected_z19, z19.VnD());
				12056
				12057	uint64_t expected_z22[] = {0x3fe0000000000000, 0x3fe0000000000000};
				12058	ASSERT_EQUAL_SVE(expected_z22, z22.VnD());
				12059	uint64_t expected_z23[] = {0xbfe0000000000000, 0xbfe0000000000000};
				12060	ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
				12061	uint64_t expected_z24[] = {0x3ff8000000000000, 0x3ff8000000000000};
				12062	ASSERT_EQUAL_SVE(expected_z24, z24.VnD());
				12063	uint64_t expected_z25[] = {0x4008000000000000, 0x4008000000000000};
				12064	ASSERT_EQUAL_SVE(expected_z25, z25.VnD());
				12065	uint64_t expected_z26[] = {0x0000000000000000, 0x0000000000000000};
				12066	ASSERT_EQUAL_SVE(expected_z26, z26.VnD());
				12067	uint64_t expected_z27[] = {0x3ff0000000000000, 0x3ff0000000000000};
				12068	ASSERT_EQUAL_SVE(expected_z27, z27.VnD());
				12069	uint64_t expected_z28[] = {0x3ff0000000000000, 0x3ff0000000000000};
				12070	ASSERT_EQUAL_SVE(expected_z28, z28.VnD());
				12071	uint64_t expected_z29[] = {0x4008000000000000, 0x4008000000000000};
				12072	ASSERT_EQUAL_SVE(expected_z29, z29.VnD());
				12073	uint64_t expected_z0[] = {0x4400420040003c00, 0x0000000000000000};
				12074	ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
				12075	uint64_t expected_z1[] = {0x404000003f800000, 0xbf800000c0400000};
				12076	ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
				12077	}
				12078	}
				12079
Martyn Capewell	37f2818	2020-01-14 10:15:10 +0000	[diff] [blame]	12080	TEST_SVE(sve_fscale) {
				12081	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				12082	START();
				12083
				12084	uint64_t inputs_h[] = {0x4800470046004500, 0x4400420040003c00};
				12085	InsrHelper(&masm, z0.VnD(), inputs_h);
				12086	uint64_t inputs_s[] = {0x4080000040400000, 0x400000003f800000};
				12087	InsrHelper(&masm, z1.VnD(), inputs_s);
				12088	uint64_t inputs_d[] = {0x40f0000000000000, 0x4000000000000000};
				12089	InsrHelper(&masm, z2.VnD(), inputs_d);
				12090
				12091	uint64_t scales[] = {0x00080002fff8fffe, 0x00100001fff0ffff};
				12092	InsrHelper(&masm, z3.VnD(), scales);
				12093
				12094	__ Ptrue(p0.VnB());
				12095	int pred[] = {0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1};
				12096	Initialise(&masm, p1.VnB(), pred);
				12097
				12098	__ Mov(z4, z0);
				12099	__ Fscale(z4.VnH(), p0.Merging(), z4.VnH(), z3.VnH());
				12100	__ Mov(z5, z0);
				12101	__ Fscale(z5.VnH(), p1.Merging(), z5.VnH(), z3.VnH());
				12102
				12103	__ Sunpklo(z3.VnS(), z3.VnH());
				12104	__ Mov(z6, z1);
				12105	__ Fscale(z6.VnS(), p0.Merging(), z6.VnS(), z3.VnS());
				12106	__ Mov(z7, z1);
				12107	__ Fscale(z7.VnS(), p1.Merging(), z7.VnS(), z3.VnS());
				12108
				12109	__ Sunpklo(z3.VnD(), z3.VnS());
				12110	__ Mov(z8, z2);
				12111	__ Fscale(z8.VnD(), p0.Merging(), z8.VnD(), z3.VnD());
				12112	__ Mov(z9, z2);
				12113	__ Fscale(z9.VnD(), p1.Merging(), z9.VnD(), z3.VnD());
				12114
				12115	// Test full double precision range scaling.
				12116	__ Dup(z10.VnD(), 2045);
				12117	__ Dup(z11.VnD(), 0x0010000000000000); // 2^-1022
				12118	__ Fscale(z11.VnD(), p0.Merging(), z11.VnD(), z10.VnD());
				12119
				12120	END();
				12121
				12122	if (CAN_RUN()) {
				12123	RUN();
				12124
				12125	uint64_t expected_z4[] = {0x68004f0026003d00, 0x7c00460002003800};
				12126	ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
				12127	uint64_t expected_z5[] = {0x68004f0026004500, 0x7c00420002003800};
				12128	ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
				12129
				12130	uint64_t expected_z6[] = {0x4880000040c00000, 0x380000003f000000};
				12131	ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
				12132	uint64_t expected_z7[] = {0x4880000040400000, 0x400000003f000000};
				12133	ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
				12134
				12135	uint64_t expected_z8[] = {0x3ff0000000000000, 0x3ff0000000000000};
				12136	ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
				12137	uint64_t expected_z9[] = {0x40f0000000000000, 0x3ff0000000000000};
				12138	ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
				12139
				12140	uint64_t expected_z11[] = {0x7fe0000000000000, 0x7fe0000000000000};
				12141	ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
				12142	}
				12143	}
				12144
TatWai Chong	db7437c	2020-01-09 17:44:10 -0800	[diff] [blame]	12145	typedef void (MacroAssembler::*FcvtFn)(const ZRegister& zd,
				12146	const PRegisterM& pg,
				12147	const ZRegister& zn);
				12148
				12149	template <typename F, size_t N>
				12150	static void TestFcvtzsFcvtzuHelper(
				12151	Test* config,
				12152	FcvtFn macro,
				12153	int dst_type_size_in_bits,
				12154	int src_type_size_in_bits,
				12155	const F (&zn_inputs)[N],
				12156	const int (&pg_inputs)[N],
				12157	const uint64_t (&zd_expected_all_active)[N]) {
				12158	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				12159	START();
				12160
				12161	// If the input and result types have a different size, the instruction
				12162	// options on elements of the largest specified type is determined by the
				12163	// larger type.
				12164	int lane_size_in_bits =
				12165	std::max(dst_type_size_in_bits, src_type_size_in_bits);
				12166
				12167	ZRegister zd_all_active = z25;
				12168	ZRegister zd_merged = z26;
				12169	ZRegister zn = z27;
				12170
				12171	uint64_t zn_rawbits[N];
				12172	FPToRawbitsWithSize(zn_inputs, zn_rawbits, src_type_size_in_bits);
				12173	InsrHelper(&masm, zn.WithLaneSize(lane_size_in_bits), zn_rawbits);
				12174
				12175	PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
				12176	__ Ptrue(pg_all_active);
				12177
				12178	// Test floating-point conversions with all lanes actived.
				12179	(masm.*macro)(zd_all_active.WithLaneSize(dst_type_size_in_bits),
				12180	pg_all_active.Merging(),
				12181	zn.WithLaneSize(src_type_size_in_bits));
				12182
				12183	PRegisterWithLaneSize pg_merged = p1.WithLaneSize(lane_size_in_bits);
				12184	Initialise(&masm, pg_merged, pg_inputs);
				12185
				12186	__ Dup(zd_merged.VnD(), 0x0bad0bad0bad0bad);
				12187
				12188	// Use the same `zn` inputs to test floating-point conversions but partial
				12189	// lanes are set inactive.
				12190	(masm.*macro)(zd_merged.WithLaneSize(dst_type_size_in_bits),
				12191	pg_merged.Merging(),
				12192	zn.WithLaneSize(src_type_size_in_bits));
				12193
				12194	END();
				12195
				12196	if (CAN_RUN()) {
				12197	RUN();
				12198
				12199	ASSERT_EQUAL_SVE(zd_expected_all_active,
				12200	zd_all_active.WithLaneSize(lane_size_in_bits));
				12201
				12202	uint64_t zd_expected_merged[N];
				12203	for (unsigned i = 0; i < N; i++) {
				12204	zd_expected_merged[i] =
				12205	pg_inputs[i] ? zd_expected_all_active[i]
				12206	: 0x0bad0bad0bad0bad & GetUintMask(lane_size_in_bits);
				12207	}
				12208	ASSERT_EQUAL_SVE(zd_expected_merged,
				12209	zd_merged.WithLaneSize(lane_size_in_bits));
				12210	}
				12211	}
				12212
				12213	TEST_SVE(fcvtzs_fcvtzu_float16) {
				12214	// clang-format off
				12215	const double h_max_float16 = kHMaxInt; // Largest float16 == INT16_MAX.
				12216	const double h_min_float16 = -h_max_float16; // Smallest float16 > INT16_MIN.
				12217	const double largest_float16 = 0xffe0; // 65504
				12218	const double smallest_float16 = -largest_float16;
				12219	const double h_max_int_sub_one = kHMaxInt - 1;
				12220	const double h_min_int_add_one = kHMinInt + 1;
				12221
				12222	double zn_inputs[] = {1.0,
				12223	1.1,
				12224	1.5,
				12225	-1.5,
				12226	h_max_float16,
				12227	h_min_float16,
				12228	largest_float16,
				12229	smallest_float16,
				12230	kFP64PositiveInfinity,
				12231	kFP64NegativeInfinity,
				12232	h_max_int_sub_one,
				12233	h_min_int_add_one};
				12234
				12235	int pg_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
				12236
				12237	uint64_t expected_fcvtzs_fp162h[] = {1,
				12238	1,
				12239	1,
				12240	0xffff,
				12241	0x7fff,
				12242	0x8000,
				12243	0x7fff,
				12244	0x8000,
				12245	0x7fff,
				12246	0x8000,
				12247	0x7fff,
				12248	0x8000};
				12249
				12250	uint64_t expected_fcvtzu_fp162h[] = {1,
				12251	1,
				12252	1,
				12253	0,
				12254	0x8000,
				12255	0,
				12256	0xffe0,
				12257	0,
				12258	0xffff,
				12259	0,
				12260	0x8000,
				12261	0};
				12262
				12263	// Float16 to 16-bit integers.
				12264	TestFcvtzsFcvtzuHelper(config,
				12265	&MacroAssembler::Fcvtzs,
				12266	kHRegSize,
				12267	kHRegSize,
				12268	zn_inputs,
				12269	pg_inputs,
				12270	expected_fcvtzs_fp162h);
				12271
				12272	TestFcvtzsFcvtzuHelper(config,
				12273	&MacroAssembler::Fcvtzu,
				12274	kHRegSize,
				12275	kHRegSize,
				12276	zn_inputs,
				12277	pg_inputs,
				12278	expected_fcvtzu_fp162h);
				12279
				12280	uint64_t expected_fcvtzs_fp162w[] = {1,
				12281	1,
				12282	1,
				12283	0xffffffff,
				12284	0x8000,
				12285	0xffff8000,
				12286	0xffe0,
				12287	0xffff0020,
				12288	0x7fffffff,
				12289	0x80000000,
				12290	0x8000,
				12291	0xffff8000};
				12292
				12293	uint64_t expected_fcvtzu_fp162w[] = {1,
				12294	1,
				12295	1,
				12296	0,
				12297	0x8000,
				12298	0,
				12299	0xffe0,
				12300	0,
				12301	0xffffffff,
				12302	0,
				12303	0x8000,
				12304	0};
				12305
				12306	// Float16 to 32-bit integers.
				12307	TestFcvtzsFcvtzuHelper(config,
				12308	&MacroAssembler::Fcvtzs,
				12309	kSRegSize,
				12310	kHRegSize,
				12311	zn_inputs,
				12312	pg_inputs,
				12313	expected_fcvtzs_fp162w);
				12314
				12315	TestFcvtzsFcvtzuHelper(config,
				12316	&MacroAssembler::Fcvtzu,
				12317	kSRegSize,
				12318	kHRegSize,
				12319	zn_inputs,
				12320	pg_inputs,
				12321	expected_fcvtzu_fp162w);
				12322
				12323	uint64_t expected_fcvtzs_fp162x[] = {1,
				12324	1,
				12325	1,
				12326	0xffffffffffffffff,
				12327	0x8000,
				12328	0xffffffffffff8000,
				12329	0xffe0,
				12330	0xffffffffffff0020,
				12331	0x7fffffffffffffff,
				12332	0x8000000000000000,
				12333	0x8000,
				12334	0xffffffffffff8000};
				12335
				12336	uint64_t expected_fcvtzu_fp162x[] = {1,
				12337	1,
				12338	1,
				12339	0,
				12340	0x8000,
				12341	0,
				12342	0xffe0,
				12343	0,
				12344	0xffffffffffffffff,
				12345	0,
				12346	0x8000,
				12347	0};
				12348
				12349	// Float16 to 64-bit integers.
				12350	TestFcvtzsFcvtzuHelper(config,
				12351	&MacroAssembler::Fcvtzs,
				12352	kDRegSize,
				12353	kHRegSize,
				12354	zn_inputs,
				12355	pg_inputs,
				12356	expected_fcvtzs_fp162x);
				12357
				12358	TestFcvtzsFcvtzuHelper(config,
				12359	&MacroAssembler::Fcvtzu,
				12360	kDRegSize,
				12361	kHRegSize,
				12362	zn_inputs,
				12363	pg_inputs,
				12364	expected_fcvtzu_fp162x);
				12365	// clang-format on
				12366	}
				12367
				12368	TEST_SVE(fcvtzs_fcvtzu_float) {
				12369	const double w_max_float = 0x7fffff80; // Largest float < INT32_MAX.
				12370	const double w_min_float = -w_max_float; // Smallest float > INT32_MIN.
				12371	const double x_max_float = 0x7fffff8000000000; // Largest float < INT64_MAX.
				12372	const double x_min_float = -x_max_float; // Smallest float > INT64_MIN.
				12373	const double w_max_int_sub_one = kWMaxInt - 1;
				12374	const double w_min_int_add_one = kWMinInt + 1;
				12375	const double x_max_int_sub_one = kXMaxInt - 1;
				12376	const double x_min_int_add_one = kXMinInt + 1;
				12377
				12378	// clang-format off
				12379	double zn_inputs[] = {1.0,
				12380	1.1,
				12381	1.5,
				12382	-1.5,
				12383	w_max_float,
				12384	w_min_float,
				12385	x_max_float,
				12386	x_min_float,
				12387	kFP64PositiveInfinity,
				12388	kFP64NegativeInfinity,
				12389	w_max_int_sub_one,
				12390	w_min_int_add_one,
				12391	x_max_int_sub_one,
				12392	x_min_int_add_one};
				12393
				12394	int pg_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0};
				12395
				12396	uint64_t expected_fcvtzs_s2w[] = {1,
				12397	1,
				12398	1,
				12399	0xffffffff,
				12400	0x7fffff80,
				12401	0x80000080,
				12402	0x7fffffff,
				12403	0x80000000,
				12404	0x7fffffff,
				12405	0x80000000,
				12406	0x7fffffff,
				12407	0x80000000,
				12408	0x7fffffff,
				12409	0x80000000};
				12410
				12411	uint64_t expected_fcvtzu_s2w[] = {1,
				12412	1,
				12413	1,
				12414	0,
				12415	0x7fffff80,
				12416	0,
				12417	0xffffffff,
				12418	0,
				12419	0xffffffff,
				12420	0,
				12421	0x80000000,
				12422	0,
				12423	0xffffffff,
				12424	0};
				12425
				12426	// Float to 32-bit integers.
				12427	TestFcvtzsFcvtzuHelper(config,
				12428	&MacroAssembler::Fcvtzs,
				12429	kSRegSize,
				12430	kSRegSize,
				12431	zn_inputs,
				12432	pg_inputs,
				12433	expected_fcvtzs_s2w);
				12434
				12435	TestFcvtzsFcvtzuHelper(config,
				12436	&MacroAssembler::Fcvtzu,
				12437	kSRegSize,
				12438	kSRegSize,
				12439	zn_inputs,
				12440	pg_inputs,
				12441	expected_fcvtzu_s2w);
				12442
				12443	uint64_t expected_fcvtzs_s2x[] = {1,
				12444	1,
				12445	1,
				12446	0xffffffffffffffff,
				12447	0x7fffff80,
				12448	0xffffffff80000080,
				12449	0x7fffff8000000000,
				12450	0x8000008000000000,
				12451	0x7fffffffffffffff,
				12452	0x8000000000000000,
				12453	0x80000000,
				12454	0xffffffff80000000,
				12455	0x7fffffffffffffff,
				12456	0x8000000000000000};
				12457
				12458	uint64_t expected_fcvtzu_s2x[] = {1,
				12459	1,
				12460	1,
				12461	0,
				12462	0x7fffff80,
				12463	0,
				12464	0x7fffff8000000000,
				12465	0,
				12466	0xffffffffffffffff,
				12467	0,
				12468	0x0000000080000000,
				12469	0,
				12470	0x8000000000000000,
				12471	0};
				12472
				12473	// Float to 64-bit integers.
				12474	TestFcvtzsFcvtzuHelper(config,
				12475	&MacroAssembler::Fcvtzs,
				12476	kDRegSize,
				12477	kSRegSize,
				12478	zn_inputs,
				12479	pg_inputs,
				12480	expected_fcvtzs_s2x);
				12481
				12482	TestFcvtzsFcvtzuHelper(config,
				12483	&MacroAssembler::Fcvtzu,
				12484	kDRegSize,
				12485	kSRegSize,
				12486	zn_inputs,
				12487	pg_inputs,
				12488	expected_fcvtzu_s2x);
				12489	// clang-format on
				12490	}
				12491
				12492	TEST_SVE(fcvtzs_fcvtzu_double) {
				12493	// clang-format off
				12494	const double w_max_float = 0x7fffff80; // Largest float < INT32_MAX.
				12495	const double w_min_float = -w_max_float; // Smallest float > INT32_MIN.
				12496	const double x_max_float = 0x7fffff8000000000; // Largest float < INT64_MAX.
				12497	const double x_min_float = -x_max_float; // Smallest float > INT64_MIN.
				12498	const double w_max_double = kWMaxInt; // Largest double == INT32_MAX.
				12499	const double w_min_double = -w_max_double; // Smallest double > INT32_MIN.
				12500	const double x_max_double = 0x7ffffffffffffc00; // Largest double < INT64_MAX.
				12501	const double x_min_double = -x_max_double; // Smallest double > INT64_MIN.
				12502	const double w_max_int_sub_one = kWMaxInt - 1;
				12503	const double w_min_int_add_one = kWMinInt + 1;
				12504	const double x_max_int_sub_one = kXMaxInt - 1;
				12505	const double x_min_int_add_one = kXMinInt + 1;
				12506
				12507	double zn_inputs[] = {1.0,
				12508	1.1,
				12509	1.5,
				12510	-1.5,
				12511	w_max_float,
				12512	w_min_float,
				12513	x_max_float,
				12514	x_min_float,
				12515	w_max_double,
				12516	w_min_double,
				12517	x_max_double,
				12518	x_min_double,
				12519	kFP64PositiveInfinity,
				12520	kFP64NegativeInfinity,
				12521	w_max_int_sub_one,
				12522	w_min_int_add_one,
				12523	x_max_int_sub_one,
				12524	x_min_int_add_one};
				12525
				12526	int pg_inputs[] = {1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0};
				12527
				12528	uint64_t expected_fcvtzs_d2w[] = {1,
				12529	1,
				12530	1,
				12531	0xffffffffffffffff,
				12532	0x7fffff80,
				12533	0xffffffff80000080,
				12534	0x7fffffff,
				12535	0xffffffff80000000,
				12536	0x7fffffff,
				12537	0xffffffff80000001,
				12538	0x7fffffff,
				12539	0xffffffff80000000,
				12540	0x7fffffff,
				12541	0xffffffff80000000,
				12542	0x7ffffffe,
				12543	0xffffffff80000001,
				12544	0x7fffffff,
				12545	0xffffffff80000000};
				12546
				12547	uint64_t expected_fcvtzu_d2w[] = {1,
				12548	1,
				12549	1,
				12550	0,
				12551	0x7fffff80,
				12552	0,
				12553	0xffffffff,
				12554	0,
				12555	0x7fffffff,
				12556	0,
				12557	0xffffffff,
				12558	0,
				12559	0xffffffff,
				12560	0,
				12561	0x7ffffffe,
				12562	0,
				12563	0xffffffff,
				12564	0};
				12565
				12566	// Double to 32-bit integers.
				12567	TestFcvtzsFcvtzuHelper(config,
				12568	&MacroAssembler::Fcvtzs,
				12569	kSRegSize,
				12570	kDRegSize,
				12571	zn_inputs,
				12572	pg_inputs,
				12573	expected_fcvtzs_d2w);
				12574
				12575	TestFcvtzsFcvtzuHelper(config,
				12576	&MacroAssembler::Fcvtzu,
				12577	kSRegSize,
				12578	kDRegSize,
				12579	zn_inputs,
				12580	pg_inputs,
				12581	expected_fcvtzu_d2w);
				12582
				12583	uint64_t expected_fcvtzs_d2x[] = {1,
				12584	1,
				12585	1,
				12586	0xffffffffffffffff,
				12587	0x7fffff80,
				12588	0xffffffff80000080,
				12589	0x7fffff8000000000,
				12590	0x8000008000000000,
				12591	0x7fffffff,
				12592	0xffffffff80000001,
				12593	0x7ffffffffffffc00,
				12594	0x8000000000000400,
				12595	0x7fffffffffffffff,
				12596	0x8000000000000000,
				12597	0x7ffffffe,
				12598	0xffffffff80000001,
				12599	0x7fffffffffffffff,
				12600	0x8000000000000000};
				12601
				12602	uint64_t expected_fcvtzu_d2x[] = {1,
				12603	1,
				12604	1,
				12605	0,
				12606	0x7fffff80,
				12607	0,
				12608	0x7fffff8000000000,
				12609	0,
				12610	0x7fffffff,
				12611	0,
				12612	0x7ffffffffffffc00,
				12613	0,
				12614	0xffffffffffffffff,
				12615	0,
				12616	0x000000007ffffffe,
				12617	0,
				12618	0x8000000000000000,
				12619	0};
				12620
				12621	// Double to 64-bit integers.
				12622	TestFcvtzsFcvtzuHelper(config,
				12623	&MacroAssembler::Fcvtzs,
				12624	kDRegSize,
				12625	kDRegSize,
				12626	zn_inputs,
				12627	pg_inputs,
				12628	expected_fcvtzs_d2x);
				12629
				12630	TestFcvtzsFcvtzuHelper(config,
				12631	&MacroAssembler::Fcvtzu,
				12632	kDRegSize,
				12633	kDRegSize,
				12634	zn_inputs,
				12635	pg_inputs,
				12636	expected_fcvtzu_d2x);
				12637	// clang-format on
				12638	}
				12639
TatWai Chong	31cd6a0	2020-01-10 13:03:26 -0800	[diff] [blame]	12640	struct CvtfTestDataSet {
				12641	uint64_t int_value;
				12642	uint64_t scvtf_result;
				12643	uint64_t ucvtf_result;
				12644	};
				12645
				12646	template <size_t N>
				12647	static void TestUScvtfHelper(Test* config,
				12648	int dst_type_size_in_bits,
				12649	int src_type_size_in_bits,
				12650	const int (&pg_inputs)[N],
				12651	const CvtfTestDataSet (&data_set)[N]) {
				12652	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				12653	START();
				12654
				12655	// Unpack the data from the array of struct into individual arrays that can
				12656	// simplify the testing.
				12657	uint64_t zn_inputs[N];
				12658	uint64_t expected_zd_scvtf_all_active[N];
				12659	uint64_t expected_zd_ucvtf_all_active[N];
				12660	for (size_t i = 0; i < N; i++) {
				12661	zn_inputs[i] = data_set[i].int_value;
				12662	expected_zd_scvtf_all_active[i] = data_set[i].scvtf_result;
				12663	expected_zd_ucvtf_all_active[i] = data_set[i].ucvtf_result;
				12664	}
				12665
				12666	// If the input and result types have a different size, the instruction
				12667	// operates on elements of the largest specified type.
				12668	int lane_size_in_bits =
				12669	std::max(dst_type_size_in_bits, src_type_size_in_bits);
				12670
				12671	ZRegister zd_scvtf_all_active = z25;
				12672	ZRegister zd_ucvtf_all_active = z26;
				12673	ZRegister zn = z27;
				12674	InsrHelper(&masm, zn.WithLaneSize(lane_size_in_bits), zn_inputs);
				12675
				12676	PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
				12677	__ Ptrue(pg_all_active);
				12678
				12679	// Test integer conversions with all lanes actived.
				12680	__ Scvtf(zd_scvtf_all_active.WithLaneSize(dst_type_size_in_bits),
				12681	pg_all_active.Merging(),
				12682	zn.WithLaneSize(src_type_size_in_bits));
				12683	__ Ucvtf(zd_ucvtf_all_active.WithLaneSize(dst_type_size_in_bits),
				12684	pg_all_active.Merging(),
				12685	zn.WithLaneSize(src_type_size_in_bits));
				12686
				12687	ZRegister zd_scvtf_merged = z23;
				12688	ZRegister zd_ucvtf_merged = z24;
				12689
				12690	PRegisterWithLaneSize pg_merged = p1.WithLaneSize(lane_size_in_bits);
				12691	Initialise(&masm, pg_merged, pg_inputs);
				12692
				12693	uint64_t snan;
				12694	switch (lane_size_in_bits) {
				12695	case kHRegSize:
				12696	snan = 0x7c11;
				12697	break;
				12698	case kSRegSize:
				12699	snan = 0x7f951111;
				12700	break;
				12701	case kDRegSize:
				12702	snan = 0x7ff5555511111111;
				12703	break;
				12704	}
				12705	__ Dup(zd_scvtf_merged.WithLaneSize(lane_size_in_bits), snan);
				12706	__ Dup(zd_ucvtf_merged.WithLaneSize(lane_size_in_bits), snan);
				12707
				12708	// Use the same `zn` inputs to test integer conversions but some lanes are set
				12709	// inactive.
				12710	__ Scvtf(zd_scvtf_merged.WithLaneSize(dst_type_size_in_bits),
				12711	pg_merged.Merging(),
				12712	zn.WithLaneSize(src_type_size_in_bits));
				12713	__ Ucvtf(zd_ucvtf_merged.WithLaneSize(dst_type_size_in_bits),
				12714	pg_merged.Merging(),
				12715	zn.WithLaneSize(src_type_size_in_bits));
				12716
				12717	END();
				12718
				12719	if (CAN_RUN()) {
				12720	RUN();
				12721
				12722	ASSERT_EQUAL_SVE(expected_zd_scvtf_all_active,
				12723	zd_scvtf_all_active.WithLaneSize(lane_size_in_bits));
				12724	ASSERT_EQUAL_SVE(expected_zd_ucvtf_all_active,
				12725	zd_ucvtf_all_active.WithLaneSize(lane_size_in_bits));
				12726
				12727	uint64_t expected_zd_scvtf_merged[N];
				12728	for (size_t i = 0; i < N; i++) {
				12729	expected_zd_scvtf_merged[i] =
				12730	pg_inputs[i] ? expected_zd_scvtf_all_active[i] : snan;
				12731	}
				12732	ASSERT_EQUAL_SVE(expected_zd_scvtf_merged,
				12733	zd_scvtf_merged.WithLaneSize(lane_size_in_bits));
				12734
				12735	uint64_t expected_zd_ucvtf_merged[N];
				12736	for (size_t i = 0; i < N; i++) {
				12737	expected_zd_ucvtf_merged[i] =
				12738	pg_inputs[i] ? expected_zd_ucvtf_all_active[i] : snan;
				12739	}
				12740	ASSERT_EQUAL_SVE(expected_zd_ucvtf_merged,
				12741	zd_ucvtf_merged.WithLaneSize(lane_size_in_bits));
				12742	}
				12743	}
				12744
				12745	TEST_SVE(scvtf_ucvtf_h_s_d_to_float16) {
				12746	// clang-format off
				12747	CvtfTestDataSet data_set_1[] = {
				12748	// Simple conversions of positive numbers which require no rounding; the
				12749	// results should not depened on the rounding mode, and ucvtf and scvtf should
				12750	// produce the same result.
				12751	{0x0000, 0x0000, 0x0000},
				12752	{0x0001, 0x3c00, 0x3c00},
				12753	{0x0010, 0x4c00, 0x4c00},
				12754	{0x0080, 0x5800, 0x5800},
				12755	{0x0400, 0x6400, 0x6400},
				12756	// Conversions which require rounding.
				12757	{0x4000, 0x7400, 0x7400},
				12758	{0x4001, 0x7400, 0x7400},
				12759	// Round up to produce a result that's too big for the input to represent.
				12760	{0x7ff0, 0x77ff, 0x77ff},
				12761	{0x7ff1, 0x77ff, 0x77ff},
				12762	{0x7ffe, 0x7800, 0x7800},
				12763	{0x7fff, 0x7800, 0x7800}};
				12764	int pg_1[] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
				12765	TestUScvtfHelper(config, kHRegSize, kDRegSize, pg_1, data_set_1);
				12766	TestUScvtfHelper(config, kHRegSize, kSRegSize, pg_1, data_set_1);
				12767	TestUScvtfHelper(config, kHRegSize, kHRegSize, pg_1, data_set_1);
				12768
				12769	CvtfTestDataSet data_set_2[] = {
				12770	// Test mantissa extremities.
				12771	{0x0401, 0x6401, 0x6401},
				12772	{0x4020, 0x7402, 0x7402},
				12773	// The largest int16_t that fits in a float16.
				12774	{0xffef, 0xcc40, 0x7bff},
				12775	// Values that would be negative if treated as an int16_t.
				12776	{0xff00, 0xdc00, 0x7bf8},
				12777	{0x8000, 0xf800, 0x7800},
				12778	{0x8100, 0xf7f0, 0x7808},
				12779	// Check for bit pattern reproduction.
				12780	{0x0123, 0x5c8c, 0x5c8c},
				12781	{0x0cde, 0x6a6f, 0x6a6f},
				12782	// Simple conversions of negative int64_t values. These require no rounding,
				12783	// and the results should not depend on the rounding mode.
				12784	{0xf800, 0xe800, 0x7bc0},
				12785	{0xfc00, 0xe400, 0x7be0},
				12786	{0xc000, 0xf400, 0x7a00},
				12787	// Check rounding of negative int16_t values.
				12788	{0x8ffe, 0xf700, 0x7880},
				12789	{0x8fff, 0xf700, 0x7880},
				12790	{0xffee, 0xcc80, 0x7bff},
				12791	{0xffef, 0xcc40, 0x7bff}};
				12792	int pg_2[] = {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1};
				12793	// `32-bit to float16` and `64-bit to float16` of above tests has been tested
				12794	// in `ucvtf` of `16-bit to float16`.
				12795	TestUScvtfHelper(config, kHRegSize, kHRegSize, pg_2, data_set_2);
				12796	// clang-format on
				12797	}
				12798
				12799	TEST_SVE(scvtf_ucvtf_s_to_float) {
				12800	// clang-format off
				12801	int dst_lane_size = kSRegSize;
				12802	int src_lane_size = kSRegSize;
				12803
				12804	// Simple conversions of positive numbers which require no rounding; the
				12805	// results should not depened on the rounding mode, and ucvtf and scvtf should
				12806	// produce the same result.
				12807	CvtfTestDataSet data_set_1[] = {
				12808	{0x00000000, 0x00000000, 0x00000000},
				12809	{0x00000001, 0x3f800000, 0x3f800000},
				12810	{0x00004000, 0x46800000, 0x46800000},
				12811	{0x00010000, 0x47800000, 0x47800000},
				12812	{0x40000000, 0x4e800000, 0x4e800000}};
				12813	int pg_1[] = {1, 0, 1, 0, 0};
				12814	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
				12815
				12816	CvtfTestDataSet data_set_2[] = {
				12817	// Test mantissa extremities.
				12818	{0x00800001, 0x4b000001, 0x4b000001},
				12819	{0x40400000, 0x4e808000, 0x4e808000},
				12820	// The largest int32_t that fits in a double.
				12821	{0x7fffff80, 0x4effffff, 0x4effffff},
				12822	// Values that would be negative if treated as an int32_t.
				12823	{0xffffffff, 0xbf800000, 0x4f800000},
				12824	{0xffffff00, 0xc3800000, 0x4f7fffff},
				12825	{0x80000000, 0xcf000000, 0x4f000000},
				12826	{0x80000001, 0xcf000000, 0x4f000000},
				12827	// Check for bit pattern reproduction.
				12828	{0x089abcde, 0x4d09abce, 0x4d09abce},
				12829	{0x12345678, 0x4d91a2b4, 0x4d91a2b4}};
				12830	int pg_2[] = {1, 0, 1, 0, 1, 1, 1, 0, 0};
				12831	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
				12832
				12833	// Simple conversions of negative int32_t values. These require no rounding,
				12834	// and the results should not depend on the rounding mode.
				12835	CvtfTestDataSet data_set_3[] = {
				12836	{0xffffc000, 0xc6800000, 0x4f7fffc0},
				12837	{0xffff0000, 0xc7800000, 0x4f7fff00},
				12838	{0xc0000000, 0xce800000, 0x4f400000},
				12839	// Conversions which require rounding.
				12840	{0x72800000, 0x4ee50000, 0x4ee50000},
				12841	{0x72800001, 0x4ee50000, 0x4ee50000},
				12842	{0x73000000, 0x4ee60000, 0x4ee60000},
				12843	// Check rounding of negative int32_t values.
				12844	{0x80000140, 0xcefffffe, 0x4f000001},
				12845	{0x80000141, 0xcefffffd, 0x4f000001},
				12846	{0x80000180, 0xcefffffd, 0x4f000002},
				12847	// Round up to produce a result that's too big for the input to represent.
				12848	{0x7fffffc0, 0x4f000000, 0x4f000000},
				12849	{0x7fffffff, 0x4f000000, 0x4f000000}};
				12850	int pg_3[] = {1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0};
				12851	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
				12852	// clang-format on
				12853	}
				12854
				12855	TEST_SVE(scvtf_ucvtf_d_to_float) {
				12856	// clang-format off
				12857	int dst_lane_size = kSRegSize;
				12858	int src_lane_size = kDRegSize;
				12859
				12860	// Simple conversions of positive numbers which require no rounding; the
				12861	// results should not depened on the rounding mode, and ucvtf and scvtf should
				12862	// produce the same result.
				12863	CvtfTestDataSet data_set_1[] = {
				12864	{0x0000000000000000, 0x00000000, 0x00000000},
				12865	{0x0000000000000001, 0x3f800000, 0x3f800000},
				12866	{0x0000000040000000, 0x4e800000, 0x4e800000},
				12867	{0x0000000100000000, 0x4f800000, 0x4f800000},
				12868	{0x4000000000000000, 0x5e800000, 0x5e800000}};
				12869	int pg_1[] = {1, 1, 0, 1, 0};
				12870	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
				12871
				12872	CvtfTestDataSet data_set_2[] = {
				12873	// Test mantissa extremities.
				12874	{0x0010000000000001, 0x59800000, 0x59800000},
				12875	{0x4008000000000000, 0x5e801000, 0x5e801000},
				12876	// The largest int32_t that fits in a float.
				12877	{0x000000007fffff80, 0x4effffff, 0x4effffff},
				12878	// Values that would be negative if treated as an int32_t.
				12879	{0x00000000ffffffff, 0x4f800000, 0x4f800000},
				12880	{0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff},
				12881	{0x0000000080000000, 0x4f000000, 0x4f000000},
				12882	{0x0000000080000100, 0x4f000001, 0x4f000001},
				12883	// The largest int64_t that fits in a float.
				12884	{0x7fffff8000000000, 0x5effffff, 0x5effffff},
				12885	// Check for bit pattern reproduction.
				12886	{0x0123456789abcde0, 0x5b91a2b4, 0x5b91a2b4},
				12887	{0x0000000000876543, 0x4b076543, 0x4b076543}};
				12888	int pg_2[] = {1, 0, 0, 0, 1, 0, 0, 0, 0, 1};
				12889	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
				12890
				12891	CvtfTestDataSet data_set_3[] = {
				12892	// Simple conversions of negative int64_t values. These require no rounding,
				12893	// and the results should not depend on the rounding mode.
				12894	{0xffffffffc0000000, 0xce800000, 0x5f800000},
				12895	{0xffffffff00000000, 0xcf800000, 0x5f800000},
				12896	{0xc000000000000000, 0xde800000, 0x5f400000},
				12897	// Conversions which require rounding.
				12898	{0x0000800002800000, 0x57000002, 0x57000002},
				12899	{0x0000800002800001, 0x57000003, 0x57000003},
				12900	{0x0000800003000000, 0x57000003, 0x57000003},
				12901	// Check rounding of negative int64_t values.
				12902	{0x8000014000000000, 0xdefffffe, 0x5f000001},
				12903	{0x8000014000000001, 0xdefffffd, 0x5f000001},
				12904	{0x8000018000000000, 0xdefffffd, 0x5f000002},
				12905	// Round up to produce a result that's too big for the input to represent.
				12906	{0x00000000ffffff80, 0x4f800000, 0x4f800000},
				12907	{0x00000000ffffffff, 0x4f800000, 0x4f800000},
				12908	{0xffffff8000000000, 0xd3000000, 0x5f800000},
				12909	{0xffffffffffffffff, 0xbf800000, 0x5f800000}};
				12910	int pg_3[] = {0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1};
				12911	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
				12912	// clang-format on
				12913	}
				12914
				12915	TEST_SVE(scvtf_ucvtf_d_to_double) {
				12916	// clang-format off
				12917	int dst_lane_size = kDRegSize;
				12918	int src_lane_size = kDRegSize;
				12919
				12920	// Simple conversions of positive numbers which require no rounding; the
				12921	// results should not depened on the rounding mode, and ucvtf and scvtf should
				12922	// produce the same result.
				12923	CvtfTestDataSet data_set_1[] = {
				12924	{0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
				12925	{0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000},
				12926	{0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000},
				12927	{0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000},
				12928	{0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000}};
				12929	int pg_1[] = {0, 1, 1, 0, 0};
				12930	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
				12931
				12932	CvtfTestDataSet data_set_2[] = {
				12933	// Test mantissa extremities.
				12934	{0x0010000000000001, 0x4330000000000001, 0x4330000000000001},
				12935	{0x4008000000000000, 0x43d0020000000000, 0x43d0020000000000},
				12936	// The largest int32_t that fits in a double.
				12937	{0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000},
				12938	// Values that would be negative if treated as an int32_t.
				12939	{0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000},
				12940	{0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000},
				12941	{0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000},
				12942	// The largest int64_t that fits in a double.
				12943	{0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff},
				12944	// Check for bit pattern reproduction.
				12945	{0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde},
				12946	{0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000}};
				12947	int pg_2[] = {1, 1, 1, 1, 1, 0, 0, 0, 0};
				12948	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
				12949
				12950	CvtfTestDataSet data_set_3[] = {
				12951	// Simple conversions of negative int64_t values. These require no rounding,
				12952	// and the results should not depend on the rounding mode.
				12953	{0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000},
				12954	{0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000},
				12955	{0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000},
				12956	// Conversions which require rounding.
				12957	{0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002},
				12958	{0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003},
				12959	{0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003},
				12960	// Check rounding of negative int64_t values.
				12961	{0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001},
				12962	{0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001},
				12963	{0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002},
				12964	// Round up to produce a result that's too big for the input to represent.
				12965	{0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000},
				12966	{0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000},
				12967	{0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000},
				12968	{0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000}};
				12969	int pg_3[] = {1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0};
				12970	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_3, data_set_3);
				12971	// clang-format on
				12972	}
				12973
				12974	TEST_SVE(scvtf_ucvtf_s_to_double) {
				12975	// clang-format off
				12976	int dst_lane_size = kDRegSize;
				12977	int src_lane_size = kSRegSize;
				12978
				12979	// Simple conversions of positive numbers which require no rounding; the
				12980	// results should not depened on the rounding mode, and ucvtf and scvtf should
				12981	// produce the same result.
				12982	CvtfTestDataSet data_set_1[] = {
				12983	{0x00000000, 0x0000000000000000, 0x0000000000000000},
				12984	{0x00000001, 0x3ff0000000000000, 0x3ff0000000000000},
				12985	{0x00004000, 0x40d0000000000000, 0x40d0000000000000},
				12986	{0x00010000, 0x40f0000000000000, 0x40f0000000000000},
				12987	{0x40000000, 0x41d0000000000000, 0x41d0000000000000}};
				12988	int pg_1[] = {1, 0, 0, 0, 1};
				12989	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_1, data_set_1);
				12990
				12991	CvtfTestDataSet data_set_2[] = {
				12992	// Test mantissa extremities.
				12993	{0x40000400, 0x41d0000100000000, 0x41d0000100000000},
				12994	// The largest int32_t that fits in a double.
				12995	{0x7fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000},
				12996	// Values that would be negative if treated as an int32_t.
				12997	{0xffffffff, 0xbff0000000000000, 0x41efffffffe00000},
				12998	{0x80000000, 0xc1e0000000000000, 0x41e0000000000000},
				12999	{0x80000001, 0xc1dfffffffc00000, 0x41e0000000200000},
				13000	// Check for bit pattern reproduction.
				13001	{0x089abcde, 0x41a13579bc000000, 0x41a13579bc000000},
				13002	{0x12345678, 0x41b2345678000000, 0x41b2345678000000},
				13003	// Simple conversions of negative int32_t values. These require no rounding,
				13004	// and the results should not depend on the rounding mode.
				13005	{0xffffc000, 0xc0d0000000000000, 0x41effff800000000},
				13006	{0xffff0000, 0xc0f0000000000000, 0x41efffe000000000},
				13007	{0xc0000000, 0xc1d0000000000000, 0x41e8000000000000}};
				13008	int pg_2[] = {1, 0, 1, 0, 0, 1, 1, 0, 1, 1};
				13009	TestUScvtfHelper(config, dst_lane_size, src_lane_size, pg_2, data_set_2);
				13010
				13011	// Note that IEEE 754 double-precision format has 52-bits fraction, so all
				13012	// 32-bits integers are representable in double.
				13013	// clang-format on
				13014	}
				13015
Martyn Capewell	4a9829f	2020-01-30 17:41:01 +0000	[diff] [blame]	13016	TEST_SVE(sve_fadda) {
				13017	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
				13018	CPUFeatures::kFP,
				13019	CPUFeatures::kFPHalf);
				13020	START();
				13021
				13022	__ Ptrue(p0.VnB());
				13023	__ Pfalse(p1.VnB());
				13024	__ Zip1(p1.VnH(), p0.VnH(), p1.VnH());
				13025
				13026	__ Index(z0.VnS(), 3, 3);
				13027	__ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
				13028	__ Fmov(s2, 2.0);
				13029	__ Fadda(s2, p0, s2, z0.VnS());
				13030
				13031	__ Index(z0.VnD(), -7, -7);
				13032	__ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
				13033	__ Fmov(d3, 3.0);
				13034	__ Fadda(d3, p0, d3, z0.VnD());
				13035
				13036	__ Index(z0.VnH(), 1, 1);
				13037	__ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
				13038	__ Fmov(h4, 0);
				13039	__ Fadda(h4, p1, h4, z0.VnH());
				13040	END();
				13041
				13042	if (CAN_RUN()) {
				13043	RUN();
				13044	// Sum of 1 .. n is n+1 * n/2, ie. n(n+1)/2.
				13045	int n = core.GetSVELaneCount(kSRegSize);
				13046	ASSERT_EQUAL_FP32(2 + 3 * ((n + 1) * (n / 2)), s2);
				13047
				13048	n /= 2; // Half as many lanes.
				13049	ASSERT_EQUAL_FP64(3 + -7 * ((n + 1) * (n / 2)), d3);
				13050
				13051	// Sum of first n odd numbers is n^2.
				13052	n = core.GetSVELaneCount(kHRegSize) / 2; // Half are odd numbers.
				13053	ASSERT_EQUAL_FP16(Float16(n * n), h4);
				13054	}
				13055	}
				13056
Martyn Capewell	ac07af1	2019-12-02 14:55:05 +0000	[diff] [blame]	13057	TEST_SVE(sve_extract) {
				13058	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				13059	START();
				13060
				13061	__ Index(z0.VnB(), 0, 1);
				13062
				13063	__ Mov(z1, z0);
				13064	__ Mov(z2, z0);
				13065	__ Mov(z3, z0);
				13066	__ Mov(z4, z0);
				13067	__ Mov(z5, z0);
				13068	__ Mov(z6, z0);
				13069
				13070	__ Ext(z1, z1, z0, 0);
				13071	__ Ext(z2, z2, z0, 1);
				13072	__ Ext(z3, z3, z0, 15);
				13073	__ Ext(z4, z4, z0, 31);
				13074	__ Ext(z5, z5, z0, 47);
				13075	__ Ext(z6, z6, z0, 255);
				13076
				13077	END();
				13078
				13079	if (CAN_RUN()) {
				13080	RUN();
				13081
				13082	ASSERT_EQUAL_SVE(z1, z0);
				13083
				13084	int lane_count = core.GetSVELaneCount(kBRegSize);
				13085	if (lane_count == 16) {
				13086	uint64_t z2_expected[] = {0x000f0e0d0c0b0a09, 0x0807060504030201};
				13087	ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
				13088	} else {
				13089	uint64_t z2_expected[] = {0x100f0e0d0c0b0a09, 0x0807060504030201};
				13090	ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
				13091	}
				13092
				13093	if (lane_count == 16) {
				13094	uint64_t z3_expected[] = {0x0e0d0c0b0a090807, 0x060504030201000f};
				13095	ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
				13096	} else {
				13097	uint64_t z3_expected[] = {0x1e1d1c1b1a191817, 0x161514131211100f};
				13098	ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
				13099	}
				13100
				13101	if (lane_count < 32) {
				13102	ASSERT_EQUAL_SVE(z4, z0);
				13103	} else if (lane_count == 32) {
				13104	uint64_t z4_expected[] = {0x0e0d0c0b0a090807, 0x060504030201001f};
				13105	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				13106	} else {
				13107	uint64_t z4_expected[] = {0x2e2d2c2b2a292827, 0x262524232221201f};
				13108	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				13109	}
				13110
				13111	if (lane_count < 48) {
				13112	ASSERT_EQUAL_SVE(z5, z0);
				13113	} else if (lane_count == 48) {
				13114	uint64_t z5_expected[] = {0x0e0d0c0b0a090807, 0x060504030201002f};
				13115	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				13116	} else {
				13117	uint64_t z5_expected[] = {0x3e3d3c3b3a393837, 0x363534333231302f};
				13118	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				13119	}
				13120
				13121	if (lane_count < 256) {
				13122	ASSERT_EQUAL_SVE(z6, z0);
				13123	} else {
				13124	uint64_t z6_expected[] = {0x0e0d0c0b0a090807, 0x06050403020100ff};
				13125	ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
				13126	}
				13127	}
				13128	}
				13129
Martyn Capewell	894962f	2020-02-05 15:46:44 +0000	[diff] [blame]	13130	TEST_SVE(sve_fp_paired_across) {
				13131	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				13132
				13133	START();
				13134
				13135	__ Ptrue(p0.VnB());
				13136	__ Pfalse(p1.VnB());
				13137	__ Zip1(p2.VnS(), p0.VnS(), p1.VnS());
				13138	__ Zip1(p3.VnD(), p0.VnD(), p1.VnD());
				13139	__ Zip1(p4.VnH(), p0.VnH(), p1.VnH());
				13140
				13141	__ Index(z0.VnS(), 3, 3);
				13142	__ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
				13143	__ Faddv(s1, p0, z0.VnS());
				13144	__ Fminv(s2, p2, z0.VnS());
				13145	__ Fmaxv(s3, p2, z0.VnS());
				13146
				13147	__ Index(z0.VnD(), -7, -7);
				13148	__ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
				13149	__ Faddv(d4, p0, z0.VnD());
				13150	__ Fminv(d5, p3, z0.VnD());
				13151	__ Fmaxv(d6, p3, z0.VnD());
				13152
				13153	__ Index(z0.VnH(), 1, 1);
				13154	__ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
				13155	__ Faddv(h7, p4, z0.VnH());
				13156	__ Fminv(h8, p4, z0.VnH());
				13157	__ Fmaxv(h9, p4, z0.VnH());
				13158
				13159	__ Dup(z10.VnH(), 0);
				13160	__ Fdiv(z10.VnH(), p0.Merging(), z10.VnH(), z10.VnH());
				13161	__ Insr(z10.VnH(), 0x5140);
				13162	__ Insr(z10.VnH(), 0xd140);
				13163	__ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 2);
				13164	__ Fmaxnmv(h11, p0, z10.VnH());
				13165	__ Fmaxnmv(h12, p4, z10.VnH());
				13166	__ Fminnmv(h13, p0, z10.VnH());
				13167	__ Fminnmv(h14, p4, z10.VnH());
				13168
				13169	__ Dup(z10.VnS(), 0);
				13170	__ Fdiv(z10.VnS(), p0.Merging(), z10.VnS(), z10.VnS());
				13171	__ Insr(z10.VnS(), 0x42280000);
				13172	__ Insr(z10.VnS(), 0xc2280000);
				13173	__ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 4);
				13174	__ Fmaxnmv(s15, p0, z10.VnS());
				13175	__ Fmaxnmv(s16, p2, z10.VnS());
				13176	__ Fminnmv(s17, p0, z10.VnS());
				13177	__ Fminnmv(s18, p2, z10.VnS());
				13178
				13179	__ Dup(z10.VnD(), 0);
				13180	__ Fdiv(z10.VnD(), p0.Merging(), z10.VnD(), z10.VnD());
				13181	__ Insr(z10.VnD(), 0x4045000000000000);
				13182	__ Insr(z10.VnD(), 0xc045000000000000);
				13183	__ Ext(z10.VnB(), z10.VnB(), z10.VnB(), 8);
				13184	__ Fmaxnmv(d19, p0, z10.VnD());
				13185	__ Fmaxnmv(d20, p3, z10.VnD());
				13186	__ Fminnmv(d21, p0, z10.VnD());
				13187	__ Fminnmv(d22, p3, z10.VnD());
				13188	END();
				13189
				13190	if (CAN_RUN()) {
				13191	RUN();
				13192	// Sum of 1 .. n is n+1 * n/2, ie. n(n+1)/2.
				13193	int n = core.GetSVELaneCount(kSRegSize);
				13194	ASSERT_EQUAL_FP32(3 * ((n + 1) * (n / 2)), s1);
				13195	ASSERT_EQUAL_FP32(3, s2);
				13196	ASSERT_EQUAL_FP32(3 * n - 3, s3);
				13197
				13198	n /= 2; // Half as many lanes.
				13199	ASSERT_EQUAL_FP64(-7 * ((n + 1) * (n / 2)), d4);
				13200	ASSERT_EQUAL_FP64(-7 * (n - 1), d5);
				13201	ASSERT_EQUAL_FP64(-7, d6);
				13202
				13203	// Sum of first n odd numbers is n^2.
				13204	n = core.GetSVELaneCount(kHRegSize) / 2; // Half are odd numbers.
				13205	ASSERT_EQUAL_FP16(Float16(n * n), h7);
				13206	ASSERT_EQUAL_FP16(Float16(1), h8);
				13207
				13208	n = core.GetSVELaneCount(kHRegSize);
				13209	ASSERT_EQUAL_FP16(Float16(n - 1), h9);
				13210
				13211	ASSERT_EQUAL_FP16(Float16(42), h11);
				13212	ASSERT_EQUAL_FP16(Float16(42), h12);
				13213	ASSERT_EQUAL_FP16(Float16(-42), h13);
				13214	ASSERT_EQUAL_FP16(Float16(42), h14);
				13215	ASSERT_EQUAL_FP32(42, s15);
				13216	ASSERT_EQUAL_FP32(42, s16);
				13217	ASSERT_EQUAL_FP32(-42, s17);
				13218	ASSERT_EQUAL_FP32(42, s18);
				13219	ASSERT_EQUAL_FP64(42, d19);
				13220	ASSERT_EQUAL_FP64(42, d20);
				13221	ASSERT_EQUAL_FP64(-42, d21);
				13222	ASSERT_EQUAL_FP64(42, d22);
				13223	}
				13224	}
				13225
Martyn Capewell	13050ca	2020-02-11 16:43:40 +0000	[diff] [blame]	13226	TEST_SVE(sve_frecpe_frsqrte) {
				13227	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				13228
				13229	START();
				13230
				13231	__ Ptrue(p0.VnB());
				13232
				13233	__ Index(z0.VnH(), 0, 1);
				13234	__ Fdup(z1.VnH(), Float16(1));
				13235	__ Fscale(z1.VnH(), p0.Merging(), z1.VnH(), z0.VnH());
				13236	__ Insr(z1.VnH(), 0);
				13237	__ Frsqrte(z2.VnH(), z1.VnH());
				13238	__ Frecpe(z1.VnH(), z1.VnH());
				13239
				13240	__ Index(z0.VnS(), 0, 1);
				13241	__ Fdup(z3.VnS(), Float16(1));
				13242	__ Fscale(z3.VnS(), p0.Merging(), z3.VnS(), z0.VnS());
				13243	__ Insr(z3.VnS(), 0);
				13244	__ Frsqrte(z4.VnS(), z3.VnS());
				13245	__ Frecpe(z3.VnS(), z3.VnS());
				13246
				13247	__ Index(z0.VnD(), 0, 1);
				13248	__ Fdup(z5.VnD(), Float16(1));
				13249	__ Fscale(z5.VnD(), p0.Merging(), z5.VnD(), z0.VnD());
				13250	__ Insr(z5.VnD(), 0);
				13251	__ Frsqrte(z6.VnD(), z5.VnD());
				13252	__ Frecpe(z5.VnD(), z5.VnD());
				13253	END();
				13254
				13255	if (CAN_RUN()) {
				13256	RUN();
				13257	uint64_t z1_expected[] = {0x23fc27fc2bfc2ffc, 0x33fc37fc3bfc7c00};
				13258	ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
				13259	uint64_t z2_expected[] = {0x2ffc31a433fc35a4, 0x37fc39a43bfc7c00};
				13260	ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
				13261
				13262	uint64_t z3_expected[] = {0x3e7f80003eff8000, 0x3f7f80007f800000};
				13263	ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
				13264	uint64_t z4_expected[] = {0x3eff80003f348000, 0x3f7f80007f800000};
				13265	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				13266
				13267	uint64_t z5_expected[] = {0x3feff00000000000, 0x7ff0000000000000};
				13268	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				13269	uint64_t z6_expected[] = {0x3feff00000000000, 0x7ff0000000000000};
				13270	ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
				13271	}
				13272	}
				13273
Martyn Capewell	efd9dc7	2020-02-13 10:46:29 +0000	[diff] [blame]	13274	TEST_SVE(sve_frecps_frsqrts) {
				13275	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				13276
				13277	START();
				13278	__ Ptrue(p0.VnB());
				13279
				13280	__ Index(z0.VnH(), 0, -1);
				13281	__ Fdup(z1.VnH(), Float16(1));
				13282	__ Fscale(z1.VnH(), p0.Merging(), z1.VnH(), z0.VnH());
				13283	__ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
				13284	__ Insr(z1.VnH(), 0);
				13285	__ Frsqrts(z2.VnH(), z1.VnH(), z0.VnH());
				13286	__ Frecps(z1.VnH(), z1.VnH(), z0.VnH());
				13287
				13288	__ Index(z0.VnS(), 0, -1);
				13289	__ Fdup(z3.VnS(), Float16(1));
				13290	__ Fscale(z3.VnS(), p0.Merging(), z3.VnS(), z0.VnS());
				13291	__ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
				13292	__ Insr(z3.VnS(), 0);
				13293	__ Frsqrts(z4.VnS(), z3.VnS(), z0.VnS());
				13294	__ Frecps(z3.VnS(), z3.VnS(), z0.VnS());
				13295
				13296	__ Index(z0.VnD(), 0, -1);
				13297	__ Fdup(z5.VnD(), Float16(1));
				13298	__ Fscale(z5.VnD(), p0.Merging(), z5.VnD(), z0.VnD());
				13299	__ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
				13300	__ Insr(z5.VnD(), 0);
				13301	__ Frsqrts(z6.VnD(), z5.VnD(), z0.VnD());
				13302	__ Frecps(z5.VnD(), z5.VnD(), z0.VnD());
				13303	END();
				13304
				13305	if (CAN_RUN()) {
				13306	RUN();
				13307	uint64_t z1_expected[] = {0x4038406040a04100, 0x4180420042004000};
				13308	ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
				13309	uint64_t z2_expected[] = {0x3e383e603ea03f00, 0x3f80400040003e00};
				13310	ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
				13311
				13312	uint64_t z3_expected[] = {0x4030000040400000, 0x4040000040000000};
				13313	ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
				13314	uint64_t z4_expected[] = {0x3ff0000040000000, 0x400000003fc00000};
				13315	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				13316
				13317	uint64_t z5_expected[] = {0x4008000000000000, 0x4000000000000000};
				13318	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				13319	uint64_t z6_expected[] = {0x4000000000000000, 0x3ff8000000000000};
				13320	ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
				13321	}
				13322	}
				13323
				13324	TEST_SVE(sve_ftsmul) {
				13325	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				13326
				13327	START();
				13328	__ Ptrue(p0.VnB());
				13329
				13330	__ Index(z0.VnH(), 0, 1);
				13331	__ Rev(z1.VnH(), z0.VnH());
				13332	__ Scvtf(z0.VnH(), p0.Merging(), z0.VnH());
				13333	__ Dup(z2.VnH(), 0);
				13334	__ Fdiv(z2.VnH(), p0.Merging(), z2.VnH(), z2.VnH());
				13335	__ Ftsmul(z3.VnH(), z0.VnH(), z1.VnH());
				13336	__ Ftsmul(z4.VnH(), z2.VnH(), z1.VnH());
				13337
				13338	__ Index(z0.VnS(), -7, 1);
				13339	__ Rev(z1.VnS(), z0.VnS());
				13340	__ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
				13341	__ Dup(z2.VnS(), 0);
				13342	__ Fdiv(z2.VnS(), p0.Merging(), z2.VnS(), z2.VnS());
				13343	__ Ftsmul(z5.VnS(), z0.VnS(), z1.VnS());
				13344	__ Ftsmul(z6.VnS(), z2.VnS(), z1.VnS());
				13345
				13346	__ Index(z0.VnD(), 2, -1);
				13347	__ Rev(z1.VnD(), z0.VnD());
				13348	__ Scvtf(z0.VnD(), p0.Merging(), z0.VnD());
				13349	__ Dup(z2.VnD(), 0);
				13350	__ Fdiv(z2.VnD(), p0.Merging(), z2.VnD(), z2.VnD());
				13351	__ Ftsmul(z7.VnD(), z0.VnD(), z1.VnD());
				13352	__ Ftsmul(z8.VnD(), z2.VnD(), z1.VnD());
				13353	END();
				13354
				13355	if (CAN_RUN()) {
				13356	RUN();
				13357	uint64_t z3_expected[] = {0x5220d0804e40cc00, 0x4880c4003c008000};
				13358	ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
				13359	uint64_t z4_expected[] = {0x7e007e007e007e00, 0x7e007e007e007e00};
				13360	ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
				13361
				13362	uint64_t z5_expected[] = {0x41800000c1c80000, 0x42100000c2440000};
				13363	ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
				13364	uint64_t z6_expected[] = {0x7fc000007fc00000, 0x7fc000007fc00000};
				13365	ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
				13366
				13367	uint64_t z7_expected[] = {0x3ff0000000000000, 0xc010000000000000};
				13368	ASSERT_EQUAL_SVE(z7_expected, z7.VnD());
				13369	uint64_t z8_expected[] = {0x7ff8000000000000, 0x7ff8000000000000};
				13370	ASSERT_EQUAL_SVE(z8_expected, z8.VnD());
				13371	}
				13372	}
TatWai Chong	f8d29f1	2020-02-16 22:53:18 -0800	[diff] [blame]	13373
				13374	typedef void (MacroAssembler::*FPMulAccFn)(
				13375	const ZRegister& zd,
				13376	const PRegisterM& pg,
				13377	const ZRegister& za,
				13378	const ZRegister& zn,
				13379	const ZRegister& zm,
				13380	FPMacroNaNPropagationOption nan_option);
				13381
				13382	// The `pg_inputs` is used for examining the predication correctness internally.
				13383	// It does not imply the value of `result` argument. `result` stands for the
				13384	// expected result on all-true predication.
				13385	template <typename T, size_t N>
				13386	static void FPMulAccHelper(
				13387	Test* config,
				13388	FPMulAccFn macro,
				13389	unsigned lane_size_in_bits,
				13390	const int (&pg_inputs)[N],
				13391	const T (&za_inputs)[N],
				13392	const T (&zn_inputs)[N],
				13393	const T (&zm_inputs)[N],
				13394	const uint64_t (&result)[N],
				13395	FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
				13396	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				13397	START();
				13398
				13399	ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
				13400	ZRegister za = z1.WithLaneSize(lane_size_in_bits);
				13401	ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
				13402	ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
				13403
				13404	uint64_t za_rawbits[N];
				13405	uint64_t zn_rawbits[N];
				13406	uint64_t zm_rawbits[N];
				13407
				13408	FPToRawbitsWithSize(za_inputs, za_rawbits, lane_size_in_bits);
				13409	FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
				13410	FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
				13411
				13412	InsrHelper(&masm, za, za_rawbits);
				13413	InsrHelper(&masm, zn, zn_rawbits);
				13414	InsrHelper(&masm, zm, zm_rawbits);
				13415
				13416	uint64_t zd_rawbits[N];
				13417	for (size_t i = 0; i < N; i++) {
				13418	// Initialize `zd` with a signalling NaN.
				13419	switch (lane_size_in_bits) {
				13420	case kHRegSize:
				13421	zd_rawbits[i] = 0x7c99;
				13422	break;
				13423	case kSRegSize:
				13424	zd_rawbits[i] = 0x7f959999;
				13425	break;
				13426	case kDRegSize:
				13427	zd_rawbits[i] = 0x7ff5555599999999;
				13428	break;
				13429	default:
				13430	VIXL_UNIMPLEMENTED();
				13431	break;
				13432	}
				13433	}
				13434	InsrHelper(&masm, zd, zd_rawbits);
				13435
				13436	Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
				13437
				13438	// Fmla macro automatically selects between fmla, fmad and movprfx + fmla
				13439	// Fmls `ditto` fmls, fmsb and movprfx + fmls
				13440	// Fnmla `ditto` fnmla, fnmad and movprfx + fnmla
				13441	// Fnmls `ditto` fnmls, fnmsb and movprfx + fnmls
				13442	// based on what registers are aliased.
				13443	ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
				13444	ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
				13445	ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
				13446	ZRegister d_result = z13.WithLaneSize(lane_size_in_bits);
				13447
				13448	__ Mov(da_result, za);
				13449	(masm.*macro)(da_result, p0.Merging(), da_result, zn, zm, nan_option);
				13450
				13451	__ Mov(dn_result, zn);
				13452	(masm.*macro)(dn_result, p0.Merging(), za, dn_result, zm, nan_option);
				13453
				13454	__ Mov(dm_result, zm);
				13455	(masm.*macro)(dm_result, p0.Merging(), za, zn, dm_result, nan_option);
				13456
				13457	__ Mov(d_result, zd);
				13458	(masm.*macro)(d_result, p0.Merging(), za, zn, zm, nan_option);
				13459
				13460	END();
				13461
				13462	if (CAN_RUN()) {
				13463	RUN();
				13464
				13465	ASSERT_EQUAL_SVE(za_rawbits, za);
				13466	ASSERT_EQUAL_SVE(zn_rawbits, zn);
				13467	ASSERT_EQUAL_SVE(zm_rawbits, zm);
				13468
				13469	uint64_t da_expected[N];
				13470	uint64_t dn_expected[N];
				13471	uint64_t dm_expected[N];
				13472	uint64_t d_expected[N];
				13473	for (size_t i = 0; i < N; i++) {
				13474	da_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : za_rawbits[i];
				13475	dn_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zn_rawbits[i];
				13476	dm_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zm_rawbits[i];
				13477	d_expected[i] = ((pg_inputs[i] & 1) != 0) ? result[i] : zd_rawbits[i];
				13478	}
				13479
				13480	ASSERT_EQUAL_SVE(da_expected, da_result);
				13481	ASSERT_EQUAL_SVE(dn_expected, dn_result);
				13482	ASSERT_EQUAL_SVE(dm_expected, dm_result);
				13483	ASSERT_EQUAL_SVE(d_expected, d_result);
				13484	}
				13485	}
				13486
				13487	TEST_SVE(sve_fmla_fmad) {
				13488	// fmla : zd = za + zn * zm
				13489	double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
				13490	double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
				13491	double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
				13492	int pg_inputs[] = {1, 1, 0, 1};
				13493
				13494	uint64_t fmla_result_h[] = {Float16ToRawbits(Float16(-84.0)),
				13495	Float16ToRawbits(Float16(101.0)),
				13496	Float16ToRawbits(Float16(33.0)),
				13497	Float16ToRawbits(Float16(42.0))};
				13498
				13499	// `fmad` has been tested in the helper.
				13500	FPMulAccHelper(config,
				13501	&MacroAssembler::Fmla,
				13502	kHRegSize,
				13503	pg_inputs,
				13504	za_inputs,
				13505	zn_inputs,
				13506	zm_inputs,
				13507	fmla_result_h);
				13508
				13509	uint64_t fmla_result_s[] = {FloatToRawbits(-84.0f),
				13510	FloatToRawbits(101.0f),
				13511	FloatToRawbits(33.0f),
				13512	FloatToRawbits(42.0f)};
				13513
				13514	FPMulAccHelper(config,
				13515	&MacroAssembler::Fmla,
				13516	kSRegSize,
				13517	pg_inputs,
				13518	za_inputs,
				13519	zn_inputs,
				13520	zm_inputs,
				13521	fmla_result_s);
				13522
				13523	uint64_t fmla_result_d[] = {DoubleToRawbits(-84.0),
				13524	DoubleToRawbits(101.0),
				13525	DoubleToRawbits(33.0),
				13526	DoubleToRawbits(42.0)};
				13527
				13528	FPMulAccHelper(config,
				13529	&MacroAssembler::Fmla,
				13530	kDRegSize,
				13531	pg_inputs,
				13532	za_inputs,
				13533	zn_inputs,
				13534	zm_inputs,
				13535	fmla_result_d);
				13536	}
				13537
				13538	TEST_SVE(sve_fmls_fmsb) {
				13539	// fmls : zd = za - zn * zm
				13540	double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
				13541	double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
				13542	double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
				13543	int pg_inputs[] = {1, 0, 1, 1};
				13544
				13545	uint64_t fmls_result_h[] = {Float16ToRawbits(Float16(6.0)),
				13546	Float16ToRawbits(Float16(-99.0)),
				13547	Float16ToRawbits(Float16(-39.0)),
				13548	Float16ToRawbits(Float16(-38.0))};
				13549
				13550	// `fmsb` has been tested in the helper.
				13551	FPMulAccHelper(config,
				13552	&MacroAssembler::Fmls,
				13553	kHRegSize,
				13554	pg_inputs,
				13555	za_inputs,
				13556	zn_inputs,
				13557	zm_inputs,
				13558	fmls_result_h);
				13559
				13560	uint64_t fmls_result_s[] = {FloatToRawbits(6.0f),
				13561	FloatToRawbits(-99.0f),
				13562	FloatToRawbits(-39.0f),
				13563	FloatToRawbits(-38.0f)};
				13564
				13565	FPMulAccHelper(config,
				13566	&MacroAssembler::Fmls,
				13567	kSRegSize,
				13568	pg_inputs,
				13569	za_inputs,
				13570	zn_inputs,
				13571	zm_inputs,
				13572	fmls_result_s);
				13573
				13574	uint64_t fmls_result_d[] = {DoubleToRawbits(6.0),
				13575	DoubleToRawbits(-99.0),
				13576	DoubleToRawbits(-39.0),
				13577	DoubleToRawbits(-38.0)};
				13578
				13579	FPMulAccHelper(config,
				13580	&MacroAssembler::Fmls,
				13581	kDRegSize,
				13582	pg_inputs,
				13583	za_inputs,
				13584	zn_inputs,
				13585	zm_inputs,
				13586	fmls_result_d);
				13587	}
				13588
				13589	TEST_SVE(sve_fnmla_fnmad) {
				13590	// fnmla : zd = -za - zn * zm
				13591	double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
				13592	double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
				13593	double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
				13594	int pg_inputs[] = {0, 1, 1, 1};
				13595
				13596	uint64_t fnmla_result_h[] = {Float16ToRawbits(Float16(84.0)),
				13597	Float16ToRawbits(Float16(-101.0)),
				13598	Float16ToRawbits(Float16(-33.0)),
				13599	Float16ToRawbits(Float16(-42.0))};
				13600
				13601	// `fnmad` has been tested in the helper.
				13602	FPMulAccHelper(config,
				13603	&MacroAssembler::Fnmla,
				13604	kHRegSize,
				13605	pg_inputs,
				13606	za_inputs,
				13607	zn_inputs,
				13608	zm_inputs,
				13609	fnmla_result_h);
				13610
				13611	uint64_t fnmla_result_s[] = {FloatToRawbits(84.0f),
				13612	FloatToRawbits(-101.0f),
				13613	FloatToRawbits(-33.0f),
				13614	FloatToRawbits(-42.0f)};
				13615
				13616	FPMulAccHelper(config,
				13617	&MacroAssembler::Fnmla,
				13618	kSRegSize,
				13619	pg_inputs,
				13620	za_inputs,
				13621	zn_inputs,
				13622	zm_inputs,
				13623	fnmla_result_s);
				13624
				13625	uint64_t fnmla_result_d[] = {DoubleToRawbits(84.0),
				13626	DoubleToRawbits(-101.0),
				13627	DoubleToRawbits(-33.0),
				13628	DoubleToRawbits(-42.0)};
				13629
				13630	FPMulAccHelper(config,
				13631	&MacroAssembler::Fnmla,
				13632	kDRegSize,
				13633	pg_inputs,
				13634	za_inputs,
				13635	zn_inputs,
				13636	zm_inputs,
				13637	fnmla_result_d);
				13638	}
				13639
				13640	TEST_SVE(sve_fnmls_fnmsb) {
				13641	// fnmls : zd = -za + zn * zm
				13642	double za_inputs[] = {-39.0, 1.0, -3.0, 2.0};
				13643	double zn_inputs[] = {-5.0, -20.0, 9.0, 8.0};
				13644	double zm_inputs[] = {9.0, -5.0, 4.0, 5.0};
				13645	int pg_inputs[] = {1, 1, 1, 0};
				13646
				13647	uint64_t fnmls_result_h[] = {Float16ToRawbits(Float16(-6.0)),
				13648	Float16ToRawbits(Float16(99.0)),
				13649	Float16ToRawbits(Float16(39.0)),
				13650	Float16ToRawbits(Float16(38.0))};
				13651
				13652	// `fnmsb` has been tested in the helper.
				13653	FPMulAccHelper(config,
				13654	&MacroAssembler::Fnmls,
				13655	kHRegSize,
				13656	pg_inputs,
				13657	za_inputs,
				13658	zn_inputs,
				13659	zm_inputs,
				13660	fnmls_result_h);
				13661
				13662	uint64_t fnmls_result_s[] = {FloatToRawbits(-6.0f),
				13663	FloatToRawbits(99.0f),
				13664	FloatToRawbits(39.0f),
				13665	FloatToRawbits(38.0f)};
				13666
				13667	FPMulAccHelper(config,
				13668	&MacroAssembler::Fnmls,
				13669	kSRegSize,
				13670	pg_inputs,
				13671	za_inputs,
				13672	zn_inputs,
				13673	zm_inputs,
				13674	fnmls_result_s);
				13675
				13676	uint64_t fnmls_result_d[] = {DoubleToRawbits(-6.0),
				13677	DoubleToRawbits(99.0),
				13678	DoubleToRawbits(39.0),
				13679	DoubleToRawbits(38.0)};
				13680
				13681	FPMulAccHelper(config,
				13682	&MacroAssembler::Fnmls,
				13683	kDRegSize,
				13684	pg_inputs,
				13685	za_inputs,
				13686	zn_inputs,
				13687	zm_inputs,
				13688	fnmls_result_d);
				13689	}
				13690
TatWai Chong	a2c1bb7	2020-02-16 23:16:47 -0800	[diff] [blame^]	13691	typedef void (MacroAssembler::*FPMulAccIdxFn)(const ZRegister& zd,
				13692	const ZRegister& za,
				13693	const ZRegister& zn,
				13694	const ZRegister& zm,
				13695	int index);
				13696
				13697	template <typename T, size_t N>
				13698	static void FPMulAccIdxHelper(Test* config,
				13699	FPMulAccFn macro,
				13700	FPMulAccIdxFn macro_idx,
				13701	const T (&za_inputs)[N],
				13702	const T (&zn_inputs)[N],
				13703	const T (&zm_inputs)[N]) {
				13704	SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
				13705	START();
				13706
				13707	InsrHelper(&masm, z0.VnD(), zm_inputs);
				13708	InsrHelper(&masm, z1.VnD(), zn_inputs);
				13709	InsrHelper(&masm, z2.VnD(), za_inputs);
				13710
				13711	__ Mov(z3, z0);
				13712	(masm.*macro_idx)(z3.VnH(), z2.VnH(), z1.VnH(), z3.VnH(), 0); // zd == zm
				13713	__ Mov(z4, z1);
				13714	(masm.*macro_idx)(z4.VnH(), z2.VnH(), z4.VnH(), z0.VnH(), 1); // zd == zn
				13715	__ Mov(z5, z2);
				13716	(masm.*macro_idx)(z5.VnH(), z5.VnH(), z1.VnH(), z0.VnH(), 4); // zd == za
				13717	(masm.*macro_idx)(z6.VnH(), z2.VnH(), z1.VnH(), z0.VnH(), 7);
				13718
				13719	__ Mov(z7, z0);
				13720	(masm.*macro_idx)(z7.VnS(), z2.VnS(), z1.VnS(), z7.VnS(), 0); // zd == zm
				13721	__ Mov(z8, z1);
				13722	(masm.*macro_idx)(z8.VnS(), z2.VnS(), z8.VnS(), z0.VnS(), 1); // zd == zn
				13723	__ Mov(z9, z2);
				13724	(masm.*macro_idx)(z9.VnS(), z9.VnS(), z1.VnS(), z0.VnS(), 2); // zd == za
				13725	(masm.*macro_idx)(z10.VnS(), z2.VnS(), z1.VnS(), z0.VnS(), 3);
				13726
				13727	__ Mov(z11, z0);
				13728	(masm.*macro_idx)(z11.VnD(), z2.VnD(), z1.VnD(), z11.VnD(), 0); // zd == zm
				13729	__ Mov(z12, z1);
				13730	(masm.*macro_idx)(z12.VnD(), z2.VnD(), z12.VnD(), z0.VnD(), 1); // zd == zn
				13731	__ Mov(z13, z2);
				13732	(masm.*macro_idx)(z13.VnD(), z13.VnD(), z1.VnD(), z0.VnD(), 0); // zd == za
				13733	__ Mov(z14, z0);
				13734	// zd == zn == zm
				13735	(masm.*macro_idx)(z14.VnD(), z2.VnD(), z14.VnD(), z14.VnD(), 1);
				13736
				13737	__ Ptrue(p0.VnB());
				13738
				13739	// Indexed form of Fmla and Fmls won't swap argument, passing strict NaN
				13740	// propagation mode to ensure the following macros don't swap argument in
				13741	// any cases.
				13742	FPMacroNaNPropagationOption option = StrictNaNPropagation;
				13743	// Compute the results using other instructions.
				13744	__ Dup(z31.VnH(), z0.VnH(), 0);
				13745	(masm.*macro)(z15.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
				13746	__ Dup(z31.VnH(), z0.VnH(), 1);
				13747	(masm.*macro)(z16.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
				13748	__ Dup(z31.VnH(), z0.VnH(), 4);
				13749	(masm.*macro)(z17.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
				13750	__ Dup(z31.VnH(), z0.VnH(), 7);
				13751	(masm.*macro)(z18.VnH(), p0.Merging(), z2.VnH(), z1.VnH(), z31.VnH(), option);
				13752
				13753	__ Dup(z31.VnS(), z0.VnS(), 0);
				13754	(masm.*macro)(z19.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
				13755	__ Dup(z31.VnS(), z0.VnS(), 1);
				13756	(masm.*macro)(z20.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
				13757	__ Dup(z31.VnS(), z0.VnS(), 2);
				13758	(masm.*macro)(z21.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
				13759	__ Dup(z31.VnS(), z0.VnS(), 3);
				13760	(masm.*macro)(z22.VnS(), p0.Merging(), z2.VnS(), z1.VnS(), z31.VnS(), option);
				13761
				13762	__ Dup(z31.VnD(), z0.VnD(), 0);
				13763	(masm.*macro)(z23.VnD(), p0.Merging(), z2.VnD(), z1.VnD(), z31.VnD(), option);
				13764	__ Dup(z31.VnD(), z0.VnD(), 1);
				13765	(masm.*macro)(z24.VnD(), p0.Merging(), z2.VnD(), z1.VnD(), z31.VnD(), option);
				13766	__ Dup(z31.VnD(), z0.VnD(), 1);
				13767	(masm.*macro)(z25.VnD(), p0.Merging(), z2.VnD(), z0.VnD(), z31.VnD(), option);
				13768
				13769	END();
				13770
				13771	if (CAN_RUN()) {
				13772	RUN();
				13773
				13774	ASSERT_EQUAL_SVE(z15.VnH(), z3.VnH());
				13775	ASSERT_EQUAL_SVE(z16.VnH(), z4.VnH());
				13776	ASSERT_EQUAL_SVE(z17.VnH(), z5.VnH());
				13777	ASSERT_EQUAL_SVE(z18.VnH(), z6.VnH());
				13778
				13779	ASSERT_EQUAL_SVE(z19.VnS(), z7.VnS());
				13780	ASSERT_EQUAL_SVE(z20.VnS(), z8.VnS());
				13781	ASSERT_EQUAL_SVE(z21.VnS(), z9.VnS());
				13782	ASSERT_EQUAL_SVE(z22.VnS(), z10.VnS());
				13783
				13784	ASSERT_EQUAL_SVE(z23.VnD(), z11.VnD());
				13785	ASSERT_EQUAL_SVE(z24.VnD(), z12.VnD());
				13786	ASSERT_EQUAL_SVE(z11.VnD(), z13.VnD());
				13787	ASSERT_EQUAL_SVE(z25.VnD(), z14.VnD());
				13788	}
				13789	}
				13790
				13791	TEST_SVE(sve_fmla_fmls_index) {
				13792	uint64_t zm_inputs_1[] = {0x3ff000003f803c00, 0xbff00000bf80bc00};
				13793	uint64_t zn_inputs_1[] = {0x3ff012343ff03c76, 0xbff01234bff0bc76};
				13794	uint64_t za_inputs_1[] = {0x3c004000bc00c000, 0x64006800e400e800};
				13795
				13796	// Using the vector form of Fmla and Fmls to verify the indexed form.
				13797	FPMulAccIdxHelper(config,
				13798	&MacroAssembler::Fmla, // vector form
				13799	&MacroAssembler::Fmla, // indexed form
				13800	za_inputs_1,
				13801	zn_inputs_1,
				13802	zm_inputs_1);
				13803
				13804	FPMulAccIdxHelper(config,
				13805	&MacroAssembler::Fmls, // vector form
				13806	&MacroAssembler::Fmls, // indexed form
				13807	za_inputs_1,
				13808	zn_inputs_1,
				13809	zm_inputs_1);
				13810
				13811	uint64_t zm_inputs_2[] = {0x7ff5555511111111, // NaN
				13812	0xfff0000000000000}; // Infinity
				13813	uint64_t zn_inputs_2[] = {0x7f9511117fc00000, // NaN
				13814	0x7f800000ff800000}; // Infinity
				13815	uint64_t za_inputs_2[] = {0x7c11000000007e00, // NaN
				13816	0x000000007c00fc00}; // Infinity
				13817	FPMulAccIdxHelper(config,
				13818	&MacroAssembler::Fmla, // vector form
				13819	&MacroAssembler::Fmla, // indexed form
				13820	za_inputs_2,
				13821	zn_inputs_2,
				13822	zm_inputs_2);
				13823
				13824	FPMulAccIdxHelper(config,
				13825	&MacroAssembler::Fmls, // vector form
				13826	&MacroAssembler::Fmls, // indexed form
				13827	za_inputs_2,
				13828	zn_inputs_2,
				13829	zm_inputs_2);
				13830	}
				13831
TatWai Chong	f8d29f1	2020-02-16 22:53:18 -0800	[diff] [blame]	13832	// Execute a number of instructions which all use ProcessNaNs, and check that
				13833	// they all propagate NaNs correctly.
				13834	template <typename Ti, typename Td, size_t N>
				13835	static void ProcessNaNsHelper(Test* config,
				13836	int lane_size_in_bits,
				13837	const Ti (&zn_inputs)[N],
				13838	const Ti (&zm_inputs)[N],
				13839	const Td (&zd_expected)[N],
				13840	FPMacroNaNPropagationOption nan_option) {
				13841	ArithFn arith_unpredicated_macro[] = {&MacroAssembler::Fadd,
				13842	&MacroAssembler::Fsub,
				13843	&MacroAssembler::Fmul};
				13844
				13845	for (size_t i = 0; i < ArrayLength(arith_unpredicated_macro); i++) {
				13846	FPBinArithHelper(config,
				13847	arith_unpredicated_macro[i],
				13848	lane_size_in_bits,
				13849	zn_inputs,
				13850	zm_inputs,
				13851	zd_expected);
				13852	}
				13853
				13854	FPArithPredicatedFn arith_predicated_macro[] = {&MacroAssembler::Fmax,
				13855	&MacroAssembler::Fmin};
				13856	int pg_inputs[N];
				13857	// With an all-true predicate, this helper aims to compare with special
				13858	// numbers.
				13859	for (size_t i = 0; i < N; i++) {
				13860	pg_inputs[i] = 1;
				13861	}
				13862
				13863	// fdivr propagates the quotient (Zm) preferentially, so we don't actually
				13864	// need any special handling for StrictNaNPropagation.
				13865	FPBinArithHelper(config,
				13866	NULL,
				13867	&MacroAssembler::Fdiv,
				13868	lane_size_in_bits,
				13869	// With an all-true predicate, the value in zd is
				13870	// irrelevant to the operations.
				13871	zn_inputs,
				13872	pg_inputs,
				13873	zn_inputs,
				13874	zm_inputs,
				13875	zd_expected);
				13876
				13877	for (size_t i = 0; i < ArrayLength(arith_predicated_macro); i++) {
				13878	FPBinArithHelper(config,
				13879	arith_predicated_macro[i],
				13880	NULL,
				13881	lane_size_in_bits,
				13882	// With an all-true predicate, the value in zd is
				13883	// irrelevant to the operations.
				13884	zn_inputs,
				13885	pg_inputs,
				13886	zn_inputs,
				13887	zm_inputs,
				13888	zd_expected,
				13889	nan_option);
				13890	}
				13891	}
				13892
				13893	template <typename Ti, typename Td, size_t N>
				13894	static void ProcessNaNsHelper3(Test* config,
				13895	int lane_size_in_bits,
				13896	const Ti (&za_inputs)[N],
				13897	const Ti (&zn_inputs)[N],
				13898	const Ti (&zm_inputs)[N],
				13899	const Td (&zd_expected_fmla)[N],
				13900	const Td (&zd_expected_fmls)[N],
				13901	const Td (&zd_expected_fnmla)[N],
				13902	const Td (&zd_expected_fnmls)[N],
				13903	FPMacroNaNPropagationOption nan_option) {
				13904	int pg_inputs[N];
				13905	// With an all-true predicate, this helper aims to compare with special
				13906	// numbers.
				13907	for (size_t i = 0; i < N; i++) {
				13908	pg_inputs[i] = 1;
				13909	}
				13910
				13911	FPMulAccHelper(config,
				13912	&MacroAssembler::Fmla,
				13913	lane_size_in_bits,
				13914	pg_inputs,
				13915	za_inputs,
				13916	zn_inputs,
				13917	zm_inputs,
				13918	zd_expected_fmla,
				13919	nan_option);
				13920
				13921	FPMulAccHelper(config,
				13922	&MacroAssembler::Fmls,
				13923	lane_size_in_bits,
				13924	pg_inputs,
				13925	za_inputs,
				13926	zn_inputs,
				13927	zm_inputs,
				13928	zd_expected_fmls,
				13929	nan_option);
				13930
				13931	FPMulAccHelper(config,
				13932	&MacroAssembler::Fnmla,
				13933	lane_size_in_bits,
				13934	pg_inputs,
				13935	za_inputs,
				13936	zn_inputs,
				13937	zm_inputs,
				13938	zd_expected_fnmla,
				13939	nan_option);
				13940
				13941	FPMulAccHelper(config,
				13942	&MacroAssembler::Fnmls,
				13943	lane_size_in_bits,
				13944	pg_inputs,
				13945	za_inputs,
				13946	zn_inputs,
				13947	zm_inputs,
				13948	zd_expected_fnmls,
				13949	nan_option);
				13950	}
				13951
				13952	TEST_SVE(sve_process_nans_double) {
				13953	// Use non-standard NaNs to check that the payload bits are preserved.
				13954	double sa = RawbitsToDouble(0x7ff5555511111111);
				13955	double sn = RawbitsToDouble(0x7ff5555522222222);
				13956	double sm = RawbitsToDouble(0x7ff5555533333333);
				13957	double qa = RawbitsToDouble(0x7ffaaaaa11111111);
				13958	double qn = RawbitsToDouble(0x7ffaaaaa22222222);
				13959	double qm = RawbitsToDouble(0x7ffaaaaa33333333);
				13960	VIXL_ASSERT(IsSignallingNaN(sa));
				13961	VIXL_ASSERT(IsSignallingNaN(sn));
				13962	VIXL_ASSERT(IsSignallingNaN(sm));
				13963	VIXL_ASSERT(IsQuietNaN(qa));
				13964	VIXL_ASSERT(IsQuietNaN(qn));
				13965	VIXL_ASSERT(IsQuietNaN(qm));
				13966
				13967	// The input NaNs after passing through ProcessNaN.
				13968	uint64_t sa_proc = 0x7ffd555511111111;
				13969	uint64_t sn_proc = 0x7ffd555522222222;
				13970	uint64_t sm_proc = 0x7ffd555533333333;
				13971	uint64_t qa_proc = DoubleToRawbits(qa);
				13972	uint64_t qn_proc = DoubleToRawbits(qn);
				13973	uint64_t qm_proc = DoubleToRawbits(qm);
				13974	uint64_t sa_proc_n = sa_proc ^ kDSignMask;
				13975	uint64_t sn_proc_n = sn_proc ^ kDSignMask;
				13976	uint64_t qa_proc_n = qa_proc ^ kDSignMask;
				13977	uint64_t qn_proc_n = qn_proc ^ kDSignMask;
				13978
				13979	// Quiet NaNs are propagated.
				13980	double zn_inputs_1[] = {qn, 0.0, 0.0, qm, qn, qm};
				13981	double zm_inputs_1[] = {0.0, qn, qm, 0.0, qm, qn};
				13982	uint64_t zd_expected_1[] =
				13983	{qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
				13984
				13985	ProcessNaNsHelper(config,
				13986	kDRegSize,
				13987	zn_inputs_1,
				13988	zm_inputs_1,
				13989	zd_expected_1,
				13990	StrictNaNPropagation);
				13991
				13992	// Signalling NaNs are propagated.
				13993	double zn_inputs_2[] = {sn, 0.0, 0.0, sm, sn, sm};
				13994	double zm_inputs_2[] = {0.0, sn, sm, 0.0, sm, sn};
				13995	uint64_t zd_expected_2[] =
				13996	{sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
				13997	ProcessNaNsHelper(config,
				13998	kDRegSize,
				13999	zn_inputs_2,
				14000	zm_inputs_2,
				14001	zd_expected_2,
				14002	StrictNaNPropagation);
				14003
				14004	// Signalling NaNs take precedence over quiet NaNs.
				14005	double zn_inputs_3[] = {sn, qn, sn, sn, qn};
				14006	double zm_inputs_3[] = {qm, sm, sm, qn, sn};
				14007	uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
				14008	ProcessNaNsHelper(config,
				14009	kDRegSize,
				14010	zn_inputs_3,
				14011	zm_inputs_3,
				14012	zd_expected_3,
				14013	StrictNaNPropagation);
				14014
				14015	double za_inputs_4[] = {qa, qa, 0.0, 0.0, qa, qa};
				14016	double zn_inputs_4[] = {qn, 0.0, 0.0, qn, qn, qn};
				14017	double zm_inputs_4[] = {0.0, qm, qm, qm, qm, 0.0};
				14018
				14019	// If `a` is propagated, its sign is inverted by fnmla and fnmls.
				14020	// If `n` is propagated, its sign is inverted by fmls and fnmla.
				14021	// If `m` is propagated, its sign is never inverted.
				14022	uint64_t zd_expected_fmla_4[] =
				14023	{qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
				14024	uint64_t zd_expected_fmls_4[] =
				14025	{qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
				14026	uint64_t zd_expected_fnmla_4[] =
				14027	{qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
				14028	uint64_t zd_expected_fnmls_4[] =
				14029	{qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
				14030
				14031	ProcessNaNsHelper3(config,
				14032	kDRegSize,
				14033	za_inputs_4,
				14034	zn_inputs_4,
				14035	zm_inputs_4,
				14036	zd_expected_fmla_4,
				14037	zd_expected_fmls_4,
				14038	zd_expected_fnmla_4,
				14039	zd_expected_fnmls_4,
				14040	StrictNaNPropagation);
				14041
				14042	// Signalling NaNs take precedence over quiet NaNs.
				14043	double za_inputs_5[] = {qa, qa, sa, sa, sa};
				14044	double zn_inputs_5[] = {qn, sn, sn, sn, qn};
				14045	double zm_inputs_5[] = {sm, qm, sm, qa, sm};
				14046	uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
				14047	uint64_t zd_expected_fmls_5[] = {sm_proc,
				14048	sn_proc_n,
				14049	sa_proc,
				14050	sa_proc,
				14051	sa_proc};
				14052	uint64_t zd_expected_fnmla_5[] = {sm_proc,
				14053	sn_proc_n,
				14054	sa_proc_n,
				14055	sa_proc_n,
				14056	sa_proc_n};
				14057	uint64_t zd_expected_fnmls_5[] = {sm_proc,
				14058	sn_proc,
				14059	sa_proc_n,
				14060	sa_proc_n,
				14061	sa_proc_n};
				14062
				14063	ProcessNaNsHelper3(config,
				14064	kDRegSize,
				14065	za_inputs_5,
				14066	zn_inputs_5,
				14067	zm_inputs_5,
				14068	zd_expected_fmla_5,
				14069	zd_expected_fmls_5,
				14070	zd_expected_fnmla_5,
				14071	zd_expected_fnmls_5,
				14072	StrictNaNPropagation);
				14073
				14074	const double inf = kFP64PositiveInfinity;
				14075	const double inf_n = kFP64NegativeInfinity;
				14076	uint64_t inf_proc = DoubleToRawbits(inf);
				14077	uint64_t inf_proc_n = DoubleToRawbits(inf_n);
				14078	uint64_t d_inf_proc = DoubleToRawbits(kFP64DefaultNaN);
				14079
				14080	double za_inputs_6[] = {qa, qa, 0.0f, -0.0f, qa, sa};
				14081	double zn_inputs_6[] = {inf, -0.0f, -0.0f, inf, inf_n, inf};
				14082	double zm_inputs_6[] = {0.0f, inf_n, inf, inf, inf, 0.0f};
				14083
				14084	// quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
				14085	// (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
				14086	// quiet_nan.
				14087	uint64_t zd_expected_fmla_6[] =
				14088	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
				14089	uint64_t zd_expected_fmls_6[] =
				14090	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
				14091	uint64_t zd_expected_fnmla_6[] =
				14092	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
				14093	uint64_t zd_expected_fnmls_6[] =
				14094	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
				14095
				14096	ProcessNaNsHelper3(config,
				14097	kDRegSize,
				14098	za_inputs_6,
				14099	zn_inputs_6,
				14100	zm_inputs_6,
				14101	zd_expected_fmla_6,
				14102	zd_expected_fmls_6,
				14103	zd_expected_fnmla_6,
				14104	zd_expected_fnmls_6,
				14105	StrictNaNPropagation);
				14106	}
				14107
				14108	TEST_SVE(sve_process_nans_float) {
				14109	// Use non-standard NaNs to check that the payload bits are preserved.
				14110	float sa = RawbitsToFloat(0x7f951111);
				14111	float sn = RawbitsToFloat(0x7f952222);
				14112	float sm = RawbitsToFloat(0x7f953333);
				14113	float qa = RawbitsToFloat(0x7fea1111);
				14114	float qn = RawbitsToFloat(0x7fea2222);
				14115	float qm = RawbitsToFloat(0x7fea3333);
				14116	VIXL_ASSERT(IsSignallingNaN(sa));
				14117	VIXL_ASSERT(IsSignallingNaN(sn));
				14118	VIXL_ASSERT(IsSignallingNaN(sm));
				14119	VIXL_ASSERT(IsQuietNaN(qa));
				14120	VIXL_ASSERT(IsQuietNaN(qn));
				14121	VIXL_ASSERT(IsQuietNaN(qm));
				14122
				14123	// The input NaNs after passing through ProcessNaN.
				14124	uint32_t sa_proc = 0x7fd51111;
				14125	uint32_t sn_proc = 0x7fd52222;
				14126	uint32_t sm_proc = 0x7fd53333;
				14127	uint32_t qa_proc = FloatToRawbits(qa);
				14128	uint32_t qn_proc = FloatToRawbits(qn);
				14129	uint32_t qm_proc = FloatToRawbits(qm);
				14130	uint32_t sa_proc_n = sa_proc ^ kSSignMask;
				14131	uint32_t sn_proc_n = sn_proc ^ kSSignMask;
				14132	uint32_t qa_proc_n = qa_proc ^ kSSignMask;
				14133	uint32_t qn_proc_n = qn_proc ^ kSSignMask;
				14134
				14135	// Quiet NaNs are propagated.
				14136	float zn_inputs_1[] = {qn, 0.0f, 0.0f, qm, qn, qm};
				14137	float zm_inputs_1[] = {0.0f, qn, qm, 0.0f, qm, qn};
				14138	uint64_t zd_expected_1[] =
				14139	{qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
				14140
				14141	ProcessNaNsHelper(config,
				14142	kSRegSize,
				14143	zn_inputs_1,
				14144	zm_inputs_1,
				14145	zd_expected_1,
				14146	StrictNaNPropagation);
				14147
				14148	// Signalling NaNs are propagated.
				14149	float zn_inputs_2[] = {sn, 0.0f, 0.0f, sm, sn, sm};
				14150	float zm_inputs_2[] = {0.0f, sn, sm, 0.0f, sm, sn};
				14151	uint64_t zd_expected_2[] =
				14152	{sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
				14153	ProcessNaNsHelper(config,
				14154	kSRegSize,
				14155	zn_inputs_2,
				14156	zm_inputs_2,
				14157	zd_expected_2,
				14158	StrictNaNPropagation);
				14159
				14160	// Signalling NaNs take precedence over quiet NaNs.
				14161	float zn_inputs_3[] = {sn, qn, sn, sn, qn};
				14162	float zm_inputs_3[] = {qm, sm, sm, qn, sn};
				14163	uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
				14164	ProcessNaNsHelper(config,
				14165	kSRegSize,
				14166	zn_inputs_3,
				14167	zm_inputs_3,
				14168	zd_expected_3,
				14169	StrictNaNPropagation);
				14170
				14171	float za_inputs_4[] = {qa, qa, 0.0f, 0.0f, qa, qa};
				14172	float zn_inputs_4[] = {qn, 0.0f, 0.0f, qn, qn, qn};
				14173	float zm_inputs_4[] = {0.0f, qm, qm, qm, qm, 0.0f};
				14174
				14175	// If `a` is propagated, its sign is inverted by fnmla and fnmls.
				14176	// If `n` is propagated, its sign is inverted by fmls and fnmla.
				14177	// If `m` is propagated, its sign is never inverted.
				14178	uint64_t zd_expected_fmla_4[] =
				14179	{qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
				14180	uint64_t zd_expected_fmls_4[] =
				14181	{qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
				14182	uint64_t zd_expected_fnmla_4[] =
				14183	{qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
				14184	uint64_t zd_expected_fnmls_4[] =
				14185	{qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
				14186
				14187	ProcessNaNsHelper3(config,
				14188	kSRegSize,
				14189	za_inputs_4,
				14190	zn_inputs_4,
				14191	zm_inputs_4,
				14192	zd_expected_fmla_4,
				14193	zd_expected_fmls_4,
				14194	zd_expected_fnmla_4,
				14195	zd_expected_fnmls_4,
				14196	StrictNaNPropagation);
				14197
				14198	// Signalling NaNs take precedence over quiet NaNs.
				14199	float za_inputs_5[] = {qa, qa, sa, sa, sa};
				14200	float zn_inputs_5[] = {qn, sn, sn, sn, qn};
				14201	float zm_inputs_5[] = {sm, qm, sm, qa, sm};
				14202	uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
				14203	uint64_t zd_expected_fmls_5[] = {sm_proc,
				14204	sn_proc_n,
				14205	sa_proc,
				14206	sa_proc,
				14207	sa_proc};
				14208	uint64_t zd_expected_fnmla_5[] = {sm_proc,
				14209	sn_proc_n,
				14210	sa_proc_n,
				14211	sa_proc_n,
				14212	sa_proc_n};
				14213	uint64_t zd_expected_fnmls_5[] = {sm_proc,
				14214	sn_proc,
				14215	sa_proc_n,
				14216	sa_proc_n,
				14217	sa_proc_n};
				14218
				14219	ProcessNaNsHelper3(config,
				14220	kSRegSize,
				14221	za_inputs_5,
				14222	zn_inputs_5,
				14223	zm_inputs_5,
				14224	zd_expected_fmla_5,
				14225	zd_expected_fmls_5,
				14226	zd_expected_fnmla_5,
				14227	zd_expected_fnmls_5,
				14228	StrictNaNPropagation);
				14229
				14230	const float inf = kFP32PositiveInfinity;
				14231	const float inf_n = kFP32NegativeInfinity;
				14232	uint32_t inf_proc = FloatToRawbits(inf);
				14233	uint32_t inf_proc_n = FloatToRawbits(inf_n);
				14234	uint32_t d_inf_proc = FloatToRawbits(kFP32DefaultNaN);
				14235
				14236	float za_inputs_6[] = {qa, qa, 0.0f, 0.0f, qa, sa};
				14237	float zn_inputs_6[] = {inf, 0.0f, 0.0f, inf, inf_n, inf};
				14238	float zm_inputs_6[] = {0.0f, inf_n, inf, inf, inf, 0.0f};
				14239
				14240	// quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
				14241	// (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
				14242	// quiet_nan.
				14243	uint64_t zd_expected_fmla_6[] =
				14244	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
				14245	uint64_t zd_expected_fmls_6[] =
				14246	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
				14247	uint64_t zd_expected_fnmla_6[] =
				14248	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
				14249	uint64_t zd_expected_fnmls_6[] =
				14250	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
				14251
				14252	ProcessNaNsHelper3(config,
				14253	kSRegSize,
				14254	za_inputs_6,
				14255	zn_inputs_6,
				14256	zm_inputs_6,
				14257	zd_expected_fmla_6,
				14258	zd_expected_fmls_6,
				14259	zd_expected_fnmla_6,
				14260	zd_expected_fnmls_6,
				14261	StrictNaNPropagation);
				14262	}
				14263
				14264	TEST_SVE(sve_process_nans_half) {
				14265	// Use non-standard NaNs to check that the payload bits are preserved.
				14266	Float16 sa(RawbitsToFloat16(0x7c11));
				14267	Float16 sn(RawbitsToFloat16(0x7c22));
				14268	Float16 sm(RawbitsToFloat16(0x7c33));
				14269	Float16 qa(RawbitsToFloat16(0x7e44));
				14270	Float16 qn(RawbitsToFloat16(0x7e55));
				14271	Float16 qm(RawbitsToFloat16(0x7e66));
				14272	VIXL_ASSERT(IsSignallingNaN(sa));
				14273	VIXL_ASSERT(IsSignallingNaN(sn));
				14274	VIXL_ASSERT(IsSignallingNaN(sm));
				14275	VIXL_ASSERT(IsQuietNaN(qa));
				14276	VIXL_ASSERT(IsQuietNaN(qn));
				14277	VIXL_ASSERT(IsQuietNaN(qm));
				14278
				14279	// The input NaNs after passing through ProcessNaN.
				14280	uint16_t sa_proc = 0x7e11;
				14281	uint16_t sn_proc = 0x7e22;
				14282	uint16_t sm_proc = 0x7e33;
				14283	uint16_t qa_proc = Float16ToRawbits(qa);
				14284	uint16_t qn_proc = Float16ToRawbits(qn);
				14285	uint16_t qm_proc = Float16ToRawbits(qm);
				14286	uint16_t sa_proc_n = sa_proc ^ kHSignMask;
				14287	uint16_t sn_proc_n = sn_proc ^ kHSignMask;
				14288	uint16_t qa_proc_n = qa_proc ^ kHSignMask;
				14289	uint16_t qn_proc_n = qn_proc ^ kHSignMask;
				14290	Float16 zero(0.0);
				14291
				14292	// Quiet NaNs are propagated.
				14293	Float16 zn_inputs_1[] = {qn, zero, zero, qm, qn, qm};
				14294	Float16 zm_inputs_1[] = {zero, qn, qm, zero, qm, qn};
				14295	uint64_t zd_expected_1[] =
				14296	{qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
				14297
				14298	ProcessNaNsHelper(config,
				14299	kHRegSize,
				14300	zn_inputs_1,
				14301	zm_inputs_1,
				14302	zd_expected_1,
				14303	StrictNaNPropagation);
				14304
				14305	// Signalling NaNs are propagated.
				14306	Float16 zn_inputs_2[] = {sn, zero, zero, sm, sn, sm};
				14307	Float16 zm_inputs_2[] = {zero, sn, sm, zero, sm, sn};
				14308	uint64_t zd_expected_2[] =
				14309	{sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
				14310	ProcessNaNsHelper(config,
				14311	kHRegSize,
				14312	zn_inputs_2,
				14313	zm_inputs_2,
				14314	zd_expected_2,
				14315	StrictNaNPropagation);
				14316
				14317	// Signalling NaNs take precedence over quiet NaNs.
				14318	Float16 zn_inputs_3[] = {sn, qn, sn, sn, qn};
				14319	Float16 zm_inputs_3[] = {qm, sm, sm, qn, sn};
				14320	uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
				14321	ProcessNaNsHelper(config,
				14322	kHRegSize,
				14323	zn_inputs_3,
				14324	zm_inputs_3,
				14325	zd_expected_3,
				14326	StrictNaNPropagation);
				14327
				14328	Float16 za_inputs_4[] = {qa, qa, zero, zero, qa, qa};
				14329	Float16 zn_inputs_4[] = {qn, zero, zero, qn, qn, qn};
				14330	Float16 zm_inputs_4[] = {zero, qm, qm, qm, qm, zero};
				14331
				14332	// If `a` is propagated, its sign is inverted by fnmla and fnmls.
				14333	// If `n` is propagated, its sign is inverted by fmls and fnmla.
				14334	// If `m` is propagated, its sign is never inverted.
				14335	uint64_t zd_expected_fmla_4[] =
				14336	{qa_proc, qa_proc, qm_proc, qn_proc, qa_proc, qa_proc};
				14337	uint64_t zd_expected_fmls_4[] =
				14338	{qa_proc, qa_proc, qm_proc, qn_proc_n, qa_proc, qa_proc};
				14339	uint64_t zd_expected_fnmla_4[] =
				14340	{qa_proc_n, qa_proc_n, qm_proc, qn_proc_n, qa_proc_n, qa_proc_n};
				14341	uint64_t zd_expected_fnmls_4[] =
				14342	{qa_proc_n, qa_proc_n, qm_proc, qn_proc, qa_proc_n, qa_proc_n};
				14343
				14344	ProcessNaNsHelper3(config,
				14345	kHRegSize,
				14346	za_inputs_4,
				14347	zn_inputs_4,
				14348	zm_inputs_4,
				14349	zd_expected_fmla_4,
				14350	zd_expected_fmls_4,
				14351	zd_expected_fnmla_4,
				14352	zd_expected_fnmls_4,
				14353	StrictNaNPropagation);
				14354
				14355	// Signalling NaNs take precedence over quiet NaNs.
				14356	Float16 za_inputs_5[] = {qa, qa, sa, sa, sa};
				14357	Float16 zn_inputs_5[] = {qn, sn, sn, sn, qn};
				14358	Float16 zm_inputs_5[] = {sm, qm, sm, qa, sm};
				14359	uint64_t zd_expected_fmla_5[] = {sm_proc, sn_proc, sa_proc, sa_proc, sa_proc};
				14360	uint64_t zd_expected_fmls_5[] = {sm_proc,
				14361	sn_proc_n,
				14362	sa_proc,
				14363	sa_proc,
				14364	sa_proc};
				14365	uint64_t zd_expected_fnmla_5[] = {sm_proc,
				14366	sn_proc_n,
				14367	sa_proc_n,
				14368	sa_proc_n,
				14369	sa_proc_n};
				14370	uint64_t zd_expected_fnmls_5[] = {sm_proc,
				14371	sn_proc,
				14372	sa_proc_n,
				14373	sa_proc_n,
				14374	sa_proc_n};
				14375
				14376	ProcessNaNsHelper3(config,
				14377	kHRegSize,
				14378	za_inputs_5,
				14379	zn_inputs_5,
				14380	zm_inputs_5,
				14381	zd_expected_fmla_5,
				14382	zd_expected_fmls_5,
				14383	zd_expected_fnmla_5,
				14384	zd_expected_fnmls_5,
				14385	StrictNaNPropagation);
				14386
				14387	const Float16 inf = kFP16PositiveInfinity;
				14388	const Float16 inf_n = kFP16NegativeInfinity;
				14389	uint64_t inf_proc = Float16ToRawbits(inf);
				14390	uint64_t inf_proc_n = Float16ToRawbits(inf_n);
				14391	uint64_t d_inf_proc = Float16ToRawbits(kFP16DefaultNaN);
				14392
				14393	Float16 za_inputs_6[] = {qa, qa, zero, zero, qa, sa};
				14394	Float16 zn_inputs_6[] = {inf, zero, zero, inf, inf_n, inf};
				14395	Float16 zm_inputs_6[] = {zero, inf_n, inf, inf, inf, zero};
				14396
				14397	// quiet_nan + (0.0 * inf) produces the default NaN, not quiet_nan. Ditto for
				14398	// (inf * 0.0). On the other hand, quiet_nan + (inf * inf) propagates the
				14399	// quiet_nan.
				14400	uint64_t zd_expected_fmla_6[] =
				14401	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc, sa_proc};
				14402	uint64_t zd_expected_fmls_6[] =
				14403	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc, sa_proc};
				14404	uint64_t zd_expected_fnmla_6[] =
				14405	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc_n, qa_proc_n, sa_proc_n};
				14406	uint64_t zd_expected_fnmls_6[] =
				14407	{d_inf_proc, d_inf_proc, d_inf_proc, inf_proc, qa_proc_n, sa_proc_n};
				14408
				14409	ProcessNaNsHelper3(config,
				14410	kHRegSize,
				14411	za_inputs_6,
				14412	zn_inputs_6,
				14413	zm_inputs_6,
				14414	zd_expected_fmla_6,
				14415	zd_expected_fmls_6,
				14416	zd_expected_fnmla_6,
				14417	zd_expected_fnmls_6,
				14418	StrictNaNPropagation);
				14419	}
				14420
Jacob Bramley	d77a8e4	2019-02-12 16:52:24 +0000	[diff] [blame]	14421	} // namespace aarch64
				14422	} // namespace vixl