Blame - src/a64/macro-assembler-a64.cc - arm/vixl.git

blob: ba1ca52b1bbba7dd1db1b848e358999308c42013 [file] [log] [blame]

armvixl	ad96eda	2013-06-14 11:42:37 +0100	[diff] [blame^]	1	// Copyright 2013, ARM Limited
				2	// All rights reserved.
				3	//
				4	// Redistribution and use in source and binary forms, with or without
				5	// modification, are permitted provided that the following conditions are met:
				6	//
				7	// * Redistributions of source code must retain the above copyright notice,
				8	// this list of conditions and the following disclaimer.
				9	// * Redistributions in binary form must reproduce the above copyright notice,
				10	// this list of conditions and the following disclaimer in the documentation
				11	// and/or other materials provided with the distribution.
				12	// * Neither the name of ARM Limited nor the names of its contributors may be
				13	// used to endorse or promote products derived from this software without
				14	// specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
				17	// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				18	// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				19	// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
				20	// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				21	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
				22	// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
				23	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
				24	// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				25	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				26
				27	#include "a64/macro-assembler-a64.h"
				28	namespace vixl {
				29
				30	void MacroAssembler::And(const Register& rd,
				31	const Register& rn,
				32	const Operand& operand,
				33	FlagsUpdate S) {
				34	ASSERT(allow_macro_instructions_);
				35	LogicalMacro(rd, rn, operand, (S == SetFlags) ? ANDS : AND);
				36	}
				37
				38
				39	void MacroAssembler::Tst(const Register& rn,
				40	const Operand& operand) {
				41	ASSERT(allow_macro_instructions_);
				42	And(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
				43	}
				44
				45
				46	void MacroAssembler::Bic(const Register& rd,
				47	const Register& rn,
				48	const Operand& operand,
				49	FlagsUpdate S) {
				50	ASSERT(allow_macro_instructions_);
				51	LogicalMacro(rd, rn, operand, (S == SetFlags) ? BICS : BIC);
				52	}
				53
				54
				55	void MacroAssembler::Orr(const Register& rd,
				56	const Register& rn,
				57	const Operand& operand) {
				58	ASSERT(allow_macro_instructions_);
				59	LogicalMacro(rd, rn, operand, ORR);
				60	}
				61
				62
				63	void MacroAssembler::Orn(const Register& rd,
				64	const Register& rn,
				65	const Operand& operand) {
				66	ASSERT(allow_macro_instructions_);
				67	LogicalMacro(rd, rn, operand, ORN);
				68	}
				69
				70
				71	void MacroAssembler::Eor(const Register& rd,
				72	const Register& rn,
				73	const Operand& operand) {
				74	ASSERT(allow_macro_instructions_);
				75	LogicalMacro(rd, rn, operand, EOR);
				76	}
				77
				78
				79	void MacroAssembler::Eon(const Register& rd,
				80	const Register& rn,
				81	const Operand& operand) {
				82	ASSERT(allow_macro_instructions_);
				83	LogicalMacro(rd, rn, operand, EON);
				84	}
				85
				86
				87	void MacroAssembler::LogicalMacro(const Register& rd,
				88	const Register& rn,
				89	const Operand& operand,
				90	LogicalOp op) {
				91	if (operand.IsImmediate()) {
				92	int64_t immediate = operand.immediate();
				93	unsigned reg_size = rd.size();
				94	ASSERT(rd.Is64Bits() \|\| is_uint32(immediate));
				95
				96	// If the operation is NOT, invert the operation and immediate.
				97	if ((op & NOT) == NOT) {
				98	op = static_cast<LogicalOp>(op & ~NOT);
				99	immediate = ~immediate;
				100	if (rd.Is32Bits()) {
				101	immediate &= kWRegMask;
				102	}
				103	}
				104
				105	// Special cases for all set or all clear immediates.
				106	if (immediate == 0) {
				107	switch (op) {
				108	case AND:
				109	Mov(rd, 0);
				110	return;
				111	case ORR: // Fall through.
				112	case EOR:
				113	Mov(rd, rn);
				114	return;
				115	case ANDS: // Fall through.
				116	case BICS:
				117	break;
				118	default:
				119	UNREACHABLE();
				120	}
				121	} else if ((rd.Is64Bits() && (immediate == -1L)) \|\|
				122	(rd.Is32Bits() && (immediate == 0xffffffffL))) {
				123	switch (op) {
				124	case AND:
				125	Mov(rd, rn);
				126	return;
				127	case ORR:
				128	Mov(rd, immediate);
				129	return;
				130	case EOR:
				131	Mvn(rd, rn);
				132	return;
				133	case ANDS: // Fall through.
				134	case BICS:
				135	break;
				136	default:
				137	UNREACHABLE();
				138	}
				139	}
				140
				141	unsigned n, imm_s, imm_r;
				142	if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
				143	// Immediate can be encoded in the instruction.
				144	LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
				145	} else {
				146	// Immediate can't be encoded: synthesize using move immediate.
				147	Register temp = AppropriateTempFor(rn);
				148	Mov(temp, immediate);
				149	if (rd.Is(sp)) {
				150	// If rd is the stack pointer we cannot use it as the destination
				151	// register so we use the temp register as an intermediate again.
				152	Logical(temp, rn, Operand(temp), op);
				153	Mov(sp, temp);
				154	} else {
				155	Logical(rd, rn, Operand(temp), op);
				156	}
				157	}
				158	} else if (operand.IsExtendedRegister()) {
				159	ASSERT(operand.reg().size() <= rd.size());
				160	// Add/sub extended supports shift <= 4. We want to support exactly the
				161	// same modes here.
				162	ASSERT(operand.shift_amount() <= 4);
				163	ASSERT(operand.reg().Is64Bits() \|\|
				164	((operand.extend() != UXTX) && (operand.extend() != SXTX)));
				165	Register temp = AppropriateTempFor(rn, operand.reg());
				166	EmitExtendShift(temp, operand.reg(), operand.extend(),
				167	operand.shift_amount());
				168	Logical(rd, rn, Operand(temp), op);
				169	} else {
				170	// The operand can be encoded in the instruction.
				171	ASSERT(operand.IsShiftedRegister());
				172	Logical(rd, rn, operand, op);
				173	}
				174	}
				175
				176
				177	void MacroAssembler::Mov(const Register& rd, const Operand& operand) {
				178	ASSERT(allow_macro_instructions_);
				179	if (operand.IsImmediate()) {
				180	// Call the macro assembler for generic immediates.
				181	Mov(rd, operand.immediate());
				182	} else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
				183	// Emit a shift instruction if moving a shifted register. This operation
				184	// could also be achieved using an orr instruction (like orn used by Mvn),
				185	// but using a shift instruction makes the disassembly clearer.
				186	EmitShift(rd, operand.reg(), operand.shift(), operand.shift_amount());
				187	} else if (operand.IsExtendedRegister()) {
				188	// Emit an extend instruction if moving an extended register. This handles
				189	// extend with post-shift operations, too.
				190	EmitExtendShift(rd, operand.reg(), operand.extend(),
				191	operand.shift_amount());
				192	} else {
				193	// Otherwise, emit a register move only if the registers are distinct, or
				194	// if they are not X registers. Note that mov(w0, w0) is not a no-op
				195	// because it clears the top word of x0.
				196	// If the sp is an operand, add #0 is emitted, otherwise, orr #0.
				197	if (!rd.Is(operand.reg()) \|\| !rd.Is64Bits()) {
				198	mov(rd, operand.reg());
				199	}
				200	}
				201	}
				202
				203
				204	void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
				205	ASSERT(allow_macro_instructions_);
				206	if (operand.IsImmediate()) {
				207	// Call the macro assembler for generic immediates.
				208	Mvn(rd, operand.immediate());
				209	} else if (operand.IsExtendedRegister()) {
				210	// Emit two instructions for the extend case. This differs from Mov, as
				211	// the extend and invert can't be achieved in one instruction.
				212	Register temp = AppropriateTempFor(rd, operand.reg());
				213	EmitExtendShift(temp, operand.reg(), operand.extend(),
				214	operand.shift_amount());
				215	mvn(rd, Operand(temp));
				216	} else {
				217	// Otherwise, register and shifted register cases can be handled by the
				218	// assembler directly, using orn.
				219	mvn(rd, operand);
				220	}
				221	}
				222
				223
				224	void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
				225	ASSERT(allow_macro_instructions_);
				226	ASSERT(is_uint32(imm) \|\| is_int32(imm) \|\| rd.Is64Bits());
				227
				228	// Immediates on Aarch64 can be produced using an initial value, and zero to
				229	// three move keep operations.
				230	//
				231	// Initial values can be generated with:
				232	// 1. 64-bit move zero (movz).
				233	// 2. 32-bit move negative (movn).
				234	// 3. 64-bit move negative.
				235	// 4. 32-bit orr immediate.
				236	// 5. 64-bit orr immediate.
				237	// Move-keep may then be used to modify each of the 16-bit nybbles.
				238	//
				239	// The code below supports all five initial value generators, and
				240	// applying move-keep operations to move-zero initial values only.
				241
				242	unsigned reg_size = rd.size();
				243	unsigned n, imm_s, imm_r;
				244	if (IsImmMovz(imm, reg_size) && !rd.IsSP()) {
				245	// Immediate can be represented in a move zero instruction.
				246	movz(rd, imm);
				247	} else if (IsImmMovn(imm, reg_size) && !rd.IsSP()) {
				248	// Immediate can be represented in a move negative instruction. Movn can't
				249	// write to the stack pointer.
				250	movn(rd, rd.Is64Bits() ? ~imm : (~imm & kWRegMask));
				251	} else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
				252	// Immediate can be represented in a logical orr instruction.
				253	ASSERT(!rd.IsZero());
				254	LogicalImmediate(rd, AppropriateZeroRegFor(rd), n, imm_s, imm_r, ORR);
				255	} else {
				256	// Generic immediate case. Imm will be represented by
				257	// [imm3, imm2, imm1, imm0], where each imm is 16 bits.
				258	// A move-zero is generated for the first non-zero immX, and a move-keep
				259	// for subsequent non-zero immX.
				260
				261	// Use a temporary register when moving to the stack pointer.
				262	Register temp = rd.IsSP() ? AppropriateTempFor(rd) : rd;
				263
				264	ASSERT((reg_size % 16) == 0);
				265	bool first_mov_done = false;
				266	for (unsigned i = 0; i < (temp.size() / 16); i++) {
				267	uint64_t imm16 = (imm >> (16 * i)) & 0xffffL;
				268	if (imm16 != 0) {
				269	if (!first_mov_done) {
				270	// Move the first non-zero 16-bit chunk into the destination register.
				271	movz(temp, imm16, 16 * i);
				272	first_mov_done = true;
				273	} else {
				274	// Construct a wider constant.
				275	movk(temp, imm16, 16 * i);
				276	}
				277	}
				278	}
				279
				280	if (rd.IsSP()) {
				281	mov(rd, temp);
				282	}
				283
				284	ASSERT(first_mov_done);
				285	}
				286	}
				287
				288
				289	// The movz instruction can generate immediates containing an arbitrary 16-bit
				290	// value, with remaining bits set, eg. 0x00001234, 0x0000123400000000.
				291	bool MacroAssembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
				292	if (reg_size == kXRegSize) {
				293	if (((imm & 0xffffffffffff0000UL) == 0UL) \|\|
				294	((imm & 0xffffffff0000ffffUL) == 0UL) \|\|
				295	((imm & 0xffff0000ffffffffUL) == 0UL) \|\|
				296	((imm & 0x0000ffffffffffffUL) == 0UL)) {
				297	return true;
				298	}
				299	} else {
				300	ASSERT(reg_size == kWRegSize);
				301	imm &= kWRegMask;
				302	if (((imm & 0xffff0000) == 0) \|\|
				303	((imm & 0x0000ffff) == 0)) {
				304	return true;
				305	}
				306	}
				307	return false;
				308	}
				309
				310
				311	// The movn instruction can generate immediates containing an arbitrary 16-bit
				312	// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff.
				313	bool MacroAssembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
				314	return IsImmMovz(~imm, reg_size);
				315	}
				316
				317
				318	void MacroAssembler::Ccmp(const Register& rn,
				319	const Operand& operand,
				320	StatusFlags nzcv,
				321	Condition cond) {
				322	ASSERT(allow_macro_instructions_);
				323	ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
				324	}
				325
				326
				327	void MacroAssembler::Ccmn(const Register& rn,
				328	const Operand& operand,
				329	StatusFlags nzcv,
				330	Condition cond) {
				331	ASSERT(allow_macro_instructions_);
				332	ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
				333	}
				334
				335
				336	void MacroAssembler::ConditionalCompareMacro(const Register& rn,
				337	const Operand& operand,
				338	StatusFlags nzcv,
				339	Condition cond,
				340	ConditionalCompareOp op) {
				341	if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) \|\|
				342	(operand.IsImmediate() && IsImmConditionalCompare(operand.immediate()))) {
				343	// The immediate can be encoded in the instruction, or the operand is an
				344	// unshifted register: call the assembler.
				345	ConditionalCompare(rn, operand, nzcv, cond, op);
				346	} else {
				347	// The operand isn't directly supported by the instruction: perform the
				348	// operation on a temporary register.
				349	Register temp(NoReg);
				350	if (operand.IsImmediate()) {
				351	temp = AppropriateTempFor(rn);
				352	Mov(temp, operand.immediate());
				353	} else if (operand.IsShiftedRegister()) {
				354	ASSERT(operand.shift() != ROR);
				355	ASSERT(is_uintn(rn.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
				356	operand.shift_amount()));
				357	temp = AppropriateTempFor(rn, operand.reg());
				358	EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
				359	} else {
				360	ASSERT(operand.IsExtendedRegister());
				361	ASSERT(operand.reg().size() <= rn.size());
				362	// Add/sub extended support a shift <= 4. We want to support exactly the
				363	// same modes.
				364	ASSERT(operand.shift_amount() <= 4);
				365	ASSERT(operand.reg().Is64Bits() \|\|
				366	((operand.extend() != UXTX) && (operand.extend() != SXTX)));
				367	temp = AppropriateTempFor(rn, operand.reg());
				368	EmitExtendShift(temp, operand.reg(), operand.extend(),
				369	operand.shift_amount());
				370	}
				371	ConditionalCompare(rn, Operand(temp), nzcv, cond, op);
				372	}
				373	}
				374
				375
				376	void MacroAssembler::Add(const Register& rd,
				377	const Register& rn,
				378	const Operand& operand,
				379	FlagsUpdate S) {
				380	ASSERT(allow_macro_instructions_);
				381	if (operand.IsImmediate() && (operand.immediate() < 0)) {
				382	AddSubMacro(rd, rn, -operand.immediate(), S, SUB);
				383	} else {
				384	AddSubMacro(rd, rn, operand, S, ADD);
				385	}
				386	}
				387
				388
				389	void MacroAssembler::Sub(const Register& rd,
				390	const Register& rn,
				391	const Operand& operand,
				392	FlagsUpdate S) {
				393	ASSERT(allow_macro_instructions_);
				394	if (operand.IsImmediate() && (operand.immediate() < 0)) {
				395	AddSubMacro(rd, rn, -operand.immediate(), S, ADD);
				396	} else {
				397	AddSubMacro(rd, rn, operand, S, SUB);
				398	}
				399	}
				400
				401
				402	void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
				403	ASSERT(allow_macro_instructions_);
				404	Add(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
				405	}
				406
				407
				408	void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
				409	ASSERT(allow_macro_instructions_);
				410	Sub(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
				411	}
				412
				413
				414	void MacroAssembler::Neg(const Register& rd,
				415	const Operand& operand,
				416	FlagsUpdate S) {
				417	ASSERT(allow_macro_instructions_);
				418	if (operand.IsImmediate()) {
				419	Mov(rd, -operand.immediate());
				420	} else {
				421	Sub(rd, AppropriateZeroRegFor(rd), operand, S);
				422	}
				423	}
				424
				425
				426	void MacroAssembler::AddSubMacro(const Register& rd,
				427	const Register& rn,
				428	const Operand& operand,
				429	FlagsUpdate S,
				430	AddSubOp op) {
				431	if ((operand.IsImmediate() && !IsImmAddSub(operand.immediate())) \|\|
				432	(rn.IsZero() && !operand.IsShiftedRegister()) \|\|
				433	(operand.IsShiftedRegister() && (operand.shift() == ROR))) {
				434	Register temp = AppropriateTempFor(rn);
				435	Mov(temp, operand);
				436	AddSub(rd, rn, temp, S, op);
				437	} else {
				438	AddSub(rd, rn, operand, S, op);
				439	}
				440	}
				441
				442
				443	void MacroAssembler::Adc(const Register& rd,
				444	const Register& rn,
				445	const Operand& operand,
				446	FlagsUpdate S) {
				447	ASSERT(allow_macro_instructions_);
				448	AddSubWithCarryMacro(rd, rn, operand, S, ADC);
				449	}
				450
				451
				452	void MacroAssembler::Sbc(const Register& rd,
				453	const Register& rn,
				454	const Operand& operand,
				455	FlagsUpdate S) {
				456	ASSERT(allow_macro_instructions_);
				457	AddSubWithCarryMacro(rd, rn, operand, S, SBC);
				458	}
				459
				460
				461	void MacroAssembler::Ngc(const Register& rd,
				462	const Operand& operand,
				463	FlagsUpdate S) {
				464	ASSERT(allow_macro_instructions_);
				465	Register zr = AppropriateZeroRegFor(rd);
				466	Sbc(rd, zr, operand, S);
				467	}
				468
				469
				470	void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
				471	const Register& rn,
				472	const Operand& operand,
				473	FlagsUpdate S,
				474	AddSubWithCarryOp op) {
				475	ASSERT(rd.size() == rn.size());
				476
				477	if (operand.IsImmediate() \|\|
				478	(operand.IsShiftedRegister() && (operand.shift() == ROR))) {
				479	// Add/sub with carry (immediate or ROR shifted register.)
				480	Register temp = AppropriateTempFor(rn);
				481	Mov(temp, operand);
				482	AddSubWithCarry(rd, rn, Operand(temp), S, op);
				483	} else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
				484	// Add/sub with carry (shifted register).
				485	ASSERT(operand.reg().size() == rd.size());
				486	ASSERT(operand.shift() != ROR);
				487	ASSERT(is_uintn(rd.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
				488	operand.shift_amount()));
				489	Register temp = AppropriateTempFor(rn, operand.reg());
				490	EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
				491	AddSubWithCarry(rd, rn, Operand(temp), S, op);
				492	} else if (operand.IsExtendedRegister()) {
				493	// Add/sub with carry (extended register).
				494	ASSERT(operand.reg().size() <= rd.size());
				495	// Add/sub extended supports a shift <= 4. We want to support exactly the
				496	// same modes.
				497	ASSERT(operand.shift_amount() <= 4);
				498	ASSERT(operand.reg().Is64Bits() \|\|
				499	((operand.extend() != UXTX) && (operand.extend() != SXTX)));
				500	Register temp = AppropriateTempFor(rn, operand.reg());
				501	EmitExtendShift(temp, operand.reg(), operand.extend(),
				502	operand.shift_amount());
				503	AddSubWithCarry(rd, rn, Operand(temp), S, op);
				504	} else {
				505	// The addressing mode is directly supported by the instruction.
				506	AddSubWithCarry(rd, rn, operand, S, op);
				507	}
				508	}
				509
				510
				511	#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP) \
				512	void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
				513	LoadStoreMacro(REG, addr, OP); \
				514	}
				515	LS_MACRO_LIST(DEFINE_FUNCTION)
				516	#undef DEFINE_FUNCTION
				517
				518	void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
				519	const MemOperand& addr,
				520	LoadStoreOp op) {
				521	int64_t offset = addr.offset();
				522	LSDataSize size = CalcLSDataSize(op);
				523
				524	// Check if an immediate offset fits in the immediate field of the
				525	// appropriate instruction. If not, emit two instructions to perform
				526	// the operation.
				527	if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, size) &&
				528	!IsImmLSUnscaled(offset)) {
				529	// Immediate offset that can't be encoded using unsigned or unscaled
				530	// addressing modes.
				531	Register temp = AppropriateTempFor(addr.base());
				532	Mov(temp, addr.offset());
				533	LoadStore(rt, MemOperand(addr.base(), temp), op);
				534	} else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
				535	// Post-index beyond unscaled addressing range.
				536	LoadStore(rt, MemOperand(addr.base()), op);
				537	Add(addr.base(), addr.base(), Operand(offset));
				538	} else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
				539	// Pre-index beyond unscaled addressing range.
				540	Add(addr.base(), addr.base(), Operand(offset));
				541	LoadStore(rt, MemOperand(addr.base()), op);
				542	} else {
				543	// Encodable in one load/store instruction.
				544	LoadStore(rt, addr, op);
				545	}
				546	}
				547
				548
				549	void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1,
				550	const CPURegister& src2, const CPURegister& src3) {
				551	ASSERT(allow_macro_instructions_);
				552	ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
				553	ASSERT(src0.IsValid());
				554
				555	int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
				556	int size = src0.SizeInBytes();
				557
				558	PrepareForPush(count, size);
				559	PushHelper(count, size, src0, src1, src2, src3);
				560	}
				561
				562
				563	void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
				564	const CPURegister& dst2, const CPURegister& dst3) {
				565	// It is not valid to pop into the same register more than once in one
				566	// instruction, not even into the zero register.
				567	ASSERT(allow_macro_instructions_);
				568	ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
				569	ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
				570	ASSERT(dst0.IsValid());
				571
				572	int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
				573	int size = dst0.SizeInBytes();
				574
				575	PrepareForPop(count, size);
				576	PopHelper(count, size, dst0, dst1, dst2, dst3);
				577	}
				578
				579
				580	void MacroAssembler::PushCPURegList(CPURegList registers) {
				581	int size = registers.RegisterSizeInBytes();
				582
				583	PrepareForPush(registers.Count(), size);
				584	// Push up to four registers at a time because if the current stack pointer is
				585	// sp and reg_size is 32, registers must be pushed in blocks of four in order
				586	// to maintain the 16-byte alignment for sp.
				587	ASSERT(allow_macro_instructions_);
				588	while (!registers.IsEmpty()) {
				589	int count_before = registers.Count();
				590	const CPURegister& src0 = registers.PopHighestIndex();
				591	const CPURegister& src1 = registers.PopHighestIndex();
				592	const CPURegister& src2 = registers.PopHighestIndex();
				593	const CPURegister& src3 = registers.PopHighestIndex();
				594	int count = count_before - registers.Count();
				595	PushHelper(count, size, src0, src1, src2, src3);
				596	}
				597	}
				598
				599
				600	void MacroAssembler::PopCPURegList(CPURegList registers) {
				601	int size = registers.RegisterSizeInBytes();
				602
				603	PrepareForPop(registers.Count(), size);
				604	// Pop up to four registers at a time because if the current stack pointer is
				605	// sp and reg_size is 32, registers must be pushed in blocks of four in order
				606	// to maintain the 16-byte alignment for sp.
				607	ASSERT(allow_macro_instructions_);
				608	while (!registers.IsEmpty()) {
				609	int count_before = registers.Count();
				610	const CPURegister& dst0 = registers.PopLowestIndex();
				611	const CPURegister& dst1 = registers.PopLowestIndex();
				612	const CPURegister& dst2 = registers.PopLowestIndex();
				613	const CPURegister& dst3 = registers.PopLowestIndex();
				614	int count = count_before - registers.Count();
				615	PopHelper(count, size, dst0, dst1, dst2, dst3);
				616	}
				617	}
				618
				619
				620	void MacroAssembler::PushMultipleTimes(int count, Register src) {
				621	ASSERT(allow_macro_instructions_);
				622	int size = src.SizeInBytes();
				623
				624	PrepareForPush(count, size);
				625	// Push up to four registers at a time if possible because if the current
				626	// stack pointer is sp and the register size is 32, registers must be pushed
				627	// in blocks of four in order to maintain the 16-byte alignment for sp.
				628	while (count >= 4) {
				629	PushHelper(4, size, src, src, src, src);
				630	count -= 4;
				631	}
				632	if (count >= 2) {
				633	PushHelper(2, size, src, src, NoReg, NoReg);
				634	count -= 2;
				635	}
				636	if (count == 1) {
				637	PushHelper(1, size, src, NoReg, NoReg, NoReg);
				638	count -= 1;
				639	}
				640	ASSERT(count == 0);
				641	}
				642
				643
				644	void MacroAssembler::PushHelper(int count, int size,
				645	const CPURegister& src0,
				646	const CPURegister& src1,
				647	const CPURegister& src2,
				648	const CPURegister& src3) {
				649	// Ensure that we don't unintentionally modify scratch or debug registers.
				650	InstructionAccurateScope scope(this);
				651
				652	ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
				653	ASSERT(size == src0.SizeInBytes());
				654
				655	// When pushing multiple registers, the store order is chosen such that
				656	// Push(a, b) is equivalent to Push(a) followed by Push(b).
				657	switch (count) {
				658	case 1:
				659	ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
				660	str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
				661	break;
				662	case 2:
				663	ASSERT(src2.IsNone() && src3.IsNone());
				664	stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
				665	break;
				666	case 3:
				667	ASSERT(src3.IsNone());
				668	stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
				669	str(src0, MemOperand(StackPointer(), 2 * size));
				670	break;
				671	case 4:
				672	// Skip over 4 * size, then fill in the gap. This allows four W registers
				673	// to be pushed using sp, whilst maintaining 16-byte alignment for sp at
				674	// all times.
				675	stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
				676	stp(src1, src0, MemOperand(StackPointer(), 2 * size));
				677	break;
				678	default:
				679	UNREACHABLE();
				680	}
				681	}
				682
				683
				684	void MacroAssembler::PopHelper(int count, int size,
				685	const CPURegister& dst0,
				686	const CPURegister& dst1,
				687	const CPURegister& dst2,
				688	const CPURegister& dst3) {
				689	// Ensure that we don't unintentionally modify scratch or debug registers.
				690	InstructionAccurateScope scope(this);
				691
				692	ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
				693	ASSERT(size == dst0.SizeInBytes());
				694
				695	// When popping multiple registers, the load order is chosen such that
				696	// Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
				697	switch (count) {
				698	case 1:
				699	ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
				700	ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
				701	break;
				702	case 2:
				703	ASSERT(dst2.IsNone() && dst3.IsNone());
				704	ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
				705	break;
				706	case 3:
				707	ASSERT(dst3.IsNone());
				708	ldr(dst2, MemOperand(StackPointer(), 2 * size));
				709	ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
				710	break;
				711	case 4:
				712	// Load the higher addresses first, then load the lower addresses and skip
				713	// the whole block in the second instruction. This allows four W registers
				714	// to be popped using sp, whilst maintaining 16-byte alignment for sp at
				715	// all times.
				716	ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
				717	ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
				718	break;
				719	default:
				720	UNREACHABLE();
				721	}
				722	}
				723
				724
				725	void MacroAssembler::PrepareForPush(int count, int size) {
				726	if (sp.Is(StackPointer())) {
				727	// If the current stack pointer is sp, then it must be aligned to 16 bytes
				728	// on entry and the total size of the specified registers must also be a
				729	// multiple of 16 bytes.
				730	ASSERT((count * size) % 16 == 0);
				731	} else {
				732	// Even if the current stack pointer is not the system stack pointer (sp),
				733	// the system stack pointer will still be modified in order to comply with
				734	// ABI rules about accessing memory below the system stack pointer.
				735	BumpSystemStackPointer(count * size);
				736	}
				737	}
				738
				739
				740	void MacroAssembler::PrepareForPop(int count, int size) {
				741	USE(count);
				742	USE(size);
				743	if (sp.Is(StackPointer())) {
				744	// If the current stack pointer is sp, then it must be aligned to 16 bytes
				745	// on entry and the total size of the specified registers must also be a
				746	// multiple of 16 bytes.
				747	ASSERT((count * size) % 16 == 0);
				748	}
				749	}
				750
				751	void MacroAssembler::Poke(const Register& src, const Operand& offset) {
				752	ASSERT(allow_macro_instructions_);
				753	if (offset.IsImmediate()) {
				754	ASSERT(offset.immediate() >= 0);
				755	}
				756
				757	Str(src, MemOperand(StackPointer(), offset));
				758	}
				759
				760
				761	void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
				762	ASSERT(allow_macro_instructions_);
				763	if (offset.IsImmediate()) {
				764	ASSERT(offset.immediate() >= 0);
				765	}
				766
				767	Ldr(dst, MemOperand(StackPointer(), offset));
				768	}
				769
				770
				771	void MacroAssembler::Claim(const Operand& size) {
				772	ASSERT(allow_macro_instructions_);
				773	if (size.IsImmediate()) {
				774	ASSERT(size.immediate() >= 0);
				775	if (sp.Is(StackPointer())) {
				776	ASSERT((size.immediate() % 16) == 0);
				777	}
				778	}
				779
				780	if (!sp.Is(StackPointer())) {
				781	BumpSystemStackPointer(size);
				782	}
				783
				784	Sub(StackPointer(), StackPointer(), size);
				785	}
				786
				787
				788	void MacroAssembler::Drop(const Operand& size) {
				789	ASSERT(allow_macro_instructions_);
				790	if (size.IsImmediate()) {
				791	ASSERT(size.immediate() >= 0);
				792	if (sp.Is(StackPointer())) {
				793	ASSERT((size.immediate() % 16) == 0);
				794	}
				795	}
				796
				797	Add(StackPointer(), StackPointer(), size);
				798	}
				799
				800
				801	void MacroAssembler::PushCalleeSavedRegisters() {
				802	// Ensure that the macro-assembler doesn't use any scratch registers.
				803	InstructionAccurateScope scope(this);
				804
				805	// This method must not be called unless the current stack pointer is sp.
				806	ASSERT(sp.Is(StackPointer()));
				807
				808	MemOperand tos(sp, -2 * kXRegSizeInBytes, PreIndex);
				809
				810	stp(d14, d15, tos);
				811	stp(d12, d13, tos);
				812	stp(d10, d11, tos);
				813	stp(d8, d9, tos);
				814
				815	stp(x29, x30, tos);
				816	stp(x27, x28, tos);
				817	stp(x25, x26, tos);
				818	stp(x23, x24, tos);
				819	stp(x21, x22, tos);
				820	stp(x19, x20, tos);
				821	}
				822
				823
				824	void MacroAssembler::PopCalleeSavedRegisters() {
				825	// Ensure that the macro-assembler doesn't use any scratch registers.
				826	InstructionAccurateScope scope(this);
				827
				828	// This method must not be called unless the current stack pointer is sp.
				829	ASSERT(sp.Is(StackPointer()));
				830
				831	MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
				832
				833	ldp(x19, x20, tos);
				834	ldp(x21, x22, tos);
				835	ldp(x23, x24, tos);
				836	ldp(x25, x26, tos);
				837	ldp(x27, x28, tos);
				838	ldp(x29, x30, tos);
				839
				840	ldp(d8, d9, tos);
				841	ldp(d10, d11, tos);
				842	ldp(d12, d13, tos);
				843	ldp(d14, d15, tos);
				844	}
				845
				846	void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
				847	ASSERT(!sp.Is(StackPointer()));
				848	// TODO: Several callers rely on this not using scratch registers, so we use
				849	// the assembler directly here. However, this means that large immediate
				850	// values of 'space' cannot be handled.
				851	InstructionAccurateScope scope(this);
				852	sub(sp, StackPointer(), space);
				853	}
				854
				855
				856	// This is the main Printf implementation. All callee-saved registers are
				857	// preserved, but NZCV and the caller-saved registers may be clobbered.
				858	void MacroAssembler::PrintfNoPreserve(const char * format,
				859	const CPURegister& arg0,
				860	const CPURegister& arg1,
				861	const CPURegister& arg2,
				862	const CPURegister& arg3) {
				863	// We cannot handle a caller-saved stack pointer. It doesn't make much sense
				864	// in most cases anyway, so this restriction shouldn't be too serious.
				865	ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));
				866
				867	// We cannot print Tmp0() or Tmp1() as they're used internally by the macro
				868	// assembler. We cannot print the stack pointer because it is typically used
				869	// to preserve caller-saved registers (using other Printf variants which
				870	// depend on this helper).
				871	ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg0));
				872	ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg1));
				873	ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg2));
				874	ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg3));
				875
				876	static const int kMaxArgCount = 4;
				877	// Assume that we have the maximum number of arguments until we know
				878	// otherwise.
				879	int arg_count = kMaxArgCount;
				880
				881	// The provided arguments.
				882	CPURegister args[kMaxArgCount] = {arg0, arg1, arg2, arg3};
				883
				884	// The PCS registers where the arguments need to end up.
				885	CPURegister pcs[kMaxArgCount];
				886
				887	// Promote FP arguments to doubles, and integer arguments to X registers.
				888	// Note that FP and integer arguments cannot be mixed, but we'll check
				889	// AreSameSizeAndType once we've processed these promotions.
				890	for (int i = 0; i < kMaxArgCount; i++) {
				891	if (args[i].IsRegister()) {
				892	// Note that we use x1 onwards, because x0 will hold the format string.
				893	pcs[i] = Register::XRegFromCode(i + 1);
				894	// For simplicity, we handle all integer arguments as X registers. An X
				895	// register argument takes the same space as a W register argument in the
				896	// PCS anyway. The only limitation is that we must explicitly clear the
				897	// top word for W register arguments as the callee will expect it to be
				898	// clear.
				899	if (!args[i].Is64Bits()) {
				900	const Register& as_x = args[i].X();
				901	And(as_x, as_x, 0x00000000ffffffff);
				902	args[i] = as_x;
				903	}
				904	} else if (args[i].IsFPRegister()) {
				905	pcs[i] = FPRegister::DRegFromCode(i);
				906	// C and C++ varargs functions (such as printf) implicitly promote float
				907	// arguments to doubles.
				908	if (!args[i].Is64Bits()) {
				909	FPRegister s(args[i]);
				910	const FPRegister& as_d = args[i].D();
				911	Fcvt(as_d, s);
				912	args[i] = as_d;
				913	}
				914	} else {
				915	// This is the first empty (NoCPUReg) argument, so use it to set the
				916	// argument count and bail out.
				917	arg_count = i;
				918	break;
				919	}
				920	}
				921	ASSERT((arg_count >= 0) && (arg_count <= kMaxArgCount));
				922	// Check that every remaining argument is NoCPUReg.
				923	for (int i = arg_count; i < kMaxArgCount; i++) {
				924	ASSERT(args[i].IsNone());
				925	}
				926	ASSERT((arg_count == 0) \|\| AreSameSizeAndType(args[0], args[1],
				927	args[2], args[3],
				928	pcs[0], pcs[1],
				929	pcs[2], pcs[3]));
				930
				931	// Move the arguments into the appropriate PCS registers.
				932	//
				933	// Arranging an arbitrary list of registers into x1-x4 (or d0-d3) is
				934	// surprisingly complicated.
				935	//
				936	// * For even numbers of registers, we push the arguments and then pop them
				937	// into their final registers. This maintains 16-byte stack alignment in
				938	// case sp is the stack pointer, since we're only handling X or D registers
				939	// at this point.
				940	//
				941	// * For odd numbers of registers, we push and pop all but one register in
				942	// the same way, but the left-over register is moved directly, since we
				943	// can always safely move one register without clobbering any source.
				944	if (arg_count >= 4) {
				945	Push(args[3], args[2], args[1], args[0]);
				946	} else if (arg_count >= 2) {
				947	Push(args[1], args[0]);
				948	}
				949
				950	if ((arg_count % 2) != 0) {
				951	// Move the left-over register directly.
				952	const CPURegister& leftover_arg = args[arg_count - 1];
				953	const CPURegister& leftover_pcs = pcs[arg_count - 1];
				954	if (leftover_arg.IsRegister()) {
				955	Mov(Register(leftover_pcs), Register(leftover_arg));
				956	} else {
				957	Fmov(FPRegister(leftover_pcs), FPRegister(leftover_arg));
				958	}
				959	}
				960
				961	if (arg_count >= 4) {
				962	Pop(pcs[0], pcs[1], pcs[2], pcs[3]);
				963	} else if (arg_count >= 2) {
				964	Pop(pcs[0], pcs[1]);
				965	}
				966
				967	// Load the format string into x0, as per the procedure-call standard.
				968	//
				969	// To make the code as portable as possible, the format string is encoded
				970	// directly in the instruction stream. It might be cleaner to encode it in a
				971	// literal pool, but since Printf is usually used for debugging, it is
				972	// beneficial for it to be minimally dependent on other features.
				973	Label format_address;
				974	Adr(x0, &format_address);
				975
				976	// Emit the format string directly in the instruction stream.
				977	{ BlockLiteralPoolScope scope(this);
				978	Label after_data;
				979	B(&after_data);
				980	Bind(&format_address);
				981	EmitStringData(format);
				982	Unreachable();
				983	Bind(&after_data);
				984	}
				985
				986	// We don't pass any arguments on the stack, but we still need to align the C
				987	// stack pointer to a 16-byte boundary for PCS compliance.
				988	if (!sp.Is(StackPointer())) {
				989	Bic(sp, StackPointer(), 0xf);
				990	}
				991
				992	// Actually call printf. This part needs special handling for the simulator,
				993	// since the system printf function will use a different instruction set and
				994	// the procedure-call standard will not be compatible.
				995	#ifdef USE_SIMULATOR
				996	{ InstructionAccurateScope scope(this, kPrintfLength / kInstructionSize);
				997	hlt(kPrintfOpcode);
				998	dc32(pcs[0].type());
				999	}
				1000	#else
				1001	Mov(Tmp0(), reinterpret_cast<uintptr_t>(printf));
				1002	Blr(Tmp0());
				1003	#endif
				1004	}
				1005
				1006
				1007	void MacroAssembler::Printf(const char * format,
				1008	const CPURegister& arg0,
				1009	const CPURegister& arg1,
				1010	const CPURegister& arg2,
				1011	const CPURegister& arg3) {
				1012	// Preserve all caller-saved registers as well as NZCV.
				1013	// If sp is the stack pointer, PushCPURegList asserts that the size of each
				1014	// list is a multiple of 16 bytes.
				1015	PushCPURegList(kCallerSaved);
				1016	PushCPURegList(kCallerSavedFP);
				1017	// Use Tmp0() as a scratch register. It is not accepted by Printf so it will
				1018	// never overlap an argument register.
				1019	Mrs(Tmp0(), NZCV);
				1020	Push(Tmp0(), xzr);
				1021
				1022	PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
				1023
				1024	Pop(xzr, Tmp0());
				1025	Msr(NZCV, Tmp0());
				1026	PopCPURegList(kCallerSavedFP);
				1027	PopCPURegList(kCallerSaved);
				1028	}
				1029
				1030	void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
				1031	ASSERT(allow_macro_instructions_);
				1032
				1033	#ifdef USE_SIMULATOR
				1034	// The arguments to the trace pseudo instruction need to be contiguous in
				1035	// memory, so make sure we don't try to emit a literal pool.
				1036	InstructionAccurateScope scope(this, kTraceLength / kInstructionSize);
				1037
				1038	Label start;
				1039	bind(&start);
				1040
				1041	// Refer to instructions-a64.h for a description of the marker and its
				1042	// arguments.
				1043	hlt(kTraceOpcode);
				1044
				1045	ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
				1046	dc32(parameters);
				1047
				1048	ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
				1049	dc32(command);
				1050	#else
				1051	// Emit nothing on real hardware.
				1052	USE(parameters);
				1053	USE(command);
				1054	#endif
				1055	}
				1056
				1057
				1058	void MacroAssembler::Log(TraceParameters parameters) {
				1059	ASSERT(allow_macro_instructions_);
				1060
				1061	#ifdef USE_SIMULATOR
				1062	// The arguments to the log pseudo instruction need to be contiguous in
				1063	// memory, so make sure we don't try to emit a literal pool.
				1064	InstructionAccurateScope scope(this, kLogLength / kInstructionSize);
				1065
				1066	Label start;
				1067	bind(&start);
				1068
				1069	// Refer to instructions-a64.h for a description of the marker and its
				1070	// arguments.
				1071	hlt(kLogOpcode);
				1072
				1073	ASSERT(SizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
				1074	dc32(parameters);
				1075	#else
				1076	// Emit nothing on real hardware.
				1077	USE(parameters);
				1078	#endif
				1079	}
				1080
				1081	} // namespace vixl