Blame - src/a64/macro-assembler-a64.cc - arm/vixl.git

blob: 39a925c2b197c75407a8468f46db2c53a4a6586f [file] [log] [blame]

armvixl	ad96eda	2013-06-14 11:42:37 +0100	[diff] [blame]	1	// Copyright 2013, ARM Limited
				2	// All rights reserved.
				3	//
				4	// Redistribution and use in source and binary forms, with or without
				5	// modification, are permitted provided that the following conditions are met:
				6	//
				7	// * Redistributions of source code must retain the above copyright notice,
				8	// this list of conditions and the following disclaimer.
				9	// * Redistributions in binary form must reproduce the above copyright notice,
				10	// this list of conditions and the following disclaimer in the documentation
				11	// and/or other materials provided with the distribution.
				12	// * Neither the name of ARM Limited nor the names of its contributors may be
				13	// used to endorse or promote products derived from this software without
				14	// specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
				17	// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				18	// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				19	// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
				20	// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				21	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
				22	// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
				23	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
				24	// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				25	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				26
				27	#include "a64/macro-assembler-a64.h"
				28	namespace vixl {
				29
				30	void MacroAssembler::And(const Register& rd,
				31	const Register& rn,
				32	const Operand& operand,
				33	FlagsUpdate S) {
				34	ASSERT(allow_macro_instructions_);
				35	LogicalMacro(rd, rn, operand, (S == SetFlags) ? ANDS : AND);
				36	}
				37
				38
				39	void MacroAssembler::Tst(const Register& rn,
				40	const Operand& operand) {
				41	ASSERT(allow_macro_instructions_);
				42	And(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
				43	}
				44
				45
				46	void MacroAssembler::Bic(const Register& rd,
				47	const Register& rn,
				48	const Operand& operand,
				49	FlagsUpdate S) {
				50	ASSERT(allow_macro_instructions_);
				51	LogicalMacro(rd, rn, operand, (S == SetFlags) ? BICS : BIC);
				52	}
				53
				54
				55	void MacroAssembler::Orr(const Register& rd,
				56	const Register& rn,
				57	const Operand& operand) {
				58	ASSERT(allow_macro_instructions_);
				59	LogicalMacro(rd, rn, operand, ORR);
				60	}
				61
				62
				63	void MacroAssembler::Orn(const Register& rd,
				64	const Register& rn,
				65	const Operand& operand) {
				66	ASSERT(allow_macro_instructions_);
				67	LogicalMacro(rd, rn, operand, ORN);
				68	}
				69
				70
				71	void MacroAssembler::Eor(const Register& rd,
				72	const Register& rn,
				73	const Operand& operand) {
				74	ASSERT(allow_macro_instructions_);
				75	LogicalMacro(rd, rn, operand, EOR);
				76	}
				77
				78
				79	void MacroAssembler::Eon(const Register& rd,
				80	const Register& rn,
				81	const Operand& operand) {
				82	ASSERT(allow_macro_instructions_);
				83	LogicalMacro(rd, rn, operand, EON);
				84	}
				85
				86
				87	void MacroAssembler::LogicalMacro(const Register& rd,
				88	const Register& rn,
				89	const Operand& operand,
				90	LogicalOp op) {
				91	if (operand.IsImmediate()) {
				92	int64_t immediate = operand.immediate();
				93	unsigned reg_size = rd.size();
				94	ASSERT(rd.Is64Bits() \|\| is_uint32(immediate));
				95
				96	// If the operation is NOT, invert the operation and immediate.
				97	if ((op & NOT) == NOT) {
				98	op = static_cast<LogicalOp>(op & ~NOT);
				99	immediate = ~immediate;
				100	if (rd.Is32Bits()) {
				101	immediate &= kWRegMask;
				102	}
				103	}
				104
				105	// Special cases for all set or all clear immediates.
				106	if (immediate == 0) {
				107	switch (op) {
				108	case AND:
				109	Mov(rd, 0);
				110	return;
				111	case ORR: // Fall through.
				112	case EOR:
				113	Mov(rd, rn);
				114	return;
				115	case ANDS: // Fall through.
				116	case BICS:
				117	break;
				118	default:
				119	UNREACHABLE();
				120	}
				121	} else if ((rd.Is64Bits() && (immediate == -1L)) \|\|
				122	(rd.Is32Bits() && (immediate == 0xffffffffL))) {
				123	switch (op) {
				124	case AND:
				125	Mov(rd, rn);
				126	return;
				127	case ORR:
				128	Mov(rd, immediate);
				129	return;
				130	case EOR:
				131	Mvn(rd, rn);
				132	return;
				133	case ANDS: // Fall through.
				134	case BICS:
				135	break;
				136	default:
				137	UNREACHABLE();
				138	}
				139	}
				140
				141	unsigned n, imm_s, imm_r;
				142	if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
				143	// Immediate can be encoded in the instruction.
				144	LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
				145	} else {
				146	// Immediate can't be encoded: synthesize using move immediate.
				147	Register temp = AppropriateTempFor(rn);
				148	Mov(temp, immediate);
				149	if (rd.Is(sp)) {
				150	// If rd is the stack pointer we cannot use it as the destination
				151	// register so we use the temp register as an intermediate again.
				152	Logical(temp, rn, Operand(temp), op);
				153	Mov(sp, temp);
				154	} else {
				155	Logical(rd, rn, Operand(temp), op);
				156	}
				157	}
				158	} else if (operand.IsExtendedRegister()) {
				159	ASSERT(operand.reg().size() <= rd.size());
				160	// Add/sub extended supports shift <= 4. We want to support exactly the
				161	// same modes here.
				162	ASSERT(operand.shift_amount() <= 4);
				163	ASSERT(operand.reg().Is64Bits() \|\|
				164	((operand.extend() != UXTX) && (operand.extend() != SXTX)));
				165	Register temp = AppropriateTempFor(rn, operand.reg());
				166	EmitExtendShift(temp, operand.reg(), operand.extend(),
				167	operand.shift_amount());
				168	Logical(rd, rn, Operand(temp), op);
				169	} else {
				170	// The operand can be encoded in the instruction.
				171	ASSERT(operand.IsShiftedRegister());
				172	Logical(rd, rn, operand, op);
				173	}
				174	}
				175
				176
				177	void MacroAssembler::Mov(const Register& rd, const Operand& operand) {
				178	ASSERT(allow_macro_instructions_);
				179	if (operand.IsImmediate()) {
				180	// Call the macro assembler for generic immediates.
				181	Mov(rd, operand.immediate());
				182	} else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
				183	// Emit a shift instruction if moving a shifted register. This operation
				184	// could also be achieved using an orr instruction (like orn used by Mvn),
				185	// but using a shift instruction makes the disassembly clearer.
				186	EmitShift(rd, operand.reg(), operand.shift(), operand.shift_amount());
				187	} else if (operand.IsExtendedRegister()) {
				188	// Emit an extend instruction if moving an extended register. This handles
				189	// extend with post-shift operations, too.
				190	EmitExtendShift(rd, operand.reg(), operand.extend(),
				191	operand.shift_amount());
				192	} else {
				193	// Otherwise, emit a register move only if the registers are distinct, or
				194	// if they are not X registers. Note that mov(w0, w0) is not a no-op
				195	// because it clears the top word of x0.
				196	// If the sp is an operand, add #0 is emitted, otherwise, orr #0.
				197	if (!rd.Is(operand.reg()) \|\| !rd.Is64Bits()) {
				198	mov(rd, operand.reg());
				199	}
				200	}
				201	}
				202
				203
				204	void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
				205	ASSERT(allow_macro_instructions_);
				206	if (operand.IsImmediate()) {
				207	// Call the macro assembler for generic immediates.
				208	Mvn(rd, operand.immediate());
				209	} else if (operand.IsExtendedRegister()) {
				210	// Emit two instructions for the extend case. This differs from Mov, as
				211	// the extend and invert can't be achieved in one instruction.
				212	Register temp = AppropriateTempFor(rd, operand.reg());
				213	EmitExtendShift(temp, operand.reg(), operand.extend(),
				214	operand.shift_amount());
				215	mvn(rd, Operand(temp));
				216	} else {
				217	// Otherwise, register and shifted register cases can be handled by the
				218	// assembler directly, using orn.
				219	mvn(rd, operand);
				220	}
				221	}
				222
				223
				224	void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
				225	ASSERT(allow_macro_instructions_);
				226	ASSERT(is_uint32(imm) \|\| is_int32(imm) \|\| rd.Is64Bits());
				227
				228	// Immediates on Aarch64 can be produced using an initial value, and zero to
				229	// three move keep operations.
				230	//
				231	// Initial values can be generated with:
				232	// 1. 64-bit move zero (movz).
				233	// 2. 32-bit move negative (movn).
				234	// 3. 64-bit move negative.
				235	// 4. 32-bit orr immediate.
				236	// 5. 64-bit orr immediate.
				237	// Move-keep may then be used to modify each of the 16-bit nybbles.
				238	//
				239	// The code below supports all five initial value generators, and
				240	// applying move-keep operations to move-zero initial values only.
				241
				242	unsigned reg_size = rd.size();
				243	unsigned n, imm_s, imm_r;
				244	if (IsImmMovz(imm, reg_size) && !rd.IsSP()) {
				245	// Immediate can be represented in a move zero instruction.
				246	movz(rd, imm);
				247	} else if (IsImmMovn(imm, reg_size) && !rd.IsSP()) {
				248	// Immediate can be represented in a move negative instruction. Movn can't
				249	// write to the stack pointer.
				250	movn(rd, rd.Is64Bits() ? ~imm : (~imm & kWRegMask));
				251	} else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
				252	// Immediate can be represented in a logical orr instruction.
				253	ASSERT(!rd.IsZero());
				254	LogicalImmediate(rd, AppropriateZeroRegFor(rd), n, imm_s, imm_r, ORR);
				255	} else {
				256	// Generic immediate case. Imm will be represented by
				257	// [imm3, imm2, imm1, imm0], where each imm is 16 bits.
				258	// A move-zero is generated for the first non-zero immX, and a move-keep
				259	// for subsequent non-zero immX.
				260
				261	// Use a temporary register when moving to the stack pointer.
				262	Register temp = rd.IsSP() ? AppropriateTempFor(rd) : rd;
				263
				264	ASSERT((reg_size % 16) == 0);
				265	bool first_mov_done = false;
				266	for (unsigned i = 0; i < (temp.size() / 16); i++) {
				267	uint64_t imm16 = (imm >> (16 * i)) & 0xffffL;
				268	if (imm16 != 0) {
				269	if (!first_mov_done) {
				270	// Move the first non-zero 16-bit chunk into the destination register.
				271	movz(temp, imm16, 16 * i);
				272	first_mov_done = true;
				273	} else {
				274	// Construct a wider constant.
				275	movk(temp, imm16, 16 * i);
				276	}
				277	}
				278	}
				279
				280	if (rd.IsSP()) {
				281	mov(rd, temp);
				282	}
				283
				284	ASSERT(first_mov_done);
				285	}
				286	}
				287
				288
				289	// The movz instruction can generate immediates containing an arbitrary 16-bit
				290	// value, with remaining bits set, eg. 0x00001234, 0x0000123400000000.
				291	bool MacroAssembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
				292	if (reg_size == kXRegSize) {
				293	if (((imm & 0xffffffffffff0000UL) == 0UL) \|\|
				294	((imm & 0xffffffff0000ffffUL) == 0UL) \|\|
				295	((imm & 0xffff0000ffffffffUL) == 0UL) \|\|
				296	((imm & 0x0000ffffffffffffUL) == 0UL)) {
				297	return true;
				298	}
				299	} else {
				300	ASSERT(reg_size == kWRegSize);
				301	imm &= kWRegMask;
				302	if (((imm & 0xffff0000) == 0) \|\|
				303	((imm & 0x0000ffff) == 0)) {
				304	return true;
				305	}
				306	}
				307	return false;
				308	}
				309
				310
				311	// The movn instruction can generate immediates containing an arbitrary 16-bit
				312	// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff.
				313	bool MacroAssembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
				314	return IsImmMovz(~imm, reg_size);
				315	}
				316
				317
				318	void MacroAssembler::Ccmp(const Register& rn,
				319	const Operand& operand,
				320	StatusFlags nzcv,
				321	Condition cond) {
				322	ASSERT(allow_macro_instructions_);
				323	ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
				324	}
				325
				326
				327	void MacroAssembler::Ccmn(const Register& rn,
				328	const Operand& operand,
				329	StatusFlags nzcv,
				330	Condition cond) {
				331	ASSERT(allow_macro_instructions_);
				332	ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
				333	}
				334
				335
				336	void MacroAssembler::ConditionalCompareMacro(const Register& rn,
				337	const Operand& operand,
				338	StatusFlags nzcv,
				339	Condition cond,
				340	ConditionalCompareOp op) {
armvixl	578645f	2013-08-15 17:21:42 +0100	[diff] [blame^]	341	ASSERT((cond != al) && (cond != nv));
armvixl	ad96eda	2013-06-14 11:42:37 +0100	[diff] [blame]	342	if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) \|\|
				343	(operand.IsImmediate() && IsImmConditionalCompare(operand.immediate()))) {
				344	// The immediate can be encoded in the instruction, or the operand is an
				345	// unshifted register: call the assembler.
				346	ConditionalCompare(rn, operand, nzcv, cond, op);
				347	} else {
				348	// The operand isn't directly supported by the instruction: perform the
				349	// operation on a temporary register.
				350	Register temp(NoReg);
				351	if (operand.IsImmediate()) {
				352	temp = AppropriateTempFor(rn);
				353	Mov(temp, operand.immediate());
				354	} else if (operand.IsShiftedRegister()) {
				355	ASSERT(operand.shift() != ROR);
				356	ASSERT(is_uintn(rn.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
				357	operand.shift_amount()));
				358	temp = AppropriateTempFor(rn, operand.reg());
				359	EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
				360	} else {
				361	ASSERT(operand.IsExtendedRegister());
				362	ASSERT(operand.reg().size() <= rn.size());
				363	// Add/sub extended support a shift <= 4. We want to support exactly the
				364	// same modes.
				365	ASSERT(operand.shift_amount() <= 4);
				366	ASSERT(operand.reg().Is64Bits() \|\|
				367	((operand.extend() != UXTX) && (operand.extend() != SXTX)));
				368	temp = AppropriateTempFor(rn, operand.reg());
				369	EmitExtendShift(temp, operand.reg(), operand.extend(),
				370	operand.shift_amount());
				371	}
				372	ConditionalCompare(rn, Operand(temp), nzcv, cond, op);
				373	}
				374	}
				375
				376
				377	void MacroAssembler::Add(const Register& rd,
				378	const Register& rn,
				379	const Operand& operand,
				380	FlagsUpdate S) {
				381	ASSERT(allow_macro_instructions_);
				382	if (operand.IsImmediate() && (operand.immediate() < 0)) {
				383	AddSubMacro(rd, rn, -operand.immediate(), S, SUB);
				384	} else {
				385	AddSubMacro(rd, rn, operand, S, ADD);
				386	}
				387	}
				388
				389
				390	void MacroAssembler::Sub(const Register& rd,
				391	const Register& rn,
				392	const Operand& operand,
				393	FlagsUpdate S) {
				394	ASSERT(allow_macro_instructions_);
				395	if (operand.IsImmediate() && (operand.immediate() < 0)) {
				396	AddSubMacro(rd, rn, -operand.immediate(), S, ADD);
				397	} else {
				398	AddSubMacro(rd, rn, operand, S, SUB);
				399	}
				400	}
				401
				402
				403	void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
				404	ASSERT(allow_macro_instructions_);
				405	Add(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
				406	}
				407
				408
				409	void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
				410	ASSERT(allow_macro_instructions_);
				411	Sub(AppropriateZeroRegFor(rn), rn, operand, SetFlags);
				412	}
				413
				414
				415	void MacroAssembler::Neg(const Register& rd,
				416	const Operand& operand,
				417	FlagsUpdate S) {
				418	ASSERT(allow_macro_instructions_);
				419	if (operand.IsImmediate()) {
				420	Mov(rd, -operand.immediate());
				421	} else {
				422	Sub(rd, AppropriateZeroRegFor(rd), operand, S);
				423	}
				424	}
				425
				426
				427	void MacroAssembler::AddSubMacro(const Register& rd,
				428	const Register& rn,
				429	const Operand& operand,
				430	FlagsUpdate S,
				431	AddSubOp op) {
				432	if ((operand.IsImmediate() && !IsImmAddSub(operand.immediate())) \|\|
				433	(rn.IsZero() && !operand.IsShiftedRegister()) \|\|
				434	(operand.IsShiftedRegister() && (operand.shift() == ROR))) {
				435	Register temp = AppropriateTempFor(rn);
				436	Mov(temp, operand);
				437	AddSub(rd, rn, temp, S, op);
				438	} else {
				439	AddSub(rd, rn, operand, S, op);
				440	}
				441	}
				442
				443
				444	void MacroAssembler::Adc(const Register& rd,
				445	const Register& rn,
				446	const Operand& operand,
				447	FlagsUpdate S) {
				448	ASSERT(allow_macro_instructions_);
				449	AddSubWithCarryMacro(rd, rn, operand, S, ADC);
				450	}
				451
				452
				453	void MacroAssembler::Sbc(const Register& rd,
				454	const Register& rn,
				455	const Operand& operand,
				456	FlagsUpdate S) {
				457	ASSERT(allow_macro_instructions_);
				458	AddSubWithCarryMacro(rd, rn, operand, S, SBC);
				459	}
				460
				461
				462	void MacroAssembler::Ngc(const Register& rd,
				463	const Operand& operand,
				464	FlagsUpdate S) {
				465	ASSERT(allow_macro_instructions_);
				466	Register zr = AppropriateZeroRegFor(rd);
				467	Sbc(rd, zr, operand, S);
				468	}
				469
				470
				471	void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
				472	const Register& rn,
				473	const Operand& operand,
				474	FlagsUpdate S,
				475	AddSubWithCarryOp op) {
				476	ASSERT(rd.size() == rn.size());
				477
				478	if (operand.IsImmediate() \|\|
				479	(operand.IsShiftedRegister() && (operand.shift() == ROR))) {
				480	// Add/sub with carry (immediate or ROR shifted register.)
				481	Register temp = AppropriateTempFor(rn);
				482	Mov(temp, operand);
				483	AddSubWithCarry(rd, rn, Operand(temp), S, op);
				484	} else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
				485	// Add/sub with carry (shifted register).
				486	ASSERT(operand.reg().size() == rd.size());
				487	ASSERT(operand.shift() != ROR);
				488	ASSERT(is_uintn(rd.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
				489	operand.shift_amount()));
				490	Register temp = AppropriateTempFor(rn, operand.reg());
				491	EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
				492	AddSubWithCarry(rd, rn, Operand(temp), S, op);
				493	} else if (operand.IsExtendedRegister()) {
				494	// Add/sub with carry (extended register).
				495	ASSERT(operand.reg().size() <= rd.size());
				496	// Add/sub extended supports a shift <= 4. We want to support exactly the
				497	// same modes.
				498	ASSERT(operand.shift_amount() <= 4);
				499	ASSERT(operand.reg().Is64Bits() \|\|
				500	((operand.extend() != UXTX) && (operand.extend() != SXTX)));
				501	Register temp = AppropriateTempFor(rn, operand.reg());
				502	EmitExtendShift(temp, operand.reg(), operand.extend(),
				503	operand.shift_amount());
				504	AddSubWithCarry(rd, rn, Operand(temp), S, op);
				505	} else {
				506	// The addressing mode is directly supported by the instruction.
				507	AddSubWithCarry(rd, rn, operand, S, op);
				508	}
				509	}
				510
				511
				512	#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP) \
				513	void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
				514	LoadStoreMacro(REG, addr, OP); \
				515	}
				516	LS_MACRO_LIST(DEFINE_FUNCTION)
				517	#undef DEFINE_FUNCTION
				518
				519	void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
				520	const MemOperand& addr,
				521	LoadStoreOp op) {
				522	int64_t offset = addr.offset();
				523	LSDataSize size = CalcLSDataSize(op);
				524
				525	// Check if an immediate offset fits in the immediate field of the
				526	// appropriate instruction. If not, emit two instructions to perform
				527	// the operation.
				528	if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, size) &&
				529	!IsImmLSUnscaled(offset)) {
				530	// Immediate offset that can't be encoded using unsigned or unscaled
				531	// addressing modes.
				532	Register temp = AppropriateTempFor(addr.base());
				533	Mov(temp, addr.offset());
				534	LoadStore(rt, MemOperand(addr.base(), temp), op);
				535	} else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
				536	// Post-index beyond unscaled addressing range.
				537	LoadStore(rt, MemOperand(addr.base()), op);
				538	Add(addr.base(), addr.base(), Operand(offset));
				539	} else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
				540	// Pre-index beyond unscaled addressing range.
				541	Add(addr.base(), addr.base(), Operand(offset));
				542	LoadStore(rt, MemOperand(addr.base()), op);
				543	} else {
				544	// Encodable in one load/store instruction.
				545	LoadStore(rt, addr, op);
				546	}
				547	}
				548
				549
				550	void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1,
				551	const CPURegister& src2, const CPURegister& src3) {
				552	ASSERT(allow_macro_instructions_);
				553	ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
				554	ASSERT(src0.IsValid());
				555
				556	int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
				557	int size = src0.SizeInBytes();
				558
				559	PrepareForPush(count, size);
				560	PushHelper(count, size, src0, src1, src2, src3);
				561	}
				562
				563
				564	void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
				565	const CPURegister& dst2, const CPURegister& dst3) {
				566	// It is not valid to pop into the same register more than once in one
				567	// instruction, not even into the zero register.
				568	ASSERT(allow_macro_instructions_);
				569	ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
				570	ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
				571	ASSERT(dst0.IsValid());
				572
				573	int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
				574	int size = dst0.SizeInBytes();
				575
				576	PrepareForPop(count, size);
				577	PopHelper(count, size, dst0, dst1, dst2, dst3);
				578	}
				579
				580
				581	void MacroAssembler::PushCPURegList(CPURegList registers) {
				582	int size = registers.RegisterSizeInBytes();
				583
				584	PrepareForPush(registers.Count(), size);
				585	// Push up to four registers at a time because if the current stack pointer is
				586	// sp and reg_size is 32, registers must be pushed in blocks of four in order
				587	// to maintain the 16-byte alignment for sp.
				588	ASSERT(allow_macro_instructions_);
				589	while (!registers.IsEmpty()) {
				590	int count_before = registers.Count();
				591	const CPURegister& src0 = registers.PopHighestIndex();
				592	const CPURegister& src1 = registers.PopHighestIndex();
				593	const CPURegister& src2 = registers.PopHighestIndex();
				594	const CPURegister& src3 = registers.PopHighestIndex();
				595	int count = count_before - registers.Count();
				596	PushHelper(count, size, src0, src1, src2, src3);
				597	}
				598	}
				599
				600
				601	void MacroAssembler::PopCPURegList(CPURegList registers) {
				602	int size = registers.RegisterSizeInBytes();
				603
				604	PrepareForPop(registers.Count(), size);
				605	// Pop up to four registers at a time because if the current stack pointer is
				606	// sp and reg_size is 32, registers must be pushed in blocks of four in order
				607	// to maintain the 16-byte alignment for sp.
				608	ASSERT(allow_macro_instructions_);
				609	while (!registers.IsEmpty()) {
				610	int count_before = registers.Count();
				611	const CPURegister& dst0 = registers.PopLowestIndex();
				612	const CPURegister& dst1 = registers.PopLowestIndex();
				613	const CPURegister& dst2 = registers.PopLowestIndex();
				614	const CPURegister& dst3 = registers.PopLowestIndex();
				615	int count = count_before - registers.Count();
				616	PopHelper(count, size, dst0, dst1, dst2, dst3);
				617	}
				618	}
				619
				620
				621	void MacroAssembler::PushMultipleTimes(int count, Register src) {
				622	ASSERT(allow_macro_instructions_);
				623	int size = src.SizeInBytes();
				624
				625	PrepareForPush(count, size);
				626	// Push up to four registers at a time if possible because if the current
				627	// stack pointer is sp and the register size is 32, registers must be pushed
				628	// in blocks of four in order to maintain the 16-byte alignment for sp.
				629	while (count >= 4) {
				630	PushHelper(4, size, src, src, src, src);
				631	count -= 4;
				632	}
				633	if (count >= 2) {
				634	PushHelper(2, size, src, src, NoReg, NoReg);
				635	count -= 2;
				636	}
				637	if (count == 1) {
				638	PushHelper(1, size, src, NoReg, NoReg, NoReg);
				639	count -= 1;
				640	}
				641	ASSERT(count == 0);
				642	}
				643
				644
				645	void MacroAssembler::PushHelper(int count, int size,
				646	const CPURegister& src0,
				647	const CPURegister& src1,
				648	const CPURegister& src2,
				649	const CPURegister& src3) {
				650	// Ensure that we don't unintentionally modify scratch or debug registers.
				651	InstructionAccurateScope scope(this);
				652
				653	ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
				654	ASSERT(size == src0.SizeInBytes());
				655
				656	// When pushing multiple registers, the store order is chosen such that
				657	// Push(a, b) is equivalent to Push(a) followed by Push(b).
				658	switch (count) {
				659	case 1:
				660	ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
				661	str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
				662	break;
				663	case 2:
				664	ASSERT(src2.IsNone() && src3.IsNone());
				665	stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
				666	break;
				667	case 3:
				668	ASSERT(src3.IsNone());
				669	stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
				670	str(src0, MemOperand(StackPointer(), 2 * size));
				671	break;
				672	case 4:
				673	// Skip over 4 * size, then fill in the gap. This allows four W registers
				674	// to be pushed using sp, whilst maintaining 16-byte alignment for sp at
				675	// all times.
				676	stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
				677	stp(src1, src0, MemOperand(StackPointer(), 2 * size));
				678	break;
				679	default:
				680	UNREACHABLE();
				681	}
				682	}
				683
				684
				685	void MacroAssembler::PopHelper(int count, int size,
				686	const CPURegister& dst0,
				687	const CPURegister& dst1,
				688	const CPURegister& dst2,
				689	const CPURegister& dst3) {
				690	// Ensure that we don't unintentionally modify scratch or debug registers.
				691	InstructionAccurateScope scope(this);
				692
				693	ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
				694	ASSERT(size == dst0.SizeInBytes());
				695
				696	// When popping multiple registers, the load order is chosen such that
				697	// Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
				698	switch (count) {
				699	case 1:
				700	ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
				701	ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
				702	break;
				703	case 2:
				704	ASSERT(dst2.IsNone() && dst3.IsNone());
				705	ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
				706	break;
				707	case 3:
				708	ASSERT(dst3.IsNone());
				709	ldr(dst2, MemOperand(StackPointer(), 2 * size));
				710	ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
				711	break;
				712	case 4:
				713	// Load the higher addresses first, then load the lower addresses and skip
				714	// the whole block in the second instruction. This allows four W registers
				715	// to be popped using sp, whilst maintaining 16-byte alignment for sp at
				716	// all times.
				717	ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
				718	ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
				719	break;
				720	default:
				721	UNREACHABLE();
				722	}
				723	}
				724
				725
				726	void MacroAssembler::PrepareForPush(int count, int size) {
				727	if (sp.Is(StackPointer())) {
				728	// If the current stack pointer is sp, then it must be aligned to 16 bytes
				729	// on entry and the total size of the specified registers must also be a
				730	// multiple of 16 bytes.
				731	ASSERT((count * size) % 16 == 0);
				732	} else {
				733	// Even if the current stack pointer is not the system stack pointer (sp),
				734	// the system stack pointer will still be modified in order to comply with
				735	// ABI rules about accessing memory below the system stack pointer.
				736	BumpSystemStackPointer(count * size);
				737	}
				738	}
				739
				740
				741	void MacroAssembler::PrepareForPop(int count, int size) {
				742	USE(count);
				743	USE(size);
				744	if (sp.Is(StackPointer())) {
				745	// If the current stack pointer is sp, then it must be aligned to 16 bytes
				746	// on entry and the total size of the specified registers must also be a
				747	// multiple of 16 bytes.
				748	ASSERT((count * size) % 16 == 0);
				749	}
				750	}
				751
				752	void MacroAssembler::Poke(const Register& src, const Operand& offset) {
				753	ASSERT(allow_macro_instructions_);
				754	if (offset.IsImmediate()) {
				755	ASSERT(offset.immediate() >= 0);
				756	}
				757
				758	Str(src, MemOperand(StackPointer(), offset));
				759	}
				760
				761
				762	void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
				763	ASSERT(allow_macro_instructions_);
				764	if (offset.IsImmediate()) {
				765	ASSERT(offset.immediate() >= 0);
				766	}
				767
				768	Ldr(dst, MemOperand(StackPointer(), offset));
				769	}
				770
				771
				772	void MacroAssembler::Claim(const Operand& size) {
				773	ASSERT(allow_macro_instructions_);
				774	if (size.IsImmediate()) {
				775	ASSERT(size.immediate() >= 0);
				776	if (sp.Is(StackPointer())) {
				777	ASSERT((size.immediate() % 16) == 0);
				778	}
				779	}
				780
				781	if (!sp.Is(StackPointer())) {
				782	BumpSystemStackPointer(size);
				783	}
				784
				785	Sub(StackPointer(), StackPointer(), size);
				786	}
				787
				788
				789	void MacroAssembler::Drop(const Operand& size) {
				790	ASSERT(allow_macro_instructions_);
				791	if (size.IsImmediate()) {
				792	ASSERT(size.immediate() >= 0);
				793	if (sp.Is(StackPointer())) {
				794	ASSERT((size.immediate() % 16) == 0);
				795	}
				796	}
				797
				798	Add(StackPointer(), StackPointer(), size);
				799	}
				800
				801
				802	void MacroAssembler::PushCalleeSavedRegisters() {
				803	// Ensure that the macro-assembler doesn't use any scratch registers.
				804	InstructionAccurateScope scope(this);
				805
				806	// This method must not be called unless the current stack pointer is sp.
				807	ASSERT(sp.Is(StackPointer()));
				808
				809	MemOperand tos(sp, -2 * kXRegSizeInBytes, PreIndex);
				810
				811	stp(d14, d15, tos);
				812	stp(d12, d13, tos);
				813	stp(d10, d11, tos);
				814	stp(d8, d9, tos);
				815
				816	stp(x29, x30, tos);
				817	stp(x27, x28, tos);
				818	stp(x25, x26, tos);
				819	stp(x23, x24, tos);
				820	stp(x21, x22, tos);
				821	stp(x19, x20, tos);
				822	}
				823
				824
				825	void MacroAssembler::PopCalleeSavedRegisters() {
				826	// Ensure that the macro-assembler doesn't use any scratch registers.
				827	InstructionAccurateScope scope(this);
				828
				829	// This method must not be called unless the current stack pointer is sp.
				830	ASSERT(sp.Is(StackPointer()));
				831
				832	MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
				833
				834	ldp(x19, x20, tos);
				835	ldp(x21, x22, tos);
				836	ldp(x23, x24, tos);
				837	ldp(x25, x26, tos);
				838	ldp(x27, x28, tos);
				839	ldp(x29, x30, tos);
				840
				841	ldp(d8, d9, tos);
				842	ldp(d10, d11, tos);
				843	ldp(d12, d13, tos);
				844	ldp(d14, d15, tos);
				845	}
				846
				847	void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
				848	ASSERT(!sp.Is(StackPointer()));
				849	// TODO: Several callers rely on this not using scratch registers, so we use
				850	// the assembler directly here. However, this means that large immediate
				851	// values of 'space' cannot be handled.
				852	InstructionAccurateScope scope(this);
				853	sub(sp, StackPointer(), space);
				854	}
				855
				856
				857	// This is the main Printf implementation. All callee-saved registers are
				858	// preserved, but NZCV and the caller-saved registers may be clobbered.
				859	void MacroAssembler::PrintfNoPreserve(const char * format,
				860	const CPURegister& arg0,
				861	const CPURegister& arg1,
				862	const CPURegister& arg2,
				863	const CPURegister& arg3) {
				864	// We cannot handle a caller-saved stack pointer. It doesn't make much sense
				865	// in most cases anyway, so this restriction shouldn't be too serious.
				866	ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));
				867
				868	// We cannot print Tmp0() or Tmp1() as they're used internally by the macro
				869	// assembler. We cannot print the stack pointer because it is typically used
				870	// to preserve caller-saved registers (using other Printf variants which
				871	// depend on this helper).
				872	ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg0));
				873	ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg1));
				874	ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg2));
				875	ASSERT(!AreAliased(Tmp0(), Tmp1(), StackPointer(), arg3));
				876
				877	static const int kMaxArgCount = 4;
				878	// Assume that we have the maximum number of arguments until we know
				879	// otherwise.
				880	int arg_count = kMaxArgCount;
				881
				882	// The provided arguments.
				883	CPURegister args[kMaxArgCount] = {arg0, arg1, arg2, arg3};
				884
				885	// The PCS registers where the arguments need to end up.
				886	CPURegister pcs[kMaxArgCount];
				887
				888	// Promote FP arguments to doubles, and integer arguments to X registers.
				889	// Note that FP and integer arguments cannot be mixed, but we'll check
				890	// AreSameSizeAndType once we've processed these promotions.
				891	for (int i = 0; i < kMaxArgCount; i++) {
				892	if (args[i].IsRegister()) {
				893	// Note that we use x1 onwards, because x0 will hold the format string.
				894	pcs[i] = Register::XRegFromCode(i + 1);
				895	// For simplicity, we handle all integer arguments as X registers. An X
				896	// register argument takes the same space as a W register argument in the
				897	// PCS anyway. The only limitation is that we must explicitly clear the
				898	// top word for W register arguments as the callee will expect it to be
				899	// clear.
				900	if (!args[i].Is64Bits()) {
				901	const Register& as_x = args[i].X();
				902	And(as_x, as_x, 0x00000000ffffffff);
				903	args[i] = as_x;
				904	}
				905	} else if (args[i].IsFPRegister()) {
				906	pcs[i] = FPRegister::DRegFromCode(i);
				907	// C and C++ varargs functions (such as printf) implicitly promote float
				908	// arguments to doubles.
				909	if (!args[i].Is64Bits()) {
				910	FPRegister s(args[i]);
				911	const FPRegister& as_d = args[i].D();
				912	Fcvt(as_d, s);
				913	args[i] = as_d;
				914	}
				915	} else {
				916	// This is the first empty (NoCPUReg) argument, so use it to set the
				917	// argument count and bail out.
				918	arg_count = i;
				919	break;
				920	}
				921	}
				922	ASSERT((arg_count >= 0) && (arg_count <= kMaxArgCount));
				923	// Check that every remaining argument is NoCPUReg.
				924	for (int i = arg_count; i < kMaxArgCount; i++) {
				925	ASSERT(args[i].IsNone());
				926	}
				927	ASSERT((arg_count == 0) \|\| AreSameSizeAndType(args[0], args[1],
				928	args[2], args[3],
				929	pcs[0], pcs[1],
				930	pcs[2], pcs[3]));
				931
				932	// Move the arguments into the appropriate PCS registers.
				933	//
				934	// Arranging an arbitrary list of registers into x1-x4 (or d0-d3) is
				935	// surprisingly complicated.
				936	//
				937	// * For even numbers of registers, we push the arguments and then pop them
				938	// into their final registers. This maintains 16-byte stack alignment in
				939	// case sp is the stack pointer, since we're only handling X or D registers
				940	// at this point.
				941	//
				942	// * For odd numbers of registers, we push and pop all but one register in
				943	// the same way, but the left-over register is moved directly, since we
				944	// can always safely move one register without clobbering any source.
				945	if (arg_count >= 4) {
				946	Push(args[3], args[2], args[1], args[0]);
				947	} else if (arg_count >= 2) {
				948	Push(args[1], args[0]);
				949	}
				950
				951	if ((arg_count % 2) != 0) {
				952	// Move the left-over register directly.
				953	const CPURegister& leftover_arg = args[arg_count - 1];
				954	const CPURegister& leftover_pcs = pcs[arg_count - 1];
				955	if (leftover_arg.IsRegister()) {
				956	Mov(Register(leftover_pcs), Register(leftover_arg));
				957	} else {
				958	Fmov(FPRegister(leftover_pcs), FPRegister(leftover_arg));
				959	}
				960	}
				961
				962	if (arg_count >= 4) {
				963	Pop(pcs[0], pcs[1], pcs[2], pcs[3]);
				964	} else if (arg_count >= 2) {
				965	Pop(pcs[0], pcs[1]);
				966	}
				967
				968	// Load the format string into x0, as per the procedure-call standard.
				969	//
				970	// To make the code as portable as possible, the format string is encoded
				971	// directly in the instruction stream. It might be cleaner to encode it in a
				972	// literal pool, but since Printf is usually used for debugging, it is
				973	// beneficial for it to be minimally dependent on other features.
				974	Label format_address;
				975	Adr(x0, &format_address);
				976
				977	// Emit the format string directly in the instruction stream.
				978	{ BlockLiteralPoolScope scope(this);
				979	Label after_data;
				980	B(&after_data);
				981	Bind(&format_address);
				982	EmitStringData(format);
				983	Unreachable();
				984	Bind(&after_data);
				985	}
				986
				987	// We don't pass any arguments on the stack, but we still need to align the C
				988	// stack pointer to a 16-byte boundary for PCS compliance.
				989	if (!sp.Is(StackPointer())) {
				990	Bic(sp, StackPointer(), 0xf);
				991	}
				992
				993	// Actually call printf. This part needs special handling for the simulator,
				994	// since the system printf function will use a different instruction set and
				995	// the procedure-call standard will not be compatible.
				996	#ifdef USE_SIMULATOR
				997	{ InstructionAccurateScope scope(this, kPrintfLength / kInstructionSize);
				998	hlt(kPrintfOpcode);
				999	dc32(pcs[0].type());
				1000	}
				1001	#else
				1002	Mov(Tmp0(), reinterpret_cast<uintptr_t>(printf));
				1003	Blr(Tmp0());
				1004	#endif
				1005	}
				1006
				1007
				1008	void MacroAssembler::Printf(const char * format,
				1009	const CPURegister& arg0,
				1010	const CPURegister& arg1,
				1011	const CPURegister& arg2,
				1012	const CPURegister& arg3) {
				1013	// Preserve all caller-saved registers as well as NZCV.
				1014	// If sp is the stack pointer, PushCPURegList asserts that the size of each
				1015	// list is a multiple of 16 bytes.
				1016	PushCPURegList(kCallerSaved);
				1017	PushCPURegList(kCallerSavedFP);
				1018	// Use Tmp0() as a scratch register. It is not accepted by Printf so it will
				1019	// never overlap an argument register.
				1020	Mrs(Tmp0(), NZCV);
				1021	Push(Tmp0(), xzr);
				1022
				1023	PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
				1024
				1025	Pop(xzr, Tmp0());
				1026	Msr(NZCV, Tmp0());
				1027	PopCPURegList(kCallerSavedFP);
				1028	PopCPURegList(kCallerSaved);
				1029	}
				1030
				1031	void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
				1032	ASSERT(allow_macro_instructions_);
				1033
				1034	#ifdef USE_SIMULATOR
				1035	// The arguments to the trace pseudo instruction need to be contiguous in
				1036	// memory, so make sure we don't try to emit a literal pool.
				1037	InstructionAccurateScope scope(this, kTraceLength / kInstructionSize);
				1038
				1039	Label start;
				1040	bind(&start);
				1041
				1042	// Refer to instructions-a64.h for a description of the marker and its
				1043	// arguments.
				1044	hlt(kTraceOpcode);
				1045
				1046	ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
				1047	dc32(parameters);
				1048
				1049	ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
				1050	dc32(command);
				1051	#else
				1052	// Emit nothing on real hardware.
				1053	USE(parameters);
				1054	USE(command);
				1055	#endif
				1056	}
				1057
				1058
				1059	void MacroAssembler::Log(TraceParameters parameters) {
				1060	ASSERT(allow_macro_instructions_);
				1061
				1062	#ifdef USE_SIMULATOR
				1063	// The arguments to the log pseudo instruction need to be contiguous in
				1064	// memory, so make sure we don't try to emit a literal pool.
				1065	InstructionAccurateScope scope(this, kLogLength / kInstructionSize);
				1066
				1067	Label start;
				1068	bind(&start);
				1069
				1070	// Refer to instructions-a64.h for a description of the marker and its
				1071	// arguments.
				1072	hlt(kLogOpcode);
				1073
				1074	ASSERT(SizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
				1075	dc32(parameters);
				1076	#else
				1077	// Emit nothing on real hardware.
				1078	USE(parameters);
				1079	#endif
				1080	}
				1081
armvixl	578645f	2013-08-15 17:21:42 +0100	[diff] [blame^]	1082
				1083	void MacroAssembler::EnableInstrumentation() {
				1084	ASSERT(!isprint(InstrumentStateEnable));
				1085	InstructionAccurateScope scope(this, 1);
				1086	movn(xzr, InstrumentStateEnable);
				1087	}
				1088
				1089
				1090	void MacroAssembler::DisableInstrumentation() {
				1091	ASSERT(!isprint(InstrumentStateDisable));
				1092	InstructionAccurateScope scope(this, 1);
				1093	movn(xzr, InstrumentStateDisable);
				1094	}
				1095
				1096
				1097	void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
				1098	ASSERT(strlen(marker_name) == 2);
				1099
				1100	// We allow only printable characters in the marker names. Unprintable
				1101	// characters are reserved for controlling features of the instrumentation.
				1102	ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));
				1103
				1104	InstructionAccurateScope scope(this, 1);
				1105	movn(xzr, (marker_name[1] << 8) \| marker_name[0]);
				1106	}
				1107
armvixl	ad96eda	2013-06-14 11:42:37 +0100	[diff] [blame]	1108	} // namespace vixl