VIXL Release 1.9
Refer to the README.md and LICENCE files for details.
diff --git a/README.md b/README.md
index a2f6e4f..6a57632 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,24 @@
-VIXL: AArch64 Runtime Code Generation Library Version 1.8
+VIXL: AArch64 Runtime Code Generation Library Version 1.9
=========================================================
Contents:
- * Requirements
* Overview
+ * Requirements
* Known limitations
* Usage
-Requirements
-============
-
-To build VIXL the following software is required:
-
- 1. Python 2.7
- 2. SCons 2.0
- 3. GCC 4.6+
-
-A 64-bit host machine is required, implementing an LP64 data model. VIXL has
-only been tested using GCC on AArch64 Debian and amd64 Ubuntu systems.
-
-To run the linter stage of the tests, the following software is also required:
-
- 1. Git
- 2. [Google's `cpplint.py`][cpplint]
-
-Refer to the 'Usage' section for details.
-
-
Overview
========
-VIXL is made of three components.
+VIXL contains three components.
- 1. A programmatic assembler to generate A64 code at runtime. The assembler
+ 1. A programmatic **assembler** to generate A64 code at runtime. The assembler
abstracts some of the constraints of the A64 ISA; for example, most
instructions support any immediate.
- 2. A disassembler which can print any instruction emitted by the assembler.
- 3. A simulator which can simulate any instruction emitted by the assembler.
+ 2. A **disassembler** that can print any instruction emitted by the assembler.
+ 3. A **simulator** that can simulate any instruction emitted by the assembler.
The simulator allows generated code to be run on another architecture
without the need for a full ISA model.
@@ -48,11 +28,32 @@
[Changelog](doc/changelog.md).
+Requirements
+============
+
+To build VIXL the following software is required:
+
+ 1. Python 2.7
+ 2. SCons 2.0
+ 3. GCC 4.8+ or Clang 3.4+
+
+A 64-bit host machine is required, implementing an LP64 data model. VIXL has
+been tested using GCC on AArch64 Debian, GCC and Clang on amd64 Ubuntu
+systems.
+
+To run the linter stage of the tests, the following software is also required:
+
+ 1. Git
+ 2. [Google's `cpplint.py`][cpplint]
+
+Refer to the 'Usage' section for details.
+
+
Known Limitations
=================
-VIXL was developed to target JavaScript engines so a number of features from A64
-were deemed unnecessary:
+VIXL was developed for JavaScript engines so a number of features from A64 were
+deemed unnecessary:
* Limited rounding mode support for floating point.
* Limited support for synchronisation instructions.
diff --git a/SConstruct b/SConstruct
index 47e3e86..e29e9a7 100644
--- a/SConstruct
+++ b/SConstruct
@@ -49,18 +49,19 @@
# Global configuration.
PROJ_SRC_DIR = 'src'
PROJ_SRC_FILES = '''
-src/a64/assembler-a64.cc
-src/a64/cpu-a64.cc
-src/a64/debugger-a64.cc
-src/a64/decoder-a64.cc
-src/a64/disasm-a64.cc
-src/a64/instructions-a64.cc
-src/a64/instrument-a64.cc
-src/a64/logic-a64.cc
-src/a64/macro-assembler-a64.cc
-src/a64/simulator-a64.cc
-src/code-buffer.cc
-src/utils.cc
+src/vixl/a64/assembler-a64.cc
+src/vixl/a64/cpu-a64.cc
+src/vixl/a64/debugger-a64.cc
+src/vixl/a64/decoder-a64.cc
+src/vixl/a64/disasm-a64.cc
+src/vixl/a64/instructions-a64.cc
+src/vixl/a64/instrument-a64.cc
+src/vixl/a64/logic-a64.cc
+src/vixl/a64/macro-assembler-a64.cc
+src/vixl/a64/simulator-a64.cc
+src/vixl/code-buffer.cc
+src/vixl/compiler-intrinsics.cc
+src/vixl/utils.cc
'''.split()
PROJ_EXAMPLES_DIR = 'examples'
PROJ_EXAMPLES_SRC_FILES = '''
@@ -119,9 +120,7 @@
benchmarks/bench-branch-link-masm.cc
'''.split()
}
-RELEASE_OBJ_DIR = 'obj/release'
-DEBUG_OBJ_DIR = 'obj/debug'
-
+OBJ_DIR = 'obj'
# Helper functions.
def abort(message):
@@ -133,6 +132,10 @@
return map(lambda x: os.path.join(obj_dir, x), src_files)
+def is_compiler(compiler):
+ return env['CXX'].find(compiler) == 0
+
+
def create_variant(obj_dir, targets_dir):
VariantDir(os.path.join(obj_dir, PROJ_SRC_DIR), PROJ_SRC_DIR)
for directory in targets_dir.itervalues():
@@ -146,10 +149,9 @@
sim_default = 'off' if platform.machine() == 'aarch64' else 'on'
args.Add(EnumVariable('simulator', 'build for the simulator', sim_default,
allowed_values = ['on', 'off']))
+args.Add('std', 'c++ standard')
# Configure the environment.
-create_variant(RELEASE_OBJ_DIR, TARGET_SRC_DIR)
-create_variant(DEBUG_OBJ_DIR, TARGET_SRC_DIR)
env = Environment(variables=args)
# Commandline help.
@@ -175,18 +177,32 @@
env.Append(LINKFLAGS = os.environ.get('LINKFLAGS').split())
# Always look in 'src' for include files.
+# TODO: Restore the '-Wunreachable-code' flag. This flag breaks builds for clang
+# 3.4 with std=c++98. So we need to re-enable this conditionally when clang is at
+# version 3.5 or later.
env.Append(CPPPATH = [PROJ_SRC_DIR])
env.Append(CPPFLAGS = ['-Wall',
'-Werror',
'-fdiagnostics-show-option',
'-Wextra',
+ '-Wredundant-decls',
'-pedantic',
# Explicitly enable the write-strings warning. VIXL uses
# const correctly when handling string constants.
'-Wwrite-strings'])
build_suffix = ''
+std_path = 'default-std'
+if 'std' in env:
+ env.Append(CPPFLAGS = ['-std=' + env['std']])
+ std_path = env['std']
+
+if is_compiler('clang++'):
+ # This warning only works for Clang, when compiling the code base as C++11
+ # or newer. The compiler does not complain if the option is passed when
+ # compiling earlier C++ standards.
+ env.Append(CPPFLAGS = ['-Wimplicit-fallthrough'])
if env['simulator'] == 'on':
env.Append(CPPFLAGS = ['-DUSE_SIMULATOR'])
@@ -196,11 +212,9 @@
env.Append(CPPFLAGS = ['-g', '-DVIXL_DEBUG'])
# Append the debug mode suffix to the executable name.
build_suffix += '_g'
- build_dir = DEBUG_OBJ_DIR
else:
# Release mode.
env.Append(CPPFLAGS = ['-O3'])
- build_dir = RELEASE_OBJ_DIR
process = subprocess.Popen(env['CXX'] + ' --version | grep "gnu.*4\.8"',
shell = True,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@@ -214,6 +228,9 @@
# GCC 4.8.
env.Append(CPPFLAGS = ['-Wno-maybe-uninitialized'])
+# Configure build directory
+build_dir = os.path.join(OBJ_DIR, env['mode'], env['CXX'], std_path, '')
+create_variant(build_dir, TARGET_SRC_DIR)
# The lists of available targets and target names.
targets = []
@@ -226,7 +243,7 @@
# The vixl library.
-libvixl = env.Library('vixl' + build_suffix,
+libvixl = env.Library(build_dir + 'vixl' + build_suffix,
list_target(build_dir, PROJ_SRC_FILES))
create_alias('libvixl', libvixl)
@@ -238,7 +255,7 @@
VariantDir(test_ex_vdir, '.')
test_ex_obj = env.Object(list_target(test_ex_vdir, PROJ_EXAMPLES_SRC_FILES),
CPPFLAGS = env['CPPFLAGS'] + ['-DTEST_EXAMPLES'])
-test = env.Program('test-runner' + build_suffix,
+test = env.Program(build_dir + 'test-runner' + build_suffix,
list_target(build_dir, TARGET_SRC_FILES['test']) +
test_ex_obj + libvixl,
CPPPATH = env['CPPPATH'] + [PROJ_EXAMPLES_DIR])
@@ -248,7 +265,7 @@
benchmarks = ['bench-dataop', 'bench-branch', 'bench-branch-link',
'bench-branch-masm', 'bench-branch-link-masm']
for bench in benchmarks:
- prog = env.Program(bench + build_suffix,
+ prog = env.Program(build_dir + bench + build_suffix,
list_target(build_dir, TARGET_SRC_FILES[bench]) + libvixl)
create_alias(bench, prog)
# Alias to build all benchmarks.
@@ -258,7 +275,7 @@
examples = []
for example in PROJ_EXAMPLES_SRC_FILES:
example_name = "example-" + os.path.splitext(os.path.basename(example))[0]
- prog = env.Program(example_name,
+ prog = env.Program(build_dir + example_name,
[os.path.join(build_dir, example)] + libvixl,
CPPPATH = env['CPPPATH'] + [PROJ_EXAMPLES_DIR])
create_alias(example_name, prog)
diff --git a/benchmarks/bench-branch-link-masm.cc b/benchmarks/bench-branch-link-masm.cc
index 2f6c65e..115f402 100644
--- a/benchmarks/bench-branch-link-masm.cc
+++ b/benchmarks/bench-branch-link-masm.cc
@@ -24,9 +24,9 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/macro-assembler-a64.h"
-#include "a64/instructions-a64.h"
-#include "globals.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/instructions-a64.h"
+#include "vixl/globals.h"
using namespace vixl;
diff --git a/benchmarks/bench-branch-link.cc b/benchmarks/bench-branch-link.cc
index 6448566..b18cd8b 100644
--- a/benchmarks/bench-branch-link.cc
+++ b/benchmarks/bench-branch-link.cc
@@ -24,9 +24,9 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/macro-assembler-a64.h"
-#include "a64/instructions-a64.h"
-#include "globals.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/instructions-a64.h"
+#include "vixl/globals.h"
using namespace vixl;
diff --git a/benchmarks/bench-branch-masm.cc b/benchmarks/bench-branch-masm.cc
index 910403c..23cbd1e 100644
--- a/benchmarks/bench-branch-masm.cc
+++ b/benchmarks/bench-branch-masm.cc
@@ -24,10 +24,10 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "globals.h"
+#include "vixl/globals.h"
-#include "a64/macro-assembler-a64.h"
-#include "a64/instructions-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/instructions-a64.h"
using namespace vixl;
diff --git a/benchmarks/bench-branch.cc b/benchmarks/bench-branch.cc
index 227ba95..706ecfd 100644
--- a/benchmarks/bench-branch.cc
+++ b/benchmarks/bench-branch.cc
@@ -24,9 +24,9 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/macro-assembler-a64.h"
-#include "a64/instructions-a64.h"
-#include "globals.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/instructions-a64.h"
+#include "vixl/globals.h"
using namespace vixl;
diff --git a/benchmarks/bench-dataop.cc b/benchmarks/bench-dataop.cc
index 431e991..cf9faed 100644
--- a/benchmarks/bench-dataop.cc
+++ b/benchmarks/bench-dataop.cc
@@ -24,9 +24,9 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/macro-assembler-a64.h"
-#include "a64/instructions-a64.h"
-#include "globals.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/instructions-a64.h"
+#include "vixl/globals.h"
using namespace vixl;
diff --git a/doc/changelog.md b/doc/changelog.md
index 4881ef4..55943e5 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,6 +1,13 @@
VIXL Change Log
===============
+* 1.9
+ + Improved compatibility with Android build system.
+ + Improved compatibility with Clang toolchain.
+ + Added support for `umulh` instruction.
+ + Added support for `fcmpe` and `fccmpe` instructions.
+ + Other small bug fixes and improvements.
+
* 1.8
+ Complete NEON instruction set support.
+ Support long branches using veneers.
diff --git a/examples/custom-disassembler.h b/examples/custom-disassembler.h
index 382a55d..4fcc693 100644
--- a/examples/custom-disassembler.h
+++ b/examples/custom-disassembler.h
@@ -27,7 +27,7 @@
#ifndef VIXL_EXAMPLES_CUSTOM_DISASSEMBLER_H_
#define VIXL_EXAMPLES_CUSTOM_DISASSEMBLER_H_
-#include "a64/disasm-a64.h"
+#include "vixl/a64/disasm-a64.h"
using namespace vixl;
diff --git a/examples/examples.h b/examples/examples.h
index 5d74e29..8c51589 100644
--- a/examples/examples.h
+++ b/examples/examples.h
@@ -27,9 +27,9 @@
#ifndef VIXL_EXAMPLE_EXAMPLES_H_
# define VIXL_EXAMPLE_EXAMPLES_H_
-#include "a64/simulator-a64.h"
-#include "a64/debugger-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/simulator-a64.h"
+#include "vixl/a64/debugger-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
using namespace vixl;
diff --git a/examples/getting-started.cc b/examples/getting-started.cc
index 27e9dd7..0987429 100644
--- a/examples/getting-started.cc
+++ b/examples/getting-started.cc
@@ -24,8 +24,8 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/simulator-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/simulator-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
#define BUF_SIZE (4096)
#define __ masm->
diff --git a/examples/neon-matrix-multiply.cc b/examples/neon-matrix-multiply.cc
index 88123fb..6a27150 100644
--- a/examples/neon-matrix-multiply.cc
+++ b/examples/neon-matrix-multiply.cc
@@ -117,7 +117,7 @@
float mat1[kLength], mat2[kLength], output[kLength];
// Initialise the output matrix to the zero matrix.
- memset(output, 0, sizeof(float)*kLength);
+ memset(output, 0, sizeof(output[0]) * kLength);
// Fill the two input matrices with some 32 bit floating point values.
// Array initialisation using curly brackets is also possible like so:
diff --git a/src/a64/assembler-a64.cc b/src/vixl/a64/assembler-a64.cc
similarity index 98%
rename from src/a64/assembler-a64.cc
rename to src/vixl/a64/assembler-a64.cc
index 6af2291..9f85e8f 100644
--- a/src/a64/assembler-a64.cc
+++ b/src/vixl/a64/assembler-a64.cc
@@ -26,7 +26,7 @@
#include <cmath>
-#include "a64/assembler-a64.h"
+#include "vixl/a64/assembler-a64.h"
namespace vixl {
@@ -35,7 +35,7 @@
if (IsEmpty()) {
return NoCPUReg;
}
- int index = CountTrailingZeros(list_, kRegListSizeInBits);
+ int index = CountTrailingZeros(list_);
VIXL_ASSERT((1 << index) & list_);
Remove(index);
return CPURegister(index, size_, type_);
@@ -47,7 +47,7 @@
if (IsEmpty()) {
return NoCPUReg;
}
- int index = CountLeadingZeros(list_, kRegListSizeInBits);
+ int index = CountLeadingZeros(list_);
index = kRegListSizeInBits - 1 - index;
VIXL_ASSERT((1 << index) & list_);
Remove(index);
@@ -463,6 +463,12 @@
}
+void MemOperand::AddOffset(int64_t offset) {
+ VIXL_ASSERT(IsImmediateOffset());
+ offset_ += offset;
+}
+
+
// Assembler
Assembler::Assembler(byte* buffer, size_t capacity,
PositionIndependentCodeOption pic)
@@ -1349,6 +1355,14 @@
}
+void Assembler::umulh(const Register& xd,
+ const Register& xn,
+ const Register& xm) {
+ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits());
+ DataProcessing3Source(xd, xn, xm, xzr, UMULH_x);
+}
+
+
void Assembler::udiv(const Register& rd,
const Register& rn,
const Register& rm) {
@@ -2628,33 +2642,78 @@
}
-void Assembler::fcmp(const VRegister& vn,
- const VRegister& vm) {
+void Assembler::FPCompareMacro(const VRegister& vn,
+ double value,
+ FPTrapFlags trap) {
+ USE(value);
+ // Although the fcmp{e} instructions can strictly only take an immediate
+ // value of +0.0, we don't need to check for -0.0 because the sign of 0.0
+ // doesn't affect the result of the comparison.
+ VIXL_ASSERT(value == 0.0);
+ VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ Instr op = (trap == EnableTrap) ? FCMPE_zero : FCMP_zero;
+ Emit(FPType(vn) | op | Rn(vn));
+}
+
+
+void Assembler::FPCompareMacro(const VRegister& vn,
+ const VRegister& vm,
+ FPTrapFlags trap) {
VIXL_ASSERT(vn.Is1S() || vn.Is1D());
VIXL_ASSERT(vn.IsSameSizeAndType(vm));
- Emit(FPType(vn) | FCMP | Rm(vm) | Rn(vn));
+ Instr op = (trap == EnableTrap) ? FCMPE : FCMP;
+ Emit(FPType(vn) | op | Rm(vm) | Rn(vn));
+}
+
+
+void Assembler::fcmp(const VRegister& vn,
+ const VRegister& vm) {
+ FPCompareMacro(vn, vm, DisableTrap);
+}
+
+
+void Assembler::fcmpe(const VRegister& vn,
+ const VRegister& vm) {
+ FPCompareMacro(vn, vm, EnableTrap);
}
void Assembler::fcmp(const VRegister& vn,
double value) {
- USE(value);
- // Although the fcmp instruction can strictly only take an immediate value of
- // +0.0, we don't need to check for -0.0 because the sign of 0.0 doesn't
- // affect the result of the comparison.
- VIXL_ASSERT(value == 0.0);
- VIXL_ASSERT(vn.Is1S() || vn.Is1D());
- Emit(FPType(vn) | FCMP_zero | Rn(vn));
+ FPCompareMacro(vn, value, DisableTrap);
}
+void Assembler::fcmpe(const VRegister& vn,
+ double value) {
+ FPCompareMacro(vn, value, EnableTrap);
+}
+
+
+void Assembler::FPCCompareMacro(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond,
+ FPTrapFlags trap) {
+ VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT(vn.IsSameSizeAndType(vm));
+ Instr op = (trap == EnableTrap) ? FCCMPE : FCCMP;
+ Emit(FPType(vn) | op | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv));
+}
+
void Assembler::fccmp(const VRegister& vn,
const VRegister& vm,
StatusFlags nzcv,
Condition cond) {
- VIXL_ASSERT(vn.Is1S() || vn.Is1D());
- VIXL_ASSERT(vn.IsSameSizeAndType(vm));
- Emit(FPType(vn) | FCCMP | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv));
+ FPCCompareMacro(vn, vm, nzcv, cond, DisableTrap);
+}
+
+
+void Assembler::fccmpe(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond) {
+ FPCCompareMacro(vn, vm, nzcv, cond, EnableTrap);
}
@@ -4948,6 +5007,7 @@
bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) {
+ VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
bool offset_is_size_multiple =
(((offset >> access_size) << access_size) == offset);
return offset_is_size_multiple && is_int7(offset >> access_size);
@@ -4955,6 +5015,7 @@
bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) {
+ VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
bool offset_is_size_multiple =
(((offset >> access_size) << access_size) == offset);
return offset_is_size_multiple && is_uint12(offset >> access_size);
@@ -5319,10 +5380,8 @@
}
}
- int number_of_unique_regs =
- CountSetBits(unique_regs, sizeof(unique_regs) * 8);
- int number_of_unique_fpregs =
- CountSetBits(unique_fpregs, sizeof(unique_fpregs) * 8);
+ int number_of_unique_regs = CountSetBits(unique_regs);
+ int number_of_unique_fpregs = CountSetBits(unique_fpregs);
VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs);
diff --git a/src/a64/assembler-a64.h b/src/vixl/a64/assembler-a64.h
similarity index 98%
rename from src/a64/assembler-a64.h
rename to src/vixl/a64/assembler-a64.h
index 5f24105..39763b3 100644
--- a/src/a64/assembler-a64.h
+++ b/src/vixl/a64/assembler-a64.h
@@ -28,11 +28,11 @@
#define VIXL_A64_ASSEMBLER_A64_H_
-#include "globals.h"
-#include "invalset.h"
-#include "utils.h"
-#include "code-buffer.h"
-#include "a64/instructions-a64.h"
+#include "vixl/globals.h"
+#include "vixl/invalset.h"
+#include "vixl/utils.h"
+#include "vixl/code-buffer.h"
+#include "vixl/a64/instructions-a64.h"
namespace vixl {
@@ -55,6 +55,7 @@
kInvalid = 0,
kRegister,
kVRegister,
+ kFPRegister = kVRegister,
kNoRegister
};
@@ -556,6 +557,10 @@
const CPURegList& list_3,
const CPURegList& list_4);
+ bool Overlaps(const CPURegList& other) const {
+ return (type_ == other.type_) && ((list_ & other.list_) != 0);
+ }
+
RegList list() const {
VIXL_ASSERT(IsValid());
return list_;
@@ -600,7 +605,7 @@
int Count() const {
VIXL_ASSERT(IsValid());
- return CountSetBits(list_, kRegListSizeInBits);
+ return CountSetBits(list_);
}
unsigned RegisterSizeInBits() const {
@@ -630,7 +635,7 @@
// AAPCS64 callee-saved registers.
extern const CPURegList kCalleeSaved;
-extern const CPURegList kCalleeSavedFP;
+extern const CPURegList kCalleeSavedV;
// AAPCS64 caller-saved registers. Note that this includes lr.
@@ -710,17 +715,17 @@
explicit MemOperand(Register base,
int64_t offset = 0,
AddrMode addrmode = Offset);
- explicit MemOperand(Register base,
- Register regoffset,
- Shift shift = LSL,
- unsigned shift_amount = 0);
- explicit MemOperand(Register base,
- Register regoffset,
- Extend extend,
- unsigned shift_amount = 0);
- explicit MemOperand(Register base,
- const Operand& offset,
- AddrMode addrmode = Offset);
+ MemOperand(Register base,
+ Register regoffset,
+ Shift shift = LSL,
+ unsigned shift_amount = 0);
+ MemOperand(Register base,
+ Register regoffset,
+ Extend extend,
+ unsigned shift_amount = 0);
+ MemOperand(Register base,
+ const Operand& offset,
+ AddrMode addrmode = Offset);
const Register& base() const { return base_; }
const Register& regoffset() const { return regoffset_; }
@@ -734,6 +739,8 @@
bool IsPreIndex() const;
bool IsPostIndex() const;
+ void AddOffset(int64_t offset);
+
private:
Register base_;
Register regoffset_;
@@ -1606,6 +1613,11 @@
umaddl(rd, rn, rm, xzr);
}
+ // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
+ void umulh(const Register& xd,
+ const Register& xn,
+ const Register& xm);
+
// Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
void smsubl(const Register& rd,
const Register& rn,
@@ -2022,18 +2034,44 @@
// FP round to integer, towards zero.
void frintz(const VRegister& vd, const VRegister& vn);
+ void FPCompareMacro(const VRegister& vn,
+ double value,
+ FPTrapFlags trap);
+
+ void FPCompareMacro(const VRegister& vn,
+ const VRegister& vm,
+ FPTrapFlags trap);
+
// FP compare registers.
void fcmp(const VRegister& vn, const VRegister& vm);
// FP compare immediate.
void fcmp(const VRegister& vn, double value);
+ void FPCCompareMacro(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond,
+ FPTrapFlags trap);
+
// FP conditional compare.
void fccmp(const VRegister& vn,
const VRegister& vm,
StatusFlags nzcv,
Condition cond);
+ // FP signaling compare registers.
+ void fcmpe(const VRegister& vn, const VRegister& vm);
+
+ // FP signaling compare immediate.
+ void fcmpe(const VRegister& vn, double value);
+
+ // FP conditional signaling compare.
+ void fccmpe(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond);
+
// FP conditional select.
void fcsel(const VRegister& vd,
const VRegister& vn,
@@ -3949,8 +3987,8 @@
unsigned* n = NULL,
unsigned* imm_s = NULL,
unsigned* imm_r = NULL);
- static bool IsImmLSPair(int64_t offset, unsigned size);
- static bool IsImmLSScaled(int64_t offset, unsigned size);
+ static bool IsImmLSPair(int64_t offset, unsigned access_size);
+ static bool IsImmLSScaled(int64_t offset, unsigned access_size);
static bool IsImmLSUnscaled(int64_t offset);
static bool IsImmMovn(uint64_t imm, unsigned reg_size);
static bool IsImmMovz(uint64_t imm, unsigned reg_size);
diff --git a/src/a64/constants-a64.h b/src/vixl/a64/constants-a64.h
similarity index 99%
rename from src/a64/constants-a64.h
rename to src/vixl/a64/constants-a64.h
index 0791575..2caa73a 100644
--- a/src/a64/constants-a64.h
+++ b/src/vixl/a64/constants-a64.h
@@ -225,6 +225,11 @@
return static_cast<Condition>(cond ^ 1);
}
+enum FPTrapFlags {
+ EnableTrap = 1,
+ DisableTrap = 0
+};
+
enum FlagsUpdate {
SetFlags = 1,
LeaveFlags = 0
@@ -1092,8 +1097,10 @@
FCMP_zero = FCMP_s_zero,
FCMPE_s = FPCompareFixed | 0x00000010,
FCMPE_d = FPCompareFixed | FP64 | 0x00000010,
+ FCMPE = FCMPE_s,
FCMPE_s_zero = FPCompareFixed | 0x00000018,
- FCMPE_d_zero = FPCompareFixed | FP64 | 0x00000018
+ FCMPE_d_zero = FPCompareFixed | FP64 | 0x00000018,
+ FCMPE_zero = FCMPE_s_zero
};
// Floating point conditional compare.
diff --git a/src/a64/cpu-a64.cc b/src/vixl/a64/cpu-a64.cc
similarity index 98%
rename from src/a64/cpu-a64.cc
rename to src/vixl/a64/cpu-a64.cc
index f71a065..7a33551 100644
--- a/src/a64/cpu-a64.cc
+++ b/src/vixl/a64/cpu-a64.cc
@@ -24,8 +24,8 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "utils.h"
-#include "a64/cpu-a64.h"
+#include "vixl/utils.h"
+#include "vixl/a64/cpu-a64.h"
namespace vixl {
diff --git a/src/a64/cpu-a64.h b/src/vixl/a64/cpu-a64.h
similarity index 97%
rename from src/a64/cpu-a64.h
rename to src/vixl/a64/cpu-a64.h
index 71e7fd4..cdf09a6 100644
--- a/src/a64/cpu-a64.h
+++ b/src/vixl/a64/cpu-a64.h
@@ -27,8 +27,8 @@
#ifndef VIXL_CPU_A64_H
#define VIXL_CPU_A64_H
-#include "globals.h"
-#include "instructions-a64.h"
+#include "vixl/globals.h"
+#include "vixl/a64/instructions-a64.h"
namespace vixl {
diff --git a/src/a64/debugger-a64.cc b/src/vixl/a64/debugger-a64.cc
similarity index 99%
rename from src/a64/debugger-a64.cc
rename to src/vixl/a64/debugger-a64.cc
index e412e9c..1a65bd3 100644
--- a/src/a64/debugger-a64.cc
+++ b/src/vixl/a64/debugger-a64.cc
@@ -26,7 +26,7 @@
#ifdef USE_SIMULATOR
-#include "a64/debugger-a64.h"
+#include "vixl/a64/debugger-a64.h"
namespace vixl {
@@ -645,7 +645,8 @@
case BRK:
DoBreakpoint(instr);
return;
- case HLT: // Fall through.
+ case HLT:
+ VIXL_FALLTHROUGH();
default: Simulator::VisitException(instr);
}
}
@@ -994,6 +995,7 @@
break;
case 'i':
if (length == 1) return new Format<uint32_t>("%08" PRIx32, 'i');
+ VIXL_FALLTHROUGH();
default: return NULL;
}
diff --git a/src/a64/debugger-a64.h b/src/vixl/a64/debugger-a64.h
similarity index 96%
rename from src/a64/debugger-a64.h
rename to src/vixl/a64/debugger-a64.h
index fbc5b59..aecd620 100644
--- a/src/a64/debugger-a64.h
+++ b/src/vixl/a64/debugger-a64.h
@@ -32,10 +32,10 @@
#include <errno.h>
#include <vector>
-#include "globals.h"
-#include "utils.h"
-#include "a64/constants-a64.h"
-#include "a64/simulator-a64.h"
+#include "vixl/globals.h"
+#include "vixl/utils.h"
+#include "vixl/a64/constants-a64.h"
+#include "vixl/a64/simulator-a64.h"
namespace vixl {
diff --git a/src/a64/decoder-a64.cc b/src/vixl/a64/decoder-a64.cc
similarity index 99%
rename from src/a64/decoder-a64.cc
rename to src/vixl/a64/decoder-a64.cc
index 58834be..5ba2d3c 100644
--- a/src/a64/decoder-a64.cc
+++ b/src/vixl/a64/decoder-a64.cc
@@ -24,9 +24,9 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "globals.h"
-#include "utils.h"
-#include "a64/decoder-a64.h"
+#include "vixl/globals.h"
+#include "vixl/utils.h"
+#include "vixl/a64/decoder-a64.h"
namespace vixl {
@@ -488,6 +488,7 @@
case 6: {
if (instr->Bit(29) == 0x1) {
VisitUnallocated(instr);
+ VIXL_FALLTHROUGH();
} else {
if (instr->Bit(30) == 0) {
if ((instr->Bit(15) == 0x1) ||
diff --git a/src/a64/decoder-a64.h b/src/vixl/a64/decoder-a64.h
similarity index 99%
rename from src/a64/decoder-a64.h
rename to src/vixl/a64/decoder-a64.h
index 81cd0c2..4f4f19c 100644
--- a/src/a64/decoder-a64.h
+++ b/src/vixl/a64/decoder-a64.h
@@ -29,8 +29,8 @@
#include <list>
-#include "globals.h"
-#include "a64/instructions-a64.h"
+#include "vixl/globals.h"
+#include "vixl/a64/instructions-a64.h"
// List macro containing all visitors needed by the decoder class.
diff --git a/src/a64/disasm-a64.cc b/src/vixl/a64/disasm-a64.cc
similarity index 99%
rename from src/a64/disasm-a64.cc
rename to src/vixl/a64/disasm-a64.cc
index 37b4b5c..a12d028 100644
--- a/src/a64/disasm-a64.cc
+++ b/src/vixl/a64/disasm-a64.cc
@@ -25,7 +25,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstdlib>
-#include "a64/disasm-a64.h"
+#include "vixl/a64/disasm-a64.h"
namespace vixl {
@@ -890,9 +890,9 @@
case LDUR_s: mnemonic = "ldur"; form = form_s; break;
case LDUR_d: mnemonic = "ldur"; form = form_d; break;
case LDUR_q: mnemonic = "ldur"; form = form_q; break;
- case LDURSB_x: form = form_x; // Fall through.
+ case LDURSB_x: form = form_x; VIXL_FALLTHROUGH();
case LDURSB_w: mnemonic = "ldursb"; break;
- case LDURSH_x: form = form_x; // Fall through.
+ case LDURSH_x: form = form_x; VIXL_FALLTHROUGH();
case LDURSH_w: mnemonic = "ldursh"; break;
case LDURSW_x: mnemonic = "ldursw"; form = form_x; break;
case PRFUM: mnemonic = "prfum"; form = form_prefetch; break;
@@ -1054,9 +1054,13 @@
switch (instr->Mask(FPCompareMask)) {
case FCMP_s_zero:
- case FCMP_d_zero: form = form_zero; // Fall through.
+ case FCMP_d_zero: form = form_zero; VIXL_FALLTHROUGH();
case FCMP_s:
case FCMP_d: mnemonic = "fcmp"; break;
+ case FCMPE_s_zero:
+ case FCMPE_d_zero: form = form_zero; VIXL_FALLTHROUGH();
+ case FCMPE_s:
+ case FCMPE_d: mnemonic = "fcmpe"; break;
default: form = "(FPCompare)";
}
Format(instr, mnemonic, form);
@@ -2884,8 +2888,8 @@
field_len = 3;
}
- CPURegister::RegisterType reg_type;
- unsigned reg_size;
+ CPURegister::RegisterType reg_type = CPURegister::kRegister;
+ unsigned reg_size = kXRegSize;
if (reg_prefix == 'R') {
reg_prefix = instr->SixtyFourBits() ? 'X' : 'W';
@@ -2913,8 +2917,6 @@
return field_len;
default:
VIXL_UNREACHABLE();
- reg_type = CPURegister::kRegister;
- reg_size = kXRegSize;
}
if ((reg_type == CPURegister::kRegister) &&
@@ -3087,6 +3089,7 @@
return 0;
}
}
+ VIXL_FALLTHROUGH();
}
case 'L': { // IVLSLane[0123] - suffix indicates access size shift.
AppendToOutput("%d", instr->NEONLSIndex(format[8] - '0'));
@@ -3236,7 +3239,8 @@
switch (format[1]) {
case 'D': { // HDP.
VIXL_ASSERT(instr->ShiftDP() != ROR);
- } // Fall through.
+ VIXL_FALLTHROUGH();
+ }
case 'L': { // HLo.
if (instr->ImmDPShift() != 0) {
const char* shift_type[] = {"lsl", "lsr", "asr", "ror"};
diff --git a/src/a64/disasm-a64.h b/src/vixl/a64/disasm-a64.h
similarity index 97%
rename from src/a64/disasm-a64.h
rename to src/vixl/a64/disasm-a64.h
index dcb6f08..e203156 100644
--- a/src/a64/disasm-a64.h
+++ b/src/vixl/a64/disasm-a64.h
@@ -27,11 +27,11 @@
#ifndef VIXL_A64_DISASM_A64_H
#define VIXL_A64_DISASM_A64_H
-#include "globals.h"
-#include "utils.h"
-#include "instructions-a64.h"
-#include "decoder-a64.h"
-#include "assembler-a64.h"
+#include "vixl/globals.h"
+#include "vixl/utils.h"
+#include "vixl/a64/instructions-a64.h"
+#include "vixl/a64/decoder-a64.h"
+#include "vixl/a64/assembler-a64.h"
namespace vixl {
diff --git a/src/a64/instructions-a64.cc b/src/vixl/a64/instructions-a64.cc
similarity index 99%
rename from src/a64/instructions-a64.cc
rename to src/vixl/a64/instructions-a64.cc
index f9f4a42..6f6b5d2 100644
--- a/src/a64/instructions-a64.cc
+++ b/src/vixl/a64/instructions-a64.cc
@@ -24,8 +24,8 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/instructions-a64.h"
-#include "a64/assembler-a64.h"
+#include "vixl/a64/instructions-a64.h"
+#include "vixl/a64/assembler-a64.h"
namespace vixl {
diff --git a/src/a64/instructions-a64.h b/src/vixl/a64/instructions-a64.h
similarity index 99%
rename from src/a64/instructions-a64.h
rename to src/vixl/a64/instructions-a64.h
index d3bbd9c..64da966 100644
--- a/src/a64/instructions-a64.h
+++ b/src/vixl/a64/instructions-a64.h
@@ -27,9 +27,9 @@
#ifndef VIXL_A64_INSTRUCTIONS_A64_H_
#define VIXL_A64_INSTRUCTIONS_A64_H_
-#include "globals.h"
-#include "utils.h"
-#include "a64/constants-a64.h"
+#include "vixl/globals.h"
+#include "vixl/utils.h"
+#include "vixl/a64/constants-a64.h"
namespace vixl {
// ISA constants. --------------------------------------------------------------
diff --git a/src/a64/instrument-a64.cc b/src/vixl/a64/instrument-a64.cc
similarity index 97%
rename from src/a64/instrument-a64.cc
rename to src/vixl/a64/instrument-a64.cc
index 36923e7..21ec604 100644
--- a/src/a64/instrument-a64.cc
+++ b/src/vixl/a64/instrument-a64.cc
@@ -24,7 +24,7 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/instrument-a64.h"
+#include "vixl/a64/instrument-a64.h"
namespace vixl {
@@ -421,22 +421,26 @@
static Counter* store_fp_counter = GetCounter("Store FP");
switch (instr->Mask(LoadStoreMask)) {
- case STRB_w: // Fall through.
- case STRH_w: // Fall through.
- case STR_w: // Fall through.
+ case STRB_w:
+ case STRH_w:
+ case STR_w:
+ VIXL_FALLTHROUGH();
case STR_x: store_int_counter->Increment(); break;
- case STR_s: // Fall through.
+ case STR_s:
+ VIXL_FALLTHROUGH();
case STR_d: store_fp_counter->Increment(); break;
- case LDRB_w: // Fall through.
- case LDRH_w: // Fall through.
- case LDR_w: // Fall through.
- case LDR_x: // Fall through.
- case LDRSB_x: // Fall through.
- case LDRSH_x: // Fall through.
- case LDRSW_x: // Fall through.
- case LDRSB_w: // Fall through.
+ case LDRB_w:
+ case LDRH_w:
+ case LDR_w:
+ case LDR_x:
+ case LDRSB_x:
+ case LDRSH_x:
+ case LDRSW_x:
+ case LDRSB_w:
+ VIXL_FALLTHROUGH();
case LDRSH_w: load_int_counter->Increment(); break;
- case LDR_s: // Fall through.
+ case LDR_s:
+ VIXL_FALLTHROUGH();
case LDR_d: load_fp_counter->Increment(); break;
}
}
diff --git a/src/a64/instrument-a64.h b/src/vixl/a64/instrument-a64.h
similarity index 95%
rename from src/a64/instrument-a64.h
rename to src/vixl/a64/instrument-a64.h
index a55369a..8468ceb 100644
--- a/src/a64/instrument-a64.h
+++ b/src/vixl/a64/instrument-a64.h
@@ -27,11 +27,11 @@
#ifndef VIXL_A64_INSTRUMENT_A64_H_
#define VIXL_A64_INSTRUMENT_A64_H_
-#include "globals.h"
-#include "utils.h"
-#include "a64/decoder-a64.h"
-#include "a64/constants-a64.h"
-#include "a64/instrument-a64.h"
+#include "vixl/globals.h"
+#include "vixl/utils.h"
+#include "vixl/a64/decoder-a64.h"
+#include "vixl/a64/constants-a64.h"
+#include "vixl/a64/instrument-a64.h"
namespace vixl {
diff --git a/src/a64/logic-a64.cc b/src/vixl/a64/logic-a64.cc
similarity index 90%
rename from src/a64/logic-a64.cc
rename to src/vixl/a64/logic-a64.cc
index c367b35..2b62443 100644
--- a/src/a64/logic-a64.cc
+++ b/src/vixl/a64/logic-a64.cc
@@ -24,9 +24,365 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/simulator-a64.h"
+#include <cmath>
+#include "vixl/a64/simulator-a64.h"
namespace vixl {
+
+template<> double Simulator::FPDefaultNaN<double>() {
+ return kFP64DefaultNaN;
+}
+
+
+template<> float Simulator::FPDefaultNaN<float>() {
+ return kFP32DefaultNaN;
+}
+
+// See FPRound for a description of this function.
+static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
+ uint64_t mantissa, FPRounding round_mode) {
+ int64_t bits =
+ FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
+ exponent,
+ mantissa,
+ round_mode);
+ return rawbits_to_double(bits);
+}
+
+
+// See FPRound for a description of this function.
+static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
+ uint64_t mantissa, FPRounding round_mode) {
+ int32_t bits =
+ FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
+ exponent,
+ mantissa,
+ round_mode);
+ return rawbits_to_float(bits);
+}
+
+
+// See FPRound for a description of this function.
+static inline float16 FPRoundToFloat16(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
+ sign, exponent, mantissa, round_mode);
+}
+
+
+double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
+ if (src >= 0) {
+ return UFixedToDouble(src, fbits, round);
+ } else {
+ // This works for all negative values, including INT64_MIN.
+ return -UFixedToDouble(-src, fbits, round);
+ }
+}
+
+
+double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
+ // An input of 0 is a special case because the result is effectively
+ // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
+ if (src == 0) {
+ return 0.0;
+ }
+
+ // Calculate the exponent. The highest significant bit will have the value
+ // 2^exponent.
+ const int highest_significant_bit = 63 - CountLeadingZeros(src);
+ const int64_t exponent = highest_significant_bit - fbits;
+
+ return FPRoundToDouble(0, exponent, src, round);
+}
+
+
+float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
+ if (src >= 0) {
+ return UFixedToFloat(src, fbits, round);
+ } else {
+ // This works for all negative values, including INT64_MIN.
+ return -UFixedToFloat(-src, fbits, round);
+ }
+}
+
+
+float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
+ // An input of 0 is a special case because the result is effectively
+ // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
+ if (src == 0) {
+ return 0.0f;
+ }
+
+ // Calculate the exponent. The highest significant bit will have the value
+ // 2^exponent.
+ const int highest_significant_bit = 63 - CountLeadingZeros(src);
+ const int32_t exponent = highest_significant_bit - fbits;
+
+ return FPRoundToFloat(0, exponent, src, round);
+}
+
+
+double Simulator::FPToDouble(float value) {
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ FPProcessException();
+ }
+ if (DN()) return kFP64DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred entirely, except that the top
+ // bit is forced to '1', making the result a quiet NaN. The unused
+ // (low-order) payload bits are set to 0.
+ uint32_t raw = float_to_rawbits(value);
+
+ uint64_t sign = raw >> 31;
+ uint64_t exponent = (1 << 11) - 1;
+ uint64_t payload = unsigned_bitextract_64(21, 0, raw);
+ payload <<= (52 - 23); // The unused low-order bits should be 0.
+ payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
+
+ return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
+ }
+
+ case FP_ZERO:
+ case FP_NORMAL:
+ case FP_SUBNORMAL:
+ case FP_INFINITE: {
+ // All other inputs are preserved in a standard cast, because every value
+ // representable using an IEEE-754 float is also representable using an
+ // IEEE-754 double.
+ return static_cast<double>(value);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return static_cast<double>(value);
+}
+
+
+float Simulator::FPToFloat(float16 value) {
+ uint32_t sign = value >> 15;
+ uint32_t exponent = unsigned_bitextract_32(
+ kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
+ value);
+ uint32_t mantissa = unsigned_bitextract_32(
+ kFloat16MantissaBits - 1, 0, value);
+
+ switch (float16classify(value)) {
+ case FP_ZERO:
+ return (sign == 0) ? 0.0f : -0.0f;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
+
+ case FP_SUBNORMAL: {
+ // Calculate shift required to put mantissa into the most-significant bits
+ // of the destination mantissa.
+ int shift = CountLeadingZeros(mantissa << (32 - 10));
+
+ // Shift mantissa and discard implicit '1'.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
+ mantissa &= (1 << kFloatMantissaBits) - 1;
+
+ // Adjust the exponent for the shift applied, and rebias.
+ exponent = exponent - shift + (-15 + 127);
+ break;
+ }
+
+ case FP_NAN:
+ if (IsSignallingNaN(value)) {
+ FPProcessException();
+ }
+ if (DN()) return kFP32DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred entirely, except that the top
+ // bit is forced to '1', making the result a quiet NaN. The unused
+ // (low-order) payload bits are set to 0.
+ exponent = (1 << kFloatExponentBits) - 1;
+
+ // Increase bits in mantissa, making low-order bits 0.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+ mantissa |= 1 << 22; // Force a quiet NaN.
+ break;
+
+ case FP_NORMAL:
+ // Increase bits in mantissa, making low-order bits 0.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+
+ // Change exponent bias.
+ exponent += (-15 + 127);
+ break;
+
+ default: VIXL_UNREACHABLE();
+ }
+ return rawbits_to_float((sign << 31) |
+ (exponent << kFloatMantissaBits) |
+ mantissa);
+}
+
+
+float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT(round_mode == FPTieEven);
+ USE(round_mode);
+
+ uint32_t raw = float_to_rawbits(value);
+ int32_t sign = raw >> 31;
+ int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
+ uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ FPProcessException();
+ }
+ if (DN()) return kFP16DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ float16 result = (sign == 0) ? kFP16PositiveInfinity
+ : kFP16NegativeInfinity;
+ result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
+ result |= (1 << 9); // Force a quiet NaN;
+ return result;
+ }
+
+ case FP_ZERO:
+ return (sign == 0) ? 0 : 0x8000;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert float-to-half as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+
+ // Add the implicit '1' bit to the mantissa.
+ mantissa += (1 << 23);
+ return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return 0;
+}
+
+
+float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT(round_mode == FPTieEven);
+ USE(round_mode);
+
+ uint64_t raw = double_to_rawbits(value);
+ int32_t sign = raw >> 63;
+ int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
+ uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ FPProcessException();
+ }
+ if (DN()) return kFP16DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ float16 result = (sign == 0) ? kFP16PositiveInfinity
+ : kFP16NegativeInfinity;
+ result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
+ result |= (1 << 9); // Force a quiet NaN;
+ return result;
+ }
+
+ case FP_ZERO:
+ return (sign == 0) ? 0 : 0x8000;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert double-to-half as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+
+ // Add the implicit '1' bit to the mantissa.
+ mantissa += (UINT64_C(1) << 52);
+ return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return 0;
+}
+
+
+float Simulator::FPToFloat(double value, FPRounding round_mode) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+ USE(round_mode);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ FPProcessException();
+ }
+ if (DN()) return kFP32DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ uint64_t raw = double_to_rawbits(value);
+
+ uint32_t sign = raw >> 63;
+ uint32_t exponent = (1 << 8) - 1;
+ uint32_t payload = unsigned_bitextract_64(50, 52 - 23, raw);
+ payload |= (1 << 22); // Force a quiet NaN.
+
+ return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
+ }
+
+ case FP_ZERO:
+ case FP_INFINITE: {
+ // In a C++ cast, any value representable in the target type will be
+ // unchanged. This is always the case for +/-0.0 and infinities.
+ return static_cast<float>(value);
+ }
+
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert double-to-float as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+ uint64_t raw = double_to_rawbits(value);
+ // Extract the IEEE-754 double components.
+ uint32_t sign = raw >> 63;
+ // Extract the exponent and remove the IEEE-754 encoding bias.
+ int32_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
+ // Extract the mantissa and add the implicit '1' bit.
+ uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
+ if (std::fpclassify(value) == FP_NORMAL) {
+ mantissa |= (UINT64_C(1) << 52);
+ }
+ return FPRoundToFloat(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return value;
+}
+
+
void Simulator::ld1(VectorFormat vform,
LogicVRegister dst,
uint64_t addr) {
@@ -1524,7 +1880,7 @@
int64_t lj_src_val = src1.IntLeftJustified(vform, i);
// Set signed saturation state.
- if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
+ if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
(lj_src_val != 0)) {
dst.SetSignedSat(i, lj_src_val >= 0);
}
@@ -1532,7 +1888,7 @@
// Set unsigned saturation state.
if (lj_src_val < 0) {
dst.SetUnsignedSat(i, false);
- } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
+ } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
(lj_src_val != 0)) {
dst.SetUnsignedSat(i, true);
}
@@ -1570,7 +1926,7 @@
uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
// Set saturation state.
- if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
+ if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
dst.SetUnsignedSat(i, true);
}
@@ -3153,9 +3509,9 @@
template <typename T>
T Simulator::FPAdd(T op1, T op2) {
T result = FPProcessNaNs(op1, op2);
- if (isnan(result)) return result;
+ if (std::isnan(result)) return result;
- if (isinf(op1) && isinf(op2) && (op1 != op2)) {
+ if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
// inf + -inf returns the default NaN.
FPProcessException();
return FPDefaultNaN<T>();
@@ -3169,9 +3525,9 @@
template <typename T>
T Simulator::FPSub(T op1, T op2) {
// NaNs should be handled elsewhere.
- VIXL_ASSERT(!isnan(op1) && !isnan(op2));
+ VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
- if (isinf(op1) && isinf(op2) && (op1 == op2)) {
+ if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
// inf - inf returns the default NaN.
FPProcessException();
return FPDefaultNaN<T>();
@@ -3185,9 +3541,9 @@
template <typename T>
T Simulator::FPMul(T op1, T op2) {
// NaNs should be handled elsewhere.
- VIXL_ASSERT(!isnan(op1) && !isnan(op2));
+ VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
- if ((isinf(op1) && (op2 == 0.0)) || (isinf(op2) && (op1 == 0.0))) {
+ if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
// inf * 0.0 returns the default NaN.
FPProcessException();
return FPDefaultNaN<T>();
@@ -3200,7 +3556,7 @@
template<typename T>
T Simulator::FPMulx(T op1, T op2) {
- if ((isinf(op1) && (op2 == 0.0)) || (isinf(op2) && (op1 == 0.0))) {
+ if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
// inf * 0.0 returns +/-2.0.
T two = 2.0;
return copysign(1.0, op1) * copysign(1.0, op2) * two;
@@ -3215,13 +3571,13 @@
T sign_a = copysign(1.0, a);
T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
- bool isinf_prod = isinf(op1) || isinf(op2);
+ bool isinf_prod = std::isinf(op1) || std::isinf(op2);
bool operation_generates_nan =
- (isinf(op1) && (op2 == 0.0)) || // inf * 0.0
- (isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
- (isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
+ (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
+ (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
+ (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
- if (isnan(result)) {
+ if (std::isnan(result)) {
// Generated NaNs override quiet NaNs propagated from a.
if (operation_generates_nan && IsQuietNaN(a)) {
FPProcessException();
@@ -3244,7 +3600,7 @@
}
result = FusedMultiplyAdd(op1, op2, a);
- VIXL_ASSERT(!isnan(result));
+ VIXL_ASSERT(!std::isnan(result));
// Work around broken fma implementations for rounded zero results: If a is
// 0.0, the sign of the result is the sign of op1 * op2 before rounding.
@@ -3259,9 +3615,9 @@
template <typename T>
T Simulator::FPDiv(T op1, T op2) {
// NaNs should be handled elsewhere.
- VIXL_ASSERT(!isnan(op1) && !isnan(op2));
+ VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
- if ((isinf(op1) && isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
+ if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
// inf / inf and 0.0 / 0.0 return the default NaN.
FPProcessException();
return FPDefaultNaN<T>();
@@ -3276,7 +3632,7 @@
template <typename T>
T Simulator::FPSqrt(T op) {
- if (isnan(op)) {
+ if (std::isnan(op)) {
return FPProcessNaN(op);
} else if (op < 0.0) {
FPProcessException();
@@ -3290,7 +3646,7 @@
template <typename T>
T Simulator::FPMax(T a, T b) {
T result = FPProcessNaNs(a, b);
- if (isnan(result)) return result;
+ if (std::isnan(result)) return result;
if ((a == 0.0) && (b == 0.0) &&
(copysign(1.0, a) != copysign(1.0, b))) {
@@ -3311,14 +3667,14 @@
}
T result = FPProcessNaNs(a, b);
- return isnan(result) ? result : FPMax(a, b);
+ return std::isnan(result) ? result : FPMax(a, b);
}
template <typename T>
T Simulator::FPMin(T a, T b) {
T result = FPProcessNaNs(a, b);
- if (isnan(result)) return result;
+ if (std::isnan(result)) return result;
if ((a == 0.0) && (b == 0.0) &&
(copysign(1.0, a) != copysign(1.0, b))) {
@@ -3339,16 +3695,17 @@
}
T result = FPProcessNaNs(a, b);
- return isnan(result) ? result : FPMin(a, b);
+ return std::isnan(result) ? result : FPMin(a, b);
}
template <typename T>
T Simulator::FPRecipStepFused(T op1, T op2) {
const T two = 2.0;
- if ((isinf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (isinf(op2)))) {
+ if ((std::isinf(op1) && (op2 == 0.0))
+ || ((op1 == 0.0) && (std::isinf(op2)))) {
return two;
- } else if (isinf(op1) || isinf(op2)) {
+ } else if (std::isinf(op1) || std::isinf(op2)) {
// Return +inf if signs match, otherwise -inf.
return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
: kFP64NegativeInfinity;
@@ -3363,9 +3720,10 @@
const T one_point_five = 1.5;
const T two = 2.0;
- if ((isinf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (isinf(op2)))) {
+ if ((std::isinf(op1) && (op2 == 0.0))
+ || ((op1 == 0.0) && (std::isinf(op2)))) {
return one_point_five;
- } else if (isinf(op1) || isinf(op2)) {
+ } else if (std::isinf(op1) || std::isinf(op2)) {
// Return +inf if signs match, otherwise -inf.
return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
: kFP64NegativeInfinity;
@@ -3373,9 +3731,9 @@
// The multiply-add-halve operation must be fully fused, so avoid interim
// rounding by checking which operand can be losslessly divided by two
// before doing the multiply-add.
- if (isnormal(op1 / two)) {
+ if (std::isnormal(op1 / two)) {
return FusedMultiplyAdd(op1 / two, op2, one_point_five);
- } else if (isnormal(op2 / two)) {
+ } else if (std::isnormal(op2 / two)) {
return FusedMultiplyAdd(op1, op2 / two, one_point_five);
} else {
// Neither operand is normal after halving: the result is dominated by
@@ -3390,11 +3748,11 @@
if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
(value == kFP64NegativeInfinity)) {
return value;
- } else if (isnan(value)) {
+ } else if (std::isnan(value)) {
return FPProcessNaN(value);
}
- double int_result = floor(value);
+ double int_result = std::floor(value);
double error = value - int_result;
switch (round_mode) {
case FPTieAway: {
@@ -3419,7 +3777,7 @@
// If the error is greater than 0.5, or is equal to 0.5 and the integer
// result is odd, round up.
} else if ((error > 0.5) ||
- ((error == 0.5) && (fmod(int_result, 2) != 0))) {
+ ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
int_result++;
}
break;
@@ -3461,7 +3819,7 @@
} else if (value < kWMinInt) {
return kWMinInt;
}
- return isnan(value) ? 0 : static_cast<int32_t>(value);
+ return std::isnan(value) ? 0 : static_cast<int32_t>(value);
}
@@ -3472,7 +3830,7 @@
} else if (value < kXMinInt) {
return kXMinInt;
}
- return isnan(value) ? 0 : static_cast<int64_t>(value);
+ return std::isnan(value) ? 0 : static_cast<int64_t>(value);
}
@@ -3483,7 +3841,7 @@
} else if (value < 0.0) {
return 0;
}
- return isnan(value) ? 0 : static_cast<uint32_t>(value);
+ return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
}
@@ -3494,7 +3852,7 @@
} else if (value < 0.0) {
return 0;
}
- return isnan(value) ? 0 : static_cast<uint64_t>(value);
+ return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
}
@@ -3511,7 +3869,7 @@
T result; \
if (PROCNAN) { \
result = FPProcessNaNs(op1, op2); \
- if (!isnan(result)) { \
+ if (!std::isnan(result)) { \
result = OP(op1, op2); \
} \
} else { \
@@ -3558,7 +3916,7 @@
T op1 = -src1.Float<T>(i);
T op2 = src2.Float<T>(i);
T result = FPProcessNaNs(op1, op2);
- dst.SetFloat(i, isnan(result) ? result : FPRecipStepFused(op1, op2));
+ dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
}
return dst;
}
@@ -3588,7 +3946,7 @@
T op1 = -src1.Float<T>(i);
T op2 = src2.Float<T>(i);
T result = FPProcessNaNs(op1, op2);
- dst.SetFloat(i, isnan(result) ? result : FPRSqrtStepFused(op1, op2));
+ dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
}
return dst;
}
@@ -3620,7 +3978,7 @@
T op1 = src1.Float<T>(i);
T op2 = src2.Float<T>(i);
T nan_result = FPProcessNaNs(op1, op2);
- if (!isnan(nan_result)) {
+ if (!std::isnan(nan_result)) {
switch (cond) {
case eq: result = (op1 == op2); break;
case ge: result = (op1 >= op2); break;
@@ -4001,7 +4359,7 @@
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float input = src.Float<float>(i);
float rounded = FPRoundInt(input, rounding_mode);
- if (inexact_exception && !isnan(input) && (input != rounded)) {
+ if (inexact_exception && !std::isnan(input) && (input != rounded)) {
FPProcessException();
}
dst.SetFloat<float>(i, rounded);
@@ -4011,7 +4369,7 @@
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
double input = src.Float<double>(i);
double rounded = FPRoundInt(input, rounding_mode);
- if (inexact_exception && !isnan(input) && (input != rounded)) {
+ if (inexact_exception && !std::isnan(input) && (input != rounded)) {
FPProcessException();
}
dst.SetFloat<double>(i, rounded);
@@ -4029,13 +4387,13 @@
dst.ClearForWrite(vform);
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- float op = src.Float<float>(i) * powf(2.0f, fbits);
+ float op = src.Float<float>(i) * std::pow(2.0f, fbits);
dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- double op = src.Float<double>(i) * pow(2.0, fbits);
+ double op = src.Float<double>(i) * std::pow(2.0, fbits);
dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
}
}
@@ -4051,13 +4409,13 @@
dst.ClearForWrite(vform);
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- float op = src.Float<float>(i) * powf(2.0f, fbits);
+ float op = src.Float<float>(i) * std::pow(2.0f, fbits);
dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- double op = src.Float<double>(i) * pow(2.0, fbits);
+ double op = src.Float<double>(i) * std::pow(2.0, fbits);
dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
}
}
@@ -4182,7 +4540,7 @@
template <typename T>
T Simulator::FPRecipSqrtEstimate(T op) {
- if (isnan(op)) {
+ if (std::isnan(op)) {
return FPProcessNaN(op);
} else if (op == 0.0) {
if (copysign(1.0, op) < 0.0) {
@@ -4193,7 +4551,7 @@
} else if (copysign(1.0, op) < 0.0) {
FPProcessException();
return FPDefaultNaN<T>();
- } else if (isinf(op)) {
+ } else if (std::isinf(op)) {
return 0.0;
} else {
uint64_t fraction;
@@ -4271,17 +4629,17 @@
sign = double_sign(op);
}
- if (isnan(op)) {
+ if (std::isnan(op)) {
return FPProcessNaN(op);
- } else if (isinf(op)) {
+ } else if (std::isinf(op)) {
return (sign == 1) ? -0.0 : 0.0;
} else if (op == 0.0) {
FPProcessException(); // FPExc_DivideByZero exception.
return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
} else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
- (fabsf(op) < pow(2.0, -128))) ||
+ (std::fabs(op) < std::pow(2.0, -128.0))) ||
((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
- (fabs(op) < pow(2.0, -1024)))) {
+ (std::fabs(op) < std::pow(2.0, -1024.0)))) {
bool overflow_to_inf = false;
switch (rounding) {
case FPTieEven: overflow_to_inf = true; break;
@@ -4338,9 +4696,9 @@
fraction = double_mantissa(estimate);
if (result_exp == 0) {
- fraction = (1L << 51) | Bits(fraction, 51, 1);
+ fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
} else if (result_exp == -1) {
- fraction = (1L << 50) | Bits(fraction, 51, 2);
+ fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
result_exp = 0;
}
if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
@@ -4384,8 +4742,8 @@
if (operand <= 0x3FFFFFFF) {
result = 0xFFFFFFFF;
} else {
- dp_operand = operand * pow(2.0, -32);
- dp_result = recip_sqrt_estimate(dp_operand) * pow(2.0, 31);
+ dp_operand = operand * std::pow(2.0, -32);
+ dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
result = static_cast<uint32_t>(dp_result);
}
dst.SetUint(vform, i, result);
@@ -4416,8 +4774,8 @@
if (operand <= 0x7FFFFFFF) {
result = 0xFFFFFFFF;
} else {
- dp_operand = operand * pow(2.0, -32);
- dp_result = recip_estimate(dp_operand) * pow(2.0, 31);
+ dp_operand = operand * std::pow(2.0, -32);
+ dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
result = static_cast<uint32_t>(dp_result);
}
dst.SetUint(vform, i, result);
@@ -4433,7 +4791,7 @@
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
T op = src.Float<T>(i);
T result;
- if (isnan(op)) {
+ if (std::isnan(op)) {
result = FPProcessNaN(op);
} else {
int exp;
diff --git a/src/a64/macro-assembler-a64.cc b/src/vixl/a64/macro-assembler-a64.cc
similarity index 93%
rename from src/a64/macro-assembler-a64.cc
rename to src/vixl/a64/macro-assembler-a64.cc
index 41b571a..49218b4 100644
--- a/src/a64/macro-assembler-a64.cc
+++ b/src/vixl/a64/macro-assembler-a64.cc
@@ -24,7 +24,7 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
namespace vixl {
@@ -43,8 +43,8 @@
}
-LiteralPool::LiteralPool(MacroAssembler* masm) :
- Pool(masm), size_(0), first_use_(-1) {
+LiteralPool::LiteralPool(MacroAssembler* masm)
+ : Pool(masm), size_(0), first_use_(-1) {
}
@@ -718,11 +718,13 @@
case AND:
Mov(rd, 0);
return;
- case ORR: // Fall through.
+ case ORR:
+ VIXL_FALLTHROUGH();
case EOR:
Mov(rd, rn);
return;
- case ANDS: // Fall through.
+ case ANDS:
+ VIXL_FALLTHROUGH();
case BICS:
break;
default:
@@ -740,7 +742,8 @@
case EOR:
Mvn(rd, rn);
return;
- case ANDS: // Fall through.
+ case ANDS:
+ VIXL_FALLTHROUGH();
case BICS:
break;
default:
@@ -1131,13 +1134,14 @@
void MacroAssembler::Add(const Register& rd,
const Register& rn,
- const Operand& operand) {
+ const Operand& operand,
+ FlagsUpdate S) {
VIXL_ASSERT(allow_macro_instructions_);
if (operand.IsImmediate() && (operand.immediate() < 0) &&
IsImmAddSub(-operand.immediate())) {
- AddSubMacro(rd, rn, -operand.immediate(), LeaveFlags, SUB);
+ AddSubMacro(rd, rn, -operand.immediate(), S, SUB);
} else {
- AddSubMacro(rd, rn, operand, LeaveFlags, ADD);
+ AddSubMacro(rd, rn, operand, S, ADD);
}
}
@@ -1145,25 +1149,20 @@
void MacroAssembler::Adds(const Register& rd,
const Register& rn,
const Operand& operand) {
- VIXL_ASSERT(allow_macro_instructions_);
- if (operand.IsImmediate() && (operand.immediate() < 0) &&
- IsImmAddSub(-operand.immediate())) {
- AddSubMacro(rd, rn, -operand.immediate(), SetFlags, SUB);
- } else {
- AddSubMacro(rd, rn, operand, SetFlags, ADD);
- }
+ Add(rd, rn, operand, SetFlags);
}
void MacroAssembler::Sub(const Register& rd,
const Register& rn,
- const Operand& operand) {
+ const Operand& operand,
+ FlagsUpdate S) {
VIXL_ASSERT(allow_macro_instructions_);
if (operand.IsImmediate() && (operand.immediate() < 0) &&
IsImmAddSub(-operand.immediate())) {
- AddSubMacro(rd, rn, -operand.immediate(), LeaveFlags, ADD);
+ AddSubMacro(rd, rn, -operand.immediate(), S, ADD);
} else {
- AddSubMacro(rd, rn, operand, LeaveFlags, SUB);
+ AddSubMacro(rd, rn, operand, S, SUB);
}
}
@@ -1171,13 +1170,7 @@
void MacroAssembler::Subs(const Register& rd,
const Register& rn,
const Operand& operand) {
- VIXL_ASSERT(allow_macro_instructions_);
- if (operand.IsImmediate() && (operand.immediate() < 0) &&
- IsImmAddSub(-operand.immediate())) {
- AddSubMacro(rd, rn, -operand.immediate(), SetFlags, ADD);
- } else {
- AddSubMacro(rd, rn, operand, SetFlags, SUB);
- }
+ Sub(rd, rn, operand, SetFlags);
}
@@ -1193,23 +1186,29 @@
}
-void MacroAssembler::Fcmp(const FPRegister& fn, double value) {
+void MacroAssembler::Fcmp(const FPRegister& fn, double value,
+ FPTrapFlags trap) {
VIXL_ASSERT(allow_macro_instructions_);
// The worst case for size is:
// * 1 to materialise the constant, using literal pool if necessary
- // * 1 instruction for fcmp
+ // * 1 instruction for fcmp{e}
MacroEmissionCheckScope guard(this);
if (value != 0.0) {
UseScratchRegisterScope temps(this);
FPRegister tmp = temps.AcquireSameSizeAs(fn);
Fmov(tmp, value);
- fcmp(fn, tmp);
+ FPCompareMacro(fn, tmp, trap);
} else {
- fcmp(fn, value);
+ FPCompareMacro(fn, value, trap);
}
}
+void MacroAssembler::Fcmpe(const FPRegister& fn, double value) {
+ Fcmp(fn, value, EnableTrap);
+}
+
+
void MacroAssembler::Fmov(VRegister vd, double imm) {
VIXL_ASSERT(allow_macro_instructions_);
// Floating point immediates are loaded through the literal pool.
@@ -1637,41 +1636,67 @@
void MacroAssembler::PushCPURegList(CPURegList registers) {
- int size = registers.RegisterSizeInBytes();
-
- PrepareForPush(registers.Count(), size);
- // Push up to four registers at a time because if the current stack pointer is
- // sp and reg_size is 32, registers must be pushed in blocks of four in order
- // to maintain the 16-byte alignment for sp.
+ VIXL_ASSERT(!registers.Overlaps(*TmpList()));
+ VIXL_ASSERT(!registers.Overlaps(*FPTmpList()));
VIXL_ASSERT(allow_macro_instructions_);
+
+ int reg_size = registers.RegisterSizeInBytes();
+ PrepareForPush(registers.Count(), reg_size);
+
+ // Bump the stack pointer and store two registers at the bottom.
+ int size = registers.TotalSizeInBytes();
+ const CPURegister& bottom_0 = registers.PopLowestIndex();
+ const CPURegister& bottom_1 = registers.PopLowestIndex();
+ if (bottom_0.IsValid() && bottom_1.IsValid()) {
+ Stp(bottom_0, bottom_1, MemOperand(StackPointer(), -size, PreIndex));
+ } else if (bottom_0.IsValid()) {
+ Str(bottom_0, MemOperand(StackPointer(), -size, PreIndex));
+ }
+
+ int offset = 2 * reg_size;
while (!registers.IsEmpty()) {
- int count_before = registers.Count();
- const CPURegister& src0 = registers.PopHighestIndex();
- const CPURegister& src1 = registers.PopHighestIndex();
- const CPURegister& src2 = registers.PopHighestIndex();
- const CPURegister& src3 = registers.PopHighestIndex();
- int count = count_before - registers.Count();
- PushHelper(count, size, src0, src1, src2, src3);
+ const CPURegister& src0 = registers.PopLowestIndex();
+ const CPURegister& src1 = registers.PopLowestIndex();
+ if (src1.IsValid()) {
+ Stp(src0, src1, MemOperand(StackPointer(), offset));
+ } else {
+ Str(src0, MemOperand(StackPointer(), offset));
+ }
+ offset += 2 * reg_size;
}
}
void MacroAssembler::PopCPURegList(CPURegList registers) {
- int size = registers.RegisterSizeInBytes();
-
- PrepareForPop(registers.Count(), size);
- // Pop up to four registers at a time because if the current stack pointer is
- // sp and reg_size is 32, registers must be pushed in blocks of four in order
- // to maintain the 16-byte alignment for sp.
+ VIXL_ASSERT(!registers.Overlaps(*TmpList()));
+ VIXL_ASSERT(!registers.Overlaps(*FPTmpList()));
VIXL_ASSERT(allow_macro_instructions_);
+
+ int reg_size = registers.RegisterSizeInBytes();
+ PrepareForPop(registers.Count(), reg_size);
+
+
+ int size = registers.TotalSizeInBytes();
+ const CPURegister& bottom_0 = registers.PopLowestIndex();
+ const CPURegister& bottom_1 = registers.PopLowestIndex();
+
+ int offset = 2 * reg_size;
while (!registers.IsEmpty()) {
- int count_before = registers.Count();
const CPURegister& dst0 = registers.PopLowestIndex();
const CPURegister& dst1 = registers.PopLowestIndex();
- const CPURegister& dst2 = registers.PopLowestIndex();
- const CPURegister& dst3 = registers.PopLowestIndex();
- int count = count_before - registers.Count();
- PopHelper(count, size, dst0, dst1, dst2, dst3);
+ if (dst1.IsValid()) {
+ Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
+ } else {
+ Ldr(dst0, MemOperand(StackPointer(), offset));
+ }
+ offset += 2 * reg_size;
+ }
+
+ // Load the two registers at the bottom and drop the stack pointer.
+ if (bottom_0.IsValid() && bottom_1.IsValid()) {
+ Ldp(bottom_0, bottom_1, MemOperand(StackPointer(), size, PostIndex));
+ } else if (bottom_0.IsValid()) {
+ Ldr(bottom_0, MemOperand(StackPointer(), size, PostIndex));
}
}
@@ -1831,42 +1856,6 @@
}
-void MacroAssembler::PeekCPURegList(CPURegList registers, int offset) {
- VIXL_ASSERT(!registers.IncludesAliasOf(StackPointer()));
- VIXL_ASSERT(offset >= 0);
- int size = registers.RegisterSizeInBytes();
-
- while (registers.Count() >= 2) {
- const CPURegister& dst0 = registers.PopLowestIndex();
- const CPURegister& dst1 = registers.PopLowestIndex();
- Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
- offset += 2 * size;
- }
- if (!registers.IsEmpty()) {
- Ldr(registers.PopLowestIndex(),
- MemOperand(StackPointer(), offset));
- }
-}
-
-
-void MacroAssembler::PokeCPURegList(CPURegList registers, int offset) {
- VIXL_ASSERT(!registers.IncludesAliasOf(StackPointer()));
- VIXL_ASSERT(offset >= 0);
- int size = registers.RegisterSizeInBytes();
-
- while (registers.Count() >= 2) {
- const CPURegister& dst0 = registers.PopLowestIndex();
- const CPURegister& dst1 = registers.PopLowestIndex();
- Stp(dst0, dst1, MemOperand(StackPointer(), offset));
- offset += 2 * size;
- }
- if (!registers.IsEmpty()) {
- Str(registers.PopLowestIndex(),
- MemOperand(StackPointer(), offset));
- }
-}
-
-
void MacroAssembler::Claim(const Operand& size) {
VIXL_ASSERT(allow_macro_instructions_);
@@ -1956,6 +1945,80 @@
ldp(x29, x30, tos);
}
+void MacroAssembler::LoadCPURegList(CPURegList registers,
+ const MemOperand& src) {
+ LoadStoreCPURegListHelper(kLoad, registers, src);
+}
+
+void MacroAssembler::StoreCPURegList(CPURegList registers,
+ const MemOperand& dst) {
+ LoadStoreCPURegListHelper(kStore, registers, dst);
+}
+
+
+void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
+ CPURegList registers,
+ const MemOperand& mem) {
+ // We do not handle pre-indexing or post-indexing.
+ VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
+ VIXL_ASSERT(!registers.Overlaps(tmp_list_));
+ VIXL_ASSERT(!registers.Overlaps(fptmp_list_));
+ VIXL_ASSERT(!registers.IncludesAliasOf(sp));
+
+ UseScratchRegisterScope temps(this);
+
+ MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers,
+ mem,
+ &temps);
+
+ while (registers.Count() >= 2) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ if (op == kStore) {
+ Stp(dst0, dst1, loc);
+ } else {
+ VIXL_ASSERT(op == kLoad);
+ Ldp(dst0, dst1, loc);
+ }
+ loc.AddOffset(2 * registers.RegisterSizeInBytes());
+ }
+ if (!registers.IsEmpty()) {
+ if (op == kStore) {
+ Str(registers.PopLowestIndex(), loc);
+ } else {
+ VIXL_ASSERT(op == kLoad);
+ Ldr(registers.PopLowestIndex(), loc);
+ }
+ }
+}
+
+MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
+ const CPURegList& registers,
+ const MemOperand& mem,
+ UseScratchRegisterScope* scratch_scope) {
+ // If necessary, pre-compute the base address for the accesses.
+ if (mem.IsRegisterOffset()) {
+ Register reg_base = scratch_scope->AcquireX();
+ ComputeAddress(reg_base, mem);
+ return MemOperand(reg_base);
+
+ } else if (mem.IsImmediateOffset()) {
+ int reg_size = registers.RegisterSizeInBytes();
+ int total_size = registers.TotalSizeInBytes();
+ int64_t min_offset = mem.offset();
+ int64_t max_offset = mem.offset() + std::max(0, total_size - 2 * reg_size);
+ if ((registers.Count() >= 2) &&
+ (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
+ !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
+ Register reg_base = scratch_scope->AcquireX();
+ ComputeAddress(reg_base, mem);
+ return MemOperand(reg_base);
+ }
+ }
+
+ return mem;
+}
+
void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
VIXL_ASSERT(!sp.Is(StackPointer()));
// TODO: Several callers rely on this not using scratch registers, so we use
diff --git a/src/a64/macro-assembler-a64.h b/src/vixl/a64/macro-assembler-a64.h
similarity index 97%
rename from src/a64/macro-assembler-a64.h
rename to src/vixl/a64/macro-assembler-a64.h
index ecc4c27..e94933c 100644
--- a/src/a64/macro-assembler-a64.h
+++ b/src/vixl/a64/macro-assembler-a64.h
@@ -30,9 +30,9 @@
#include <algorithm>
#include <limits>
-#include "globals.h"
-#include "a64/assembler-a64.h"
-#include "a64/debugger-a64.h"
+#include "vixl/globals.h"
+#include "vixl/a64/assembler-a64.h"
+#include "vixl/a64/debugger-a64.h"
#define LS_MACRO_LIST(V) \
@@ -56,6 +56,7 @@
// Forward declaration
class MacroAssembler;
+class UseScratchRegisterScope;
class Pool {
public:
@@ -631,13 +632,15 @@
// Add and sub macros.
void Add(const Register& rd,
const Register& rn,
- const Operand& operand);
+ const Operand& operand,
+ FlagsUpdate S = LeaveFlags);
void Adds(const Register& rd,
const Register& rn,
const Operand& operand);
void Sub(const Register& rd,
const Register& rn,
- const Operand& operand);
+ const Operand& operand,
+ FlagsUpdate S = LeaveFlags);
void Subs(const Register& rd,
const Register& rn,
const Operand& operand);
@@ -844,39 +847,43 @@
// supported.
//
// Otherwise, (Peek|Poke)(CPU|X|W|D|S)RegList is preferred.
- void PeekCPURegList(CPURegList registers, int offset);
- void PokeCPURegList(CPURegList registers, int offset);
+ void PeekCPURegList(CPURegList registers, int64_t offset) {
+ LoadCPURegList(registers, MemOperand(StackPointer(), offset));
+ }
+ void PokeCPURegList(CPURegList registers, int64_t offset) {
+ StoreCPURegList(registers, MemOperand(StackPointer(), offset));
+ }
- void PeekSizeRegList(RegList registers, int offset, unsigned reg_size,
+ void PeekSizeRegList(RegList registers, int64_t offset, unsigned reg_size,
CPURegister::RegisterType type = CPURegister::kRegister) {
PeekCPURegList(CPURegList(type, reg_size, registers), offset);
}
- void PokeSizeRegList(RegList registers, int offset, unsigned reg_size,
+ void PokeSizeRegList(RegList registers, int64_t offset, unsigned reg_size,
CPURegister::RegisterType type = CPURegister::kRegister) {
PokeCPURegList(CPURegList(type, reg_size, registers), offset);
}
- void PeekXRegList(RegList regs, int offset) {
+ void PeekXRegList(RegList regs, int64_t offset) {
PeekSizeRegList(regs, offset, kXRegSize);
}
- void PokeXRegList(RegList regs, int offset) {
+ void PokeXRegList(RegList regs, int64_t offset) {
PokeSizeRegList(regs, offset, kXRegSize);
}
- void PeekWRegList(RegList regs, int offset) {
+ void PeekWRegList(RegList regs, int64_t offset) {
PeekSizeRegList(regs, offset, kWRegSize);
}
- void PokeWRegList(RegList regs, int offset) {
+ void PokeWRegList(RegList regs, int64_t offset) {
PokeSizeRegList(regs, offset, kWRegSize);
}
- void PeekDRegList(RegList regs, int offset) {
+ void PeekDRegList(RegList regs, int64_t offset) {
PeekSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister);
}
- void PokeDRegList(RegList regs, int offset) {
+ void PokeDRegList(RegList regs, int64_t offset) {
PokeSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister);
}
- void PeekSRegList(RegList regs, int offset) {
+ void PeekSRegList(RegList regs, int64_t offset) {
PeekSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister);
}
- void PokeSRegList(RegList regs, int offset) {
+ void PokeSRegList(RegList regs, int64_t offset) {
PokeSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister);
}
@@ -911,6 +918,9 @@
// aligned to 16 bytes.
void PopCalleeSavedRegisters();
+ void LoadCPURegList(CPURegList registers, const MemOperand& src);
+ void StoreCPURegList(CPURegList registers, const MemOperand& dst);
+
// Remaining instructions are simple pass-through calls to the assembler.
void Adr(const Register& rd, Label* label) {
VIXL_ASSERT(allow_macro_instructions_);
@@ -1135,18 +1145,31 @@
void Fccmp(const VRegister& vn,
const VRegister& vm,
StatusFlags nzcv,
- Condition cond) {
+ Condition cond,
+ FPTrapFlags trap = DisableTrap) {
VIXL_ASSERT(allow_macro_instructions_);
VIXL_ASSERT((cond != al) && (cond != nv));
SingleEmissionCheckScope guard(this);
- fccmp(vn, vm, nzcv, cond);
+ FPCCompareMacro(vn, vm, nzcv, cond, trap);
}
- void Fcmp(const VRegister& vn, const VRegister& vm) {
+ void Fccmpe(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond) {
+ Fccmp(vn, vm, nzcv, cond, EnableTrap);
+ }
+ void Fcmp(const VRegister& vn, const VRegister& vm,
+ FPTrapFlags trap = DisableTrap) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
- fcmp(vn, vm);
+ FPCompareMacro(vn, vm, trap);
}
- void Fcmp(const VRegister& vn, double value);
+ void Fcmp(const VRegister& vn, double value,
+ FPTrapFlags trap = DisableTrap);
+ void Fcmpe(const VRegister& vn, double value);
+ void Fcmpe(const VRegister& vn, const VRegister& vm) {
+ Fcmp(vn, vm, EnableTrap);
+ }
void Fcsel(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
@@ -2000,6 +2023,14 @@
SingleEmissionCheckScope guard(this);
umull(rd, rn, rm);
}
+ void Umulh(const Register& xd, const Register& xn, const Register& xm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!xd.IsZero());
+ VIXL_ASSERT(!xn.IsZero());
+ VIXL_ASSERT(!xm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ umulh(xd, xn, xm);
+ }
void Umsubl(const Register& rd,
const Register& rn,
const Register& rm,
@@ -2989,6 +3020,23 @@
void PrepareForPush(int count, int size);
void PrepareForPop(int count, int size);
+ // The actual implementation of load and store operations for CPURegList.
+ enum LoadStoreCPURegListAction {
+ kLoad,
+ kStore
+ };
+ void LoadStoreCPURegListHelper(LoadStoreCPURegListAction operation,
+ CPURegList registers,
+ const MemOperand& mem);
+ // Returns a MemOperand suitable for loading or storing a CPURegList at `dst`.
+ // This helper may allocate registers from `scratch_scope` and generate code
+ // to compute an intermediate address. The resulting MemOperand is only valid
+ // as long as `scratch_scope` remains valid.
+ MemOperand BaseMemOperandForLoadStoreCPURegList(
+ const CPURegList& registers,
+ const MemOperand& mem,
+ UseScratchRegisterScope* scratch_scope);
+
bool LabelIsOutOfRange(Label* label, ImmBranchType branch_type) {
return !Instruction::IsValidImmPCOffset(branch_type,
label->location() - CursorOffset());
diff --git a/src/a64/simulator-a64.cc b/src/vixl/a64/simulator-a64.cc
similarity index 85%
rename from src/a64/simulator-a64.cc
rename to src/vixl/a64/simulator-a64.cc
index 3f3f3e2..79256bb 100644
--- a/src/a64/simulator-a64.cc
+++ b/src/vixl/a64/simulator-a64.cc
@@ -27,8 +27,8 @@
#ifdef USE_SIMULATOR
#include <string.h>
-#include <math.h>
-#include "a64/simulator-a64.h"
+#include <cmath>
+#include "vixl/a64/simulator-a64.h"
namespace vixl {
@@ -396,23 +396,18 @@
}
-template<> double Simulator::FPDefaultNaN<double>() const {
- return kFP64DefaultNaN;
-}
-
-
-template<> float Simulator::FPDefaultNaN<float>() const {
- return kFP32DefaultNaN;
-}
-
-
-void Simulator::FPCompare(double val0, double val1) {
+void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
AssertSupportedFPCR();
// TODO: This assumes that the C++ implementation handles comparisons in the
// way that we expect (as per AssertSupportedFPCR()).
- if ((isnan(val0) != 0) || (isnan(val1) != 0)) {
+ bool process_exception = false;
+ if ((std::isnan(val0) != 0) || (std::isnan(val1) != 0)) {
nzcv().SetRawValue(FPUnorderedFlag);
+ if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
+ (trap == EnableTrap)) {
+ process_exception = true;
+ }
} else if (val0 < val1) {
nzcv().SetRawValue(FPLessThanFlag);
} else if (val0 > val1) {
@@ -423,6 +418,7 @@
VIXL_UNREACHABLE();
}
LogSystemRegister(NZCV);
+ if (process_exception) FPProcessException();
}
@@ -440,7 +436,7 @@
}
switch (lane_size) {
- default: VIXL_UNREACHABLE();
+ default: VIXL_UNREACHABLE(); break;
case kQRegSizeInBytes: format |= kPrintReg1Q; break;
case kDRegSizeInBytes: format |= kPrintReg1D; break;
case kSRegSizeInBytes: format |= kPrintReg1S; break;
@@ -460,7 +456,7 @@
Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
VectorFormat vform) {
switch (vform) {
- default: VIXL_UNREACHABLE();
+ default: VIXL_UNREACHABLE(); return kPrintReg16B;
case kFormat16B: return kPrintReg16B;
case kFormat8B: return kPrintReg8B;
case kFormat8H: return kPrintReg8H;
@@ -841,7 +837,7 @@
switch (instr->Mask(UnconditionalBranchMask)) {
case BL:
set_lr(instr->NextInstruction());
- // Fall through.
+ VIXL_FALLTHROUGH();
case B:
set_pc(instr->ImmPCOffsetTarget());
break;
@@ -864,7 +860,7 @@
switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
case BLR:
set_lr(instr->NextInstruction());
- // Fall through.
+ VIXL_FALLTHROUGH();
case BR:
case RET: set_pc(target); break;
default: VIXL_UNREACHABLE();
@@ -1007,7 +1003,7 @@
// Switch on the logical operation, stripping out the NOT bit, as it has a
// different meaning for logical immediate instructions.
switch (instr->Mask(LogicalOpMask & ~NOT)) {
- case ANDS: update_flags = true; // Fall through.
+ case ANDS: update_flags = true; VIXL_FALLTHROUGH();
case AND: result = op1 & op2; break;
case ORR: result = op1 | op2; break;
case EOR: result = op1 ^ op2; break;
@@ -1616,14 +1612,14 @@
case REV_w: set_wreg(dst, ReverseBytes(wreg(src), Reverse32)); break;
case REV32_x: set_xreg(dst, ReverseBytes(xreg(src), Reverse32)); break;
case REV_x: set_xreg(dst, ReverseBytes(xreg(src), Reverse64)); break;
- case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src), kWRegSize)); break;
- case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src), kXRegSize)); break;
+ case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src))); break;
+ case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src))); break;
case CLS_w: {
- set_wreg(dst, CountLeadingSignBits(wreg(src), kWRegSize));
+ set_wreg(dst, CountLeadingSignBits(wreg(src)));
break;
}
case CLS_x: {
- set_xreg(dst, CountLeadingSignBits(xreg(src), kXRegSize));
+ set_xreg(dst, CountLeadingSignBits(xreg(src)));
break;
}
default: VIXL_UNIMPLEMENTED();
@@ -1831,9 +1827,13 @@
// The algorithm used is adapted from the one described in section 8.2 of
// Hacker's Delight, by Henry S. Warren, Jr.
// It assumes that a right shift on a signed integer is an arithmetic shift.
-static int64_t MultiplyHighSigned(int64_t u, int64_t v) {
+// Type T must be either uint64_t or int64_t.
+template <typename T>
+static T MultiplyHigh(T u, T v) {
uint64_t u0, v0, w0;
- int64_t u1, v1, w1, w2, t;
+ T u1, v1, w1, w2, t;
+
+ VIXL_ASSERT(sizeof(u) == sizeof(u0));
u0 = u & 0xffffffff;
u1 = u >> 32;
@@ -1872,8 +1872,12 @@
case SMSUBL_x: result = xreg(instr->Ra()) - (rn_s32 * rm_s32); break;
case UMADDL_x: result = xreg(instr->Ra()) + (rn_u32 * rm_u32); break;
case UMSUBL_x: result = xreg(instr->Ra()) - (rn_u32 * rm_u32); break;
+ case UMULH_x:
+ result = MultiplyHigh(reg<uint64_t>(instr->Rn()),
+ reg<uint64_t>(instr->Rm()));
+ break;
case SMULH_x:
- result = MultiplyHighSigned(xreg(instr->Rn()), xreg(instr->Rm()));
+ result = MultiplyHigh(xreg(instr->Rn()), xreg(instr->Rm()));
break;
default: VIXL_UNIMPLEMENTED();
}
@@ -2112,28 +2116,28 @@
break;
}
case FCVTZS_xd_fixed:
- set_xreg(dst, FPToInt64(dreg(src) * pow(2.0, fbits), FPZero));
+ set_xreg(dst, FPToInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
break;
case FCVTZS_wd_fixed:
- set_wreg(dst, FPToInt32(dreg(src) * pow(2.0, fbits), FPZero));
+ set_wreg(dst, FPToInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
break;
case FCVTZU_xd_fixed:
- set_xreg(dst, FPToUInt64(dreg(src) * pow(2.0, fbits), FPZero));
+ set_xreg(dst, FPToUInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
break;
case FCVTZU_wd_fixed:
- set_wreg(dst, FPToUInt32(dreg(src) * pow(2.0, fbits), FPZero));
+ set_wreg(dst, FPToUInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
break;
case FCVTZS_xs_fixed:
- set_xreg(dst, FPToInt64(sreg(src) * powf(2.0f, fbits), FPZero));
+ set_xreg(dst, FPToInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
break;
case FCVTZS_ws_fixed:
- set_wreg(dst, FPToInt32(sreg(src) * powf(2.0f, fbits), FPZero));
+ set_wreg(dst, FPToInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
break;
case FCVTZU_xs_fixed:
- set_xreg(dst, FPToUInt64(sreg(src) * powf(2.0f, fbits), FPZero));
+ set_xreg(dst, FPToUInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
break;
case FCVTZU_ws_fixed:
- set_wreg(dst, FPToUInt32(sreg(src) * powf(2.0f, fbits), FPZero));
+ set_wreg(dst, FPToUInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
break;
default: VIXL_UNREACHABLE();
}
@@ -2143,11 +2147,16 @@
void Simulator::VisitFPCompare(const Instruction* instr) {
AssertSupportedFPCR();
+ FPTrapFlags trap = DisableTrap;
switch (instr->Mask(FPCompareMask)) {
- case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm())); break;
- case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm())); break;
- case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f); break;
- case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0); break;
+ case FCMPE_s: trap = EnableTrap; VIXL_FALLTHROUGH();
+ case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap); break;
+ case FCMPE_d: trap = EnableTrap; VIXL_FALLTHROUGH();
+ case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap); break;
+ case FCMPE_s_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
+ case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f, trap); break;
+ case FCMPE_d_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
+ case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0, trap); break;
default: VIXL_UNIMPLEMENTED();
}
}
@@ -2156,18 +2165,23 @@
void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
AssertSupportedFPCR();
+ FPTrapFlags trap = DisableTrap;
switch (instr->Mask(FPConditionalCompareMask)) {
+ case FCCMPE_s: trap = EnableTrap;
+ VIXL_FALLTHROUGH();
case FCCMP_s:
if (ConditionPassed(instr->Condition())) {
- FPCompare(sreg(instr->Rn()), sreg(instr->Rm()));
+ FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap);
} else {
nzcv().SetFlags(instr->Nzcv());
LogSystemRegister(NZCV);
}
break;
+ case FCCMPE_d: trap = EnableTrap;
+ VIXL_FALLTHROUGH();
case FCCMP_d:
if (ConditionPassed(instr->Condition())) {
- FPCompare(dreg(instr->Rn()), dreg(instr->Rm()));
+ FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap);
} else {
nzcv().SetFlags(instr->Nzcv());
LogSystemRegister(NZCV);
@@ -2245,547 +2259,6 @@
}
-// Assemble the specified IEEE-754 components into the target type and apply
-// appropriate rounding.
-// sign: 0 = positive, 1 = negative
-// exponent: Unbiased IEEE-754 exponent.
-// mantissa: The mantissa of the input. The top bit (which is not encoded for
-// normal IEEE-754 values) must not be omitted. This bit has the
-// value 'pow(2, exponent)'.
-//
-// The input value is assumed to be a normalized value. That is, the input may
-// not be infinity or NaN. If the source value is subnormal, it must be
-// normalized before calling this function such that the highest set bit in the
-// mantissa has the value 'pow(2, exponent)'.
-//
-// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
-// calling a templated FPRound.
-template <class T, int ebits, int mbits>
-static T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,
- FPRounding round_mode) {
- VIXL_ASSERT((sign == 0) || (sign == 1));
-
- // Only FPTieEven and FPRoundOdd rounding modes are implemented.
- VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
-
- // Rounding can promote subnormals to normals, and normals to infinities. For
- // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
- // encodable as a float, but rounding based on the low-order mantissa bits
- // could make it overflow. With ties-to-even rounding, this value would become
- // an infinity.
-
- // ---- Rounding Method ----
- //
- // The exponent is irrelevant in the rounding operation, so we treat the
- // lowest-order bit that will fit into the result ('onebit') as having
- // the value '1'. Similarly, the highest-order bit that won't fit into
- // the result ('halfbit') has the value '0.5'. The 'point' sits between
- // 'onebit' and 'halfbit':
- //
- // These bits fit into the result.
- // |---------------------|
- // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- // ||
- // / |
- // / halfbit
- // onebit
- //
- // For subnormal outputs, the range of representable bits is smaller and
- // the position of onebit and halfbit depends on the exponent of the
- // input, but the method is otherwise similar.
- //
- // onebit(frac)
- // |
- // | halfbit(frac) halfbit(adjusted)
- // | / /
- // | | |
- // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
- // 0b00.0... -> 0b00.0... -> 0b00
- // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
- // 0b00.1... -> 0b00.1... -> 0b01
- // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
- // 0b01.0... -> 0b01.0... -> 0b01
- // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
- // 0b01.1... -> 0b01.1... -> 0b10
- // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
- // 0b10.0... -> 0b10.0... -> 0b10
- // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
- // 0b10.1... -> 0b10.1... -> 0b11
- // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
- // ... / | / |
- // / | / |
- // / |
- // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
- //
- // mantissa = (mantissa >> shift) + halfbit(adjusted);
-
- static const int mantissa_offset = 0;
- static const int exponent_offset = mantissa_offset + mbits;
- static const int sign_offset = exponent_offset + ebits;
- VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
-
- // Bail out early for zero inputs.
- if (mantissa == 0) {
- return sign << sign_offset;
- }
-
- // If all bits in the exponent are set, the value is infinite or NaN.
- // This is true for all binary IEEE-754 formats.
- static const int infinite_exponent = (1 << ebits) - 1;
- static const int max_normal_exponent = infinite_exponent - 1;
-
- // Apply the exponent bias to encode it for the result. Doing this early makes
- // it easy to detect values that will be infinite or subnormal.
- exponent += max_normal_exponent >> 1;
-
- if (exponent > max_normal_exponent) {
- // Overflow: the input is too large for the result type to represent.
- if (round_mode == FPTieEven) {
- // FPTieEven rounding mode handles overflows using infinities.
- exponent = infinite_exponent;
- mantissa = 0;
- } else {
- VIXL_ASSERT(round_mode == FPRoundOdd);
- // FPRoundOdd rounding mode handles overflows using the largest magnitude
- // normal number.
- exponent = max_normal_exponent;
- mantissa = (UINT64_C(1) << exponent_offset) - 1;
- }
- return (sign << sign_offset) |
- (exponent << exponent_offset) |
- (mantissa << mantissa_offset);
- }
-
- // Calculate the shift required to move the top mantissa bit to the proper
- // place in the destination type.
- const int highest_significant_bit = 63 - CountLeadingZeros(mantissa, 64);
- int shift = highest_significant_bit - mbits;
-
- if (exponent <= 0) {
- // The output will be subnormal (before rounding).
- // For subnormal outputs, the shift must be adjusted by the exponent. The +1
- // is necessary because the exponent of a subnormal value (encoded as 0) is
- // the same as the exponent of the smallest normal value (encoded as 1).
- shift += -exponent + 1;
-
- // Handle inputs that would produce a zero output.
- //
- // Shifts higher than highest_significant_bit+1 will always produce a zero
- // result. A shift of exactly highest_significant_bit+1 might produce a
- // non-zero result after rounding.
- if (shift > (highest_significant_bit + 1)) {
- if (round_mode == FPTieEven) {
- // The result will always be +/-0.0.
- return sign << sign_offset;
- } else {
- VIXL_ASSERT(round_mode == FPRoundOdd);
- VIXL_ASSERT(mantissa != 0);
- // For FPRoundOdd, if the mantissa is too small to represent and
- // non-zero return the next "odd" value.
- return (sign << sign_offset) | 1;
- }
- }
-
- // Properly encode the exponent for a subnormal output.
- exponent = 0;
- } else {
- // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
- // normal values.
- mantissa &= ~(UINT64_C(1) << highest_significant_bit);
- }
-
- if (shift > 0) {
- if (round_mode == FPTieEven) {
- // We have to shift the mantissa to the right. Some precision is lost, so
- // we need to apply rounding.
- uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
- uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1;
- uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
- uint64_t adjusted = mantissa - adjustment;
- T halfbit_adjusted = (adjusted >> (shift-1)) & 1;
-
- T result = (sign << sign_offset) |
- (exponent << exponent_offset) |
- ((mantissa >> shift) << mantissa_offset);
-
- // A very large mantissa can overflow during rounding. If this happens,
- // the exponent should be incremented and the mantissa set to 1.0
- // (encoded as 0). Applying halfbit_adjusted after assembling the float
- // has the nice side-effect that this case is handled for free.
- //
- // This also handles cases where a very large finite value overflows to
- // infinity, or where a very large subnormal value overflows to become
- // normal.
- return result + halfbit_adjusted;
- } else {
- VIXL_ASSERT(round_mode == FPRoundOdd);
- // If any bits at position halfbit or below are set, onebit (ie. the
- // bottom bit of the resulting mantissa) must be set.
- uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
- if (fractional_bits != 0) {
- mantissa |= UINT64_C(1) << shift;
- }
-
- return (sign << sign_offset) |
- (exponent << exponent_offset) |
- ((mantissa >> shift) << mantissa_offset);
- }
- } else {
- // We have to shift the mantissa to the left (or not at all). The input
- // mantissa is exactly representable in the output mantissa, so apply no
- // rounding correction.
- return (sign << sign_offset) |
- (exponent << exponent_offset) |
- ((mantissa << -shift) << mantissa_offset);
- }
-}
-
-
-// See FPRound for a description of this function.
-static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
- uint64_t mantissa, FPRounding round_mode) {
- int64_t bits =
- FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
- exponent,
- mantissa,
- round_mode);
- return rawbits_to_double(bits);
-}
-
-
-// See FPRound for a description of this function.
-static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
- uint64_t mantissa, FPRounding round_mode) {
- int32_t bits =
- FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
- exponent,
- mantissa,
- round_mode);
- return rawbits_to_float(bits);
-}
-
-
-// See FPRound for a description of this function.
-static inline float16 FPRoundToFloat16(int64_t sign,
- int64_t exponent,
- uint64_t mantissa,
- FPRounding round_mode) {
- return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
- sign, exponent, mantissa, round_mode);
-}
-
-
-double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
- if (src >= 0) {
- return UFixedToDouble(src, fbits, round);
- } else {
- // This works for all negative values, including INT64_MIN.
- return -UFixedToDouble(-src, fbits, round);
- }
-}
-
-
-double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
- // An input of 0 is a special case because the result is effectively
- // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
- if (src == 0) {
- return 0.0;
- }
-
- // Calculate the exponent. The highest significant bit will have the value
- // 2^exponent.
- const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
- const int64_t exponent = highest_significant_bit - fbits;
-
- return FPRoundToDouble(0, exponent, src, round);
-}
-
-
-float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
- if (src >= 0) {
- return UFixedToFloat(src, fbits, round);
- } else {
- // This works for all negative values, including INT64_MIN.
- return -UFixedToFloat(-src, fbits, round);
- }
-}
-
-
-float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
- // An input of 0 is a special case because the result is effectively
- // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
- if (src == 0) {
- return 0.0f;
- }
-
- // Calculate the exponent. The highest significant bit will have the value
- // 2^exponent.
- const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
- const int32_t exponent = highest_significant_bit - fbits;
-
- return FPRoundToFloat(0, exponent, src, round);
-}
-
-
-double Simulator::FPToDouble(float value) {
- switch (fpclassify(value)) {
- case FP_NAN: {
- if (IsSignallingNaN(value)) {
- FPProcessException();
- }
- if (DN()) return kFP64DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred entirely, except that the top
- // bit is forced to '1', making the result a quiet NaN. The unused
- // (low-order) payload bits are set to 0.
- uint32_t raw = float_to_rawbits(value);
-
- uint64_t sign = raw >> 31;
- uint64_t exponent = (1 << 11) - 1;
- uint64_t payload = unsigned_bitextract_64(21, 0, raw);
- payload <<= (52 - 23); // The unused low-order bits should be 0.
- payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
-
- return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
- }
-
- case FP_ZERO:
- case FP_NORMAL:
- case FP_SUBNORMAL:
- case FP_INFINITE: {
- // All other inputs are preserved in a standard cast, because every value
- // representable using an IEEE-754 float is also representable using an
- // IEEE-754 double.
- return static_cast<double>(value);
- }
- }
-
- VIXL_UNREACHABLE();
- return static_cast<double>(value);
-}
-
-
-float Simulator::FPToFloat(float16 value) {
- uint32_t sign = value >> 15;
- uint32_t exponent = unsigned_bitextract_32(
- kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
- value);
- uint32_t mantissa = unsigned_bitextract_32(
- kFloat16MantissaBits - 1, 0, value);
-
- switch (float16classify(value)) {
- case FP_ZERO:
- return (sign == 0) ? 0.0f : -0.0f;
-
- case FP_INFINITE:
- return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
-
- case FP_SUBNORMAL: {
- // Calculate shift required to put mantissa into the most-significant bits
- // of the destination mantissa.
- int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
-
- // Shift mantissa and discard implicit '1'.
- mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
- mantissa &= (1 << kFloatMantissaBits) - 1;
-
- // Adjust the exponent for the shift applied, and rebias.
- exponent = exponent - shift + (-15 + 127);
- break;
- }
-
- case FP_NAN:
- if (IsSignallingNaN(value)) {
- FPProcessException();
- }
- if (DN()) return kFP32DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred entirely, except that the top
- // bit is forced to '1', making the result a quiet NaN. The unused
- // (low-order) payload bits are set to 0.
- exponent = (1 << kFloatExponentBits) - 1;
-
- // Increase bits in mantissa, making low-order bits 0.
- mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
- mantissa |= 1 << 22; // Force a quiet NaN.
- break;
-
- case FP_NORMAL:
- // Increase bits in mantissa, making low-order bits 0.
- mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
-
- // Change exponent bias.
- exponent += (-15 + 127);
- break;
-
- default: VIXL_UNREACHABLE();
- }
- return rawbits_to_float((sign << 31) |
- (exponent << kFloatMantissaBits) |
- mantissa);
-}
-
-
-float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
- // Only the FPTieEven rounding mode is implemented.
- VIXL_ASSERT(round_mode == FPTieEven);
- USE(round_mode);
-
- uint32_t raw = float_to_rawbits(value);
- int32_t sign = raw >> 31;
- int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
- uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
-
- switch (fpclassify(value)) {
- case FP_NAN: {
- if (IsSignallingNaN(value)) {
- FPProcessException();
- }
- if (DN()) return kFP16DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred as much as possible, except
- // that the top bit is forced to '1', making the result a quiet NaN.
- float16 result = (sign == 0) ? kFP16PositiveInfinity
- : kFP16NegativeInfinity;
- result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
- result |= (1 << 9); // Force a quiet NaN;
- return result;
- }
-
- case FP_ZERO:
- return (sign == 0) ? 0 : 0x8000;
-
- case FP_INFINITE:
- return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
-
- case FP_NORMAL:
- case FP_SUBNORMAL: {
- // Convert float-to-half as the processor would, assuming that FPCR.FZ
- // (flush-to-zero) is not set.
-
- // Add the implicit '1' bit to the mantissa.
- mantissa += (1 << 23);
- return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
- }
- }
-
- VIXL_UNREACHABLE();
- return 0;
-}
-
-
-float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
- // Only the FPTieEven rounding mode is implemented.
- VIXL_ASSERT(round_mode == FPTieEven);
- USE(round_mode);
-
- uint64_t raw = double_to_rawbits(value);
- int32_t sign = raw >> 63;
- int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
- uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
-
- switch (fpclassify(value)) {
- case FP_NAN: {
- if (IsSignallingNaN(value)) {
- FPProcessException();
- }
- if (DN()) return kFP16DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred as much as possible, except
- // that the top bit is forced to '1', making the result a quiet NaN.
- float16 result = (sign == 0) ? kFP16PositiveInfinity
- : kFP16NegativeInfinity;
- result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
- result |= (1 << 9); // Force a quiet NaN;
- return result;
- }
-
- case FP_ZERO:
- return (sign == 0) ? 0 : 0x8000;
-
- case FP_INFINITE:
- return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
-
- case FP_NORMAL:
- case FP_SUBNORMAL: {
- // Convert double-to-half as the processor would, assuming that FPCR.FZ
- // (flush-to-zero) is not set.
-
- // Add the implicit '1' bit to the mantissa.
- mantissa += (UINT64_C(1) << 52);
- return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
- }
- }
-
- VIXL_UNREACHABLE();
- return 0;
-}
-
-
-float Simulator::FPToFloat(double value, FPRounding round_mode) {
- // Only the FPTieEven rounding mode is implemented.
- VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
- USE(round_mode);
-
- switch (fpclassify(value)) {
- case FP_NAN: {
- if (IsSignallingNaN(value)) {
- FPProcessException();
- }
- if (DN()) return kFP32DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred as much as possible, except
- // that the top bit is forced to '1', making the result a quiet NaN.
- uint64_t raw = double_to_rawbits(value);
-
- uint32_t sign = raw >> 63;
- uint32_t exponent = (1 << 8) - 1;
- uint32_t payload = unsigned_bitextract_64(50, 52 - 23, raw);
- payload |= (1 << 22); // Force a quiet NaN.
-
- return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
- }
-
- case FP_ZERO:
- case FP_INFINITE: {
- // In a C++ cast, any value representable in the target type will be
- // unchanged. This is always the case for +/-0.0 and infinities.
- return static_cast<float>(value);
- }
-
- case FP_NORMAL:
- case FP_SUBNORMAL: {
- // Convert double-to-float as the processor would, assuming that FPCR.FZ
- // (flush-to-zero) is not set.
- uint64_t raw = double_to_rawbits(value);
- // Extract the IEEE-754 double components.
- uint32_t sign = raw >> 63;
- // Extract the exponent and remove the IEEE-754 encoding bias.
- int32_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
- // Extract the mantissa and add the implicit '1' bit.
- uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
- if (fpclassify(value) == FP_NORMAL) {
- mantissa |= (UINT64_C(1) << 52);
- }
- return FPRoundToFloat(sign, exponent, mantissa, round_mode);
- }
- }
-
- VIXL_UNREACHABLE();
- return value;
-}
-
-
void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
AssertSupportedFPCR();
@@ -2851,63 +2324,6 @@
}
-template <typename T>
-T Simulator::FPProcessNaN(T op) {
- VIXL_ASSERT(isnan(op));
- if (IsSignallingNaN(op)) {
- FPProcessException();
- }
- return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
-}
-
-template float Simulator::FPProcessNaN(float op);
-template double Simulator::FPProcessNaN(double op);
-
-template <typename T>
-T Simulator::FPProcessNaNs(T op1, T op2) {
- if (IsSignallingNaN(op1)) {
- return FPProcessNaN(op1);
- } else if (IsSignallingNaN(op2)) {
- return FPProcessNaN(op2);
- } else if (isnan(op1)) {
- VIXL_ASSERT(IsQuietNaN(op1));
- return FPProcessNaN(op1);
- } else if (isnan(op2)) {
- VIXL_ASSERT(IsQuietNaN(op2));
- return FPProcessNaN(op2);
- } else {
- return 0.0;
- }
-}
-
-template float Simulator::FPProcessNaNs(float op1, float op2);
-template double Simulator::FPProcessNaNs(double op1, double op2);
-
-template <typename T>
-T Simulator::FPProcessNaNs3(T op1, T op2, T op3) {
- if (IsSignallingNaN(op1)) {
- return FPProcessNaN(op1);
- } else if (IsSignallingNaN(op2)) {
- return FPProcessNaN(op2);
- } else if (IsSignallingNaN(op3)) {
- return FPProcessNaN(op3);
- } else if (isnan(op1)) {
- VIXL_ASSERT(IsQuietNaN(op1));
- return FPProcessNaN(op1);
- } else if (isnan(op2)) {
- VIXL_ASSERT(IsQuietNaN(op2));
- return FPProcessNaN(op2);
- } else if (isnan(op3)) {
- VIXL_ASSERT(IsQuietNaN(op3));
- return FPProcessNaN(op3);
- } else {
- return 0.0;
- }
-}
-
-template float Simulator::FPProcessNaNs3(float op1, float op2, float op3);
-template double Simulator::FPProcessNaNs3(double op1, double op2, double op3);
-
bool Simulator::FPProcessNaNs(const Instruction* instr) {
unsigned fd = instr->Rd();
unsigned fn = instr->Rn();
@@ -2916,13 +2332,13 @@
if (instr->Mask(FP64) == FP64) {
double result = FPProcessNaNs(dreg(fn), dreg(fm));
- if (isnan(result)) {
+ if (std::isnan(result)) {
set_dreg(fd, result);
done = true;
}
} else {
float result = FPProcessNaNs(sreg(fn), sreg(fm));
- if (isnan(result)) {
+ if (std::isnan(result)) {
set_sreg(fd, result);
done = true;
}
@@ -3618,13 +3034,13 @@
switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
case NEON_LD1_4v:
case NEON_LD1_4v_post: ld1(vf, vreg(reg[3]), addr[3]); count++;
- // Fall through.
+ VIXL_FALLTHROUGH();
case NEON_LD1_3v:
case NEON_LD1_3v_post: ld1(vf, vreg(reg[2]), addr[2]); count++;
- // Fall through.
+ VIXL_FALLTHROUGH();
case NEON_LD1_2v:
case NEON_LD1_2v_post: ld1(vf, vreg(reg[1]), addr[1]); count++;
- // Fall through.
+ VIXL_FALLTHROUGH();
case NEON_LD1_1v:
case NEON_LD1_1v_post:
ld1(vf, vreg(reg[0]), addr[0]);
@@ -3632,13 +3048,13 @@
break;
case NEON_ST1_4v:
case NEON_ST1_4v_post: st1(vf, vreg(reg[3]), addr[3]); count++;
- // Fall through.
+ VIXL_FALLTHROUGH();
case NEON_ST1_3v:
case NEON_ST1_3v_post: st1(vf, vreg(reg[2]), addr[2]); count++;
- // Fall through.
+ VIXL_FALLTHROUGH();
case NEON_ST1_2v:
case NEON_ST1_2v_post: st1(vf, vreg(reg[1]), addr[1]); count++;
- // Fall through.
+ VIXL_FALLTHROUGH();
case NEON_ST1_1v:
case NEON_ST1_1v_post:
st1(vf, vreg(reg[0]), addr[0]);
@@ -3745,6 +3161,7 @@
case NEON_LD3_b_post:
case NEON_LD4_b:
case NEON_LD4_b_post: do_load = true;
+ VIXL_FALLTHROUGH();
case NEON_ST1_b:
case NEON_ST1_b_post:
case NEON_ST2_b:
@@ -3762,6 +3179,7 @@
case NEON_LD3_h_post:
case NEON_LD4_h:
case NEON_LD4_h_post: do_load = true;
+ VIXL_FALLTHROUGH();
case NEON_ST1_h:
case NEON_ST1_h_post:
case NEON_ST2_h:
@@ -3778,6 +3196,7 @@
case NEON_LD3_s_post:
case NEON_LD4_s:
case NEON_LD4_s_post: do_load = true;
+ VIXL_FALLTHROUGH();
case NEON_ST1_s:
case NEON_ST1_s_post:
case NEON_ST2_s:
diff --git a/src/a64/simulator-a64.h b/src/vixl/a64/simulator-a64.h
similarity index 89%
rename from src/a64/simulator-a64.h
rename to src/vixl/a64/simulator-a64.h
index c314f2a..34dd5a1 100644
--- a/src/a64/simulator-a64.h
+++ b/src/vixl/a64/simulator-a64.h
@@ -27,12 +27,12 @@
#ifndef VIXL_A64_SIMULATOR_A64_H_
#define VIXL_A64_SIMULATOR_A64_H_
-#include "globals.h"
-#include "utils.h"
-#include "a64/instructions-a64.h"
-#include "a64/assembler-a64.h"
-#include "a64/disasm-a64.h"
-#include "a64/instrument-a64.h"
+#include "vixl/globals.h"
+#include "vixl/utils.h"
+#include "vixl/a64/instructions-a64.h"
+#include "vixl/a64/assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/instrument-a64.h"
namespace vixl {
@@ -150,6 +150,201 @@
const unsigned kLogLength = 2 * kInstructionSize;
+// Assemble the specified IEEE-754 components into the target type and apply
+// appropriate rounding.
+// sign: 0 = positive, 1 = negative
+// exponent: Unbiased IEEE-754 exponent.
+// mantissa: The mantissa of the input. The top bit (which is not encoded for
+// normal IEEE-754 values) must not be omitted. This bit has the
+// value 'pow(2, exponent)'.
+//
+// The input value is assumed to be a normalized value. That is, the input may
+// not be infinity or NaN. If the source value is subnormal, it must be
+// normalized before calling this function such that the highest set bit in the
+// mantissa has the value 'pow(2, exponent)'.
+//
+// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
+// calling a templated FPRound.
+template <class T, int ebits, int mbits>
+T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,
+ FPRounding round_mode) {
+ VIXL_ASSERT((sign == 0) || (sign == 1));
+
+ // Only FPTieEven and FPRoundOdd rounding modes are implemented.
+ VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+
+ // Rounding can promote subnormals to normals, and normals to infinities. For
+ // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
+ // encodable as a float, but rounding based on the low-order mantissa bits
+ // could make it overflow. With ties-to-even rounding, this value would become
+ // an infinity.
+
+ // ---- Rounding Method ----
+ //
+ // The exponent is irrelevant in the rounding operation, so we treat the
+ // lowest-order bit that will fit into the result ('onebit') as having
+ // the value '1'. Similarly, the highest-order bit that won't fit into
+ // the result ('halfbit') has the value '0.5'. The 'point' sits between
+ // 'onebit' and 'halfbit':
+ //
+ // These bits fit into the result.
+ // |---------------------|
+ // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ // ||
+ // / |
+ // / halfbit
+ // onebit
+ //
+ // For subnormal outputs, the range of representable bits is smaller and
+ // the position of onebit and halfbit depends on the exponent of the
+ // input, but the method is otherwise similar.
+ //
+ // onebit(frac)
+ // |
+ // | halfbit(frac) halfbit(adjusted)
+ // | / /
+ // | | |
+ // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
+ // 0b00.0... -> 0b00.0... -> 0b00
+ // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
+ // 0b00.1... -> 0b00.1... -> 0b01
+ // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
+ // 0b01.0... -> 0b01.0... -> 0b01
+ // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
+ // 0b01.1... -> 0b01.1... -> 0b10
+ // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
+ // 0b10.0... -> 0b10.0... -> 0b10
+ // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
+ // 0b10.1... -> 0b10.1... -> 0b11
+ // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
+ // ... / | / |
+ // / | / |
+ // / |
+ // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
+ //
+ // mantissa = (mantissa >> shift) + halfbit(adjusted);
+
+ static const int mantissa_offset = 0;
+ static const int exponent_offset = mantissa_offset + mbits;
+ static const int sign_offset = exponent_offset + ebits;
+ VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
+
+ // Bail out early for zero inputs.
+ if (mantissa == 0) {
+ return sign << sign_offset;
+ }
+
+ // If all bits in the exponent are set, the value is infinite or NaN.
+ // This is true for all binary IEEE-754 formats.
+ static const int infinite_exponent = (1 << ebits) - 1;
+ static const int max_normal_exponent = infinite_exponent - 1;
+
+ // Apply the exponent bias to encode it for the result. Doing this early makes
+ // it easy to detect values that will be infinite or subnormal.
+ exponent += max_normal_exponent >> 1;
+
+ if (exponent > max_normal_exponent) {
+ // Overflow: the input is too large for the result type to represent.
+ if (round_mode == FPTieEven) {
+ // FPTieEven rounding mode handles overflows using infinities.
+ exponent = infinite_exponent;
+ mantissa = 0;
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ // FPRoundOdd rounding mode handles overflows using the largest magnitude
+ // normal number.
+ exponent = max_normal_exponent;
+ mantissa = (UINT64_C(1) << exponent_offset) - 1;
+ }
+ return (sign << sign_offset) |
+ (exponent << exponent_offset) |
+ (mantissa << mantissa_offset);
+ }
+
+ // Calculate the shift required to move the top mantissa bit to the proper
+ // place in the destination type.
+ const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
+ int shift = highest_significant_bit - mbits;
+
+ if (exponent <= 0) {
+ // The output will be subnormal (before rounding).
+ // For subnormal outputs, the shift must be adjusted by the exponent. The +1
+ // is necessary because the exponent of a subnormal value (encoded as 0) is
+ // the same as the exponent of the smallest normal value (encoded as 1).
+ shift += -exponent + 1;
+
+ // Handle inputs that would produce a zero output.
+ //
+ // Shifts higher than highest_significant_bit+1 will always produce a zero
+ // result. A shift of exactly highest_significant_bit+1 might produce a
+ // non-zero result after rounding.
+ if (shift > (highest_significant_bit + 1)) {
+ if (round_mode == FPTieEven) {
+ // The result will always be +/-0.0.
+ return sign << sign_offset;
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ VIXL_ASSERT(mantissa != 0);
+ // For FPRoundOdd, if the mantissa is too small to represent and
+ // non-zero return the next "odd" value.
+ return (sign << sign_offset) | 1;
+ }
+ }
+
+ // Properly encode the exponent for a subnormal output.
+ exponent = 0;
+ } else {
+ // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
+ // normal values.
+ mantissa &= ~(UINT64_C(1) << highest_significant_bit);
+ }
+
+ if (shift > 0) {
+ if (round_mode == FPTieEven) {
+ // We have to shift the mantissa to the right. Some precision is lost, so
+ // we need to apply rounding.
+ uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
+ uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1;
+ uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
+ uint64_t adjusted = mantissa - adjustment;
+ T halfbit_adjusted = (adjusted >> (shift-1)) & 1;
+
+ T result = (sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa >> shift) << mantissa_offset);
+
+ // A very large mantissa can overflow during rounding. If this happens,
+ // the exponent should be incremented and the mantissa set to 1.0
+ // (encoded as 0). Applying halfbit_adjusted after assembling the float
+ // has the nice side-effect that this case is handled for free.
+ //
+ // This also handles cases where a very large finite value overflows to
+ // infinity, or where a very large subnormal value overflows to become
+ // normal.
+ return result + halfbit_adjusted;
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ // If any bits at position halfbit or below are set, onebit (ie. the
+ // bottom bit of the resulting mantissa) must be set.
+ uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
+ if (fractional_bits != 0) {
+ mantissa |= UINT64_C(1) << shift;
+ }
+
+ return (sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa >> shift) << mantissa_offset);
+ }
+ } else {
+ // We have to shift the mantissa to the left (or not at all). The input
+ // mantissa is exactly representable in the output mantissa, so apply no
+ // rounding correction.
+ return (sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa << -shift) << mantissa_offset);
+ }
+}
+
// Representation of memory, with typed getters and setters for access.
class Memory {
@@ -988,7 +1183,7 @@
PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
switch (size) {
- default: VIXL_UNREACHABLE();
+ default: VIXL_UNREACHABLE(); return kPrintDReg;
case kDRegSizeInBytes: return kPrintDReg;
case kSRegSizeInBytes: return kPrintSReg;
}
@@ -1170,7 +1365,8 @@
return !Z() && (N() == V());
case le:
return !(!Z() && (N() == V()));
- case nv: // Fall through.
+ case nv:
+ VIXL_FALLTHROUGH();
case al:
return true;
default:
@@ -2318,15 +2514,13 @@
void SysOp_W(int op, int64_t val);
template <typename T>
- T FPDefaultNaN() const;
- template <typename T>
T FPRecipSqrtEstimate(T op);
template <typename T>
T FPRecipEstimate(T op, FPRounding rounding);
template <typename T, typename R>
R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
- void FPCompare(double val0, double val1);
+ void FPCompare(double val0, double val1, FPTrapFlags trap);
double FPRoundInt(double value, FPRounding round_mode);
double FPToDouble(float value);
float FPToFloat(double value, FPRounding round_mode);
@@ -2389,18 +2583,8 @@
// for cumulative exception bits or floating-point exceptions.
void FPProcessException() { }
- // Standard NaN processing.
- template <typename T>
- T FPProcessNaN(T op);
-
bool FPProcessNaNs(const Instruction* instr);
- template <typename T>
- T FPProcessNaNs(T op1, T op2);
-
- template <typename T>
- T FPProcessNaNs3(T op1, T op2, T op3);
-
// Pseudo Printf instruction
void DoPrintf(const Instruction* instr);
@@ -2478,6 +2662,58 @@
static const Instruction* kEndOfSimAddress;
private:
+ template <typename T>
+ static T FPDefaultNaN();
+
+ // Standard NaN processing.
+ template <typename T>
+ T FPProcessNaN(T op) {
+ VIXL_ASSERT(std::isnan(op));
+ if (IsSignallingNaN(op)) {
+ FPProcessException();
+ }
+ return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
+ }
+
+ template <typename T>
+ T FPProcessNaNs(T op1, T op2) {
+ if (IsSignallingNaN(op1)) {
+ return FPProcessNaN(op1);
+ } else if (IsSignallingNaN(op2)) {
+ return FPProcessNaN(op2);
+ } else if (std::isnan(op1)) {
+ VIXL_ASSERT(IsQuietNaN(op1));
+ return FPProcessNaN(op1);
+ } else if (std::isnan(op2)) {
+ VIXL_ASSERT(IsQuietNaN(op2));
+ return FPProcessNaN(op2);
+ } else {
+ return 0.0;
+ }
+ }
+
+ template <typename T>
+ T FPProcessNaNs3(T op1, T op2, T op3) {
+ if (IsSignallingNaN(op1)) {
+ return FPProcessNaN(op1);
+ } else if (IsSignallingNaN(op2)) {
+ return FPProcessNaN(op2);
+ } else if (IsSignallingNaN(op3)) {
+ return FPProcessNaN(op3);
+ } else if (std::isnan(op1)) {
+ VIXL_ASSERT(IsQuietNaN(op1));
+ return FPProcessNaN(op1);
+ } else if (std::isnan(op2)) {
+ VIXL_ASSERT(IsQuietNaN(op2));
+ return FPProcessNaN(op2);
+ } else if (std::isnan(op3)) {
+ VIXL_ASSERT(IsQuietNaN(op3));
+ return FPProcessNaN(op3);
+ } else {
+ return 0.0;
+ }
+ }
+
bool coloured_trace_;
// A set of TraceParameters flags.
diff --git a/src/code-buffer.cc b/src/vixl/code-buffer.cc
similarity index 98%
rename from src/code-buffer.cc
rename to src/vixl/code-buffer.cc
index bc86e75..bb83975 100644
--- a/src/code-buffer.cc
+++ b/src/vixl/code-buffer.cc
@@ -24,8 +24,8 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "code-buffer.h"
-#include "utils.h"
+#include "vixl/code-buffer.h"
+#include "vixl/utils.h"
namespace vixl {
diff --git a/src/code-buffer.h b/src/vixl/code-buffer.h
similarity index 98%
rename from src/code-buffer.h
rename to src/vixl/code-buffer.h
index da6233d..f93ebb6 100644
--- a/src/code-buffer.h
+++ b/src/vixl/code-buffer.h
@@ -28,7 +28,7 @@
#define VIXL_CODE_BUFFER_H
#include <string.h>
-#include "globals.h"
+#include "vixl/globals.h"
namespace vixl {
diff --git a/src/vixl/compiler-intrinsics.cc b/src/vixl/compiler-intrinsics.cc
new file mode 100644
index 0000000..005bd55
--- /dev/null
+++ b/src/vixl/compiler-intrinsics.cc
@@ -0,0 +1,144 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "compiler-intrinsics.h"
+
+namespace vixl {
+
+
+int CountLeadingSignBitsFallBack(int64_t value, int width) {
+ VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+ if (value >= 0) {
+ return CountLeadingZeros(value, width) - 1;
+ } else {
+ return CountLeadingZeros(~value, width) - 1;
+ }
+}
+
+
+int CountLeadingZerosFallBack(uint64_t value, int width) {
+ VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+ if (value == 0) {
+ return width;
+ }
+ int count = 0;
+ value = value << (64 - width);
+ if ((value & UINT64_C(0xffffffff00000000)) == 0) {
+ count += 32;
+ value = value << 32;
+ }
+ if ((value & UINT64_C(0xffff000000000000)) == 0) {
+ count += 16;
+ value = value << 16;
+ }
+ if ((value & UINT64_C(0xff00000000000000)) == 0) {
+ count += 8;
+ value = value << 8;
+ }
+ if ((value & UINT64_C(0xf000000000000000)) == 0) {
+ count += 4;
+ value = value << 4;
+ }
+ if ((value & UINT64_C(0xc000000000000000)) == 0) {
+ count += 2;
+ value = value << 2;
+ }
+ if ((value & UINT64_C(0x8000000000000000)) == 0) {
+ count += 1;
+ }
+ count += (value == 0);
+ return count;
+}
+
+
+int CountSetBitsFallBack(uint64_t value, int width) {
+ VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+
+ // Mask out unused bits to ensure that they are not counted.
+ value &= (UINT64_C(0xffffffffffffffff) >> (64 - width));
+
+ // Add up the set bits.
+ // The algorithm works by adding pairs of bit fields together iteratively,
+ // where the size of each bit field doubles each time.
+ // An example for an 8-bit value:
+ // Bits: h g f e d c b a
+ // \ | \ | \ | \ |
+ // value = h+g f+e d+c b+a
+ // \ | \ |
+ // value = h+g+f+e d+c+b+a
+ // \ |
+ // value = h+g+f+e+d+c+b+a
+ const uint64_t kMasks[] = {
+ UINT64_C(0x5555555555555555),
+ UINT64_C(0x3333333333333333),
+ UINT64_C(0x0f0f0f0f0f0f0f0f),
+ UINT64_C(0x00ff00ff00ff00ff),
+ UINT64_C(0x0000ffff0000ffff),
+ UINT64_C(0x00000000ffffffff),
+ };
+
+ for (unsigned i = 0; i < (sizeof(kMasks) / sizeof(kMasks[0])); i++) {
+ int shift = 1 << i;
+ value = ((value >> shift) & kMasks[i]) + (value & kMasks[i]);
+ }
+
+ return value;
+}
+
+
+int CountTrailingZerosFallBack(uint64_t value, int width) {
+ VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+ int count = 0;
+ value = value << (64 - width);
+ if ((value & UINT64_C(0xffffffff)) == 0) {
+ count += 32;
+ value = value >> 32;
+ }
+ if ((value & 0xffff) == 0) {
+ count += 16;
+ value = value >> 16;
+ }
+ if ((value & 0xff) == 0) {
+ count += 8;
+ value = value >> 8;
+ }
+ if ((value & 0xf) == 0) {
+ count += 4;
+ value = value >> 4;
+ }
+ if ((value & 0x3) == 0) {
+ count += 2;
+ value = value >> 2;
+ }
+ if ((value & 0x1) == 0) {
+ count += 1;
+ }
+ count += (value == 0);
+ return count - (64 - width);
+}
+
+
+} // namespace vixl
diff --git a/src/vixl/compiler-intrinsics.h b/src/vixl/compiler-intrinsics.h
new file mode 100644
index 0000000..534f7e6
--- /dev/null
+++ b/src/vixl/compiler-intrinsics.h
@@ -0,0 +1,155 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#ifndef VIXL_COMPILER_INTRINSICS_H
+#define VIXL_COMPILER_INTRINSICS_H
+
+#include "globals.h"
+
+namespace vixl {
+
+// Helper to check whether the version of GCC used is greater than the specified
+// requirement.
+#define MAJOR 1000000
+#define MINOR 1000
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \
+ ((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR + __GNUC_PATCHLEVEL__) >= \
+ ((major) * MAJOR + (minor) * MINOR + (patchlevel)))
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \
+ ((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR) >= \
+ ((major) * MAJOR + (minor) * MINOR + (patchlevel)))
+#else
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) 0
+#endif
+
+
+#if defined(__clang__) && !defined(VIXL_NO_COMPILER_BUILTINS)
+
+#define COMPILER_HAS_BUILTIN_CLRSB (__has_builtin(__builtin_clrsb))
+#define COMPILER_HAS_BUILTIN_CLZ (__has_builtin(__builtin_clz))
+#define COMPILER_HAS_BUILTIN_CTZ (__has_builtin(__builtin_ctz))
+#define COMPILER_HAS_BUILTIN_FFS (__has_builtin(__builtin_ffs))
+#define COMPILER_HAS_BUILTIN_POPCOUNT (__has_builtin(__builtin_popcount))
+
+#elif defined(__GNUC__) && !defined(VIXL_NO_COMPILER_BUILTINS)
+// The documentation for these builtins is available at:
+// https://gcc.gnu.org/onlinedocs/gcc-$MAJOR.$MINOR.$PATCHLEVEL/gcc//Other-Builtins.html
+
+# define COMPILER_HAS_BUILTIN_CLRSB (GCC_VERSION_OR_NEWER(4, 7, 0))
+# define COMPILER_HAS_BUILTIN_CLZ (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_CTZ (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_FFS (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_POPCOUNT (GCC_VERSION_OR_NEWER(3, 4, 0))
+
+#else
+// One can define VIXL_NO_COMPILER_BUILTINS to force using the manually
+// implemented C++ methods.
+
+#define COMPILER_HAS_BUILTIN_BSWAP false
+#define COMPILER_HAS_BUILTIN_CLRSB false
+#define COMPILER_HAS_BUILTIN_CLZ false
+#define COMPILER_HAS_BUILTIN_CTZ false
+#define COMPILER_HAS_BUILTIN_FFS false
+#define COMPILER_HAS_BUILTIN_POPCOUNT false
+
+#endif
+
+
+template<typename V>
+inline bool IsPowerOf2(V value) {
+ return (value != 0) && ((value & (value - 1)) == 0);
+}
+
+
+// Declaration of fallback functions.
+int CountLeadingSignBitsFallBack(int64_t value, int width);
+int CountLeadingZerosFallBack(uint64_t value, int width);
+int CountSetBitsFallBack(uint64_t value, int width);
+int CountTrailingZerosFallBack(uint64_t value, int width);
+
+
+// Implementation of intrinsics functions.
+// TODO: The implementations could be improved for sizes different from 32bit
+// and 64bit: we could mask the values and call the appropriate builtin.
+
+template<typename V>
+inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CLRSB
+ if (width == 32) {
+ return __builtin_clrsb(value);
+ } else if (width == 64) {
+ return __builtin_clrsbll(value);
+ }
+#endif
+ return CountLeadingSignBitsFallBack(value, width);
+}
+
+
+template<typename V>
+inline int CountLeadingZeros(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CLZ
+ if (width == 32) {
+ return (value == 0) ? 32 : __builtin_clz(value);
+ } else if (width == 64) {
+ return (value == 0) ? 64 : __builtin_clzll(value);
+ }
+#endif
+ return CountLeadingZerosFallBack(value, width);
+}
+
+
+template<typename V>
+inline int CountSetBits(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_POPCOUNT
+ if (width == 32) {
+ return __builtin_popcount(value);
+ } else if (width == 64) {
+ return __builtin_popcountll(value);
+ }
+#endif
+ return CountSetBitsFallBack(value, width);
+}
+
+
+template<typename V>
+inline int CountTrailingZeros(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CTZ
+ if (width == 32) {
+ return (value == 0) ? 32 : __builtin_ctz(value);
+ } else if (width == 64) {
+ return (value == 0) ? 64 : __builtin_ctzll(value);
+ }
+#endif
+ return CountTrailingZerosFallBack(value, width);
+}
+
+} // namespace vixl
+
+#endif // VIXL_COMPILER_INTRINSICS_H
+
diff --git a/src/globals.h b/src/vixl/globals.h
similarity index 84%
rename from src/globals.h
rename to src/vixl/globals.h
index 35d9b05..6dfd000 100644
--- a/src/globals.h
+++ b/src/vixl/globals.h
@@ -49,7 +49,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <stddef.h>
-#include "platform.h"
+#include "vixl/platform.h"
typedef uint8_t byte;
@@ -88,4 +88,20 @@
#define VIXL_ALIGNMENT_EXCEPTION() printf("ALIGNMENT EXCEPTION\t"); VIXL_ABORT()
+// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough
+// argument to annotate intentional fall-through between switch labels.
+// For more information please refer to:
+// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+#ifndef __has_warning
+ #define __has_warning(x) 0
+#endif
+
+// Note: This option is only available for Clang. And will only be enabled for
+// C++11(201103L).
+#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
+ #define VIXL_FALLTHROUGH() [[clang::fallthrough]] //NOLINT
+#else
+ #define VIXL_FALLTHROUGH() do {} while (0)
+#endif
+
#endif // VIXL_GLOBALS_H
diff --git a/src/invalset.h b/src/vixl/invalset.h
similarity index 98%
rename from src/invalset.h
rename to src/vixl/invalset.h
index c18353a..ffdc023 100644
--- a/src/invalset.h
+++ b/src/vixl/invalset.h
@@ -32,7 +32,7 @@
#include <algorithm>
#include <vector>
-#include "globals.h"
+#include "vixl/globals.h"
namespace vixl {
@@ -250,7 +250,7 @@
// Indicates if the iterator is looking at the vector or at the preallocated
// elements.
- bool using_vector_;
+ const bool using_vector_;
// Used when looking at the preallocated elements, or in debug mode when using
// the vector to track how many times the iterator has advanced.
size_t index_;
@@ -657,13 +657,14 @@
template<class S>
InvalSetIterator<S>::InvalSetIterator(S* inval_set)
- : using_vector_(false), index_(0), inval_set_(inval_set) {
+ : using_vector_((inval_set != NULL) && inval_set->IsUsingVector()),
+ index_(0),
+ inval_set_(inval_set) {
if (inval_set != NULL) {
inval_set->Sort(S::kSoftSort);
#ifdef VIXL_DEBUG
inval_set->Acquire();
#endif
- using_vector_ = inval_set->IsUsingVector();
if (using_vector_) {
iterator_ = typename std::vector<ElementType>::iterator(
inval_set_->vector_->begin());
diff --git a/src/platform.h b/src/vixl/platform.h
similarity index 100%
rename from src/platform.h
rename to src/vixl/platform.h
diff --git a/src/utils.cc b/src/vixl/utils.cc
similarity index 63%
rename from src/utils.cc
rename to src/vixl/utils.cc
index e026c2d..867001d 100644
--- a/src/utils.cc
+++ b/src/vixl/utils.cc
@@ -24,7 +24,7 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "utils.h"
+#include "vixl/utils.h"
#include <stdio.h>
namespace vixl {
@@ -127,91 +127,6 @@
}
-int CountLeadingZeros(uint64_t value, int width) {
- VIXL_ASSERT((width == 8) || (width == 16) || (width == 32) || (width == 64));
- int count = 0;
- uint64_t bit_test = UINT64_C(1) << (width - 1);
- while ((count < width) && ((bit_test & value) == 0)) {
- count++;
- bit_test >>= 1;
- }
- return count;
-}
-
-
-int CountLeadingSignBits(int64_t value, int width) {
- VIXL_ASSERT((width == 8) || (width == 16) || (width == 32) || (width == 64));
- if (value >= 0) {
- return CountLeadingZeros(value, width) - 1;
- } else {
- return CountLeadingZeros(~value, width) - 1;
- }
-}
-
-
-int CountTrailingZeros(uint64_t value, int width) {
- VIXL_ASSERT((width == 32) || (width == 64));
- int count = 0;
- while ((count < width) && (((value >> count) & 1) == 0)) {
- count++;
- }
- return count;
-}
-
-
-int CountSetBits(uint64_t value, int width) {
- // TODO: Other widths could be added here, as the implementation already
- // supports them.
- VIXL_ASSERT((width == 32) || (width == 64));
-
- // Mask out unused bits to ensure that they are not counted.
- value &= (UINT64_C(0xffffffffffffffff) >> (64-width));
-
- // Add up the set bits.
- // The algorithm works by adding pairs of bit fields together iteratively,
- // where the size of each bit field doubles each time.
- // An example for an 8-bit value:
- // Bits: h g f e d c b a
- // \ | \ | \ | \ |
- // value = h+g f+e d+c b+a
- // \ | \ |
- // value = h+g+f+e d+c+b+a
- // \ |
- // value = h+g+f+e+d+c+b+a
- const uint64_t kMasks[] = {
- UINT64_C(0x5555555555555555),
- UINT64_C(0x3333333333333333),
- UINT64_C(0x0f0f0f0f0f0f0f0f),
- UINT64_C(0x00ff00ff00ff00ff),
- UINT64_C(0x0000ffff0000ffff),
- UINT64_C(0x00000000ffffffff),
- };
-
- for (unsigned i = 0; i < (sizeof(kMasks) / sizeof(kMasks[0])); i++) {
- int shift = 1 << i;
- value = ((value >> shift) & kMasks[i]) + (value & kMasks[i]);
- }
-
- return value;
-}
-
-
-uint64_t LowestSetBit(uint64_t value) {
- return value & -value;
-}
-
-
-int HighestSetBitPosition(uint64_t number) {
- VIXL_ASSERT(number != 0);
- return 63 - CountLeadingZeros(number, 64);
-}
-
-
-bool IsPowerOf2(int64_t value) {
- return (value != 0) && ((value & (value - 1)) == 0);
-}
-
-
unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
VIXL_ASSERT((reg_size % 8) == 0);
int count = 0;
diff --git a/src/utils.h b/src/vixl/utils.h
similarity index 91%
rename from src/utils.h
rename to src/vixl/utils.h
index 6b9b72f..d7488b7 100644
--- a/src/utils.h
+++ b/src/vixl/utils.h
@@ -27,9 +27,10 @@
#ifndef VIXL_UTILS_H
#define VIXL_UTILS_H
-#include <math.h>
#include <string.h>
-#include "globals.h"
+#include <cmath>
+#include "vixl/globals.h"
+#include "vixl/compiler-intrinsics.h"
namespace vixl {
@@ -121,7 +122,7 @@
inline bool IsSignallingNaN(double num) {
const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
uint64_t raw = double_to_rawbits(num);
- if (isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) {
+ if (std::isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) {
return true;
}
return false;
@@ -131,7 +132,7 @@
inline bool IsSignallingNaN(float num) {
const uint32_t kFP32QuietNaNMask = 0x00400000;
uint32_t raw = float_to_rawbits(num);
- if (isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) {
+ if (std::isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) {
return true;
}
return false;
@@ -147,21 +148,21 @@
template <typename T>
inline bool IsQuietNaN(T num) {
- return isnan(num) && !IsSignallingNaN(num);
+ return std::isnan(num) && !IsSignallingNaN(num);
}
// Convert the NaN in 'num' to a quiet NaN.
inline double ToQuietNaN(double num) {
const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
- VIXL_ASSERT(isnan(num));
+ VIXL_ASSERT(std::isnan(num));
return rawbits_to_double(double_to_rawbits(num) | kFP64QuietNaNMask);
}
inline float ToQuietNaN(float num) {
const uint32_t kFP32QuietNaNMask = 0x00400000;
- VIXL_ASSERT(isnan(num));
+ VIXL_ASSERT(std::isnan(num));
return rawbits_to_float(float_to_rawbits(num) | kFP32QuietNaNMask);
}
@@ -177,14 +178,23 @@
}
-// Bit counting.
-int CountLeadingZeros(uint64_t value, int width);
-int CountLeadingSignBits(int64_t value, int width);
-int CountTrailingZeros(uint64_t value, int width);
-int CountSetBits(uint64_t value, int width);
-uint64_t LowestSetBit(uint64_t value);
-int HighestSetBitPosition(uint64_t value);
-bool IsPowerOf2(int64_t value);
+inline uint64_t LowestSetBit(uint64_t value) {
+ return value & -value;
+}
+
+
+template<typename T>
+inline int HighestSetBitPosition(T value) {
+ VIXL_ASSERT(value != 0);
+ return (sizeof(value) * 8 - 1) - CountLeadingZeros(value);
+}
+
+
+template<typename V>
+inline int WhichPowerOf2(V value) {
+ VIXL_ASSERT(IsPowerOf2(value));
+ return CountTrailingZeros(value);
+}
unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size);
diff --git a/test/examples/test-examples.cc b/test/examples/test-examples.cc
index 61a31fb..7777c20 100644
--- a/test/examples/test-examples.cc
+++ b/test/examples/test-examples.cc
@@ -24,9 +24,9 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "a64/macro-assembler-a64.h"
-#include "a64/debugger-a64.h"
-#include "a64/simulator-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/debugger-a64.h"
+#include "vixl/a64/simulator-a64.h"
#include "examples.h"
#include "non-const-visitor.h"
#include "custom-disassembler.h"
diff --git a/test/test-assembler-a64.cc b/test/test-assembler-a64.cc
index ed55ae9..55e42ab 100644
--- a/test/test-assembler-a64.cc
+++ b/test/test-assembler-a64.cc
@@ -27,16 +27,16 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <math.h>
#include <float.h>
+#include <cmath>
#include "test-runner.h"
#include "test-utils-a64.h"
-#include "a64/macro-assembler-a64.h"
-#include "a64/simulator-a64.h"
-#include "a64/debugger-a64.h"
-#include "a64/disasm-a64.h"
-#include "a64/cpu-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/simulator-a64.h"
+#include "vixl/a64/debugger-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/cpu-a64.h"
namespace vixl {
@@ -1072,28 +1072,28 @@
SETUP();
START();
- __ Mov(x16, 0);
- __ Mov(x17, 1);
+ __ Mov(x25, 0);
+ __ Mov(x26, 1);
__ Mov(x18, 0xffffffff);
__ Mov(x19, 0xffffffffffffffff);
- __ Mul(w0, w16, w16);
- __ Mul(w1, w16, w17);
- __ Mul(w2, w17, w18);
+ __ Mul(w0, w25, w25);
+ __ Mul(w1, w25, w26);
+ __ Mul(w2, w26, w18);
__ Mul(w3, w18, w19);
- __ Mul(x4, x16, x16);
- __ Mul(x5, x17, x18);
+ __ Mul(x4, x25, x25);
+ __ Mul(x5, x26, x18);
__ Mul(x6, x18, x19);
__ Mul(x7, x19, x19);
- __ Smull(x8, w17, w18);
+ __ Smull(x8, w26, w18);
__ Smull(x9, w18, w18);
__ Smull(x10, w19, w19);
- __ Mneg(w11, w16, w16);
- __ Mneg(w12, w16, w17);
- __ Mneg(w13, w17, w18);
+ __ Mneg(w11, w25, w25);
+ __ Mneg(w12, w25, w26);
+ __ Mneg(w13, w26, w18);
__ Mneg(w14, w18, w19);
- __ Mneg(x20, x16, x16);
- __ Mneg(x21, x17, x18);
+ __ Mneg(x20, x25, x25);
+ __ Mneg(x21, x26, x18);
__ Mneg(x22, x18, x19);
__ Mneg(x23, x19, x19);
END();
@@ -1333,6 +1333,54 @@
}
+TEST(umulh) {
+ SETUP();
+
+ START();
+ __ Mov(x20, 0);
+ __ Mov(x21, 1);
+ __ Mov(x22, 0x0000000100000000);
+ __ Mov(x23, 0x0000000012345678);
+ __ Mov(x24, 0x0123456789abcdef);
+ __ Mov(x25, 0x0000000200000000);
+ __ Mov(x26, 0x8000000000000000);
+ __ Mov(x27, 0xffffffffffffffff);
+ __ Mov(x28, 0x5555555555555555);
+ __ Mov(x29, 0xaaaaaaaaaaaaaaaa);
+
+ __ Umulh(x0, x20, x24);
+ __ Umulh(x1, x21, x24);
+ __ Umulh(x2, x22, x23);
+ __ Umulh(x3, x22, x24);
+ __ Umulh(x4, x24, x25);
+ __ Umulh(x5, x23, x27);
+ __ Umulh(x6, x26, x26);
+ __ Umulh(x7, x26, x27);
+ __ Umulh(x8, x27, x27);
+ __ Umulh(x9, x28, x28);
+ __ Umulh(x10, x28, x29);
+ __ Umulh(x11, x29, x29);
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_64(0, x0);
+ ASSERT_EQUAL_64(0, x1);
+ ASSERT_EQUAL_64(0, x2);
+ ASSERT_EQUAL_64(0x0000000001234567, x3);
+ ASSERT_EQUAL_64(0x0000000002468acf, x4);
+ ASSERT_EQUAL_64(0x0000000012345677, x5);
+ ASSERT_EQUAL_64(0x4000000000000000, x6);
+ ASSERT_EQUAL_64(0x7fffffffffffffff, x7);
+ ASSERT_EQUAL_64(0xfffffffffffffffe, x8);
+ ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9);
+ ASSERT_EQUAL_64(0x38e38e38e38e38e3, x10);
+ ASSERT_EQUAL_64(0x71c71c71c71c71c6, x11);
+
+ TEARDOWN();
+}
+
+
TEST(smaddl_umaddl_umull) {
SETUP();
@@ -9446,26 +9494,26 @@
uint32_t raw_n = float_to_rawbits(n);
uint32_t raw_m = float_to_rawbits(m);
- if (isnan(n) && ((raw_n & kFP32QuietNaNMask) == 0)) {
+ if (std::isnan(n) && ((raw_n & kFP32QuietNaNMask) == 0)) {
// n is signalling NaN.
return rawbits_to_float(raw_n | kFP32QuietNaNMask);
- } else if (isnan(m) && ((raw_m & kFP32QuietNaNMask) == 0)) {
+ } else if (std::isnan(m) && ((raw_m & kFP32QuietNaNMask) == 0)) {
// m is signalling NaN.
return rawbits_to_float(raw_m | kFP32QuietNaNMask);
} else if (quiet_nan_substitute == 0.0) {
- if (isnan(n)) {
+ if (std::isnan(n)) {
// n is quiet NaN.
return n;
- } else if (isnan(m)) {
+ } else if (std::isnan(m)) {
// m is quiet NaN.
return m;
}
} else {
// Substitute n or m if one is quiet, but not both.
- if (isnan(n) && !isnan(m)) {
+ if (std::isnan(n) && !std::isnan(m)) {
// n is quiet NaN: replace with substitute.
n = quiet_nan_substitute;
- } else if (!isnan(n) && isnan(m)) {
+ } else if (!std::isnan(n) && std::isnan(m)) {
// m is quiet NaN: replace with substitute.
m = quiet_nan_substitute;
}
@@ -9488,26 +9536,26 @@
uint64_t raw_n = double_to_rawbits(n);
uint64_t raw_m = double_to_rawbits(m);
- if (isnan(n) && ((raw_n & kFP64QuietNaNMask) == 0)) {
+ if (std::isnan(n) && ((raw_n & kFP64QuietNaNMask) == 0)) {
// n is signalling NaN.
return rawbits_to_double(raw_n | kFP64QuietNaNMask);
- } else if (isnan(m) && ((raw_m & kFP64QuietNaNMask) == 0)) {
+ } else if (std::isnan(m) && ((raw_m & kFP64QuietNaNMask) == 0)) {
// m is signalling NaN.
return rawbits_to_double(raw_m | kFP64QuietNaNMask);
} else if (quiet_nan_substitute == 0.0) {
- if (isnan(n)) {
+ if (std::isnan(n)) {
// n is quiet NaN.
return n;
- } else if (isnan(m)) {
+ } else if (std::isnan(m)) {
// m is quiet NaN.
return m;
}
} else {
// Substitute n or m if one is quiet, but not both.
- if (isnan(n) && !isnan(m)) {
+ if (std::isnan(n) && !std::isnan(m)) {
// n is quiet NaN: replace with substitute.
n = quiet_nan_substitute;
- } else if (!isnan(n) && isnan(m)) {
+ } else if (!std::isnan(n) && std::isnan(m)) {
// m is quiet NaN: replace with substitute.
m = quiet_nan_substitute;
}
@@ -9700,6 +9748,10 @@
__ Fmov(d18, -0.5);
__ Fmov(d19, -1.0);
__ Mov(x20, 0);
+ __ Mov(x21, 0x7ff0000000000001); // Double precision NaN.
+ __ Fmov(d21, x21);
+ __ Mov(w22, 0x7f800001); // Single precision NaN.
+ __ Fmov(s22, w22);
__ Cmp(x20, 0);
__ Fccmp(s16, s16, NoFlag, eq);
@@ -9739,6 +9791,22 @@
__ fccmp(d18, d18, NFlag, nv);
__ Mrs(x9, NZCV);
+
+ __ Cmp(x20, 0);
+ __ Fccmpe(s16, s16, NoFlag, eq);
+ __ Mrs(x10, NZCV);
+
+ __ Cmp(x20, 0);
+ __ Fccmpe(d18, d19, ZCVFlag, ls);
+ __ Mrs(x11, NZCV);
+
+ __ Cmp(x20, 0);
+ __ Fccmpe(d21, d21, NoFlag, eq);
+ __ Mrs(x12, NZCV);
+
+ __ Cmp(x20, 0);
+ __ Fccmpe(s22, s22, NoFlag, eq);
+ __ Mrs(x13, NZCV);
END();
RUN();
@@ -9753,6 +9821,10 @@
ASSERT_EQUAL_32(NFlag, w7);
ASSERT_EQUAL_32(ZCFlag, w8);
ASSERT_EQUAL_32(ZCFlag, w9);
+ ASSERT_EQUAL_32(ZCFlag, w10);
+ ASSERT_EQUAL_32(CFlag, w11);
+ ASSERT_EQUAL_32(CVFlag, w12);
+ ASSERT_EQUAL_32(CVFlag, w13);
TEARDOWN();
}
@@ -9813,6 +9885,19 @@
__ Fcmp(d19, 12.3456);
temps.Exclude(d0);
__ Mrs(x16, NZCV);
+
+ __ Fcmpe(s8, s8);
+ __ Mrs(x22, NZCV);
+ __ Fcmpe(s8, 0.0);
+ __ Mrs(x23, NZCV);
+ __ Fcmpe(d19, d19);
+ __ Mrs(x24, NZCV);
+ __ Fcmpe(d19, 0.0);
+ __ Mrs(x25, NZCV);
+ __ Fcmpe(s18, s18);
+ __ Mrs(x26, NZCV);
+ __ Fcmpe(d21, d21);
+ __ Mrs(x27, NZCV);
}
END();
@@ -9833,6 +9918,12 @@
ASSERT_EQUAL_32(CVFlag, w14);
ASSERT_EQUAL_32(ZCFlag, w15);
ASSERT_EQUAL_32(NFlag, w16);
+ ASSERT_EQUAL_32(ZCFlag, w22);
+ ASSERT_EQUAL_32(ZCFlag, w23);
+ ASSERT_EQUAL_32(ZCFlag, w24);
+ ASSERT_EQUAL_32(ZCFlag, w25);
+ ASSERT_EQUAL_32(CVFlag, w26);
+ ASSERT_EQUAL_32(CVFlag, w27);
TEARDOWN();
}
@@ -11869,16 +11960,16 @@
double expected_ucvtf_base = rawbits_to_double(expected_ucvtf_bits);
for (int fbits = 0; fbits <= 32; fbits++) {
- double expected_scvtf = expected_scvtf_base / pow(2, fbits);
- double expected_ucvtf = expected_ucvtf_base / pow(2, fbits);
+ double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
+ double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
if (cvtf_s32) ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_w[fbits]);
if (cvtf_u32) ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_w[fbits]);
}
for (int fbits = 33; fbits <= 64; fbits++) {
- double expected_scvtf = expected_scvtf_base / pow(2, fbits);
- double expected_ucvtf = expected_ucvtf_base / pow(2, fbits);
+ double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
+ double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
}
@@ -12023,18 +12114,16 @@
float expected_ucvtf_base = rawbits_to_float(expected_ucvtf_bits);
for (int fbits = 0; fbits <= 32; fbits++) {
- float expected_scvtf = expected_scvtf_base / powf(2, fbits);
- float expected_ucvtf = expected_ucvtf_base / powf(2, fbits);
+ float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
+ float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
if (cvtf_s32) ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_w[fbits]);
if (cvtf_u32) ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_w[fbits]);
- break;
}
for (int fbits = 33; fbits <= 64; fbits++) {
- break;
- float expected_scvtf = expected_scvtf_base / powf(2, fbits);
- float expected_ucvtf = expected_ucvtf_base / powf(2, fbits);
+ float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
+ float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
}
@@ -12617,6 +12706,10 @@
SETUP();
START();
+ // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
+ UseScratchRegisterScope temps(&masm);
+ temps.ExcludeAll();
+
// The literal base is chosen to have two useful properties:
// * When multiplied by small values (such as a register index), this value
// is clearly readable in the result.
@@ -12687,6 +12780,10 @@
SETUP();
START();
+ // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
+ UseScratchRegisterScope temps(&masm);
+ temps.ExcludeAll();
+
// The literal base is chosen to have two useful properties:
// * When multiplied by small values (such as a register index), this value
// is clearly readable in the result.
@@ -12769,6 +12866,121 @@
}
+TEST(load_store_reglist) {
+ SETUP();
+ START();
+
+ // The literal base is chosen to have two useful properties:
+ // * When multiplied by small values (such as a register index), this value
+ // is clearly readable in the result.
+ // * The value is not formed from repeating fixed-size smaller values, so it
+ // can be used to detect endianness-related errors.
+ uint64_t high_base = UINT32_C(0x01000010);
+ uint64_t low_base = UINT32_C(0x00100101);
+ uint64_t base = (high_base << 32) | low_base;
+ uint64_t array[21];
+ memset(array, 0, sizeof(array));
+
+ // Initialize the registers.
+ __ Mov(x1, base);
+ __ Add(x2, x1, x1);
+ __ Add(x3, x2, x1);
+ __ Add(x4, x3, x1);
+ __ Fmov(d1, x1);
+ __ Fmov(d2, x2);
+ __ Fmov(d3, x3);
+ __ Fmov(d4, x4);
+ __ Fmov(d5, x1);
+ __ Fmov(d6, x2);
+ __ Fmov(d7, x3);
+ __ Fmov(d8, x4);
+
+ Register reg_base = x20;
+ Register reg_index = x21;
+ int size_stored = 0;
+
+ __ Mov(reg_base, reinterpret_cast<uintptr_t>(&array));
+
+ // Test aligned accesses.
+ CPURegList list_src(w1, w2, w3, w4);
+ CPURegList list_dst(w11, w12, w13, w14);
+ CPURegList list_fp_src_1(d1, d2, d3, d4);
+ CPURegList list_fp_dst_1(d11, d12, d13, d14);
+
+ __ StoreCPURegList(list_src, MemOperand(reg_base, 0 * sizeof(uint64_t)));
+ __ LoadCPURegList(list_dst, MemOperand(reg_base, 0 * sizeof(uint64_t)));
+ size_stored += 4 * kWRegSizeInBytes;
+
+ __ Mov(reg_index, size_stored);
+ __ StoreCPURegList(list_src, MemOperand(reg_base, reg_index));
+ __ LoadCPURegList(list_dst, MemOperand(reg_base, reg_index));
+ size_stored += 4 * kWRegSizeInBytes;
+
+ __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, size_stored));
+ __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, size_stored));
+ size_stored += 4 * kDRegSizeInBytes;
+
+ __ Mov(reg_index, size_stored);
+ __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, reg_index));
+ __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, reg_index));
+ size_stored += 4 * kDRegSizeInBytes;
+
+ // Test unaligned accesses.
+ CPURegList list_fp_src_2(d5, d6, d7, d8);
+ CPURegList list_fp_dst_2(d15, d16, d17, d18);
+
+ __ Str(wzr, MemOperand(reg_base, size_stored));
+ size_stored += 1 * kWRegSizeInBytes;
+ __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, size_stored));
+ __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, size_stored));
+ size_stored += 4 * kDRegSizeInBytes;
+
+ __ Mov(reg_index, size_stored);
+ __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, reg_index));
+ __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, reg_index));
+
+ END();
+ RUN();
+
+ VIXL_CHECK(array[0] == (1 * low_base) + (2 * low_base << kWRegSize));
+ VIXL_CHECK(array[1] == (3 * low_base) + (4 * low_base << kWRegSize));
+ VIXL_CHECK(array[2] == (1 * low_base) + (2 * low_base << kWRegSize));
+ VIXL_CHECK(array[3] == (3 * low_base) + (4 * low_base << kWRegSize));
+ VIXL_CHECK(array[4] == 1 * base);
+ VIXL_CHECK(array[5] == 2 * base);
+ VIXL_CHECK(array[6] == 3 * base);
+ VIXL_CHECK(array[7] == 4 * base);
+ VIXL_CHECK(array[8] == 1 * base);
+ VIXL_CHECK(array[9] == 2 * base);
+ VIXL_CHECK(array[10] == 3 * base);
+ VIXL_CHECK(array[11] == 4 * base);
+ VIXL_CHECK(array[12] == ((1 * low_base) << kSRegSize));
+ VIXL_CHECK(array[13] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
+ VIXL_CHECK(array[14] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
+ VIXL_CHECK(array[15] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
+ VIXL_CHECK(array[16] == (((1 * low_base) << kSRegSize) | (4 * high_base)));
+ VIXL_CHECK(array[17] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
+ VIXL_CHECK(array[18] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
+ VIXL_CHECK(array[19] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
+ VIXL_CHECK(array[20] == (4 * high_base));
+
+ ASSERT_EQUAL_64(1 * low_base, x11);
+ ASSERT_EQUAL_64(2 * low_base, x12);
+ ASSERT_EQUAL_64(3 * low_base, x13);
+ ASSERT_EQUAL_64(4 * low_base, x14);
+ ASSERT_EQUAL_FP64(rawbits_to_double(1 * base), d11);
+ ASSERT_EQUAL_FP64(rawbits_to_double(2 * base), d12);
+ ASSERT_EQUAL_FP64(rawbits_to_double(3 * base), d13);
+ ASSERT_EQUAL_FP64(rawbits_to_double(4 * base), d14);
+ ASSERT_EQUAL_FP64(rawbits_to_double(1 * base), d15);
+ ASSERT_EQUAL_FP64(rawbits_to_double(2 * base), d16);
+ ASSERT_EQUAL_FP64(rawbits_to_double(3 * base), d17);
+ ASSERT_EQUAL_FP64(rawbits_to_double(4 * base), d18);
+
+ TEARDOWN();
+}
+
+
// This enum is used only as an argument to the push-pop test helpers.
enum PushPopMethod {
// Push or Pop using the Push and Pop methods, with blocks of up to four
@@ -12814,6 +13026,10 @@
RegList list = PopulateRegisterArray(NULL, x, r, reg_size, reg_count,
allowed);
+ // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
+ UseScratchRegisterScope temps(&masm);
+ temps.ExcludeAll();
+
// The literal base is chosen to have two useful properties:
// * When multiplied by small values (such as a register index), this value
// is clearly readable in the result.
@@ -12993,6 +13209,10 @@
// Arbitrarily pick a register to use as a stack pointer.
const Register& stack_pointer = x10;
+ // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
+ UseScratchRegisterScope temps(&masm);
+ temps.ExcludeAll();
+
// The literal base is chosen to have two useful properties:
// * When multiplied (using an integer) by small values (such as a register
// index), this value is clearly readable in the result.
@@ -13167,6 +13387,10 @@
r6_to_r9 |= x[i].Bit();
}
+ // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
+ UseScratchRegisterScope temps(&masm);
+ temps.ExcludeAll();
+
// The literal base is chosen to have two useful properties:
// * When multiplied by small values (such as a register index), this value
// is clearly readable in the result.
@@ -13267,6 +13491,10 @@
stack[i] = 0xdeadbeef;
}
+ // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
+ UseScratchRegisterScope temps(&masm);
+ temps.ExcludeAll();
+
// The literal base is chosen to have two useful properties:
// * When multiplied by small values (such as a register index), this value
// is clearly readable in the result.
@@ -13446,6 +13674,10 @@
VIXL_ASSERT(sp.Is(__ StackPointer()));
+ // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
+ UseScratchRegisterScope temps(&masm);
+ temps.ExcludeAll();
+
__ Mov(x3, 0x3333333333333333);
__ Mov(x2, 0x2222222222222222);
__ Mov(x1, 0x1111111111111111);
@@ -14154,8 +14386,8 @@
static void ProcessNaNsHelper(double n, double m, double expected) {
- VIXL_ASSERT(isnan(n) || isnan(m));
- VIXL_ASSERT(isnan(expected));
+ VIXL_ASSERT(std::isnan(n) || std::isnan(m));
+ VIXL_ASSERT(std::isnan(expected));
SETUP();
START();
@@ -14225,8 +14457,8 @@
static void ProcessNaNsHelper(float n, float m, float expected) {
- VIXL_ASSERT(isnan(n) || isnan(m));
- VIXL_ASSERT(isnan(expected));
+ VIXL_ASSERT(std::isnan(n) || std::isnan(m));
+ VIXL_ASSERT(std::isnan(expected));
SETUP();
START();
@@ -14296,10 +14528,10 @@
static void DefaultNaNHelper(float n, float m, float a) {
- VIXL_ASSERT(isnan(n) || isnan(m) || isnan(a));
+ VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a));
- bool test_1op = isnan(n);
- bool test_2op = isnan(n) || isnan(m);
+ bool test_1op = std::isnan(n);
+ bool test_2op = std::isnan(n) || std::isnan(m);
SETUP();
START();
@@ -14423,10 +14655,10 @@
static void DefaultNaNHelper(double n, double m, double a) {
- VIXL_ASSERT(isnan(n) || isnan(m) || isnan(a));
+ VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a));
- bool test_1op = isnan(n);
- bool test_2op = isnan(n) || isnan(m);
+ bool test_1op = std::isnan(n);
+ bool test_2op = std::isnan(n) || std::isnan(m);
SETUP();
START();
diff --git a/test/test-disasm-a64.cc b/test/test-disasm-a64.cc
index 63f1b41..036d755 100644
--- a/test/test-disasm-a64.cc
+++ b/test/test-disasm-a64.cc
@@ -28,8 +28,8 @@
#include <cstring>
#include "test-runner.h"
-#include "a64/macro-assembler-a64.h"
-#include "a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
#define TEST(name) TEST_(DISASM_##name)
@@ -457,6 +457,7 @@
COMPARE(smull(x0, w0, w1), "smull x0, w0, w1");
COMPARE(smull(x30, w30, w0), "smull x30, w30, w0");
COMPARE(smulh(x0, x1, x2), "smulh x0, x1, x2");
+ COMPARE(umulh(x0, x2, x1), "umulh x0, x2, x1");
COMPARE(sdiv(w0, w1, w2), "sdiv w0, w1, w2");
COMPARE(sdiv(x3, x4, x5), "sdiv x3, x4, x5");
@@ -2361,6 +2362,13 @@
COMPARE(fcmp(s12, 0), "fcmp s12, #0.0");
COMPARE(fcmp(d12, 0), "fcmp d12, #0.0");
+ COMPARE(fcmpe(s0, s1), "fcmpe s0, s1");
+ COMPARE(fcmpe(s31, s30), "fcmpe s31, s30");
+ COMPARE(fcmpe(d0, d1), "fcmpe d0, d1");
+ COMPARE(fcmpe(d31, d30), "fcmpe d31, d30");
+ COMPARE(fcmpe(s12, 0), "fcmpe s12, #0.0");
+ COMPARE(fcmpe(d12, 0), "fcmpe d12, #0.0");
+
CLEANUP();
}
@@ -2379,6 +2387,17 @@
COMPARE(fccmp(s14, s15, CVFlag, al), "fccmp s14, s15, #nzCV, al");
COMPARE(fccmp(d16, d17, CFlag, nv), "fccmp d16, d17, #nzCv, nv");
+ COMPARE(fccmpe(s0, s1, NoFlag, eq), "fccmpe s0, s1, #nzcv, eq");
+ COMPARE(fccmpe(s2, s3, ZVFlag, ne), "fccmpe s2, s3, #nZcV, ne");
+ COMPARE(fccmpe(s30, s16, NCFlag, pl), "fccmpe s30, s16, #NzCv, pl");
+ COMPARE(fccmpe(s31, s31, NZCVFlag, le), "fccmpe s31, s31, #NZCV, le");
+ COMPARE(fccmpe(d4, d5, VFlag, gt), "fccmpe d4, d5, #nzcV, gt");
+ COMPARE(fccmpe(d6, d7, NFlag, vs), "fccmpe d6, d7, #Nzcv, vs");
+ COMPARE(fccmpe(d30, d0, NZFlag, vc), "fccmpe d30, d0, #NZcv, vc");
+ COMPARE(fccmpe(d31, d31, ZFlag, hs), "fccmpe d31, d31, #nZcv, hs");
+ COMPARE(fccmpe(s14, s15, CVFlag, al), "fccmpe s14, s15, #nzCV, al");
+ COMPARE(fccmpe(d16, d17, CFlag, nv), "fccmpe d16, d17, #nzCv, nv");
+
CLEANUP();
}
@@ -2655,6 +2674,12 @@
COMPARE(Add(w19, w3, -0x344), "sub w19, w3, #0x344 (836)");
COMPARE(Add(w20, w4, -2000), "sub w20, w4, #0x7d0 (2000)");
+ COMPARE(Add(w0, w1, 5, LeaveFlags), "add w0, w1, #0x5 (5)");
+ COMPARE(Add(w1, w2, 15, SetFlags), "adds w1, w2, #0xf (15)");
+
+ COMPARE(Sub(w0, w1, 5, LeaveFlags), "sub w0, w1, #0x5 (5)");
+ COMPARE(Sub(w1, w2, 15, SetFlags), "subs w1, w2, #0xf (15)");
+
COMPARE(Sub(w21, w3, -0xbc), "add w21, w3, #0xbc (188)");
COMPARE(Sub(w22, w4, -2000), "add w22, w4, #0x7d0 (2000)");
diff --git a/test/test-fuzz-a64.cc b/test/test-fuzz-a64.cc
index f488201..c73d71b 100644
--- a/test/test-fuzz-a64.cc
+++ b/test/test-fuzz-a64.cc
@@ -27,8 +27,8 @@
#include <stdlib.h>
#include "test-runner.h"
-#include "a64/decoder-a64.h"
-#include "a64/disasm-a64.h"
+#include "vixl/a64/decoder-a64.h"
+#include "vixl/a64/disasm-a64.h"
#define TEST(name) TEST_(FUZZ_##name)
diff --git a/test/test-invalset.cc b/test/test-invalset.cc
index a17755e..2fb2b34 100644
--- a/test/test-invalset.cc
+++ b/test/test-invalset.cc
@@ -26,7 +26,7 @@
#include "test-runner.h"
-#include "invalset.h"
+#include "vixl/invalset.h"
namespace vixl {
diff --git a/test/test-runner.h b/test/test-runner.h
index 40709c1..3acf053 100644
--- a/test/test-runner.h
+++ b/test/test-runner.h
@@ -27,7 +27,7 @@
#ifndef TEST_TEST_H_
#define TEST_TEST_H_
-#include "utils.h"
+#include "vixl/utils.h"
namespace vixl {
diff --git a/test/test-simulator-a64.cc b/test/test-simulator-a64.cc
index f389ece..b83642c 100644
--- a/test/test-simulator-a64.cc
+++ b/test/test-simulator-a64.cc
@@ -31,8 +31,8 @@
#include "test-utils-a64.h"
#include "test-simulator-inputs-a64.h"
#include "test-simulator-traces-a64.h"
-#include "a64/macro-assembler-a64.h"
-#include "a64/simulator-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/simulator-a64.h"
namespace vixl {
diff --git a/test/test-utils-a64.cc b/test/test-utils-a64.cc
index 10923a1..9de5b44 100644
--- a/test/test-utils-a64.cc
+++ b/test/test-utils-a64.cc
@@ -26,13 +26,13 @@
#include "test-utils-a64.h"
-#include <math.h> // Needed for isnan().
+#include <cmath>
#include "test-runner.h"
-#include "a64/macro-assembler-a64.h"
-#include "a64/simulator-a64.h"
-#include "a64/disasm-a64.h"
-#include "a64/cpu-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/simulator-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/cpu-a64.h"
#define __ masm->
@@ -85,7 +85,7 @@
if (float_to_rawbits(expected) == float_to_rawbits(result)) {
return true;
} else {
- if (isnan(expected) || (expected == 0.0)) {
+ if (std::isnan(expected) || (expected == 0.0)) {
printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
float_to_rawbits(expected), float_to_rawbits(result));
} else {
@@ -104,7 +104,7 @@
return true;
}
- if (isnan(expected) || (expected == 0.0)) {
+ if (std::isnan(expected) || (expected == 0.0)) {
printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
double_to_rawbits(expected), double_to_rawbits(result));
} else {
diff --git a/test/test-utils-a64.h b/test/test-utils-a64.h
index 3612809..94d7745 100644
--- a/test/test-utils-a64.h
+++ b/test/test-utils-a64.h
@@ -28,10 +28,10 @@
#define VIXL_A64_TEST_UTILS_A64_H_
#include "test-runner.h"
-#include "a64/macro-assembler-a64.h"
-#include "a64/simulator-a64.h"
-#include "a64/disasm-a64.h"
-#include "a64/cpu-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/simulator-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/cpu-a64.h"
namespace vixl {
diff --git a/tools/presubmit.py b/tools/presubmit.py
index 4342aa6..bd9f2b5 100755
--- a/tools/presubmit.py
+++ b/tools/presubmit.py
@@ -40,6 +40,10 @@
import util
+SUPPORTED_COMPILERS = ['g++', 'clang++']
+OBJ_DIR = './obj'
+
+
def BuildOptions():
result = argparse.ArgumentParser(
description='Run the linter and unit tests.',
@@ -53,9 +57,11 @@
help='Do not run the linter. Run the tests only.')
result.add_argument('--noclean', action='store_true',
help='Do not clean before build.')
+ result.add_argument('--fast', action='store_true',
+ help='Only test with one toolchain')
result.add_argument('--jobs', '-j', metavar='N', type=int, nargs='?',
default=1, const=multiprocessing.cpu_count(),
- help='''Runs the tests using N jobs. If the option is set
+ help='''Run the tests using N jobs. If the option is set
but no value is provided, the script will use as many jobs
as it thinks useful.''')
sim_default = 'off' if platform.machine() == 'aarch64' else 'on'
@@ -65,30 +71,72 @@
return result.parse_args()
-def CleanBuildSystem():
- def clean(mode):
- if args.verbose: print('Cleaning ' + mode + ' mode test...')
- command = 'scons mode=%s simulator=%s all --clean' % \
- (mode, args.simulator)
+def check_supported(compiler, mode, std):
+ if compiler not in SUPPORTED_COMPILERS:
+ print 'Invalid compiler.'
+ sys.exit(1)
+ if mode not in ['release', 'debug']:
+ print 'Invalid mode.'
+ sys.exit(1)
+ if std not in ['c++98', 'c++11']:
+ print 'Invalid c++ standard.'
+ sys.exit(1)
+
+
+def initalize_compiler_list():
+ compiler_list = []
+ for compiler in SUPPORTED_COMPILERS:
+ if util.has_compiler(compiler) and (len(compiler_list) == 0 or not args.fast):
+ compiler_list.append(compiler)
+ else:
+ # This warning suffices for args.fast too.
+ print 'WARNING: Skipping ' + compiler + ' tests.'
+ if len(compiler_list) == 0:
+ util.abort('Found no supported compilers')
+ return compiler_list
+
+
+def CleanBuildSystem(compiler):
+ def clean(compiler, mode, std):
+ check_supported(compiler, mode, std)
+ os.environ['CXX'] = compiler
+ if args.verbose:
+ print 'Cleaning ' + compiler + ' ' + std + ' ' \
+ + mode + ' mode test...'
+ command = 'scons mode=%s std=%s simulator=%s all --clean' % \
+ (mode, std, args.simulator)
status, output = util.getstatusoutput(command)
if status != 0:
print(output)
util.abort('Failed cleaning test: ' + command)
- clean('debug')
- clean('release')
+
+ clean(compiler, 'debug', 'c++98')
+ clean(compiler, 'debug', 'c++11')
+ clean(compiler, 'release', 'c++98')
+ clean(compiler, 'release', 'c++11')
-def BuildEverything():
- def build(mode):
- if args.verbose: print('Building ' + mode + ' mode test...')
- command = 'scons mode=%s simulator=%s all -j%u' % \
- (mode, args.simulator, args.jobs)
+def BuildEverything(compiler):
+ def build(compiler, mode, std):
+ check_supported(compiler, mode, std)
+ os.environ['CXX'] = compiler
+ if args.verbose:
+ print 'Building ' + compiler + ' ' + std + ' ' \
+ + mode + ' mode test...'
+ if args.jobs == 1:
+ print '- This may take a while. Pass `-j` to use multiple threads.'
+ command = 'scons mode=%s std=%s simulator=%s all -j%u' % \
+ (mode, std, args.simulator, args.jobs)
status, output = util.getstatusoutput(command)
if status != 0:
print(output)
util.abort('Failed building test: ' + command)
- build('debug')
- build('release')
+
+ print 'Building ' + compiler + ' tests...'
+ build(compiler, 'debug', 'c++98')
+ build(compiler, 'debug', 'c++11')
+ build(compiler, 'release', 'c++98')
+ build(compiler, 'release', 'c++11')
NOT_RUN = 'NOT RUN'
@@ -101,7 +149,7 @@
self.status = NOT_RUN
def name_prefix(self):
- return '%-26s : ' % self.name
+ return '%-40s : ' % self.name
class Tester:
@@ -121,33 +169,36 @@
class VIXLTest(Test):
- def __init__(self, mode, simulator, debugger = False, verbose = False):
- if not mode in ['release', 'debug']:
- print 'Invalid mode.'
- sys.exit(1)
-
- self.debugger = debugger
+ def __init__(self, compiler, mode, std, simulator, debugger = False, verbose = False):
+ check_supported(compiler, mode, std)
self.verbose = verbose
+ self.debugger = debugger
+ self.compiler = compiler
+ self.mode = mode
+ self.std = std
- name = 'test ' + mode
+ name = 'test ' + compiler + ' ' + std + ' ' + mode
if simulator:
name += ' (%s)' % ('debugger' if debugger else 'simulator')
Test.__init__(self, name)
- self.exe = './test-runner'
+ self.exe = 'test-runner'
if simulator:
self.exe += '_sim'
if mode == 'debug':
self.exe += '_g'
def Run(self):
- manifest = test.ReadManifest(self.exe, [], self.debugger,
- False, self.verbose)
+ self.status = PASSED
+ command = os.path.join(OBJ_DIR, self.mode, self.compiler,
+ self.std, self.exe)
+ manifest = test.ReadManifest(command, [], self.debugger, False, self.verbose)
retcode = test.RunTests(manifest, jobs = args.jobs,
verbose = self.verbose, debugger = self.debugger,
progress_prefix = self.name_prefix())
printer.EnsureNewLine()
- self.status = PASSED if retcode == 0 else FAILED
+ if retcode != 0:
+ self.status = FAILED
class LintTest(Test):
@@ -167,13 +218,17 @@
n_errors = lint.LintFiles(lint.default_tracked_files,
jobs = args.jobs, verbose = args.verbose,
progress_prefix = self.name_prefix())
-
self.status = PASSED if n_errors == 0 else FAILED
class BenchTest(Test):
- def __init__(self, mode, simulator):
- name = 'benchmarks ' + mode
+ def __init__(self, compiler, mode, std, simulator):
+ check_supported(compiler, mode, std)
+ self.compiler = compiler
+ self.mode = mode
+ self.std = std
+
+ name = 'benchmarks ' + compiler + ' ' + std + ' ' + mode
Test.__init__(self, name)
self.exe_suffix = ''
if simulator:
@@ -186,7 +241,8 @@
'bench-branch-masm', 'bench-branch-link-masm']
self.status = PASSED
for bench in benchmarks:
- command = './' + bench + self.exe_suffix
+ command = os.path.join(OBJ_DIR, self.mode, self.compiler, self.std,
+ bench + self.exe_suffix)
(rc, out) = util.getstatusoutput(command)
if rc != 0:
self.status = FAILED
@@ -206,31 +262,44 @@
print 'WARNING: This is not a Git repository. The linter will not run.'
args.nolint = True
- tester = Tester()
if not args.nolint:
import lint
- tester.AddTest(LintTest())
+ LintTest().Run()
if not args.notest:
- if not args.noclean:
- CleanBuildSystem()
- BuildEverything()
+ tester = Tester()
+ compiler_list = initalize_compiler_list()
- if args.simulator == 'on':
- # mode, sim, debugger, verbose
- tester.AddTest(VIXLTest('release', True, True, args.verbose))
- tester.AddTest(VIXLTest('debug', True, True, args.verbose))
- tester.AddTest(VIXLTest('release', True, False, args.verbose))
- tester.AddTest(VIXLTest('debug', True, False, args.verbose))
- tester.AddTest(BenchTest('release', True))
- tester.AddTest(BenchTest('debug', True))
- else:
- tester.AddTest(VIXLTest('release', False, False, args.verbose))
- tester.AddTest(VIXLTest('debug', False, False, args.verbose))
- tester.AddTest(BenchTest('release', False))
- tester.AddTest(BenchTest('debug', False))
+ for compiler in compiler_list:
+ if not args.noclean:
+ CleanBuildSystem(compiler)
+ BuildEverything(compiler)
- tester.RunAll()
+ if args.simulator == 'on':
+ # mode, std, sim, debugger, verbose
+ tester.AddTest(VIXLTest(compiler, 'release', 'c++98', True, True, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'debug', 'c++98', True, True, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'release', 'c++98', True, False, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'debug', 'c++98', True, False, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'release', 'c++11', True, True, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'debug', 'c++11', True, True, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'release', 'c++11', True, False, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'debug', 'c++11', True, False, args.verbose))
+ tester.AddTest(BenchTest(compiler,'release', 'c++98', True))
+ tester.AddTest(BenchTest(compiler,'debug', 'c++98', True))
+ tester.AddTest(BenchTest(compiler,'release', 'c++11', True))
+ tester.AddTest(BenchTest(compiler,'debug', 'c++11', True))
+ else:
+ tester.AddTest(VIXLTest(compiler, 'release', 'c++98', False, False, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'debug', 'c++98', False, False, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'release', 'c++11', False, False, args.verbose))
+ tester.AddTest(VIXLTest(compiler, 'debug', 'c++11', False, False, args.verbose))
+ tester.AddTest(BenchTest(compiler,'release', 'c++98', False))
+ tester.AddTest(BenchTest(compiler,'debug', 'c++98', False))
+ tester.AddTest(BenchTest(compiler,'release', 'c++11', False))
+ tester.AddTest(BenchTest(compiler,'debug', 'c++11', False))
+
+ tester.RunAll()
if git.is_git_repository_root():
untracked_files = git.get_untracked_files()
diff --git a/tools/util.py b/tools/util.py
index db4a07d..1c127de 100644
--- a/tools/util.py
+++ b/tools/util.py
@@ -24,6 +24,7 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import os
import sys
import subprocess
import shlex
@@ -49,3 +50,8 @@
lines = text.split('\n')
last = lines[-1].split('\r')
return last[-1]
+
+
+def has_compiler(compiler):
+ status, output = getstatusoutput('which ' + compiler)
+ return status == 0