VIXL Release 1.7

Refer to the README.md and LICENCE files for details.
diff --git a/.gitignore b/.gitignore
index 6945723..5a6c08c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
 *.pyc
 .sconsign.dblite
 obj/
-cctest*
+test-*
 bench-*
 libvixl*
 example-*
diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py
index 0525e4f..bac1def 100644
--- a/.ycm_extra_conf.py
+++ b/.ycm_extra_conf.py
@@ -18,6 +18,7 @@
   '-Werror',
   '-Wextra',
   '-pedantic',
+  '-Wno-newline-eof',
   '-Wwrite-strings',
   '-std=c++',
   '-x', 'c++'
diff --git a/README.md b/README.md
index d4baa86..014442d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-VIXL: AArch64 Runtime Code Generation Library Version 1.6
+VIXL: AArch64 Runtime Code Generation Library Version 1.7
 =========================================================
 
 Contents:
@@ -69,11 +69,24 @@
 floating-point operations which do not work correctly, and a number of tests
 fail as a result.
 
+Debug Builds
+------------
+
+Your project's build system must define `VIXL_DEBUG` (eg. `-DVIXL_DEBUG`)
+when using a VIXL library that has been built with debug enabled.
+
+Some classes defined in VIXL header files contain fields that are only present
+in debug builds, so if `VIXL_DEBUG` is defined when the library is built, but
+not defined for the header files included in your project, you will see runtime
+failures.
+
 Exclusive-Access Instructions
 -----------------------------
 
 All exclusive-access instructions are supported, but the simulator cannot
-accurately simulate their behaviour as described in the ARMv8 ARM.
+accurately simulate their behaviour as described in the ARMv8 Architecture
+Reference Manual.
+
  * A local monitor is simulated, so simulated exclusive loads and stores execute
    as expected in a single-threaded environment.
  * The global monitor is simulated by occasionally causing exclusive-access
@@ -84,6 +97,7 @@
 
 The simulator tries to be strict, and implements the following restrictions that
 the ARMv8 ARM allows:
+
  * A pair of load-/store-exclusive instructions will only succeed if they have
    the same address and access size.
  * Most of the time, cache-maintenance operations or explicit memory accesses
@@ -92,9 +106,9 @@
       exclusive monitor will sometimes be left intact after these instructions.
 
 Instructions affected by these limitations:
-  stxrb, stxrh, stxr, ldxrb, ldxrh, ldxr, stxp, ldxp, stlxrb, stlxrh, stlxr,
-  ldaxrb, ldaxrh, ldaxr, stlxp, ldaxp, stlrb, stlrh, stlr, ldarb, ldarh, ldar,
-  clrex.
+  `stxrb`, `stxrh`, `stxr`, `ldxrb`, `ldxrh`, `ldxr`, `stxp`, `ldxp`, `stlxrb`,
+  `stlxrh`, `stlxr`, `ldaxrb`, `ldaxrh`, `ldaxr`, `stlxp`, `ldaxp`, `stlrb`,
+  `stlrh`, `stlr`, `ldarb`, `ldarh`, `ldar`, `clrex`.
 
 
 Usage
@@ -116,7 +130,7 @@
  1. Git must be installed, and the VIXL project must be in a valid Git
     repository, such as one produced using `git clone`.
  2. `cpplint.py`, [as provided by Google][cpplint], must be available (and
-    executable) on the `PATH`. Only revision 104 has been tested with VIXL.
+    executable) on the `PATH`.
 
 It is possible to tell `tools/presubmit.py` to skip the linter stage by passing
 `--nolint`. This removes the dependency on `cpplint.py` and Git. The `--nolint`
@@ -148,23 +162,23 @@
 ---------------
 
 A short introduction to using VIXL can be found [here](doc/getting-started.md).
-Example source code is provided in the `examples` directory. You can build all
-the examples with `scons examples` from the root directory, or use
+Example source code is provided in the [examples](examples) directory. You can
+build all the examples with `scons examples` from the root directory, or use
 `scons --help` to get a detailed list of available build targets.
 
 
 Using VIXL
 ----------
 
-On top of the [here](doc/getting-started) page and the examples, you can find
-documentation and guides on various topics that may be of help
-[here](doc/topics/index.md).
+In addition to [getting started](doc/getting-started.md) and the
+[examples](examples), you can find documentation and guides on various topics
+that may be helpful [here](doc/topics/index.md).
 
 
 
 
 
-[cpplint]: https://google-styleguide.googlecode.com/svn-history/r104/trunk/cpplint/cpplint.py
+[cpplint]: http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
            "Google's cpplint.py script."
 
 [vixl]: https://github.com/armvixl/vixl
diff --git a/SConstruct b/SConstruct
index bca5651..334e73c 100644
--- a/SConstruct
+++ b/SConstruct
@@ -41,7 +41,6 @@
 Some common build targets are:
     scons            # Build the VIXL library and test utility.
     scons examples   # Build all the examples.
-    scons benchmarks # Build all the benchmarks.
     scons all        # Build everything.
 
 ''')
@@ -50,52 +49,52 @@
 # Global configuration.
 PROJ_SRC_DIR   = 'src'
 PROJ_SRC_FILES = '''
-src/utils.cc
-src/code-buffer.cc
 src/a64/assembler-a64.cc
-src/a64/macro-assembler-a64.cc
-src/a64/instructions-a64.cc
-src/a64/decoder-a64.cc
-src/a64/debugger-a64.cc
-src/a64/disasm-a64.cc
 src/a64/cpu-a64.cc
-src/a64/simulator-a64.cc
+src/a64/debugger-a64.cc
+src/a64/decoder-a64.cc
+src/a64/disasm-a64.cc
+src/a64/instructions-a64.cc
 src/a64/instrument-a64.cc
+src/a64/macro-assembler-a64.cc
+src/a64/simulator-a64.cc
+src/code-buffer.cc
+src/utils.cc
 '''.split()
 PROJ_EXAMPLES_DIR = 'examples'
 PROJ_EXAMPLES_SRC_FILES = '''
-examples/debugger.cc
+examples/abs.cc
 examples/add3-double.cc
 examples/add4-double.cc
+examples/check-bounds.cc
+examples/custom-disassembler.cc
+examples/debugger.cc
 examples/factorial-rec.cc
 examples/factorial.cc
-examples/sum-array.cc
-examples/abs.cc
-examples/swap4.cc
-examples/swap-int32.cc
-examples/check-bounds.cc
 examples/getting-started.cc
 examples/non-const-visitor.cc
-examples/custom-disassembler.cc
+examples/sum-array.cc
+examples/swap-int32.cc
+examples/swap4.cc
 '''.split()
 # List target specific files.
 # Target names are used as dictionary entries.
 TARGET_SRC_DIR = {
-  'cctest': 'test',
+  'test': 'test',
   'bench-dataop': 'benchmarks',
   'bench-branch': 'benchmarks',
   'bench-branch-link': 'benchmarks',
   'examples': 'examples'
 }
 TARGET_SRC_FILES = {
-  'cctest': '''
-    test/cctest.cc
-    test/test-utils-a64.cc
+  'test': '''
+    test/test-runner.cc
+    test/examples/test-examples.cc
     test/test-assembler-a64.cc
-    test/test-simulator-a64.cc
     test/test-disasm-a64.cc
     test/test-fuzz-a64.cc
-    test/examples/test-examples.cc
+    test/test-simulator-a64.cc
+    test/test-utils-a64.cc
     '''.split(),
   'bench-dataop': '''
     benchmarks/bench-dataop.cc
@@ -181,7 +180,7 @@
   build_suffix += '_sim'
 
 if env['mode'] == 'debug':
-  env.Append(CPPFLAGS = ['-g', '-DDEBUG'])
+  env.Append(CPPFLAGS = ['-g', '-DVIXL_DEBUG'])
   # Append the debug mode suffix to the executable name.
   build_suffix += '_g'
   build_dir = DEBUG_OBJ_DIR
@@ -219,18 +218,18 @@
 create_alias('libvixl', libvixl)
 
 
-# The cctest executable.
-# The cctest requires building the example files with specific options, so we
+# The test executable.
+# The test requires building the example files with specific options, so we
 # create a separate variant dir for the example objects built this way.
-cctest_ex_vdir = os.path.join(build_dir, 'cctest_examples')
-VariantDir(cctest_ex_vdir, '.')
-cctest_ex_obj = env.Object(list_target(cctest_ex_vdir, PROJ_EXAMPLES_SRC_FILES),
-                           CPPFLAGS = env['CPPFLAGS'] + ['-DTEST_EXAMPLES'])
-cctest = env.Program('cctest' + build_suffix,
-                     list_target(build_dir, TARGET_SRC_FILES['cctest']) +
-                     cctest_ex_obj + libvixl,
-                     CPPPATH = env['CPPPATH'] + [PROJ_EXAMPLES_DIR])
-create_alias('cctest', cctest)
+test_ex_vdir = os.path.join(build_dir, 'test_examples')
+VariantDir(test_ex_vdir, '.')
+test_ex_obj = env.Object(list_target(test_ex_vdir, PROJ_EXAMPLES_SRC_FILES),
+                         CPPFLAGS = env['CPPFLAGS'] + ['-DTEST_EXAMPLES'])
+test = env.Program('test-runner' + build_suffix,
+                   list_target(build_dir, TARGET_SRC_FILES['test']) +
+                   test_ex_obj + libvixl,
+                   CPPPATH = env['CPPPATH'] + [PROJ_EXAMPLES_DIR])
+create_alias('test', test)
 
 # The benchmarks.
 for bench in ['bench-dataop', 'bench-branch', 'bench-branch-link']:
@@ -256,5 +255,5 @@
 
 Help('Available top level targets:\n' + '\t' + '\n\t'.join(target_alias_names) + '\n')
 
-# By default, only build the cctests.
-Default(libvixl, cctest)
+# By default, only build the tests.
+Default(libvixl, test)
diff --git a/benchmarks/bench-branch-link.cc b/benchmarks/bench-branch-link.cc
index 0eee012..ba29ff7 100644
--- a/benchmarks/bench-branch-link.cc
+++ b/benchmarks/bench-branch-link.cc
@@ -45,13 +45,10 @@
       exit(1);
   }
 
-  // Emitting on the last word of the buffer will trigger an assert.
-  const unsigned buffer_size = (instructions + 1) * kInstructionSize;
+  MacroAssembler masm(instructions * kInstructionSize);
+  InstructionAccurateScope scope(&masm, instructions);
 
-  byte* assm_buffer = new byte[buffer_size];
-  MacroAssembler* masm = new MacroAssembler(assm_buffer, buffer_size);
-
-  #define __ masm->
+  #define __ masm.
 
   Label target;
   for (unsigned i = 0; i < instructions; i++) {
@@ -59,9 +56,7 @@
   }
   __ bind(&target);
 
-  masm->FinalizeCode();
-  delete masm;
-  delete assm_buffer;
+  masm.FinalizeCode();
 
   return 0;
 }
diff --git a/benchmarks/bench-branch.cc b/benchmarks/bench-branch.cc
index 8e49b7c..219f358 100644
--- a/benchmarks/bench-branch.cc
+++ b/benchmarks/bench-branch.cc
@@ -49,35 +49,36 @@
   }
 
   const unsigned buffer_size = 256 * KBytes;
-  // Emitting on the last word of the buffer will trigger an assert.
-  const unsigned buffer_instruction_count = buffer_size / kInstructionSize - 1;
+  const unsigned buffer_instruction_count = buffer_size / kInstructionSize;
+  MacroAssembler masm(buffer_size);
 
-  byte* assm_buffer = new byte[buffer_size];
-  MacroAssembler* masm = new MacroAssembler(assm_buffer, buffer_size);
-
-  #define __ masm->
+  #define __ masm.
   // We emit a branch to the next instruction.
 
   unsigned rounds = instructions / buffer_instruction_count;
   for (unsigned i = 0; i < rounds; ++i) {
-    for (unsigned j = 0; j < buffer_instruction_count; ++j) {
+    {
+      InstructionAccurateScope scope(&masm, buffer_instruction_count);
+      for (unsigned j = 0; j < buffer_instruction_count; ++j) {
+        Label target;
+        __ b(&target);
+        __ bind(&target);
+      }
+    }
+    masm.Reset();
+  }
+
+  unsigned remaining = instructions % buffer_instruction_count;
+  {
+    InstructionAccurateScope scope(&masm, remaining);
+    for (unsigned i = 0; i < remaining; ++i) {
       Label target;
       __ b(&target);
       __ bind(&target);
     }
-    masm->Reset();
   }
 
-  unsigned remaining = instructions % buffer_instruction_count;
-  for (unsigned i = 0; i < remaining; ++i) {
-    Label target;
-    __ b(&target);
-    __ bind(&target);
-  }
-
-  masm->FinalizeCode();
-  delete masm;
-  delete assm_buffer;
+  masm.FinalizeCode();
 
   return 0;
 }
diff --git a/benchmarks/bench-dataop.cc b/benchmarks/bench-dataop.cc
index 393f96d..4d6504a 100644
--- a/benchmarks/bench-dataop.cc
+++ b/benchmarks/bench-dataop.cc
@@ -49,30 +49,31 @@
   }
 
   const unsigned buffer_size = 256 * KBytes;
-  // Emitting on the last word of the buffer will trigger an assert.
-  const unsigned buffer_instruction_count = buffer_size / kInstructionSize - 1;
+  const unsigned buffer_instruction_count = buffer_size / kInstructionSize;
+  MacroAssembler masm(buffer_size);
 
-  byte* assm_buffer = new byte[buffer_size];
-  MacroAssembler* masm = new MacroAssembler(assm_buffer, buffer_size);
-
-  #define __ masm->
+  #define __ masm.
 
   unsigned rounds = instructions / buffer_instruction_count;
   for (unsigned i = 0; i < rounds; ++i) {
-    for (unsigned j = 0; j < buffer_instruction_count; ++j) {
-      __ add(x0, x1, Operand(x2));
+    {
+      InstructionAccurateScope scope(&masm, buffer_instruction_count);
+      for (unsigned j = 0; j < buffer_instruction_count; ++j) {
+        __ add(x0, x1, Operand(x2));
+      }
     }
-    masm->Reset();
+    masm.Reset();
   }
 
   unsigned remaining = instructions % buffer_instruction_count;
-  for (unsigned i = 0; i < remaining; ++i) {
-    __ add(x0, x1, Operand(x2));
+  {
+    InstructionAccurateScope scope(&masm, remaining);
+    for (unsigned i = 0; i < remaining; ++i) {
+      __ add(x0, x1, Operand(x2));
+    }
   }
 
-  masm->FinalizeCode();
-  delete masm;
-  delete assm_buffer;
+  masm.FinalizeCode();
 
   return 0;
 }
diff --git a/doc/changelog.md b/doc/changelog.md
index bc249a2..2340aad 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,6 +1,17 @@
 VIXL Change Log
 ===============
 
+* 1.7
+    + Added support for `prfm` prefetch instructions.
+    + Added support for all `frint` instruction variants.
+    + Add support for disassembling as an offset from a given address.
+    + Fixed the disassembly of `movz` and `movn`.
+    + Provide static helpers for immediate generation.
+    + Provide helpers to create CPURegList from list unions or intersections.
+    + Improved register value tracing.
+    + Multithreading test fixes.
+    + Other small bug fixes and build system improvements.
+
 * 1.6
     + Make literal pool management the responsibility of the macro assembler.
     + Move code buffer management out of the Assembler.
diff --git a/doc/supported-instructions.md b/doc/supported-instructions.md
index a5bde8b..ca4f438 100644
--- a/doc/supported-instructions.md
+++ b/doc/supported-instructions.md
@@ -95,7 +95,7 @@
 
 Arithmetic shift right.
 
-    inline void asr(const Register& rd, const Register& rn, unsigned shift)
+    void asr(const Register& rd, const Register& rn, unsigned shift)
 
 
 ### asrv ###
@@ -137,10 +137,10 @@
 
 Bitfield insert.
 
-    inline void bfi(const Register& rd,
-                    const Register& rn,
-                    unsigned lsb,
-                    unsigned width)
+    void bfi(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width)
 
 
 ### bfm ###
@@ -157,10 +157,10 @@
 
 Bitfield extract and insert low.
 
-    inline void bfxil(const Register& rd,
-                      const Register& rn,
-                      unsigned lsb,
-                      unsigned width)
+    void bfxil(const Register& rd,
+               const Register& rn,
+               unsigned lsb,
+               unsigned width)
 
 
 ### bic ###
@@ -661,7 +661,7 @@
 
 Logical shift left.
 
-    inline void lsl(const Register& rd, const Register& rn, unsigned shift)
+    void lsl(const Register& rd, const Register& rn, unsigned shift)
 
 
 ### lslv ###
@@ -675,7 +675,7 @@
 
 Logical shift right.
 
-    inline void lsr(const Register& rd, const Register& rn, unsigned shift)
+    void lsr(const Register& rd, const Register& rn, unsigned shift)
 
 
 ### lsrv ###
@@ -821,6 +821,36 @@
     void orr(const Register& rd, const Register& rn, const Operand& operand)
 
 
+### prfm ###
+
+Prefetch from pc + imm19 << 2.
+
+    void prfm(PrefetchOperation op, int imm19)
+
+
+### prfm ###
+
+Prefetch memory in the literal pool.
+
+    void prfm(PrefetchOperation op, RawLiteral* literal)
+
+
+### prfm ###
+
+Prefetch memory.
+
+    void prfm(PrefetchOperation op, const MemOperand& addr,
+              LoadStoreScalingOption option = PreferScaledOffset)
+
+
+### prfum ###
+
+Prefetch memory (with unscaled offset).
+
+    void prfum(PrefetchOperation op, const MemOperand& addr,
+               LoadStoreScalingOption option = PreferUnscaledOffset)
+
+
 ### rbit ###
 
 Bit reverse.
@@ -860,7 +890,7 @@
 
 Rotate right.
 
-    inline void ror(const Register& rd, const Register& rs, unsigned shift)
+    void ror(const Register& rd, const Register& rs, unsigned shift)
 
 
 ### rorv ###
@@ -892,10 +922,10 @@
 
 Signed bitfield insert with zero at right.
 
-    inline void sbfiz(const Register& rd,
-                      const Register& rn,
-                      unsigned lsb,
-                      unsigned width)
+    void sbfiz(const Register& rd,
+               const Register& rn,
+               unsigned lsb,
+               unsigned width)
 
 
 ### sbfm ###
@@ -912,10 +942,10 @@
 
 Signed bitfield extract.
 
-    inline void sbfx(const Register& rd,
-                     const Register& rn,
-                     unsigned lsb,
-                     unsigned width)
+    void sbfx(const Register& rd,
+              const Register& rn,
+              unsigned lsb,
+              unsigned width)
 
 
 ### scvtf ###
@@ -1135,21 +1165,21 @@
 
 Signed extend byte.
 
-    inline void sxtb(const Register& rd, const Register& rn)
+    void sxtb(const Register& rd, const Register& rn)
 
 
 ### sxth ###
 
 Signed extend halfword.
 
-    inline void sxth(const Register& rd, const Register& rn)
+    void sxth(const Register& rd, const Register& rn)
 
 
 ### sxtw ###
 
 Signed extend word.
 
-    inline void sxtw(const Register& rd, const Register& rn)
+    void sxtw(const Register& rd, const Register& rn)
 
 
 ### tbnz ###
@@ -1191,10 +1221,10 @@
 
 Unsigned bitfield insert with zero at right.
 
-    inline void ubfiz(const Register& rd,
-                      const Register& rn,
-                      unsigned lsb,
-                      unsigned width)
+    void ubfiz(const Register& rd,
+               const Register& rn,
+               unsigned lsb,
+               unsigned width)
 
 
 ### ubfm ###
@@ -1211,10 +1241,10 @@
 
 Unsigned bitfield extract.
 
-    inline void ubfx(const Register& rd,
-                     const Register& rn,
-                     unsigned lsb,
-                     unsigned width)
+    void ubfx(const Register& rd,
+              const Register& rn,
+              unsigned lsb,
+              unsigned width)
 
 
 ### ucvtf ###
@@ -1255,21 +1285,21 @@
 
 Unsigned extend byte.
 
-    inline void uxtb(const Register& rd, const Register& rn)
+    void uxtb(const Register& rd, const Register& rn)
 
 
 ### uxth ###
 
 Unsigned extend halfword.
 
-    inline void uxth(const Register& rd, const Register& rn)
+    void uxth(const Register& rd, const Register& rn)
 
 
 ### uxtw ###
 
 Unsigned extend word.
 
-    inline void uxtw(const Register& rd, const Register& rn)
+    void uxtw(const Register& rd, const Register& rn)
 
 
 
@@ -1518,6 +1548,13 @@
     void frinta(const FPRegister& fd, const FPRegister& fn)
 
 
+### frinti ###
+
+FP round to integer (implicit rounding).
+
+    void frinti(const FPRegister& fd, const FPRegister& fn)
+
+
 ### frintm ###
 
 FP round to integer (toward minus infinity).
@@ -1532,6 +1569,20 @@
     void frintn(const FPRegister& fd, const FPRegister& fn)
 
 
+### frintp ###
+
+FP round to integer (toward plus infinity).
+
+    void frintp(const FPRegister& fd, const FPRegister& fn)
+
+
+### frintx ###
+
+FP round to integer (exact, implicit rounding).
+
+    void frintx(const FPRegister& fd, const FPRegister& fn)
+
+
 ### frintz ###
 
 FP round to integer (towards zero).
@@ -1568,21 +1619,21 @@
 
 Emit 32 bits of data into the instruction stream.
 
-    inline void dc32(uint32_t data)
+    void dc32(uint32_t data)
 
 
 ### dc64 ###
 
 Emit 64 bits of data into the instruction stream.
 
-    inline void dc64(uint64_t data)
+    void dc64(uint64_t data)
 
 
 ### dci ###
 
 Emit raw instructions into the instruction stream.
 
-    inline void dci(Instr raw_inst)
+    void dci(Instr raw_inst)
 
 
 ### place ###
diff --git a/doc/topics/extending-the-disassembler.md b/doc/topics/extending-the-disassembler.md
index d30770d..fe9da19 100644
--- a/doc/topics/extending-the-disassembler.md
+++ b/doc/topics/extending-the-disassembler.md
@@ -2,29 +2,25 @@
 ==========================
 
 The output of the disassembler can be extended and customized. This may be
-useful for example to add comments and annotations to the disassembly or print
-aliases for register names.
+useful for example to add comments and annotations to the disassembly, print
+aliases for register names, or add an offset to disassembled addresses.
 
 The general procedure to achieve this is to create a sub-class of
 `Disassembler` and override the appropriate virtual functions.
 
 The `Disassembler` class provides virtual methods that implement how specific
 disassembly elements are printed. See
-[src/a64/disasm-a64.h](/src/a64/disasm-a64.h) for details.  At the time of
-writing, these are
+[src/a64/disasm-a64.h](/src/a64/disasm-a64.h) for details. These include
+functions like:
 
     virtual void AppendRegisterNameToOutput(const Instruction* instr,
-                                            CPURegister::RegisterType reg_type,
-                                            unsigned reg_code,
-                                            unsigned reg_size);
+                                            const CPURegister& reg);
     virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr,
                                                 int64_t offset);
-    virtual void AppendAddressToOutput(const Instruction* instr,
-                                       const void* addr);
-    virtual void AppendCodeAddressToOutput(const Instruction* instr,
-                                           const void* addr);
-    virtual void AppendDataAddressToOutput(const Instruction* instr,
-                                           const void* addr);
+    virtual void AppendCodeRelativeAddressToOutput(const Instruction* instr,
+                                                   const void* addr);
+    virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr,
+                                                       const void* addr);
 
 They can be overridden for example to use different register names and annotate
 code addresses.
@@ -39,13 +35,22 @@
 The [/examples/custom-disassembler.h](/examples/custom-disassembler.h) and
 [/examples/custom-disassembler.cc](/examples/custom-disassembler.cc) example
 files show how the methods can be overridden to use different register names,
-annotate code addresses, and add comments:
+map code addresses, annotate code addresses, and add comments:
 
-    VIXL disasm:   add x10, x16, x17
-    custom disasm: add x10, ip0, ip1 // add/sub to x10
+    VIXL disasm  0x7fff04cb05e0:  add x10, x16, x17
+    custom disasm -0x8: add x10, ip0, ip1 // add/sub to x10
 
-    VIXL disasm:   cbz x10, #+0x28 (addr 0x7fff8843bf6c)
-    custom disasm: cbz x10, #+0x28 (addr 0x7fff8843bf6c) (function: foo)
+    VIXL disasm  0x7fff04cb05e4:  cbz x10, #+0x28 (addr  0x7fff04cb060c)
+    custom disasm -0x4: cbz x10, #+0x28 (addr  0x24 ; label: somewhere)
+
+    VIXL disasm  0x7fff04cb05e8:  add x11, x16, x17
+    custom disasm  0x0: add x11, ip0, ip1
+
+    VIXL disasm  0x7fff04cb05ec:  add w5, w6, w30
+    custom disasm  0x4: add w5, w6, w30
+
+    VIXL disasm  0x7fff04cb05f0:  tbz w10, #2, #-0x10 (addr  0x7fff04cb05e0)
+    custom disasm  0x8: tbz w10, #2, #-0x10 (addr -0x8)
 
 
 One can refer to the implementation of visitor functions for the `Disassembler`
diff --git a/doc/topics/index.md b/doc/topics/index.md
index d41074d..9186e3b 100644
--- a/doc/topics/index.md
+++ b/doc/topics/index.md
@@ -2,7 +2,7 @@
 you think of any topic that may be useful and is not listed here, please contact
 us at <vixl@arm.com>.
 
-You can also have a look at the ['getting started' page](doc/getting-started).
+You can also have a look at the ['getting started' page](../doc/getting-started.md).
 
 * [Extending and customizing the disassembler](extending-the-disassembler.md)
 * [Using VIM YouCompleteMe with VIXL](ycm.md)
diff --git a/examples/custom-disassembler.cc b/examples/custom-disassembler.cc
index ce381af..32e448f 100644
--- a/examples/custom-disassembler.cc
+++ b/examples/custom-disassembler.cc
@@ -32,6 +32,7 @@
 #define __ masm->
 
 
+// We override this method to specify how register names should be disassembled.
 void CustomDisassembler::AppendRegisterNameToOutput(
     const Instruction* instr,
     const CPURegister& reg) {
@@ -51,7 +52,7 @@
         AppendToOutput(reg.Is64Bits() ? "x_stack_pointer" : "w_stack_pointer");
         return;
       case 31:
-        AppendToOutput(reg.Is64Bits() ? "x_zero_reg" : "w_zero-reg");
+        AppendToOutput(reg.Is64Bits() ? "x_zero_reg" : "w_zero_reg");
         return;
       default:
         // Fall through.
@@ -63,28 +64,37 @@
 }
 
 
-static const char* FakeLookupAddressDescription(const void* address) {
+static const char* FakeLookupTargetDescription(const void* address) {
   USE(address);
-  // We fake looking up the address in a table. We behave as if the first and
-  // third address we are asked about were function entries.
+  // We fake looking up the address.
   static int i = 0;
   const char* desc = NULL;
   if (i == 0) {
-    desc = "function: foo";
+    desc = "label: somewhere";
   } else if (i == 2) {
-    desc = "function: bar";
+    desc = "label: somewhere else";
   }
   i++;
   return desc;
 }
 
 
-void CustomDisassembler::AppendCodeAddressToOutput(
+// We override this method to add a description to addresses that we know about.
+// In this example we fake looking up a description, but in practice one could
+// for example use a table mapping addresses to function names.
+void CustomDisassembler::AppendCodeRelativeCodeAddressToOutput(
     const Instruction* instr, const void* addr) {
   USE(instr);
-  const char* address_desc = FakeLookupAddressDescription(addr);
-  // Print the raw address and - if available - its description.
-  AppendToOutput("(addr %p", addr);
+  // Print the address.
+  int64_t rel_addr = CodeRelativeAddress(addr);
+  if (rel_addr >= 0) {
+    AppendToOutput("(addr 0x%" PRIx64, rel_addr);
+  } else {
+    AppendToOutput("(addr -0x%" PRIx64, -rel_addr);
+  }
+
+  // If available, print a description of the address.
+  const char* address_desc = FakeLookupTargetDescription(addr);
   if (address_desc != NULL) {
     Disassembler::AppendToOutput(" ; %s", address_desc);
   }
@@ -92,6 +102,9 @@
 }
 
 
+// We override this method to add a comment to this type of instruction. Helpers
+// from the vixl::Instruction class can be used to analyse the instruction being
+// disasssembled.
 void CustomDisassembler::VisitAddSubShifted(const Instruction* instr) {
   vixl::Disassembler::VisitAddSubShifted(instr);
   if (instr->Rd() == vixl::x10.code()) {
@@ -143,13 +156,29 @@
   decoder.AppendVisitor(&disasm);
   decoder.AppendVisitor(&custom_disasm);
 
+  // In our custom disassembler, disassemble as if the base address was -0x8.
+  // Note that this can also be achieved with
+  //   custom_disasm.MapCodeAddress(0x0, instr_start + 2 * kInstructionSize);
+  // Users may generally want to map the start address to 0x0. Mapping to a
+  // negative offset can be used to focus on the section of the
+  // disassembly at address 0x0.
+  custom_disasm.MapCodeAddress(-0x8, instr_start);
+
   // Iterate through the instructions to show the difference in the disassembly.
   Instruction* instr;
   for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
     decoder.Decode(instr);
     printf("\n");
-    printf("VIXL disasm:   %s\n", disasm.GetOutput());
-    printf("custom disasm: %s\n", custom_disasm.GetOutput());
+    printf("VIXL disasm\t %p:\t%s\n",
+           reinterpret_cast<void*>(instr), disasm.GetOutput());
+    int64_t rel_addr =
+        custom_disasm.CodeRelativeAddress(reinterpret_cast<void*>(instr));
+    char rel_addr_sign_char = rel_addr < 0 ? '-' : ' ';
+    rel_addr = labs(rel_addr);
+    printf("custom disasm\t%c0x%" PRIx64 ":\t%s\n",
+           rel_addr_sign_char,
+           rel_addr,
+           custom_disasm.GetOutput());
   }
 }
 
diff --git a/examples/custom-disassembler.h b/examples/custom-disassembler.h
index 12d1a7f..382a55d 100644
--- a/examples/custom-disassembler.h
+++ b/examples/custom-disassembler.h
@@ -33,6 +33,10 @@
 
 void TestCustomDisassembler();
 
+// We want to change three things in the disassembly:
+// - Add comments to some add/sub instructions.
+// - Use aliases for register names.
+// - Add descriptions for code addresses.
 class CustomDisassembler: public Disassembler {
  public:
   CustomDisassembler() : Disassembler() { }
@@ -41,13 +45,11 @@
   virtual void VisitAddSubShifted(const Instruction* instr);
 
  protected:
-  // We print custom register names.
   virtual void AppendRegisterNameToOutput(const Instruction* instr,
                                           const CPURegister& reg);
 
-  // We fake looking up addresses in a table and printing useful names.
-  virtual void AppendCodeAddressToOutput(const Instruction* instr,
-                                         const void* addr);
+  virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr,
+                                                     const void* addr);
 };
 
 
diff --git a/examples/non-const-visitor.cc b/examples/non-const-visitor.cc
index 9e07cc1..bd02239 100644
--- a/examples/non-const-visitor.cc
+++ b/examples/non-const-visitor.cc
@@ -53,7 +53,7 @@
   simulator.set_xreg(1, b);
   simulator.RunFrom(start_instr);
   int64_t res = simulator.xreg(0);
-  printf("foo(%ld, %ld) = %ld\n", a, b, res);
+  printf("foo(%" PRId64", %" PRId64") = %" PRId64"\n", a, b, res);
 
   return res;
 #else
diff --git a/src/a64/assembler-a64.cc b/src/a64/assembler-a64.cc
index ae137dc..9e0bffc 100644
--- a/src/a64/assembler-a64.cc
+++ b/src/a64/assembler-a64.cc
@@ -88,6 +88,37 @@
 }
 
 
+CPURegList CPURegList::Union(const CPURegList& list_1,
+                             const CPURegList& list_2,
+                             const CPURegList& list_3) {
+  return Union(list_1, Union(list_2, list_3));
+}
+
+
+CPURegList CPURegList::Union(const CPURegList& list_1,
+                             const CPURegList& list_2,
+                             const CPURegList& list_3,
+                             const CPURegList& list_4) {
+  return Union(Union(list_1, list_2), Union(list_3, list_4));
+}
+
+
+CPURegList CPURegList::Intersection(const CPURegList& list_1,
+                                    const CPURegList& list_2,
+                                    const CPURegList& list_3) {
+  return Intersection(list_1, Intersection(list_2, list_3));
+}
+
+
+CPURegList CPURegList::Intersection(const CPURegList& list_1,
+                                    const CPURegList& list_2,
+                                    const CPURegList& list_3,
+                                    const CPURegList& list_4) {
+  return Intersection(Intersection(list_1, list_2),
+                      Intersection(list_3, list_4));
+}
+
+
 CPURegList CPURegList::GetCalleeSaved(unsigned size) {
   return CPURegList(CPURegister::kRegister, size, 19, 29);
 }
@@ -363,7 +394,7 @@
 Assembler::Assembler(byte* buffer, size_t capacity,
                      PositionIndependentCodeOption pic)
     : pic_(pic) {
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   buffer_monitor_ = 0;
 #endif
   buffer_ = new CodeBuffer(buffer, capacity);
@@ -372,7 +403,7 @@
 
 Assembler::Assembler(size_t capacity, PositionIndependentCodeOption pic)
     : pic_(pic) {
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   buffer_monitor_ = 0;
 #endif
   buffer_ = new CodeBuffer(capacity);
@@ -456,12 +487,18 @@
   if (literal->IsUsed()) {
     Instruction* target = GetCursorAddress<Instruction*>();
     ptrdiff_t offset = literal->last_use();
-
-    while (offset != 0) {
+    bool done;
+    do {
       Instruction* ldr = GetOffsetAddress<Instruction*>(offset);
-      offset = ldr->ImmLLiteral();
+      VIXL_ASSERT(ldr->IsLoadLiteral());
+
+      ptrdiff_t imm19 = ldr->ImmLLiteral();
+      VIXL_ASSERT(imm19 <= 0);
+      done = (imm19 == 0);
+      offset += imm19 * kLiteralEntrySize;
+
       ldr->SetImmLLiteral(target);
-    }
+    } while (!done);
   }
 
   // "bind" the literal.
@@ -1353,6 +1390,11 @@
 }
 
 
+void Assembler::prfm(PrefetchOperation op, int imm19) {
+  Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19));
+}
+
+
 // Exclusive-access instructions.
 void Assembler::stxrb(const Register& rs,
                       const Register& rt,
@@ -1533,6 +1575,26 @@
   Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
 }
 
+void Assembler::prfm(PrefetchOperation op, const MemOperand& address,
+                     LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireUnscaledOffset);
+  VIXL_ASSERT(option != PreferUnscaledOffset);
+  Prefetch(op, address, option);
+}
+
+
+void Assembler::prfum(PrefetchOperation op, const MemOperand& address,
+                      LoadStoreScalingOption option) {
+  VIXL_ASSERT(option != RequireScaledOffset);
+  VIXL_ASSERT(option != PreferScaledOffset);
+  Prefetch(op, address, option);
+}
+
+
+void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) {
+  prfm(op, LinkAndGetWordOffsetTo(literal));
+}
+
 
 void Assembler::mov(const Register& rd, const Register& rm) {
   // Moves involving the stack pointer are encoded as add immediate with
@@ -1738,6 +1800,13 @@
 }
 
 
+void Assembler::frinti(const FPRegister& fd,
+                       const FPRegister& fn) {
+  VIXL_ASSERT(fd.size() == fn.size());
+  FPDataProcessing1Source(fd, fn, FRINTI);
+}
+
+
 void Assembler::frintm(const FPRegister& fd,
                        const FPRegister& fn) {
   VIXL_ASSERT(fd.size() == fn.size());
@@ -1752,6 +1821,20 @@
 }
 
 
+void Assembler::frintp(const FPRegister& fd,
+                       const FPRegister& fn) {
+  VIXL_ASSERT(fd.size() == fn.size());
+  FPDataProcessing1Source(fd, fn, FRINTP);
+}
+
+
+void Assembler::frintx(const FPRegister& fd,
+                       const FPRegister& fn) {
+  VIXL_ASSERT(fd.size() == fn.size());
+  FPDataProcessing1Source(fd, fn, FRINTX);
+}
+
+
 void Assembler::frintz(const FPRegister& fd,
                        const FPRegister& fn) {
   VIXL_ASSERT(fd.size() == fn.size());
@@ -2214,39 +2297,29 @@
 }
 
 
-bool Assembler::IsImmAddSub(int64_t immediate) {
-  return is_uint12(immediate) ||
-         (is_uint12(immediate >> 12) && ((immediate & 0xfff) == 0));
-}
-
-void Assembler::LoadStore(const CPURegister& rt,
-                          const MemOperand& addr,
-                          LoadStoreOp op,
-                          LoadStoreScalingOption option) {
-  Instr memop = op | Rt(rt) | RnSP(addr.base());
+Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
+                                     LSDataSize size,
+                                     LoadStoreScalingOption option) {
+  Instr base = RnSP(addr.base());
   int64_t offset = addr.offset();
-  LSDataSize size = CalcLSDataSize(op);
 
   if (addr.IsImmediateOffset()) {
     bool prefer_unscaled = (option == PreferUnscaledOffset) ||
                            (option == RequireUnscaledOffset);
     if (prefer_unscaled && IsImmLSUnscaled(offset)) {
       // Use the unscaled addressing mode.
-      Emit(LoadStoreUnscaledOffsetFixed | memop | ImmLS(offset));
-      return;
+      return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset);
     }
 
     if ((option != RequireUnscaledOffset) && IsImmLSScaled(offset, size)) {
       // Use the scaled addressing mode.
-      Emit(LoadStoreUnsignedOffsetFixed | memop |
-           ImmLSUnsigned(offset >> size));
-      return;
+      return base | LoadStoreUnsignedOffsetFixed |
+          ImmLSUnsigned(offset >> size);
     }
 
     if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) {
       // Use the unscaled addressing mode.
-      Emit(LoadStoreUnscaledOffsetFixed | memop | ImmLS(offset));
-      return;
+      return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset);
     }
   }
 
@@ -2268,29 +2341,106 @@
     // Shifts are encoded in one bit, indicating a left shift by the memory
     // access size.
     VIXL_ASSERT((shift_amount == 0) ||
-                (shift_amount == static_cast<unsigned>(CalcLSDataSize(op))));
-    Emit(LoadStoreRegisterOffsetFixed | memop | Rm(addr.regoffset()) |
-         ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0));
-    return;
+                (shift_amount == static_cast<unsigned>(size)));
+    return base | LoadStoreRegisterOffsetFixed | Rm(addr.regoffset()) |
+        ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0);
   }
 
   if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) {
-    Emit(LoadStorePreIndexFixed | memop | ImmLS(offset));
-    return;
+    return base | LoadStorePreIndexFixed | ImmLS(offset);
   }
 
   if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) {
-    Emit(LoadStorePostIndexFixed | memop | ImmLS(offset));
-    return;
+    return base | LoadStorePostIndexFixed | ImmLS(offset);
   }
 
   // If this point is reached, the MemOperand (addr) cannot be encoded.
   VIXL_UNREACHABLE();
+  return 0;
 }
 
 
-bool Assembler::IsImmLSUnscaled(int64_t offset) {
-  return is_int9(offset);
+void Assembler::LoadStore(const CPURegister& rt,
+                          const MemOperand& addr,
+                          LoadStoreOp op,
+                          LoadStoreScalingOption option) {
+  Emit(op | Rt(rt) | LoadStoreMemOperand(addr, CalcLSDataSize(op), option));
+}
+
+
+void Assembler::Prefetch(PrefetchOperation op,
+                         const MemOperand& addr,
+                         LoadStoreScalingOption option) {
+  VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset());
+
+  Instr prfop = ImmPrefetchOperation(op);
+  Emit(PRFM | prfop | LoadStoreMemOperand(addr, LSDoubleWord, option));
+}
+
+
+bool Assembler::IsImmAddSub(int64_t immediate) {
+  return is_uint12(immediate) ||
+         (is_uint12(immediate >> 12) && ((immediate & 0xfff) == 0));
+}
+
+
+bool Assembler::IsImmConditionalCompare(int64_t immediate) {
+  return is_uint5(immediate);
+}
+
+
+bool Assembler::IsImmFP32(float imm) {
+  // Valid values will have the form:
+  // aBbb.bbbc.defg.h000.0000.0000.0000.0000
+  uint32_t bits = float_to_rawbits(imm);
+  // bits[19..0] are cleared.
+  if ((bits & 0x7ffff) != 0) {
+    return false;
+  }
+
+  // bits[29..25] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 16) & 0x3e00;
+  if (b_pattern != 0 && b_pattern != 0x3e00) {
+    return false;
+  }
+
+  // bit[30] and bit[29] are opposite.
+  if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
+    return false;
+  }
+
+  return true;
+}
+
+
+bool Assembler::IsImmFP64(double imm) {
+  // Valid values will have the form:
+  // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+  // 0000.0000.0000.0000.0000.0000.0000.0000
+  uint64_t bits = double_to_rawbits(imm);
+  // bits[47..0] are cleared.
+  if ((bits & 0x0000ffffffffffff) != 0) {
+    return false;
+  }
+
+  // bits[61..54] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+  if ((b_pattern != 0) && (b_pattern != 0x3fc0)) {
+    return false;
+  }
+
+  // bit[62] and bit[61] are opposite.
+  if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) {
+    return false;
+  }
+
+  return true;
+}
+
+
+bool Assembler::IsImmLSPair(int64_t offset, LSDataSize size) {
+  bool offset_is_size_multiple = (((offset >> size) << size) == offset);
+  return offset_is_size_multiple && is_int7(offset >> size);
 }
 
 
@@ -2300,9 +2450,23 @@
 }
 
 
-bool Assembler::IsImmLSPair(int64_t offset, LSDataSize size) {
-  bool offset_is_size_multiple = (((offset >> size) << size) == offset);
-  return offset_is_size_multiple && is_int7(offset >> size);
+bool Assembler::IsImmLSUnscaled(int64_t offset) {
+  return is_int9(offset);
+}
+
+
+// The movn instruction can generate immediates containing an arbitrary 16-bit
+// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff.
+bool Assembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
+  return IsImmMovz(~imm, reg_size);
+}
+
+
+// The movz instruction can generate immediates containing an arbitrary 16-bit
+// value, with remaining bits clear, eg. 0x00001234, 0x0000123400000000.
+bool Assembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
+  VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
+  return CountClearHalfWords(imm, reg_size) >= ((reg_size / 16) - 1);
 }
 
 
@@ -2512,60 +2676,6 @@
 }
 
 
-bool Assembler::IsImmConditionalCompare(int64_t immediate) {
-  return is_uint5(immediate);
-}
-
-
-bool Assembler::IsImmFP32(float imm) {
-  // Valid values will have the form:
-  // aBbb.bbbc.defg.h000.0000.0000.0000.0000
-  uint32_t bits = float_to_rawbits(imm);
-  // bits[19..0] are cleared.
-  if ((bits & 0x7ffff) != 0) {
-    return false;
-  }
-
-  // bits[29..25] are all set or all cleared.
-  uint32_t b_pattern = (bits >> 16) & 0x3e00;
-  if (b_pattern != 0 && b_pattern != 0x3e00) {
-    return false;
-  }
-
-  // bit[30] and bit[29] are opposite.
-  if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
-    return false;
-  }
-
-  return true;
-}
-
-
-bool Assembler::IsImmFP64(double imm) {
-  // Valid values will have the form:
-  // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
-  // 0000.0000.0000.0000.0000.0000.0000.0000
-  uint64_t bits = double_to_rawbits(imm);
-  // bits[47..0] are cleared.
-  if ((bits & 0x0000ffffffffffff) != 0) {
-    return false;
-  }
-
-  // bits[61..54] are all set or all cleared.
-  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
-  if ((b_pattern != 0) && (b_pattern != 0x3fc0)) {
-    return false;
-  }
-
-  // bit[62] and bit[61] are opposite.
-  if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) {
-    return false;
-  }
-
-  return true;
-}
-
-
 LoadStoreOp Assembler::LoadOpFor(const CPURegister& rt) {
   VIXL_ASSERT(rt.IsValid());
   if (rt.IsRegister()) {
diff --git a/src/a64/assembler-a64.h b/src/a64/assembler-a64.h
index 16a704b..35aaf20 100644
--- a/src/a64/assembler-a64.h
+++ b/src/a64/assembler-a64.h
@@ -151,21 +151,21 @@
     return Aliases(other) && (size_ == other.size_);
   }
 
-  inline bool IsZero() const {
+  bool IsZero() const {
     VIXL_ASSERT(IsValid());
     return IsRegister() && (code_ == kZeroRegCode);
   }
 
-  inline bool IsSP() const {
+  bool IsSP() const {
     VIXL_ASSERT(IsValid());
     return IsRegister() && (code_ == kSPRegInternalCode);
   }
 
-  inline bool IsRegister() const {
+  bool IsRegister() const {
     return type_ == kRegister;
   }
 
-  inline bool IsFPRegister() const {
+  bool IsFPRegister() const {
     return type_ == kFPRegister;
   }
 
@@ -179,7 +179,7 @@
   const FPRegister& S() const;
   const FPRegister& D() const;
 
-  inline bool IsSameSizeAndType(const CPURegister& other) const {
+  bool IsSameSizeAndType(const CPURegister& other) const {
     return (size_ == other.size_) && (type_ == other.type_);
   }
 
@@ -198,7 +198,7 @@
 class Register : public CPURegister {
  public:
   Register() : CPURegister() {}
-  inline explicit Register(const CPURegister& other)
+  explicit Register(const CPURegister& other)
       : CPURegister(other.code(), other.size(), other.type()) {
     VIXL_ASSERT(IsValidRegister());
   }
@@ -213,10 +213,6 @@
   static const Register& WRegFromCode(unsigned code);
   static const Register& XRegFromCode(unsigned code);
 
-  // V8 compatibility.
-  static const int kNumRegisters = kNumberOfRegisters;
-  static const int kNumAllocatableRegisters = kNumberOfRegisters - 1;
-
  private:
   static const Register wregisters[];
   static const Register xregisters[];
@@ -225,12 +221,12 @@
 
 class FPRegister : public CPURegister {
  public:
-  inline FPRegister() : CPURegister() {}
-  inline explicit FPRegister(const CPURegister& other)
+  FPRegister() : CPURegister() {}
+  explicit FPRegister(const CPURegister& other)
       : CPURegister(other.code(), other.size(), other.type()) {
     VIXL_ASSERT(IsValidFPRegister());
   }
-  inline FPRegister(unsigned code, unsigned size)
+  FPRegister(unsigned code, unsigned size)
       : CPURegister(code, size, kFPRegister) {}
 
   bool IsValid() const {
@@ -241,10 +237,6 @@
   static const FPRegister& SRegFromCode(unsigned code);
   static const FPRegister& DRegFromCode(unsigned code);
 
-  // V8 compatibility.
-  static const int kNumRegisters = kNumberOfFPRegisters;
-  static const int kNumAllocatableRegisters = kNumberOfFPRegisters - 1;
-
  private:
   static const FPRegister sregisters[];
   static const FPRegister dregisters[];
@@ -312,23 +304,23 @@
 // Lists of registers.
 class CPURegList {
  public:
-  inline explicit CPURegList(CPURegister reg1,
-                             CPURegister reg2 = NoCPUReg,
-                             CPURegister reg3 = NoCPUReg,
-                             CPURegister reg4 = NoCPUReg)
+  explicit CPURegList(CPURegister reg1,
+                      CPURegister reg2 = NoCPUReg,
+                      CPURegister reg3 = NoCPUReg,
+                      CPURegister reg4 = NoCPUReg)
       : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
         size_(reg1.size()), type_(reg1.type()) {
     VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
     VIXL_ASSERT(IsValid());
   }
 
-  inline CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
+  CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
       : list_(list), size_(size), type_(type) {
     VIXL_ASSERT(IsValid());
   }
 
-  inline CPURegList(CPURegister::RegisterType type, unsigned size,
-                    unsigned first_reg, unsigned last_reg)
+  CPURegList(CPURegister::RegisterType type, unsigned size,
+             unsigned first_reg, unsigned last_reg)
       : size_(size), type_(type) {
     VIXL_ASSERT(((type == CPURegister::kRegister) &&
                  (last_reg < kNumberOfRegisters)) ||
@@ -340,7 +332,7 @@
     VIXL_ASSERT(IsValid());
   }
 
-  inline CPURegister::RegisterType type() const {
+  CPURegister::RegisterType type() const {
     VIXL_ASSERT(IsValid());
     return type_;
   }
@@ -366,13 +358,13 @@
   }
 
   // Variants of Combine and Remove which take a single register.
-  inline void Combine(const CPURegister& other) {
+  void Combine(const CPURegister& other) {
     VIXL_ASSERT(other.type() == type_);
     VIXL_ASSERT(other.size() == size_);
     Combine(other.code());
   }
 
-  inline void Remove(const CPURegister& other) {
+  void Remove(const CPURegister& other) {
     VIXL_ASSERT(other.type() == type_);
     VIXL_ASSERT(other.size() == size_);
     Remove(other.code());
@@ -380,24 +372,51 @@
 
   // Variants of Combine and Remove which take a single register by its code;
   // the type and size of the register is inferred from this list.
-  inline void Combine(int code) {
+  void Combine(int code) {
     VIXL_ASSERT(IsValid());
     VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
     list_ |= (UINT64_C(1) << code);
   }
 
-  inline void Remove(int code) {
+  void Remove(int code) {
     VIXL_ASSERT(IsValid());
     VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
     list_ &= ~(UINT64_C(1) << code);
   }
 
-  inline RegList list() const {
+  static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
+    VIXL_ASSERT(list_1.type_ == list_2.type_);
+    VIXL_ASSERT(list_1.size_ == list_2.size_);
+    return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
+  }
+  static CPURegList Union(const CPURegList& list_1,
+                          const CPURegList& list_2,
+                          const CPURegList& list_3);
+  static CPURegList Union(const CPURegList& list_1,
+                          const CPURegList& list_2,
+                          const CPURegList& list_3,
+                          const CPURegList& list_4);
+
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2) {
+    VIXL_ASSERT(list_1.type_ == list_2.type_);
+    VIXL_ASSERT(list_1.size_ == list_2.size_);
+    return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
+  }
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2,
+                                 const CPURegList& list_3);
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2,
+                                 const CPURegList& list_3,
+                                 const CPURegList& list_4);
+
+  RegList list() const {
     VIXL_ASSERT(IsValid());
     return list_;
   }
 
-  inline void set_list(RegList new_list) {
+  void set_list(RegList new_list) {
     VIXL_ASSERT(IsValid());
     list_ = new_list;
   }
@@ -417,38 +436,38 @@
   static CPURegList GetCallerSaved(unsigned size = kXRegSize);
   static CPURegList GetCallerSavedFP(unsigned size = kDRegSize);
 
-  inline bool IsEmpty() const {
+  bool IsEmpty() const {
     VIXL_ASSERT(IsValid());
     return list_ == 0;
   }
 
-  inline bool IncludesAliasOf(const CPURegister& other) const {
+  bool IncludesAliasOf(const CPURegister& other) const {
     VIXL_ASSERT(IsValid());
     return (type_ == other.type()) && ((other.Bit() & list_) != 0);
   }
 
-  inline bool IncludesAliasOf(int code) const {
+  bool IncludesAliasOf(int code) const {
     VIXL_ASSERT(IsValid());
     return ((code & list_) != 0);
   }
 
-  inline int Count() const {
+  int Count() const {
     VIXL_ASSERT(IsValid());
     return CountSetBits(list_, kRegListSizeInBits);
   }
 
-  inline unsigned RegisterSizeInBits() const {
+  unsigned RegisterSizeInBits() const {
     VIXL_ASSERT(IsValid());
     return size_;
   }
 
-  inline unsigned RegisterSizeInBytes() const {
+  unsigned RegisterSizeInBytes() const {
     int size_in_bits = RegisterSizeInBits();
     VIXL_ASSERT((size_in_bits % 8) == 0);
     return size_in_bits / 8;
   }
 
-  inline unsigned TotalSizeInBytes() const {
+  unsigned TotalSizeInBytes() const {
     VIXL_ASSERT(IsValid());
     return RegisterSizeInBytes() * Count();
   }
@@ -587,8 +606,10 @@
     VIXL_ASSERT(!IsLinked() || IsBound());
   }
 
-  inline bool IsBound() const { return location_ >= 0; }
-  inline bool IsLinked() const { return !links_.empty(); }
+  bool IsBound() const { return location_ >= 0; }
+  bool IsLinked() const { return !links_.empty(); }
+
+  ptrdiff_t location() const { return location_; }
 
  private:
   // The list of linked instructions is stored in a stack-like structure. We
@@ -647,22 +668,20 @@
     std::stack<ptrdiff_t> * links_extended_;
   };
 
-  inline ptrdiff_t location() const { return location_; }
-
-  inline void Bind(ptrdiff_t location) {
+  void Bind(ptrdiff_t location) {
     // Labels can only be bound once.
     VIXL_ASSERT(!IsBound());
     location_ = location;
   }
 
-  inline void AddLink(ptrdiff_t instruction) {
+  void AddLink(ptrdiff_t instruction) {
     // If a label is bound, the assembler already has the information it needs
     // to write the instruction, so there is no need to add it to links_.
     VIXL_ASSERT(!IsBound());
     links_.push(instruction);
   }
 
-  inline ptrdiff_t GetAndRemoveNextLink() {
+  ptrdiff_t GetAndRemoveNextLink() {
     VIXL_ASSERT(IsLinked());
     ptrdiff_t link = links_.top();
     links_.pop();
@@ -845,14 +864,14 @@
 
   // Return the address of an offset in the buffer.
   template <typename T>
-  inline T GetOffsetAddress(ptrdiff_t offset) {
+  T GetOffsetAddress(ptrdiff_t offset) {
     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
     return buffer_->GetOffsetAddress<T>(offset);
   }
 
   // Return the address of a bound label.
   template <typename T>
-  inline T GetLabelAddress(const Label * label) {
+  T GetLabelAddress(const Label * label) {
     VIXL_ASSERT(label->IsBound());
     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
     return GetOffsetAddress<T>(label->location());
@@ -860,14 +879,14 @@
 
   // Return the address of the cursor.
   template <typename T>
-  inline T GetCursorAddress() {
+  T GetCursorAddress() {
     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
     return GetOffsetAddress<T>(CursorOffset());
   }
 
   // Return the address of the start of the buffer.
   template <typename T>
-  inline T GetStartAddress() {
+  T GetStartAddress() {
     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
     return GetOffsetAddress<T>(0);
   }
@@ -1074,20 +1093,20 @@
 
   // Bfm aliases.
   // Bitfield insert.
-  inline void bfi(const Register& rd,
-                  const Register& rn,
-                  unsigned lsb,
-                  unsigned width) {
+  void bfi(const Register& rd,
+           const Register& rn,
+           unsigned lsb,
+           unsigned width) {
     VIXL_ASSERT(width >= 1);
     VIXL_ASSERT(lsb + width <= rn.size());
     bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
   }
 
   // Bitfield extract and insert low.
-  inline void bfxil(const Register& rd,
-                    const Register& rn,
-                    unsigned lsb,
-                    unsigned width) {
+  void bfxil(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
     VIXL_ASSERT(width >= 1);
     VIXL_ASSERT(lsb + width <= rn.size());
     bfm(rd, rn, lsb, lsb + width - 1);
@@ -1095,92 +1114,92 @@
 
   // Sbfm aliases.
   // Arithmetic shift right.
-  inline void asr(const Register& rd, const Register& rn, unsigned shift) {
+  void asr(const Register& rd, const Register& rn, unsigned shift) {
     VIXL_ASSERT(shift < rd.size());
     sbfm(rd, rn, shift, rd.size() - 1);
   }
 
   // Signed bitfield insert with zero at right.
-  inline void sbfiz(const Register& rd,
-                    const Register& rn,
-                    unsigned lsb,
-                    unsigned width) {
+  void sbfiz(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
     VIXL_ASSERT(width >= 1);
     VIXL_ASSERT(lsb + width <= rn.size());
     sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
   }
 
   // Signed bitfield extract.
-  inline void sbfx(const Register& rd,
-                   const Register& rn,
-                   unsigned lsb,
-                   unsigned width) {
+  void sbfx(const Register& rd,
+            const Register& rn,
+            unsigned lsb,
+            unsigned width) {
     VIXL_ASSERT(width >= 1);
     VIXL_ASSERT(lsb + width <= rn.size());
     sbfm(rd, rn, lsb, lsb + width - 1);
   }
 
   // Signed extend byte.
-  inline void sxtb(const Register& rd, const Register& rn) {
+  void sxtb(const Register& rd, const Register& rn) {
     sbfm(rd, rn, 0, 7);
   }
 
   // Signed extend halfword.
-  inline void sxth(const Register& rd, const Register& rn) {
+  void sxth(const Register& rd, const Register& rn) {
     sbfm(rd, rn, 0, 15);
   }
 
   // Signed extend word.
-  inline void sxtw(const Register& rd, const Register& rn) {
+  void sxtw(const Register& rd, const Register& rn) {
     sbfm(rd, rn, 0, 31);
   }
 
   // Ubfm aliases.
   // Logical shift left.
-  inline void lsl(const Register& rd, const Register& rn, unsigned shift) {
+  void lsl(const Register& rd, const Register& rn, unsigned shift) {
     unsigned reg_size = rd.size();
     VIXL_ASSERT(shift < reg_size);
     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
   }
 
   // Logical shift right.
-  inline void lsr(const Register& rd, const Register& rn, unsigned shift) {
+  void lsr(const Register& rd, const Register& rn, unsigned shift) {
     VIXL_ASSERT(shift < rd.size());
     ubfm(rd, rn, shift, rd.size() - 1);
   }
 
   // Unsigned bitfield insert with zero at right.
-  inline void ubfiz(const Register& rd,
-                    const Register& rn,
-                    unsigned lsb,
-                    unsigned width) {
+  void ubfiz(const Register& rd,
+             const Register& rn,
+             unsigned lsb,
+             unsigned width) {
     VIXL_ASSERT(width >= 1);
     VIXL_ASSERT(lsb + width <= rn.size());
     ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
   }
 
   // Unsigned bitfield extract.
-  inline void ubfx(const Register& rd,
-                   const Register& rn,
-                   unsigned lsb,
-                   unsigned width) {
+  void ubfx(const Register& rd,
+            const Register& rn,
+            unsigned lsb,
+            unsigned width) {
     VIXL_ASSERT(width >= 1);
     VIXL_ASSERT(lsb + width <= rn.size());
     ubfm(rd, rn, lsb, lsb + width - 1);
   }
 
   // Unsigned extend byte.
-  inline void uxtb(const Register& rd, const Register& rn) {
+  void uxtb(const Register& rd, const Register& rn) {
     ubfm(rd, rn, 0, 7);
   }
 
   // Unsigned extend halfword.
-  inline void uxth(const Register& rd, const Register& rn) {
+  void uxth(const Register& rd, const Register& rn) {
     ubfm(rd, rn, 0, 15);
   }
 
   // Unsigned extend word.
-  inline void uxtw(const Register& rd, const Register& rn) {
+  void uxtw(const Register& rd, const Register& rn) {
     ubfm(rd, rn, 0, 31);
   }
 
@@ -1230,7 +1249,7 @@
   void cneg(const Register& rd, const Register& rn, Condition cond);
 
   // Rotate right.
-  inline void ror(const Register& rd, const Register& rs, unsigned shift) {
+  void ror(const Register& rd, const Register& rs, unsigned shift) {
     extr(rd, rs, rs, shift);
   }
 
@@ -1495,6 +1514,19 @@
   // Load-acquire register.
   void ldar(const Register& rt, const MemOperand& src);
 
+  // Prefetch memory.
+  void prfm(PrefetchOperation op, const MemOperand& addr,
+            LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Prefetch memory (with unscaled offset).
+  void prfum(PrefetchOperation op, const MemOperand& addr,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Prefetch memory in the literal pool.
+  void prfm(PrefetchOperation op, RawLiteral* literal);
+
+  // Prefetch from pc + imm19 << 2.
+  void prfm(PrefetchOperation op, int imm19);
 
   // Move instructions. The default shift of -1 indicates that the move
   // instruction will calculate an appropriate 16-bit immediate and left shift
@@ -1638,12 +1670,21 @@
   // FP round to integer (nearest with ties to away).
   void frinta(const FPRegister& fd, const FPRegister& fn);
 
+  // FP round to integer (implicit rounding).
+  void frinti(const FPRegister& fd, const FPRegister& fn);
+
   // FP round to integer (toward minus infinity).
   void frintm(const FPRegister& fd, const FPRegister& fn);
 
   // FP round to integer (nearest with ties to even).
   void frintn(const FPRegister& fd, const FPRegister& fn);
 
+  // FP round to integer (toward plus infinity).
+  void frintp(const FPRegister& fd, const FPRegister& fn);
+
+  // FP round to integer (exact, implicit rounding).
+  void frintx(const FPRegister& fd, const FPRegister& fn);
+
   // FP round to integer (towards zero).
   void frintz(const FPRegister& fd, const FPRegister& fn);
 
@@ -1705,16 +1746,16 @@
 
   // Emit generic instructions.
   // Emit raw instructions into the instruction stream.
-  inline void dci(Instr raw_inst) { Emit(raw_inst); }
+  void dci(Instr raw_inst) { Emit(raw_inst); }
 
   // Emit 32 bits of data into the instruction stream.
-  inline void dc32(uint32_t data) {
+  void dc32(uint32_t data) {
     VIXL_ASSERT(buffer_monitor_ > 0);
     buffer_->Emit32(data);
   }
 
   // Emit 64 bits of data into the instruction stream.
-  inline void dc64(uint64_t data) {
+  void dc64(uint64_t data) {
     VIXL_ASSERT(buffer_monitor_ > 0);
     buffer_->Emit64(data);
   }
@@ -1849,14 +1890,14 @@
     }
   }
 
-  static inline Instr ImmS(unsigned imms, unsigned reg_size) {
+  static Instr ImmS(unsigned imms, unsigned reg_size) {
     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) ||
            ((reg_size == kWRegSize) && is_uint5(imms)));
     USE(reg_size);
     return imms << ImmS_offset;
   }
 
-  static inline Instr ImmR(unsigned immr, unsigned reg_size) {
+  static Instr ImmR(unsigned immr, unsigned reg_size) {
     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
            ((reg_size == kWRegSize) && is_uint5(immr)));
     USE(reg_size);
@@ -1864,7 +1905,7 @@
     return immr << ImmR_offset;
   }
 
-  static inline Instr ImmSetBits(unsigned imms, unsigned reg_size) {
+  static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
     VIXL_ASSERT(is_uint6(imms));
     VIXL_ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3));
@@ -1872,7 +1913,7 @@
     return imms << ImmSetBits_offset;
   }
 
-  static inline Instr ImmRotate(unsigned immr, unsigned reg_size) {
+  static Instr ImmRotate(unsigned immr, unsigned reg_size) {
     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
            ((reg_size == kWRegSize) && is_uint5(immr)));
@@ -1880,12 +1921,12 @@
     return immr << ImmRotate_offset;
   }
 
-  static inline Instr ImmLLiteral(int imm19) {
+  static Instr ImmLLiteral(int imm19) {
     VIXL_ASSERT(is_int19(imm19));
     return truncate_to_int19(imm19) << ImmLLiteral_offset;
   }
 
-  static inline Instr BitN(unsigned bitn, unsigned reg_size) {
+  static Instr BitN(unsigned bitn, unsigned reg_size) {
     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
     USE(reg_size);
@@ -1943,6 +1984,11 @@
     return shift_amount << ImmShiftLS_offset;
   }
 
+  static Instr ImmPrefetchOperation(int imm5) {
+    VIXL_ASSERT(is_uint5(imm5));
+    return imm5 << ImmPrefetchOperation_offset;
+  }
+
   static Instr ImmException(int imm16) {
     VIXL_ASSERT(is_uint16(imm16));
     return imm16 << ImmException_offset;
@@ -2003,12 +2049,32 @@
     return scale << FPScale_offset;
   }
 
+  // Immediate field checking helpers.
+  static bool IsImmAddSub(int64_t immediate);
+  static bool IsImmConditionalCompare(int64_t immediate);
+  static bool IsImmFP32(float imm);
+  static bool IsImmFP64(double imm);
+  static bool IsImmLogical(uint64_t value,
+                           unsigned width,
+                           unsigned* n = NULL,
+                           unsigned* imm_s = NULL,
+                           unsigned* imm_r = NULL);
+  static bool IsImmLSPair(int64_t offset, LSDataSize size);
+  static bool IsImmLSScaled(int64_t offset, LSDataSize size);
+  static bool IsImmLSUnscaled(int64_t offset);
+  static bool IsImmMovn(uint64_t imm, unsigned reg_size);
+  static bool IsImmMovz(uint64_t imm, unsigned reg_size);
+
   // Size of the code generated since label to the current position.
   size_t SizeOfCodeGeneratedSince(Label* label) const {
     VIXL_ASSERT(label->IsBound());
     return buffer_->OffsetFrom(label->location());
   }
 
+  size_t SizeOfCodeGenerated() const {
+    return buffer_->CursorOffset();
+  }
+
   size_t BufferCapacity() const { return buffer_->capacity(); }
 
   size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); }
@@ -2025,7 +2091,7 @@
     }
   }
 
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   void AcquireBuffer() {
     VIXL_ASSERT(buffer_monitor_ >= 0);
     buffer_monitor_++;
@@ -2037,16 +2103,16 @@
   }
 #endif
 
-  inline PositionIndependentCodeOption pic() {
+  PositionIndependentCodeOption pic() const {
     return pic_;
   }
 
-  inline bool AllowPageOffsetDependentCode() {
+  bool AllowPageOffsetDependentCode() const {
     return (pic() == PageOffsetDependentCode) ||
            (pic() == PositionDependentCode);
   }
 
-  static inline const Register& AppropriateZeroRegFor(const CPURegister& reg) {
+  static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
     return reg.Is64Bits() ? xzr : wzr;
   }
 
@@ -2056,14 +2122,15 @@
                  const MemOperand& addr,
                  LoadStoreOp op,
                  LoadStoreScalingOption option = PreferScaledOffset);
-  static bool IsImmLSUnscaled(int64_t offset);
-  static bool IsImmLSScaled(int64_t offset, LSDataSize size);
 
   void LoadStorePair(const CPURegister& rt,
                      const CPURegister& rt2,
                      const MemOperand& addr,
                      LoadStorePairOp op);
-  static bool IsImmLSPair(int64_t offset, LSDataSize size);
+
+  void Prefetch(PrefetchOperation op,
+                const MemOperand& addr,
+                LoadStoreScalingOption option = PreferScaledOffset);
 
   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
   // reports a bogus uninitialised warning then.
@@ -2077,18 +2144,12 @@
                         unsigned imm_s,
                         unsigned imm_r,
                         LogicalOp op);
-  static bool IsImmLogical(uint64_t value,
-                           unsigned width,
-                           unsigned* n = NULL,
-                           unsigned* imm_s = NULL,
-                           unsigned* imm_r = NULL);
 
   void ConditionalCompare(const Register& rn,
                           const Operand& operand,
                           StatusFlags nzcv,
                           Condition cond,
                           ConditionalCompareOp op);
-  static bool IsImmConditionalCompare(int64_t immediate);
 
   void AddSubWithCarry(const Register& rd,
                        const Register& rn,
@@ -2096,8 +2157,6 @@
                        FlagsUpdate S,
                        AddSubWithCarryOp op);
 
-  static bool IsImmFP32(float imm);
-  static bool IsImmFP64(double imm);
 
   // Functions for emulating operands not directly supported by the instruction
   // set.
@@ -2115,7 +2174,6 @@
               const Operand& operand,
               FlagsUpdate S,
               AddSubOp op);
-  static bool IsImmAddSub(int64_t immediate);
 
   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
   // registers. Only simple loads are supported; sign- and zero-extension (such
@@ -2180,6 +2238,12 @@
                                const FPRegister& fa,
                                FPDataProcessing3SourceOp op);
 
+  // Encode the specified MemOperand for the specified access size and scaling
+  // preference.
+  Instr LoadStoreMemOperand(const MemOperand& addr,
+                            LSDataSize size,
+                            LoadStoreScalingOption option);
+
   // Link the current (not-yet-emitted) instruction to the specified label, then
   // return an offset to be encoded in the instruction. If the label is not yet
   // bound, an offset of 0 is returned.
@@ -2205,7 +2269,7 @@
   CodeBuffer* buffer_;
   PositionIndependentCodeOption pic_;
 
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   int64_t buffer_monitor_;
 #endif
 };
@@ -2239,7 +2303,7 @@
                        AssertPolicy assert_policy = kMaximumSize)
       : assm_(assm) {
     if (check_policy == kCheck) assm->EnsureSpaceFor(size);
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
     assm->bind(&start_);
     size_ = size;
     assert_policy_ = assert_policy;
@@ -2251,7 +2315,7 @@
 
   // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
   explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) {
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
     size_ = 0;
     assert_policy_ = kNoAssert;
     assm->AcquireBuffer();
@@ -2259,7 +2323,7 @@
   }
 
   ~CodeBufferCheckScope() {
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
     assm_->ReleaseBuffer();
     switch (assert_policy_) {
       case kNoAssert: break;
@@ -2277,7 +2341,7 @@
 
  protected:
   Assembler* assm_;
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   Label start_;
   size_t size_;
   AssertPolicy assert_policy_;
diff --git a/src/a64/constants-a64.h b/src/a64/constants-a64.h
index 7a14f85..bc1a2c4 100644
--- a/src/a64/constants-a64.h
+++ b/src/a64/constants-a64.h
@@ -31,12 +31,6 @@
 
 const unsigned kNumberOfRegisters = 32;
 const unsigned kNumberOfFPRegisters = 32;
-// Callee saved registers are x21-x30(lr).
-const int kNumberOfCalleeSavedRegisters = 10;
-const int kFirstCalleeSavedRegisterIndex = 21;
-// Callee saved FP registers are d8-d15.
-const int kNumberOfCalleeSavedFPRegisters = 8;
-const int kFirstCalleeSavedFPRegisterIndex = 8;
 
 #define REGISTER_CODE_LIST(R)                                                  \
 R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7)                                 \
@@ -53,7 +47,6 @@
 V_(Rt, 4, 0, Bits)                        /* Load/store register.         */   \
 V_(Rt2, 14, 10, Bits)                     /* Load/store second register.  */   \
 V_(Rs, 20, 16, Bits)                      /* Exclusive access status.     */   \
-V_(PrefetchMode, 4, 0, Bits)                                                   \
                                                                                \
 /* Common bits */                                                              \
 V_(SixtyFourBits, 31, 31, Bits)                                                \
@@ -109,6 +102,10 @@
 V_(ImmLSPair, 21, 15, SignedBits)                                              \
 V_(SizeLS, 31, 30, Bits)                                                       \
 V_(ImmShiftLS, 12, 12, Bits)                                                   \
+V_(ImmPrefetchOperation, 4, 0, Bits)                                           \
+V_(PrefetchHint, 4, 3, Bits)                                                   \
+V_(PrefetchTarget, 2, 1, Bits)                                                 \
+V_(PrefetchStream, 0, 0, Bits)                                                 \
                                                                                \
 /* Other immediates */                                                         \
 V_(ImmUncondBranch, 25, 0, SignedBits)                                         \
@@ -269,6 +266,29 @@
   BarrierAll    = 3
 };
 
+enum PrefetchOperation {
+  PLDL1KEEP = 0x00,
+  PLDL1STRM = 0x01,
+  PLDL2KEEP = 0x02,
+  PLDL2STRM = 0x03,
+  PLDL3KEEP = 0x04,
+  PLDL3STRM = 0x05,
+
+  PLIL1KEEP = 0x08,
+  PLIL1STRM = 0x09,
+  PLIL2KEEP = 0x0a,
+  PLIL2STRM = 0x0b,
+  PLIL3KEEP = 0x0c,
+  PLIL3STRM = 0x0d,
+
+  PSTL1KEEP = 0x10,
+  PSTL1STRM = 0x11,
+  PSTL2KEEP = 0x12,
+  PSTL2STRM = 0x13,
+  PSTL3KEEP = 0x14,
+  PSTL3STRM = 0x15
+};
+
 // System/special register names.
 // This information is not encoded as one field but as the concatenation of
 // multiple fields (Op0<0>, Op1, Crn, Crm, Op2).
@@ -605,6 +625,12 @@
   LoadStoreAnyFixed = 0x08000000
 };
 
+// Any load pair or store pair.
+enum LoadStorePairAnyOp {
+  LoadStorePairAnyFMask = 0x3a000000,
+  LoadStorePairAnyFixed = 0x28000000
+};
+
 #define LOAD_STORE_PAIR_OP_LIST(V)  \
   V(STP, w,   0x00000000),          \
   V(LDP, w,   0x00400000),          \
@@ -703,17 +729,6 @@
   V(LD, R, d,   0xC4400000)
 
 
-// Load/store unscaled offset.
-enum LoadStoreUnscaledOffsetOp {
-  LoadStoreUnscaledOffsetFixed = 0x38000000,
-  LoadStoreUnscaledOffsetFMask = 0x3B200C00,
-  LoadStoreUnscaledOffsetMask  = 0xFFE00C00,
-  #define LOAD_STORE_UNSCALED(A, B, C, D)  \
-  A##U##B##_##C = LoadStoreUnscaledOffsetFixed | D
-  LOAD_STORE_OP_LIST(LOAD_STORE_UNSCALED)
-  #undef LOAD_STORE_UNSCALED
-};
-
 // Load/store (post, pre, offset and unsigned.)
 enum LoadStoreOp {
   LoadStoreOpMask = 0xC4C00000,
@@ -724,6 +739,18 @@
   PRFM = 0xC0800000
 };
 
+// Load/store unscaled offset.
+enum LoadStoreUnscaledOffsetOp {
+  LoadStoreUnscaledOffsetFixed = 0x38000000,
+  LoadStoreUnscaledOffsetFMask = 0x3B200C00,
+  LoadStoreUnscaledOffsetMask  = 0xFFE00C00,
+  PRFUM                        = LoadStoreUnscaledOffsetFixed | PRFM,
+  #define LOAD_STORE_UNSCALED(A, B, C, D)  \
+  A##U##B##_##C = LoadStoreUnscaledOffsetFixed | D
+  LOAD_STORE_OP_LIST(LOAD_STORE_UNSCALED)
+  #undef LOAD_STORE_UNSCALED
+};
+
 // Load/store post index.
 enum LoadStorePostIndex {
   LoadStorePostIndexFixed = 0x38000400,
diff --git a/src/a64/debugger-a64.cc b/src/a64/debugger-a64.cc
index ac116da..b8fab31 100644
--- a/src/a64/debugger-a64.cc
+++ b/src/a64/debugger-a64.cc
@@ -524,8 +524,7 @@
 
 Debugger::Debugger(Decoder* decoder, FILE* stream)
     : Simulator(decoder, stream),
-      log_parameters_(0),
-      debug_parameters_(0),
+      debug_parameters_(DBG_INACTIVE),
       pending_request_(false),
       steps_(0),
       last_command_(NULL) {
@@ -538,11 +537,7 @@
 void Debugger::Run() {
   pc_modified_ = false;
   while (pc_ != kEndOfSimAddress) {
-    if (pending_request()) {
-      LogProcessorState();
-      RunDebuggerShell();
-    }
-
+    if (pending_request()) RunDebuggerShell();
     ExecuteInstruction();
   }
 }
@@ -603,8 +598,7 @@
   const uint64_t format_size = format->SizeOf() * 8;
   const uint64_t count = reg_size / format_size;
   const uint64_t mask = 0xffffffffffffffff >> (64 - format_size);
-  const uint64_t reg_value = reg<uint64_t>(reg_size,
-                                           target_reg.code(),
+  const uint64_t reg_value = reg<uint64_t>(target_reg.code(),
                                            Reg31IsStackPointer);
   VIXL_ASSERT(count > 0);
 
@@ -649,46 +643,12 @@
     case BRK:
       DoBreakpoint(instr);
       return;
-    case HLT:
-      switch (instr->ImmException()) {
-        case kUnreachableOpcode:
-          DoUnreachable(instr);
-          return;
-        case kTraceOpcode:
-          DoTrace(instr);
-          return;
-        case kLogOpcode:
-          DoLog(instr);
-          return;
-      }
-      // Fall through
+    case HLT:   // Fall through.
     default: Simulator::VisitException(instr);
   }
 }
 
 
-void Debugger::LogSystemRegisters() {
-  if (log_parameters_ & LOG_SYS_REGS) PrintSystemRegisters();
-}
-
-
-void Debugger::LogRegisters() {
-  if (log_parameters_ & LOG_REGS) PrintRegisters();
-}
-
-
-void Debugger::LogFPRegisters() {
-  if (log_parameters_ & LOG_FP_REGS) PrintFPRegisters();
-}
-
-
-void Debugger::LogProcessorState() {
-  LogSystemRegisters();
-  LogRegisters();
-  LogFPRegisters();
-}
-
-
 // Read a command. A command will be at most kMaxDebugShellLine char long and
 // ends with '\n\0'.
 // TODO: Should this be a utility function?
@@ -771,64 +731,6 @@
 }
 
 
-void Debugger::DoUnreachable(const Instruction* instr) {
-  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
-              (instr->ImmException() == kUnreachableOpcode));
-
-  fprintf(stream_, "Hit UNREACHABLE marker at pc=%p.\n",
-          reinterpret_cast<const void*>(instr));
-  abort();
-}
-
-
-void Debugger::DoTrace(const Instruction* instr) {
-  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
-              (instr->ImmException() == kTraceOpcode));
-
-  // Read the arguments encoded inline in the instruction stream.
-  uint32_t parameters;
-  uint32_t command;
-
-  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
-  memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
-
-  switch (command) {
-    case TRACE_ENABLE:
-      set_log_parameters(log_parameters() | parameters);
-      break;
-    case TRACE_DISABLE:
-      set_log_parameters(log_parameters() & ~parameters);
-      break;
-    default:
-      VIXL_UNREACHABLE();
-  }
-
-  set_pc(instr->InstructionAtOffset(kTraceLength));
-}
-
-
-void Debugger::DoLog(const Instruction* instr) {
-  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
-              (instr->ImmException() == kLogOpcode));
-
-  // Read the arguments encoded inline in the instruction stream.
-  uint32_t parameters;
-
-  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
-
-  // We don't support a one-shot LOG_DISASM.
-  VIXL_ASSERT((parameters & LOG_DISASM) == 0);
-  // Print the requested information.
-  if (parameters & LOG_SYS_REGS) PrintSystemRegisters(true);
-  if (parameters & LOG_REGS) PrintRegisters(true);
-  if (parameters & LOG_FP_REGS) PrintFPRegisters(true);
-
-  set_pc(instr->InstructionAtOffset(kLogLength));
-}
-
-
 static bool StringToUInt64(uint64_t* value, const char* line, int base = 10) {
   char* endptr = NULL;
   errno = 0;  // Reset errors.
@@ -1364,11 +1266,11 @@
   if (tok->IsIdentifier()) {
     char* identifier = IdentifierToken::Cast(tok)->value();
     if (strcmp(identifier, "regs") == 0) {
-      debugger->PrintRegisters(true);
+      debugger->PrintRegisters();
     } else if (strcmp(identifier, "fpregs") == 0) {
-      debugger->PrintFPRegisters(true);
+      debugger->PrintFPRegisters();
     } else if (strcmp(identifier, "sysregs") == 0) {
-      debugger->PrintSystemRegisters(true);
+      debugger->PrintSystemRegisters();
     } else if (strcmp(identifier, "pc") == 0) {
       printf("pc = %16p\n", reinterpret_cast<const void*>(debugger->pc()));
     } else {
diff --git a/src/a64/debugger-a64.h b/src/a64/debugger-a64.h
index 7af9ce7..60cd225 100644
--- a/src/a64/debugger-a64.h
+++ b/src/a64/debugger-a64.h
@@ -39,62 +39,9 @@
 
 namespace vixl {
 
-// Debug instructions.
-//
-// VIXL's macro-assembler and debugger support a few pseudo instructions to
-// make debugging easier. These pseudo instructions do not exist on real
-// hardware.
-//
-// Each debug pseudo instruction is represented by a HLT instruction. The HLT
-// immediate field is used to identify the type of debug pseudo isntruction.
-// Each pseudo instruction use a custom encoding for additional arguments, as
-// described below.
-
-// Unreachable
-//
-// Instruction which should never be executed. This is used as a guard in parts
-// of the code that should not be reachable, such as in data encoded inline in
-// the instructions.
-const Instr kUnreachableOpcode = 0xdeb0;
-
-// Trace
-//  - parameter: TraceParameter stored as a uint32_t
-//  - command: TraceCommand stored as a uint32_t
-//
-// Allow for trace management in the generated code. See the corresponding
-// enums for more information on permitted actions.
-const Instr kTraceOpcode = 0xdeb2;
-const unsigned kTraceParamsOffset = 1 * kInstructionSize;
-const unsigned kTraceCommandOffset = 2 * kInstructionSize;
-const unsigned kTraceLength = 3 * kInstructionSize;
-
-// Log
-//  - parameter: TraceParameter stored as a uint32_t
-//
-// Output the requested information.
-const Instr kLogOpcode = 0xdeb3;
-const unsigned kLogParamsOffset = 1 * kInstructionSize;
-const unsigned kLogLength = 2 * kInstructionSize;
-
-// Trace commands.
-enum TraceCommand {
-  TRACE_ENABLE   = 1,
-  TRACE_DISABLE  = 2
-};
-
-// Trace parameters.
-enum TraceParameters {
-  LOG_DISASM     = 1 << 0,  // Log disassembly.
-  LOG_REGS       = 1 << 1,  // Log general purpose registers.
-  LOG_FP_REGS    = 1 << 2,  // Log floating-point registers.
-  LOG_SYS_REGS   = 1 << 3,  // Log the flags and system registers.
-
-  LOG_STATE      = LOG_REGS | LOG_FP_REGS | LOG_SYS_REGS,
-  LOG_ALL        = LOG_DISASM | LOG_REGS | LOG_FP_REGS | LOG_SYS_REGS
-};
-
-// Debugger parameters
+// Flags that represent the debugger state.
 enum DebugParameters {
+  DBG_INACTIVE = 0,
   DBG_ACTIVE = 1 << 0,  // The debugger is active.
   DBG_BREAK  = 1 << 1   // The debugger is at a breakpoint.
 };
@@ -109,25 +56,10 @@
   explicit Debugger(Decoder* decoder, FILE* stream = stdout);
 
   virtual void Run();
-  void VisitException(const Instruction* instr);
+  virtual void VisitException(const Instruction* instr);
 
-  inline int log_parameters() {
-    // The simulator can control disassembly, so make sure that the Debugger's
-    // log parameters agree with it.
-    if (disasm_trace()) {
-      log_parameters_ |= LOG_DISASM;
-    }
-    return log_parameters_;
-  }
-  inline void set_log_parameters(int parameters) {
-    set_disasm_trace((parameters & LOG_DISASM) != 0);
-    log_parameters_ = parameters;
-
-    update_pending_request();
-  }
-
-  inline int debug_parameters() { return debug_parameters_; }
-  inline void set_debug_parameters(int parameters) {
+  int debug_parameters() const { return debug_parameters_; }
+  void set_debug_parameters(int parameters) {
     debug_parameters_ = parameters;
 
     update_pending_request();
@@ -135,23 +67,19 @@
 
   // Numbers of instructions to execute before the debugger shell is given
   // back control.
-  inline int steps() { return steps_; }
-  inline void set_steps(int value) {
+  int steps() const { return steps_; }
+  void set_steps(int value) {
     VIXL_ASSERT(value > 1);
     steps_ = value;
   }
 
-  inline bool IsDebuggerRunning() {
+  bool IsDebuggerRunning() const {
     return (debug_parameters_ & DBG_ACTIVE) != 0;
   }
 
-  inline bool pending_request() { return pending_request_; }
-  inline void update_pending_request() {
-    const int kLoggingMask = LOG_STATE;
-    const bool logging = (log_parameters_ & kLoggingMask) != 0;
-    const bool debugging = IsDebuggerRunning();
-
-    pending_request_ = logging || debugging;
+  bool pending_request() const { return pending_request_; }
+  void update_pending_request() {
+    pending_request_ = IsDebuggerRunning();
   }
 
   void PrintInstructions(const void* address, int64_t count = 1);
@@ -165,19 +93,11 @@
                        const FormatToken* format);
 
  private:
-  void LogSystemRegisters();
-  void LogRegisters();
-  void LogFPRegisters();
-  void LogProcessorState();
   char* ReadCommandLine(const char* prompt, char* buffer, int length);
   void RunDebuggerShell();
   void DoBreakpoint(const Instruction* instr);
-  void DoUnreachable(const Instruction* instr);
-  void DoTrace(const Instruction* instr);
-  void DoLog(const Instruction* instr);
 
-  int  log_parameters_;
-  int  debug_parameters_;
+  int debug_parameters_;
   bool pending_request_;
   int steps_;
   DebugCommand* last_command_;
diff --git a/src/a64/decoder-a64.h b/src/a64/decoder-a64.h
index 172594c..fd08d6c 100644
--- a/src/a64/decoder-a64.h
+++ b/src/a64/decoder-a64.h
@@ -108,7 +108,7 @@
   }
 
  private:
-  VisitorConstness constness_;
+  const VisitorConstness constness_;
 };
 
 
diff --git a/src/a64/disasm-a64.cc b/src/a64/disasm-a64.cc
index e4a74aa..f7bc246 100644
--- a/src/a64/disasm-a64.cc
+++ b/src/a64/disasm-a64.cc
@@ -34,6 +34,7 @@
   buffer_ = reinterpret_cast<char*>(malloc(buffer_size_));
   buffer_pos_ = 0;
   own_buffer_ = true;
+  code_address_offset_ = 0;
 }
 
 
@@ -42,6 +43,7 @@
   buffer_ = text_buffer;
   buffer_pos_ = 0;
   own_buffer_ = false;
+  code_address_offset_ = 0;
 }
 
 
@@ -739,9 +741,25 @@
   // shift calculation.
   switch (instr->Mask(MoveWideImmediateMask)) {
     case MOVN_w:
-    case MOVN_x: mnemonic = "movn"; break;
+    case MOVN_x:
+      if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0)) {
+        if ((instr->SixtyFourBits() == 0) && (instr->ImmMoveWide() == 0xffff)) {
+          mnemonic = "movn";
+        } else {
+          mnemonic = "mov";
+          form = "'Rd, 'IMoveNeg";
+        }
+      } else {
+        mnemonic = "movn";
+      }
+      break;
     case MOVZ_w:
-    case MOVZ_x: mnemonic = "movz"; break;
+    case MOVZ_x:
+      if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0))
+        mnemonic = "mov";
+      else
+        mnemonic = "movz";
+      break;
     case MOVK_w:
     case MOVK_x: mnemonic = "movk"; form = "'Rd, 'IMoveLSL"; break;
     default: VIXL_UNREACHABLE();
@@ -806,7 +824,7 @@
     case A##_unsigned: mnemonic = B; form = C ", ['Xns'ILU]"; break;
     LOAD_STORE_LIST(LS_UNSIGNEDOFFSET)
     #undef LS_UNSIGNEDOFFSET
-    case PRFM_unsigned: mnemonic = "prfm"; form = "'PrefOp, ['Xn'ILU]";
+    case PRFM_unsigned: mnemonic = "prfm"; form = "'PrefOp, ['Xns'ILU]";
   }
   Format(instr, mnemonic, form);
 }
@@ -833,6 +851,7 @@
   const char *form_x = "'Xt, ['Xns'ILS]";
   const char *form_s = "'St, ['Xns'ILS]";
   const char *form_d = "'Dt, ['Xns'ILS]";
+  const char *form_prefetch = "'PrefOp, ['Xns'ILS]";
 
   switch (instr->Mask(LoadStoreUnscaledOffsetMask)) {
     case STURB_w:  mnemonic = "sturb"; break;
@@ -852,6 +871,7 @@
     case LDURSH_x: form = form_x;  // Fall through.
     case LDURSH_w: mnemonic = "ldursh"; break;
     case LDURSW_x: mnemonic = "ldursw"; form = form_x; break;
+    case PRFUM:    mnemonic = "prfum"; form = form_prefetch; break;
     default: form = "(LoadStoreUnscaledOffset)";
   }
   Format(instr, mnemonic, form);
@@ -872,6 +892,11 @@
       form = "'Xt, 'ILLiteral 'LValue";
       break;
     }
+    case PRFM_lit: {
+      mnemonic = "prfm";
+      form = "'PrefOp, 'ILLiteral 'LValue";
+      break;
+    }
     default: mnemonic = "unimplemented";
   }
   Format(instr, mnemonic, form);
@@ -1344,7 +1369,7 @@
 void Disassembler::AppendAddressToOutput(const Instruction* instr,
                                          const void* addr) {
   USE(instr);
-  AppendToOutput("(addr %p)", addr);
+  AppendToOutput("(addr 0x%" PRIxPTR ")", reinterpret_cast<uintptr_t>(addr));
 }
 
 
@@ -1360,6 +1385,40 @@
 }
 
 
+void Disassembler::AppendCodeRelativeAddressToOutput(const Instruction* instr,
+                                                     const void* addr) {
+  USE(instr);
+  int64_t rel_addr = CodeRelativeAddress(addr);
+  if (rel_addr >= 0) {
+    AppendToOutput("(addr 0x%" PRIx64 ")", rel_addr);
+  } else {
+    AppendToOutput("(addr -0x%" PRIx64 ")", -rel_addr);
+  }
+}
+
+
+void Disassembler::AppendCodeRelativeCodeAddressToOutput(
+    const Instruction* instr, const void* addr) {
+  AppendCodeRelativeAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendCodeRelativeDataAddressToOutput(
+    const Instruction* instr, const void* addr) {
+  AppendCodeRelativeAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::MapCodeAddress(int64_t base_address,
+                                  const Instruction* instr_address) {
+  set_code_address_offset(
+      base_address - reinterpret_cast<intptr_t>(instr_address));
+}
+int64_t Disassembler::CodeRelativeAddress(const void* addr) {
+  return reinterpret_cast<intptr_t>(addr) + code_address_offset();
+}
+
+
 void Disassembler::Format(const Instruction* instr, const char* mnemonic,
                           const char* format) {
   VIXL_ASSERT(mnemonic != NULL);
@@ -1486,16 +1545,20 @@
   VIXL_ASSERT(format[0] == 'I');
 
   switch (format[1]) {
-    case 'M': {  // IMoveImm or IMoveLSL.
-      if (format[5] == 'I') {
-        uint64_t imm = instr->ImmMoveWide() << (16 * instr->ShiftMoveWide());
-        AppendToOutput("#0x%" PRIx64, imm);
-      } else {
-        VIXL_ASSERT(format[5] == 'L');
+    case 'M': {  // IMoveImm, IMoveNeg or IMoveLSL.
+      if (format[5] == 'L') {
         AppendToOutput("#0x%" PRIx64, instr->ImmMoveWide());
         if (instr->ShiftMoveWide() > 0) {
           AppendToOutput(", lsl #%" PRId64, 16 * instr->ShiftMoveWide());
         }
+      } else {
+        VIXL_ASSERT((format[5] == 'I') || (format[5] == 'N'));
+        uint64_t imm = instr->ImmMoveWide() << (16 * instr->ShiftMoveWide());
+        if (format[5] == 'N')
+          imm = ~imm;
+        if (!instr->SixtyFourBits())
+          imm &= UINT64_C(0xffffffff);
+        AppendToOutput("#0x%" PRIx64, imm);
       }
       return 8;
     }
@@ -1634,14 +1697,31 @@
   VIXL_ASSERT(strncmp(format, "LValue", 6) == 0);
   USE(format);
 
+  const void * address = instr->LiteralAddress<const void *>();
   switch (instr->Mask(LoadLiteralMask)) {
     case LDR_w_lit:
     case LDR_x_lit:
     case LDRSW_x_lit:
     case LDR_s_lit:
     case LDR_d_lit:
-      AppendDataAddressToOutput(instr, instr->LiteralAddress());
+      AppendCodeRelativeDataAddressToOutput(instr, address);
       break;
+    case PRFM_lit: {
+      // Use the prefetch hint to decide how to print the address.
+      switch (instr->PrefetchHint()) {
+        case 0x0:     // PLD: prefetch for load.
+        case 0x2:     // PST: prepare for store.
+          AppendCodeRelativeDataAddressToOutput(instr, address);
+          break;
+        case 0x1:     // PLI: preload instructions.
+          AppendCodeRelativeCodeAddressToOutput(instr, address);
+          break;
+        case 0x3:     // Unallocated hint.
+          AppendCodeRelativeAddressToOutput(instr, address);
+          break;
+      }
+      break;
+    }
     default:
       VIXL_UNREACHABLE();
   }
@@ -1701,17 +1781,22 @@
               (strcmp(format, "AddrPCRelPage") == 0));    // Used by `adrp`.
 
   int64_t offset = instr->ImmPCRel();
-  const Instruction * base = instr;
 
+  // Compute the target address based on the effective address (after applying
+  // code_address_offset). This is required for correct behaviour of adrp.
+  const Instruction* base = instr + code_address_offset();
   if (format[9] == 'P') {
     offset *= kPageSize;
     base = AlignDown(base, kPageSize);
   }
+  // Strip code_address_offset before printing, so we can use the
+  // semantically-correct AppendCodeRelativeAddressToOutput.
+  const void* target =
+      reinterpret_cast<const void*>(base + offset - code_address_offset());
 
-  const void* target = reinterpret_cast<const void*>(base + offset);
   AppendPCRelativeOffsetToOutput(instr, offset);
   AppendToOutput(" ");
-  AppendAddressToOutput(instr, target);
+  AppendCodeRelativeAddressToOutput(instr, target);
   return 13;
 }
 
@@ -1738,7 +1823,7 @@
 
   AppendPCRelativeOffsetToOutput(instr, offset);
   AppendToOutput(" ");
-  AppendCodeAddressToOutput(instr, target_address);
+  AppendCodeRelativeCodeAddressToOutput(instr, target_address);
 
   return 8;
 }
@@ -1805,13 +1890,26 @@
   VIXL_ASSERT(format[0] == 'P');
   USE(format);
 
-  int prefetch_mode = instr->PrefetchMode();
+  static const char* hints[] = {"ld", "li", "st"};
+  static const char* stream_options[] = {"keep", "strm"};
 
-  const char* ls = (prefetch_mode & 0x10) ? "st" : "ld";
-  int level = (prefetch_mode >> 1) + 1;
-  const char* ks = (prefetch_mode & 1) ? "strm" : "keep";
+  unsigned hint = instr->PrefetchHint();
+  unsigned target = instr->PrefetchTarget() + 1;
+  unsigned stream = instr->PrefetchStream();
 
-  AppendToOutput("p%sl%d%s", ls, level, ks);
+  if ((hint >= (sizeof(hints) / sizeof(hints[0]))) || (target > 3)) {
+    // Unallocated prefetch operations.
+    int prefetch_mode = instr->ImmPrefetchOperation();
+    AppendToOutput("#0b%c%c%c%c%c",
+                   (prefetch_mode & (1 << 4)) ? '1' : '0',
+                   (prefetch_mode & (1 << 3)) ? '1' : '0',
+                   (prefetch_mode & (1 << 2)) ? '1' : '0',
+                   (prefetch_mode & (1 << 1)) ? '1' : '0',
+                   (prefetch_mode & (1 << 0)) ? '1' : '0');
+  } else {
+    VIXL_ASSERT(stream < (sizeof(stream_options) / sizeof(stream_options[0])));
+    AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]);
+  }
   return 6;
 }
 
diff --git a/src/a64/disasm-a64.h b/src/a64/disasm-a64.h
index db04337..ddfe98b 100644
--- a/src/a64/disasm-a64.h
+++ b/src/a64/disasm-a64.h
@@ -43,7 +43,7 @@
   char* GetOutput();
 
   // Declare all Visitor functions.
-  #define DECLARE(A)  void Visit##A(const Instruction* instr);
+  #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
   #undef DECLARE
 
@@ -65,23 +65,45 @@
 
   // Prints an address, in the general case. It can be code or data. This is
   // used for example to print the target address of an ADR instruction.
-  virtual void AppendAddressToOutput(const Instruction* instr,
-                                     const void* addr);
+  virtual void AppendCodeRelativeAddressToOutput(const Instruction* instr,
+                                                 const void* addr);
 
   // Prints the address of some code.
   // This is used for example to print the target address of a branch to an
   // immediate offset.
   // A sub-class can for example override this method to lookup the address and
   // print an appropriate name.
-  virtual void AppendCodeAddressToOutput(const Instruction* instr,
-                                         const void* addr);
+  virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr,
+                                                     const void* addr);
 
   // Prints the address of some data.
   // This is used for example to print the source address of a load literal
   // instruction.
+  virtual void AppendCodeRelativeDataAddressToOutput(const Instruction* instr,
+                                                     const void* addr);
+
+  // Same as the above, but for addresses that are not relative to the code
+  // buffer. They are currently not used by VIXL.
+  virtual void AppendAddressToOutput(const Instruction* instr,
+                                     const void* addr);
+  virtual void AppendCodeAddressToOutput(const Instruction* instr,
+                                         const void* addr);
   virtual void AppendDataAddressToOutput(const Instruction* instr,
                                          const void* addr);
 
+ public:
+  // Get/Set the offset that should be added to code addresses when printing
+  // code-relative addresses in the AppendCodeRelative<Type>AddressToOutput()
+  // helpers.
+  // Below is an example of how a branch immediate instruction in memory at
+  // address 0xb010200 would disassemble with different offsets.
+  // Base address | Disassembly
+  //          0x0 | 0xb010200:  b #+0xcc  (addr 0xb0102cc)
+  //      0x10000 | 0xb000200:  b #+0xcc  (addr 0xb0002cc)
+  //    0xb010200 |       0x0:  b #+0xcc  (addr 0xcc)
+  void MapCodeAddress(int64_t base_address, const Instruction* instr_address);
+  int64_t CodeRelativeAddress(const void* instr);
+
  private:
   void Format(
       const Instruction* instr, const char* mnemonic, const char* format);
@@ -101,32 +123,40 @@
   int SubstitutePrefetchField(const Instruction* instr, const char* format);
   int SubstituteBarrierField(const Instruction* instr, const char* format);
 
-  inline bool RdIsZROrSP(const Instruction* instr) const {
+  bool RdIsZROrSP(const Instruction* instr) const {
     return (instr->Rd() == kZeroRegCode);
   }
 
-  inline bool RnIsZROrSP(const Instruction* instr) const {
+  bool RnIsZROrSP(const Instruction* instr) const {
     return (instr->Rn() == kZeroRegCode);
   }
 
-  inline bool RmIsZROrSP(const Instruction* instr) const {
+  bool RmIsZROrSP(const Instruction* instr) const {
     return (instr->Rm() == kZeroRegCode);
   }
 
-  inline bool RaIsZROrSP(const Instruction* instr) const {
+  bool RaIsZROrSP(const Instruction* instr) const {
     return (instr->Ra() == kZeroRegCode);
   }
 
   bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
 
+  int64_t code_address_offset() const { return code_address_offset_; }
+
  protected:
   void ResetOutput();
   void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);
 
+  void set_code_address_offset(int64_t code_address_offset) {
+    code_address_offset_ = code_address_offset;
+  }
+
   char* buffer_;
   uint32_t buffer_pos_;
   uint32_t buffer_size_;
   bool own_buffer_;
+
+  int64_t code_address_offset_;
 };
 
 
diff --git a/src/a64/instructions-a64.cc b/src/a64/instructions-a64.cc
index 1f08c78..b091886 100644
--- a/src/a64/instructions-a64.cc
+++ b/src/a64/instructions-a64.cc
@@ -30,6 +30,20 @@
 namespace vixl {
 
 
+// Floating-point infinity values.
+const float kFP32PositiveInfinity = rawbits_to_float(0x7f800000);
+const float kFP32NegativeInfinity = rawbits_to_float(0xff800000);
+const double kFP64PositiveInfinity =
+    rawbits_to_double(UINT64_C(0x7ff0000000000000));
+const double kFP64NegativeInfinity =
+    rawbits_to_double(UINT64_C(0xfff0000000000000));
+
+
+// The default NaN values (for FPCR.DN=1).
+const double kFP64DefaultNaN = rawbits_to_double(UINT64_C(0x7ff8000000000000));
+const float kFP32DefaultNaN = rawbits_to_float(0x7fc00000);
+
+
 static uint64_t RotateRight(uint64_t value,
                             unsigned int rotate,
                             unsigned int width) {
@@ -54,6 +68,55 @@
 }
 
 
+bool Instruction::IsLoad() const {
+  if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
+    return false;
+  }
+
+  if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) {
+    return Mask(LoadStorePairLBit) != 0;
+  } else {
+    LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreOpMask));
+    switch (op) {
+      case LDRB_w:
+      case LDRH_w:
+      case LDR_w:
+      case LDR_x:
+      case LDRSB_w:
+      case LDRSB_x:
+      case LDRSH_w:
+      case LDRSH_x:
+      case LDRSW_x:
+      case LDR_s:
+      case LDR_d: return true;
+      default: return false;
+    }
+  }
+}
+
+
+bool Instruction::IsStore() const {
+  if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
+    return false;
+  }
+
+  if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) {
+    return Mask(LoadStorePairLBit) == 0;
+  } else {
+    LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreOpMask));
+    switch (op) {
+      case STRB_w:
+      case STRH_w:
+      case STR_w:
+      case STR_x:
+      case STR_s:
+      case STR_d: return true;
+      default: return false;
+    }
+  }
+}
+
+
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
diff --git a/src/a64/instructions-a64.h b/src/a64/instructions-a64.h
index 38f079f..f1d883c 100644
--- a/src/a64/instructions-a64.h
+++ b/src/a64/instructions-a64.h
@@ -96,28 +96,15 @@
 const unsigned kFloatMantissaBits = 23;
 const unsigned kFloatExponentBits = 8;
 
-const float kFP32PositiveInfinity = rawbits_to_float(0x7f800000);
-const float kFP32NegativeInfinity = rawbits_to_float(0xff800000);
-const double kFP64PositiveInfinity =
-    rawbits_to_double(UINT64_C(0x7ff0000000000000));
-const double kFP64NegativeInfinity =
-    rawbits_to_double(UINT64_C(0xfff0000000000000));
-
-// This value is a signalling NaN as both a double and as a float (taking the
-// least-significant word).
-static const double kFP64SignallingNaN =
-    rawbits_to_double(UINT64_C(0x7ff000007f800001));
-static const float kFP32SignallingNaN = rawbits_to_float(0x7f800001);
-
-// A similar value, but as a quiet NaN.
-static const double kFP64QuietNaN =
-    rawbits_to_double(UINT64_C(0x7ff800007fc00001));
-static const float kFP32QuietNaN = rawbits_to_float(0x7fc00001);
+// Floating-point infinity values.
+extern const float kFP32PositiveInfinity;
+extern const float kFP32NegativeInfinity;
+extern const double kFP64PositiveInfinity;
+extern const double kFP64NegativeInfinity;
 
 // The default NaN values (for FPCR.DN=1).
-static const double kFP64DefaultNaN =
-    rawbits_to_double(UINT64_C(0x7ff8000000000000));
-static const float kFP32DefaultNaN = rawbits_to_float(0x7fc00000);
+extern const double kFP64DefaultNaN;
+extern const float kFP32DefaultNaN;
 
 
 enum LSDataSize {
@@ -164,33 +151,33 @@
 
 class Instruction {
  public:
-  inline Instr InstructionBits() const {
+  Instr InstructionBits() const {
     return *(reinterpret_cast<const Instr*>(this));
   }
 
-  inline void SetInstructionBits(Instr new_instr) {
+  void SetInstructionBits(Instr new_instr) {
     *(reinterpret_cast<Instr*>(this)) = new_instr;
   }
 
-  inline int Bit(int pos) const {
+  int Bit(int pos) const {
     return (InstructionBits() >> pos) & 1;
   }
 
-  inline uint32_t Bits(int msb, int lsb) const {
+  uint32_t Bits(int msb, int lsb) const {
     return unsigned_bitextract_32(msb, lsb, InstructionBits());
   }
 
-  inline int32_t SignedBits(int msb, int lsb) const {
+  int32_t SignedBits(int msb, int lsb) const {
     int32_t bits = *(reinterpret_cast<const int32_t*>(this));
     return signed_bitextract_32(msb, lsb, bits);
   }
 
-  inline Instr Mask(uint32_t mask) const {
+  Instr Mask(uint32_t mask) const {
     return InstructionBits() & mask;
   }
 
   #define DEFINE_GETTER(Name, HighBit, LowBit, Func)             \
-  inline int64_t Name() const { return Func(HighBit, LowBit); }
+  int64_t Name() const { return Func(HighBit, LowBit); }
   INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
   #undef DEFINE_GETTER
 
@@ -206,56 +193,64 @@
   float ImmFP32() const;
   double ImmFP64() const;
 
-  inline LSDataSize SizeLSPair() const {
+  LSDataSize SizeLSPair() const {
     return CalcLSPairDataSize(
              static_cast<LoadStorePairOp>(Mask(LoadStorePairMask)));
   }
 
   // Helpers.
-  inline bool IsCondBranchImm() const {
+  bool IsCondBranchImm() const {
     return Mask(ConditionalBranchFMask) == ConditionalBranchFixed;
   }
 
-  inline bool IsUncondBranchImm() const {
+  bool IsUncondBranchImm() const {
     return Mask(UnconditionalBranchFMask) == UnconditionalBranchFixed;
   }
 
-  inline bool IsCompareBranch() const {
+  bool IsCompareBranch() const {
     return Mask(CompareBranchFMask) == CompareBranchFixed;
   }
 
-  inline bool IsTestBranch() const {
+  bool IsTestBranch() const {
     return Mask(TestBranchFMask) == TestBranchFixed;
   }
 
-  inline bool IsPCRelAddressing() const {
+  bool IsPCRelAddressing() const {
     return Mask(PCRelAddressingFMask) == PCRelAddressingFixed;
   }
 
-  inline bool IsLogicalImmediate() const {
+  bool IsLogicalImmediate() const {
     return Mask(LogicalImmediateFMask) == LogicalImmediateFixed;
   }
 
-  inline bool IsAddSubImmediate() const {
+  bool IsAddSubImmediate() const {
     return Mask(AddSubImmediateFMask) == AddSubImmediateFixed;
   }
 
-  inline bool IsAddSubExtended() const {
+  bool IsAddSubExtended() const {
     return Mask(AddSubExtendedFMask) == AddSubExtendedFixed;
   }
 
-  inline bool IsLoadOrStore() const {
+  bool IsLoadOrStore() const {
     return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
   }
 
-  inline bool IsMovn() const {
+  bool IsLoad() const;
+  bool IsStore() const;
+
+  bool IsLoadLiteral() const {
+    // This includes PRFM_lit.
+    return Mask(LoadLiteralFMask) == LoadLiteralFixed;
+  }
+
+  bool IsMovn() const {
     return (Mask(MoveWideImmediateMask) == MOVN_x) ||
            (Mask(MoveWideImmediateMask) == MOVN_w);
   }
 
   // Indicate whether Rd can be the stack pointer or the zero register. This
   // does not check that the instruction actually has an Rd field.
-  inline Reg31Mode RdMode() const {
+  Reg31Mode RdMode() const {
     // The following instructions use sp or wsp as Rd:
     //  Add/sub (immediate) when not setting the flags.
     //  Add/sub (extended) when not setting the flags.
@@ -284,7 +279,7 @@
 
   // Indicate whether Rn can be the stack pointer or the zero register. This
   // does not check that the instruction actually has an Rn field.
-  inline Reg31Mode RnMode() const {
+  Reg31Mode RnMode() const {
     // The following instructions use sp or wsp as Rn:
     //  All loads and stores.
     //  Add/sub (immediate).
@@ -296,7 +291,7 @@
     return Reg31IsZeroRegister;
   }
 
-  inline ImmBranchType BranchType() const {
+  ImmBranchType BranchType() const {
     if (IsCondBranchImm()) {
       return CondBranchType;
     } else if (IsUncondBranchImm()) {
@@ -320,55 +315,66 @@
   // Patch a literal load instruction to load from 'source'.
   void SetImmLLiteral(const Instruction* source);
 
-  inline uint8_t* LiteralAddress() const {
-    int offset = ImmLLiteral() << kLiteralEntrySizeLog2;
-    const uint8_t* address = reinterpret_cast<const uint8_t*>(this) + offset;
-    // Note that the result is safely mutable only if the backing buffer is
-    // safely mutable.
-    return const_cast<uint8_t*>(address);
+  // Calculate the address of a literal referred to by a load-literal
+  // instruction, and return it as the specified type.
+  //
+  // The literal itself is safely mutable only if the backing buffer is safely
+  // mutable.
+  template <typename T>
+  T LiteralAddress() const {
+    uint64_t base_raw = reinterpret_cast<uintptr_t>(this);
+    ptrdiff_t offset = ImmLLiteral() << kLiteralEntrySizeLog2;
+    uint64_t address_raw = base_raw + offset;
+
+    // Cast the address using a C-style cast. A reinterpret_cast would be
+    // appropriate, but it can't cast one integral type to another.
+    T address = (T)(address_raw);
+
+    // Assert that the address can be represented by the specified type.
+    VIXL_ASSERT((uint64_t)(address) == address_raw);
+
+    return address;
   }
 
-  inline uint32_t Literal32() const {
+  uint32_t Literal32() const {
     uint32_t literal;
-    memcpy(&literal, LiteralAddress(), sizeof(literal));
-
+    memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal));
     return literal;
   }
 
-  inline uint64_t Literal64() const {
+  uint64_t Literal64() const {
     uint64_t literal;
-    memcpy(&literal, LiteralAddress(), sizeof(literal));
-
+    memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal));
     return literal;
   }
 
-  inline float LiteralFP32() const {
+  float LiteralFP32() const {
     return rawbits_to_float(Literal32());
   }
 
-  inline double LiteralFP64() const {
+  double LiteralFP64() const {
     return rawbits_to_double(Literal64());
   }
 
-  inline const Instruction* NextInstruction() const {
+  const Instruction* NextInstruction() const {
     return this + kInstructionSize;
   }
 
-  inline const Instruction* InstructionAtOffset(int64_t offset) const {
+  const Instruction* InstructionAtOffset(int64_t offset) const {
     VIXL_ASSERT(IsWordAligned(this + offset));
     return this + offset;
   }
 
-  template<typename T> static inline Instruction* Cast(T src) {
+  template<typename T> static Instruction* Cast(T src) {
     return reinterpret_cast<Instruction*>(src);
   }
 
-  template<typename T> static inline const Instruction* CastConst(T src) {
+  template<typename T> static const Instruction* CastConst(T src) {
     return reinterpret_cast<const Instruction*>(src);
   }
 
  private:
-  inline int ImmBranch() const;
+  int ImmBranch() const;
 
   void SetPCRelImmTarget(const Instruction* target);
   void SetBranchImmTarget(const Instruction* target);
diff --git a/src/a64/instrument-a64.cc b/src/a64/instrument-a64.cc
index 3e3c388..c842cfd 100644
--- a/src/a64/instrument-a64.cc
+++ b/src/a64/instrument-a64.cc
@@ -639,4 +639,4 @@
 }
 
 
-}  // namespace v8::internal
+}  // namespace vixl
diff --git a/src/a64/macro-assembler-a64.cc b/src/a64/macro-assembler-a64.cc
index dcf06c6..7daa6fc 100644
--- a/src/a64/macro-assembler-a64.cc
+++ b/src/a64/macro-assembler-a64.cc
@@ -129,20 +129,24 @@
 
 
 EmissionCheckScope::EmissionCheckScope(MacroAssembler* masm, size_t size) {
-  masm->EnsureEmitFor(size);
-#ifdef DEBUG
-  masm_ = masm;
-  masm->Bind(&start_);
-  size_ = size;
-  masm->AcquireBuffer();
+  if (masm) {
+    masm->EnsureEmitFor(size);
+#ifdef VIXL_DEBUG
+    masm_ = masm;
+    masm->Bind(&start_);
+    size_ = size;
+    masm->AcquireBuffer();
 #endif
+  }
 }
 
 
 EmissionCheckScope::~EmissionCheckScope() {
-#ifdef DEBUG
-  masm_->ReleaseBuffer();
-  VIXL_ASSERT(masm_->SizeOfCodeGeneratedSince(&start_) <= size_);
+#ifdef VIXL_DEBUG
+  if (masm_) {
+    masm_->ReleaseBuffer();
+    VIXL_ASSERT(masm_->SizeOfCodeGeneratedSince(&start_) <= size_);
+  }
 #endif
 }
 
@@ -150,7 +154,7 @@
 MacroAssembler::MacroAssembler(size_t capacity,
                                PositionIndependentCodeOption pic)
     : Assembler(capacity, pic),
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
       allow_macro_instructions_(true),
 #endif
       sp_(sp),
@@ -165,7 +169,7 @@
                                size_t capacity,
                                PositionIndependentCodeOption pic)
     : Assembler(buffer, capacity, pic),
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
       allow_macro_instructions_(true),
 #endif
       sp_(sp),
@@ -197,6 +201,134 @@
 }
 
 
+int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
+                                        const Register &rd,
+                                        uint64_t imm) {
+  bool emit_code = (masm != NULL);
+  VIXL_ASSERT(is_uint32(imm) || is_int32(imm) || rd.Is64Bits());
+  // The worst case for size is mov 64-bit immediate to sp:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to move to sp
+  MacroEmissionCheckScope guard(masm);
+
+  // Immediates on Aarch64 can be produced using an initial value, and zero to
+  // three move keep operations.
+  //
+  // Initial values can be generated with:
+  //  1. 64-bit move zero (movz).
+  //  2. 32-bit move inverted (movn).
+  //  3. 64-bit move inverted.
+  //  4. 32-bit orr immediate.
+  //  5. 64-bit orr immediate.
+  // Move-keep may then be used to modify each of the 16-bit half words.
+  //
+  // The code below supports all five initial value generators, and
+  // applying move-keep operations to move-zero and move-inverted initial
+  // values.
+
+  // Try to move the immediate in one instruction, and if that fails, switch to
+  // using multiple instructions.
+  if (OneInstrMoveImmediateHelper(masm, rd, imm)) {
+    return 1;
+  } else {
+    int instruction_count = 0;
+    unsigned reg_size = rd.size();
+
+    // Generic immediate case. Imm will be represented by
+    //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
+    // A move-zero or move-inverted is generated for the first non-zero or
+    // non-0xffff immX, and a move-keep for subsequent non-zero immX.
+
+    uint64_t ignored_halfword = 0;
+    bool invert_move = false;
+    // If the number of 0xffff halfwords is greater than the number of 0x0000
+    // halfwords, it's more efficient to use move-inverted.
+    if (CountClearHalfWords(~imm, reg_size) >
+        CountClearHalfWords(imm, reg_size)) {
+      ignored_halfword = 0xffff;
+      invert_move = true;
+    }
+
+    // Mov instructions can't move values into the stack pointer, so set up a
+    // temporary register, if needed.
+    UseScratchRegisterScope temps;
+    Register temp;
+    if (emit_code) {
+      temps.Open(masm);
+      temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
+    }
+
+    // Iterate through the halfwords. Use movn/movz for the first non-ignored
+    // halfword, and movk for subsequent halfwords.
+    VIXL_ASSERT((reg_size % 16) == 0);
+    bool first_mov_done = false;
+    for (unsigned i = 0; i < (temp.size() / 16); i++) {
+      uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
+      if (imm16 != ignored_halfword) {
+        if (!first_mov_done) {
+          if (invert_move) {
+            if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i);
+            instruction_count++;
+          } else {
+            if (emit_code) masm->movz(temp, imm16, 16 * i);
+            instruction_count++;
+          }
+          first_mov_done = true;
+        } else {
+          // Construct a wider constant.
+          if (emit_code) masm->movk(temp, imm16, 16 * i);
+          instruction_count++;
+        }
+      }
+    }
+
+    VIXL_ASSERT(first_mov_done);
+
+    // Move the temporary if the original destination register was the stack
+    // pointer.
+    if (rd.IsSP()) {
+      if (emit_code) masm->mov(rd, temp);
+      instruction_count++;
+    }
+    return instruction_count;
+  }
+}
+
+
+bool MacroAssembler::OneInstrMoveImmediateHelper(MacroAssembler* masm,
+                                                 const Register& dst,
+                                                 int64_t imm) {
+  bool emit_code = masm != NULL;
+  unsigned n, imm_s, imm_r;
+  int reg_size = dst.size();
+
+  if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
+    // Immediate can be represented in a move zero instruction. Movz can't write
+    // to the stack pointer.
+    if (emit_code) {
+      masm->movz(dst, imm);
+    }
+    return true;
+  } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
+    // Immediate can be represented in a move negative instruction. Movn can't
+    // write to the stack pointer.
+    if (emit_code) {
+      masm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
+    }
+    return true;
+  } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
+    // Immediate can be represented in a logical orr instruction.
+    VIXL_ASSERT(!dst.IsZero());
+    if (emit_code) {
+      masm->LogicalImmediate(
+          dst, AppropriateZeroRegFor(dst), n, imm_s, imm_r, ORR);
+    }
+    return true;
+  }
+  return false;
+}
+
+
 void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
   VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
               ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
@@ -459,109 +591,7 @@
 
 void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
   VIXL_ASSERT(allow_macro_instructions_);
-  VIXL_ASSERT(is_uint32(imm) || is_int32(imm) || rd.Is64Bits());
-  // The worst case for size is mov 64-bit immediate to sp:
-  //  * up to 4 instructions to materialise the constant
-  //  * 1 instruction to move to sp
-  MacroEmissionCheckScope guard(this);
-
-  // Immediates on Aarch64 can be produced using an initial value, and zero to
-  // three move keep operations.
-  //
-  // Initial values can be generated with:
-  //  1. 64-bit move zero (movz).
-  //  2. 32-bit move inverted (movn).
-  //  3. 64-bit move inverted.
-  //  4. 32-bit orr immediate.
-  //  5. 64-bit orr immediate.
-  // Move-keep may then be used to modify each of the 16-bit half words.
-  //
-  // The code below supports all five initial value generators, and
-  // applying move-keep operations to move-zero and move-inverted initial
-  // values.
-
-  // Try to move the immediate in one instruction, and if that fails, switch to
-  // using multiple instructions.
-  if (!TryOneInstrMoveImmediate(rd, imm)) {
-    unsigned reg_size = rd.size();
-
-    // Generic immediate case. Imm will be represented by
-    //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
-    // A move-zero or move-inverted is generated for the first non-zero or
-    // non-0xffff immX, and a move-keep for subsequent non-zero immX.
-
-    uint64_t ignored_halfword = 0;
-    bool invert_move = false;
-    // If the number of 0xffff halfwords is greater than the number of 0x0000
-    // halfwords, it's more efficient to use move-inverted.
-    if (CountClearHalfWords(~imm, reg_size) >
-        CountClearHalfWords(imm, reg_size)) {
-      ignored_halfword = 0xffff;
-      invert_move = true;
-    }
-
-    // Mov instructions can't move values into the stack pointer, so set up a
-    // temporary register, if needed.
-    UseScratchRegisterScope temps(this);
-    Register temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
-
-    // Iterate through the halfwords. Use movn/movz for the first non-ignored
-    // halfword, and movk for subsequent halfwords.
-    VIXL_ASSERT((reg_size % 16) == 0);
-    bool first_mov_done = false;
-    for (unsigned i = 0; i < (temp.size() / 16); i++) {
-      uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
-      if (imm16 != ignored_halfword) {
-        if (!first_mov_done) {
-          if (invert_move) {
-            movn(temp, ~imm16 & 0xffff, 16 * i);
-          } else {
-            movz(temp, imm16, 16 * i);
-          }
-          first_mov_done = true;
-        } else {
-          // Construct a wider constant.
-          movk(temp, imm16, 16 * i);
-        }
-      }
-    }
-
-    VIXL_ASSERT(first_mov_done);
-
-    // Move the temporary if the original destination register was the stack
-    // pointer.
-    if (rd.IsSP()) {
-      mov(rd, temp);
-    }
-  }
-}
-
-
-unsigned MacroAssembler::CountClearHalfWords(uint64_t imm, unsigned reg_size) {
-  VIXL_ASSERT((reg_size % 8) == 0);
-  int count = 0;
-  for (unsigned i = 0; i < (reg_size / 16); i++) {
-    if ((imm & 0xffff) == 0) {
-      count++;
-    }
-    imm >>= 16;
-  }
-  return count;
-}
-
-
-// The movz instruction can generate immediates containing an arbitrary 16-bit
-// value, with remaining bits clear, eg. 0x00001234, 0x0000123400000000.
-bool MacroAssembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
-  VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
-  return CountClearHalfWords(imm, reg_size) >= ((reg_size / 16) - 1);
-}
-
-
-// The movn instruction can generate immediates containing an arbitrary 16-bit
-// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff.
-bool MacroAssembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
-  return IsImmMovz(~imm, reg_size);
+  MoveImmediateHelper(this, rd, imm);
 }
 
 
@@ -807,26 +837,7 @@
 
 bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
                                               int64_t imm) {
-  unsigned n, imm_s, imm_r;
-  int reg_size = dst.size();
-
-  if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
-    // Immediate can be represented in a move zero instruction. Movz can't write
-    // to the stack pointer.
-    movz(dst, imm);
-    return true;
-  } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
-    // Immediate can be represented in a move negative instruction. Movn can't
-    // write to the stack pointer.
-    movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
-    return true;
-  } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
-    // Immediate can be represented in a logical orr instruction.
-    VIXL_ASSERT(!dst.IsZero());
-    LogicalImmediate(dst, AppropriateZeroRegFor(dst), n, imm_s, imm_r, ORR);
-    return true;
-  }
-  return false;
+  return OneInstrMoveImmediateHelper(this, dst, imm);
 }
 
 
@@ -1002,6 +1013,7 @@
 LS_MACRO_LIST(DEFINE_FUNCTION)
 #undef DEFINE_FUNCTION
 
+
 void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
                                     const MemOperand& addr,
                                     LoadStoreOp op) {
@@ -1088,6 +1100,34 @@
   }
 }
 
+
+void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) {
+  MacroEmissionCheckScope guard(this);
+
+  // There are no pre- or post-index modes for prfm.
+  VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset());
+
+  // The access size is implicitly 8 bytes for all prefetch operations.
+  LSDataSize size = LSDoubleWord;
+
+  // Check if an immediate offset fits in the immediate field of the
+  // appropriate instruction. If not, emit two instructions to perform
+  // the operation.
+  if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.offset(), size) &&
+      !IsImmLSUnscaled(addr.offset())) {
+    // Immediate offset that can't be encoded using unsigned or unscaled
+    // addressing modes.
+    UseScratchRegisterScope temps(this);
+    Register temp = temps.AcquireSameSizeAs(addr.base());
+    Mov(temp, addr.offset());
+    Prefetch(op, MemOperand(addr.base(), temp));
+  } else {
+    // Simple register-offsets are encodable in one instruction.
+    Prefetch(op, addr);
+  }
+}
+
+
 void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1,
                           const CPURegister& src2, const CPURegister& src3) {
   VIXL_ASSERT(allow_macro_instructions_);
@@ -1689,7 +1729,7 @@
   Label start;
   bind(&start);
 
-  // Refer to instructions-a64.h for a description of the marker and its
+  // Refer to simulator-a64.h for a description of the marker and its
   // arguments.
   hlt(kTraceOpcode);
 
@@ -1717,7 +1757,7 @@
   Label start;
   bind(&start);
 
-  // Refer to instructions-a64.h for a description of the marker and its
+  // Refer to simulator-a64.h for a description of the marker and its
   // arguments.
   hlt(kLogOpcode);
 
@@ -1756,9 +1796,53 @@
 }
 
 
+void UseScratchRegisterScope::Open(MacroAssembler* masm) {
+  VIXL_ASSERT(!initialised_);
+  available_ = masm->TmpList();
+  availablefp_ = masm->FPTmpList();
+  old_available_ = available_->list();
+  old_availablefp_ = availablefp_->list();
+  VIXL_ASSERT(available_->type() == CPURegister::kRegister);
+  VIXL_ASSERT(availablefp_->type() == CPURegister::kFPRegister);
+#ifdef VIXL_DEBUG
+  initialised_ = true;
+#endif
+}
+
+
+void UseScratchRegisterScope::Close() {
+  if (available_) {
+    available_->set_list(old_available_);
+    available_ = NULL;
+  }
+  if (availablefp_) {
+    availablefp_->set_list(old_availablefp_);
+    availablefp_ = NULL;
+  }
+#ifdef VIXL_DEBUG
+  initialised_ = false;
+#endif
+}
+
+
+UseScratchRegisterScope::UseScratchRegisterScope(MacroAssembler* masm) {
+#ifdef VIXL_DEBUG
+  initialised_ = false;
+#endif
+  Open(masm);
+}
+
+// This allows deferred (and optional) initialisation of the scope.
+UseScratchRegisterScope::UseScratchRegisterScope()
+    : available_(NULL), availablefp_(NULL),
+      old_available_(0), old_availablefp_(0) {
+#ifdef VIXL_DEBUG
+  initialised_ = false;
+#endif
+}
+
 UseScratchRegisterScope::~UseScratchRegisterScope() {
-  available_->set_list(old_available_);
-  availablefp_->set_list(old_availablefp_);
+  Close();
 }
 
 
@@ -1780,6 +1864,7 @@
 
 
 void UseScratchRegisterScope::Release(const CPURegister& reg) {
+  VIXL_ASSERT(initialised_);
   if (reg.IsRegister()) {
     ReleaseByCode(available_, reg.code());
   } else if (reg.IsFPRegister()) {
@@ -1791,6 +1876,7 @@
 
 
 void UseScratchRegisterScope::Include(const CPURegList& list) {
+  VIXL_ASSERT(initialised_);
   if (list.type() == CPURegister::kRegister) {
     // Make sure that neither sp nor xzr are included the list.
     IncludeByRegList(available_, list.list() & ~(xzr.Bit() | sp.Bit()));
@@ -1805,6 +1891,7 @@
                                       const Register& reg2,
                                       const Register& reg3,
                                       const Register& reg4) {
+  VIXL_ASSERT(initialised_);
   RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
   // Make sure that neither sp nor xzr are included the list.
   include &= ~(xzr.Bit() | sp.Bit());
diff --git a/src/a64/macro-assembler-a64.h b/src/a64/macro-assembler-a64.h
index d589628..f1ef48a 100644
--- a/src/a64/macro-assembler-a64.h
+++ b/src/a64/macro-assembler-a64.h
@@ -118,7 +118,7 @@
   ~EmissionCheckScope();
 
  protected:
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   MacroAssembler* masm_;
   Label start_;
   size_t size_;
@@ -208,6 +208,25 @@
   // called before executing or copying code from the buffer.
   void FinalizeCode();
 
+
+  // Constant generation helpers.
+  // These functions return the number of instructions required to move the
+  // immediate into the destination register. Also, if the masm pointer is
+  // non-null, it generates the code to do so.
+  // The two features are implemented using one function to avoid duplication of
+  // the logic.
+  // The function can be used to evaluate the cost of synthesizing an
+  // instruction using 'mov immediate' instructions. A user might prefer loading
+  // a constant using the literal pool instead of using multiple 'mov immediate'
+  // instructions.
+  static int MoveImmediateHelper(MacroAssembler* masm,
+                                 const Register &rd,
+                                 uint64_t imm);
+  static bool OneInstrMoveImmediateHelper(MacroAssembler* masm,
+                                          const Register& dst,
+                                          int64_t imm);
+
+
   // Logical macros.
   void And(const Register& rd,
            const Register& rn,
@@ -297,9 +316,6 @@
     Mov(rd, (rd.size() == kXRegSize) ? ~imm : (~imm & kWRegMask));
   }
   void Mvn(const Register& rd, const Operand& operand);
-  bool IsImmMovz(uint64_t imm, unsigned reg_size);
-  bool IsImmMovn(uint64_t imm, unsigned reg_size);
-  unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size);
 
   // Try to move an immediate into the destination register in a single
   // instruction. Returns true for success, and updates the contents of dst.
@@ -352,6 +368,8 @@
                           const MemOperand& addr,
                           LoadStorePairOp op);
 
+  void Prfm(PrefetchOperation op, const MemOperand& addr);
+
   // Push or pop up to 4 registers of the same width to or from the stack,
   // using the current stack pointer as set by SetStackPointer.
   //
@@ -416,16 +434,16 @@
   void PopWRegList(RegList regs) {
     PopSizeRegList(regs, kWRegSize);
   }
-  inline void PushDRegList(RegList regs) {
+  void PushDRegList(RegList regs) {
     PushSizeRegList(regs, kDRegSize, CPURegister::kFPRegister);
   }
-  inline void PopDRegList(RegList regs) {
+  void PopDRegList(RegList regs) {
     PopSizeRegList(regs, kDRegSize, CPURegister::kFPRegister);
   }
-  inline void PushSRegList(RegList regs) {
+  void PushSRegList(RegList regs) {
     PushSizeRegList(regs, kSRegSize, CPURegister::kFPRegister);
   }
-  inline void PopSRegList(RegList regs) {
+  void PopSRegList(RegList regs) {
     PopSizeRegList(regs, kSRegSize, CPURegister::kFPRegister);
   }
 
@@ -476,16 +494,16 @@
   void PokeWRegList(RegList regs, int offset) {
     PokeSizeRegList(regs, offset, kWRegSize);
   }
-  inline void PeekDRegList(RegList regs, int offset) {
+  void PeekDRegList(RegList regs, int offset) {
     PeekSizeRegList(regs, offset, kDRegSize, CPURegister::kFPRegister);
   }
-  inline void PokeDRegList(RegList regs, int offset) {
+  void PokeDRegList(RegList regs, int offset) {
     PokeSizeRegList(regs, offset, kDRegSize, CPURegister::kFPRegister);
   }
-  inline void PeekSRegList(RegList regs, int offset) {
+  void PeekSRegList(RegList regs, int offset) {
     PeekSizeRegList(regs, offset, kSRegSize, CPURegister::kFPRegister);
   }
-  inline void PokeSRegList(RegList regs, int offset) {
+  void PokeSRegList(RegList regs, int offset) {
     PokeSizeRegList(regs, offset, kSRegSize, CPURegister::kFPRegister);
   }
 
@@ -948,6 +966,11 @@
     SingleEmissionCheckScope guard(this);
     frinta(fd, fn);
   }
+  void Frinti(const FPRegister& fd, const FPRegister& fn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frinti(fd, fn);
+  }
   void Frintm(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
@@ -958,6 +981,16 @@
     SingleEmissionCheckScope guard(this);
     frintn(fd, fn);
   }
+  void Frintp(const FPRegister& fd, const FPRegister& fn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frintp(fd, fn);
+  }
+  void Frintx(const FPRegister& fd, const FPRegister& fn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frintx(fd, fn);
+  }
   void Frintz(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
@@ -1545,7 +1578,7 @@
   // one instruction. Refer to the implementation for details.
   void BumpSystemStackPointer(const Operand& space);
 
-#if DEBUG
+#if VIXL_DEBUG
   void SetAllowMacroInstructions(bool value) {
     allow_macro_instructions_ = value;
   }
@@ -1688,7 +1721,7 @@
   void PrepareForPush(int count, int size);
   void PrepareForPop(int count, int size);
 
-#if DEBUG
+#if VIXL_DEBUG
   // Tell whether any of the macro instruction can be used. When false the
   // MacroAssembler will assert if a method which can emit a variable number
   // of instructions is called.
@@ -1721,21 +1754,21 @@
                              kCheck,
                              policy) {
     VIXL_ASSERT(policy != kNoAssert);
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
     old_allow_macro_instructions_ = masm->AllowMacroInstructions();
     masm->SetAllowMacroInstructions(false);
 #endif
   }
 
   ~InstructionAccurateScope() {
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
     MacroAssembler* masm = reinterpret_cast<MacroAssembler*>(assm_);
     masm->SetAllowMacroInstructions(old_allow_macro_instructions_);
 #endif
   }
 
  private:
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   bool old_allow_macro_instructions_;
 #endif
 };
@@ -1765,17 +1798,21 @@
 // original state, even if the lists were modified by some other means.
 class UseScratchRegisterScope {
  public:
-  explicit UseScratchRegisterScope(MacroAssembler* masm)
-      : available_(masm->TmpList()),
-        availablefp_(masm->FPTmpList()),
-        old_available_(available_->list()),
-        old_availablefp_(availablefp_->list()) {
-    VIXL_ASSERT(available_->type() == CPURegister::kRegister);
-    VIXL_ASSERT(availablefp_->type() == CPURegister::kFPRegister);
-  }
+  // This constructor implicitly calls the `Open` function to initialise the
+  // scope, so it is ready to use immediately after it has been constructed.
+  explicit UseScratchRegisterScope(MacroAssembler* masm);
+  // This constructor allows deferred and optional initialisation of the scope.
+  // The user is required to explicitly call the `Open` function before using
+  // the scope.
+  UseScratchRegisterScope();
+  // This function performs the actual initialisation work.
+  void Open(MacroAssembler* masm);
 
-
+  // The destructor always implicitly calls the `Close` function.
   ~UseScratchRegisterScope();
+  // This function performs the cleaning-up work. It must succeed even if the
+  // scope has not been opened. It is safe to call multiple times.
+  void Close();
 
 
   bool IsAvailable(const CPURegister& reg) const;
@@ -1854,6 +1891,17 @@
   // The state of the available lists at the start of this scope.
   RegList old_available_;     // kRegister
   RegList old_availablefp_;   // kFPRegister
+#ifdef VIXL_DEBUG
+  bool initialised_;
+#endif
+
+  // Disallow copy constructor and operator=.
+  UseScratchRegisterScope(const UseScratchRegisterScope&) {
+    VIXL_UNREACHABLE();
+  }
+  void operator=(const UseScratchRegisterScope&) {
+    VIXL_UNREACHABLE();
+  }
 };
 
 
diff --git a/src/a64/simulator-a64.cc b/src/a64/simulator-a64.cc
index e01a140..df41415 100644
--- a/src/a64/simulator-a64.cc
+++ b/src/a64/simulator-a64.cc
@@ -68,6 +68,11 @@
   decoder_ = decoder;
   decoder_->AppendVisitor(this);
 
+  stream_ = stream;
+  print_disasm_ = new PrintDisassembler(stream_);
+  set_coloured_trace(false);
+  trace_parameters_ = LOG_NONE;
+
   ResetState();
 
   // Allocate and set up the simulator stack.
@@ -82,11 +87,6 @@
   tos = AlignDown(tos, 16);
   set_sp(tos);
 
-  stream_ = stream;
-  print_disasm_ = new PrintDisassembler(stream_);
-  set_coloured_trace(false);
-  disasm_trace_ = false;
-
   // Set the sample period to 10, as the VIXL examples and tests are short.
   instrumentation_ = new Instrument("vixl_stats.csv", 10);
 
@@ -108,9 +108,12 @@
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
     set_xreg(i, 0xbadbeef);
   }
+  // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
+  uint64_t nan_bits = UINT64_C(0x7ff0dead7f8beef1);
+  VIXL_ASSERT(IsSignallingNaN(rawbits_to_double(nan_bits & kDRegMask)));
+  VIXL_ASSERT(IsSignallingNaN(rawbits_to_float(nan_bits & kSRegMask)));
   for (unsigned i = 0; i < kNumberOfFPRegisters; i++) {
-    // Set FP registers to a value that is NaN in both 32-bit and 64-bit FP.
-    set_dreg(i, kFP64SignallingNaN);
+    set_dreg_bits(i, nan_bits);
   }
   // Returning to address 0 exits the Simulator.
   set_lr(kEndOfSimAddress);
@@ -213,7 +216,7 @@
 }
 
 
-#define COLOUR(colour_code)  "\033[0;" colour_code "m"
+#define COLOUR(colour_code)       "\033[0;" colour_code "m"
 #define COLOUR_BOLD(colour_code)  "\033[1;" colour_code "m"
 #define NORMAL  ""
 #define GREY    "30"
@@ -228,22 +231,45 @@
   coloured_trace_ = value;
 
   clr_normal          = value ? COLOUR(NORMAL)        : "";
-  clr_flag_name       = value ? COLOUR_BOLD(GREY)     : "";
-  clr_flag_value      = value ? COLOUR_BOLD(WHITE)    : "";
-  clr_reg_name        = value ? COLOUR_BOLD(BLUE)     : "";
-  clr_reg_value       = value ? COLOUR_BOLD(CYAN)     : "";
-  clr_fpreg_name      = value ? COLOUR_BOLD(YELLOW)   : "";
-  clr_fpreg_value     = value ? COLOUR_BOLD(MAGENTA)  : "";
-  clr_memory_value    = value ? COLOUR_BOLD(GREEN)    : "";
-  clr_memory_address  = value ? COLOUR(GREEN)         : "";
-  clr_debug_number    = value ? COLOUR_BOLD(YELLOW)   : "";
-  clr_debug_message   = value ? COLOUR(YELLOW)        : "";
-  clr_warning         = value ? COLOUR_BOLD(RED)      : "";
-  clr_warning_message = value ? COLOUR(RED)           : "";
+  clr_flag_name       = value ? COLOUR_BOLD(WHITE)    : "";
+  clr_flag_value      = value ? COLOUR(NORMAL)        : "";
+  clr_reg_name        = value ? COLOUR_BOLD(CYAN)     : "";
+  clr_reg_value       = value ? COLOUR(CYAN)          : "";
+  clr_fpreg_name      = value ? COLOUR_BOLD(MAGENTA)  : "";
+  clr_fpreg_value     = value ? COLOUR(MAGENTA)       : "";
+  clr_memory_address  = value ? COLOUR_BOLD(BLUE)     : "";
+  clr_warning         = value ? COLOUR_BOLD(YELLOW)   : "";
+  clr_warning_message = value ? COLOUR(YELLOW)        : "";
   clr_printf          = value ? COLOUR(GREEN)         : "";
 }
 
 
+void Simulator::set_trace_parameters(int parameters) {
+  bool disasm_before = trace_parameters_ & LOG_DISASM;
+  trace_parameters_ = parameters;
+  bool disasm_after = trace_parameters_ & LOG_DISASM;
+
+  if (disasm_before != disasm_after) {
+    if (disasm_after) {
+      decoder_->InsertVisitorBefore(print_disasm_, this);
+    } else {
+      decoder_->RemoveVisitor(print_disasm_);
+    }
+  }
+}
+
+
+void Simulator::set_instruction_stats(bool value) {
+  if (value != instruction_stats_) {
+    if (value) {
+      decoder_->AppendVisitor(instrumentation_);
+    } else {
+      decoder_->RemoveVisitor(instrumentation_);
+    }
+    instruction_stats_ = value;
+  }
+}
+
 // Helpers ---------------------------------------------------------------------
 int64_t Simulator::AddWithCarry(unsigned reg_size,
                                 bool set_flags,
@@ -295,6 +321,7 @@
     nzcv().SetZ(Z);
     nzcv().SetC(C);
     nzcv().SetV(V);
+    LogSystemRegister(NZCV);
   }
   return result;
 }
@@ -395,102 +422,216 @@
   } else {
     VIXL_UNREACHABLE();
   }
+  LogSystemRegister(NZCV);
 }
 
 
-void Simulator::PrintSystemRegisters(bool print_all) {
-  static bool first_run = true;
-
-  static SimSystemRegister last_nzcv;
-  if (print_all || first_run || (last_nzcv.RawValue() != nzcv().RawValue())) {
-    fprintf(stream_, "# %sFLAGS: %sN:%d Z:%d C:%d V:%d%s\n",
-            clr_flag_name,
-            clr_flag_value,
-            N(), Z(), C(), V(),
-            clr_normal);
-  }
-  last_nzcv = nzcv();
-
-  static SimSystemRegister last_fpcr;
-  if (print_all || first_run || (last_fpcr.RawValue() != fpcr().RawValue())) {
-    static const char * rmode[] = {
-      "0b00 (Round to Nearest)",
-      "0b01 (Round towards Plus Infinity)",
-      "0b10 (Round towards Minus Infinity)",
-      "0b11 (Round towards Zero)"
-    };
-    VIXL_ASSERT(fpcr().RMode() < (sizeof(rmode) / sizeof(rmode[0])));
-    fprintf(stream_, "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
-            clr_flag_name,
-            clr_flag_value,
-            fpcr().AHP(), fpcr().DN(), fpcr().FZ(), rmode[fpcr().RMode()],
-            clr_normal);
-  }
-  last_fpcr = fpcr();
-
-  first_run = false;
+void Simulator::PrintSystemRegisters() {
+  PrintSystemRegister(NZCV);
+  PrintSystemRegister(FPCR);
 }
 
 
-void Simulator::PrintRegisters(bool print_all_regs) {
-  static bool first_run = true;
-  static int64_t last_regs[kNumberOfRegisters];
-
+void Simulator::PrintRegisters() {
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
-    if (print_all_regs || first_run ||
-        (last_regs[i] != xreg(i, Reg31IsStackPointer))) {
-      fprintf(stream_,
-              "# %s%4s:%s 0x%016" PRIx64 "%s\n",
-              clr_reg_name,
-              XRegNameForCode(i, Reg31IsStackPointer),
-              clr_reg_value,
-              xreg(i, Reg31IsStackPointer),
-              clr_normal);
-    }
-    // Cache the new register value so the next run can detect any changes.
-    last_regs[i] = xreg(i, Reg31IsStackPointer);
+    PrintRegister(i);
   }
-  first_run = false;
 }
 
 
-void Simulator::PrintFPRegisters(bool print_all_regs) {
-  static bool first_run = true;
-  static uint64_t last_regs[kNumberOfFPRegisters];
-
-  // Print as many rows of registers as necessary, keeping each individual
-  // register in the same column each time (to make it easy to visually scan
-  // for changes).
+void Simulator::PrintFPRegisters() {
   for (unsigned i = 0; i < kNumberOfFPRegisters; i++) {
-    if (print_all_regs || first_run || (last_regs[i] != dreg_bits(i))) {
-      fprintf(stream_,
-              "# %s%4s:%s 0x%016" PRIx64 "%s (%s%s:%s %g%s %s:%s %g%s)\n",
-              clr_fpreg_name,
-              VRegNameForCode(i),
-              clr_fpreg_value,
-              dreg_bits(i),
-              clr_normal,
-              clr_fpreg_name,
-              DRegNameForCode(i),
-              clr_fpreg_value,
-              dreg(i),
-              clr_fpreg_name,
-              SRegNameForCode(i),
-              clr_fpreg_value,
-              sreg(i),
-              clr_normal);
-    }
-    // Cache the new register value so the next run can detect any changes.
-    last_regs[i] = dreg_bits(i);
+    PrintFPRegister(i);
   }
-  first_run = false;
 }
 
 
-void Simulator::PrintProcessorState() {
-  PrintSystemRegisters();
-  PrintRegisters();
-  PrintFPRegisters();
+void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) {
+  // Don't print writes into xzr.
+  if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) {
+    return;
+  }
+
+  // The template is "# x<code>:value".
+  fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s\n",
+          clr_reg_name, XRegNameForCode(code, r31mode),
+          clr_reg_value, reg<uint64_t>(code, r31mode), clr_normal);
+}
+
+
+void Simulator::PrintFPRegister(unsigned code, PrintFPRegisterSizes sizes) {
+  // The template is "# v<code>:bits (d<code>:value, ...)".
+
+  VIXL_ASSERT(sizes != 0);
+  VIXL_ASSERT((sizes & kPrintAllFPRegValues) == sizes);
+
+  // Print the raw bits.
+  fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s (",
+          clr_fpreg_name, VRegNameForCode(code),
+          clr_fpreg_value, fpreg<uint64_t>(code), clr_normal);
+
+  // Print all requested value interpretations.
+  bool need_separator = false;
+  if (sizes & kPrintDRegValue) {
+    fprintf(stream_, "%s%s%s: %s%g%s",
+            need_separator ? ", " : "",
+            clr_fpreg_name, DRegNameForCode(code),
+            clr_fpreg_value, fpreg<double>(code), clr_normal);
+    need_separator = true;
+  }
+
+  if (sizes & kPrintSRegValue) {
+    fprintf(stream_, "%s%s%s: %s%g%s",
+            need_separator ? ", " : "",
+            clr_fpreg_name, SRegNameForCode(code),
+            clr_fpreg_value, fpreg<float>(code), clr_normal);
+    need_separator = true;
+  }
+
+  // End the value list.
+  fprintf(stream_, ")\n");
+}
+
+
+void Simulator::PrintSystemRegister(SystemRegister id) {
+  switch (id) {
+    case NZCV:
+      fprintf(stream_, "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
+              clr_flag_name, clr_flag_value,
+              nzcv().N(), nzcv().Z(), nzcv().C(), nzcv().V(),
+              clr_normal);
+      break;
+    case FPCR: {
+      static const char * rmode[] = {
+        "0b00 (Round to Nearest)",
+        "0b01 (Round towards Plus Infinity)",
+        "0b10 (Round towards Minus Infinity)",
+        "0b11 (Round towards Zero)"
+      };
+      VIXL_ASSERT(fpcr().RMode() < (sizeof(rmode) / sizeof(rmode[0])));
+      fprintf(stream_,
+              "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
+              clr_flag_name, clr_flag_value,
+              fpcr().AHP(), fpcr().DN(), fpcr().FZ(), rmode[fpcr().RMode()],
+              clr_normal);
+      break;
+    }
+    default:
+      VIXL_UNREACHABLE();
+  }
+}
+
+
+void Simulator::PrintRead(uintptr_t address,
+                          size_t size,
+                          unsigned reg_code) {
+  USE(size);  // Size is unused here.
+
+  // The template is "# x<code>:value <- address".
+  fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s",
+          clr_reg_name, XRegNameForCode(reg_code),
+          clr_reg_value, reg<uint64_t>(reg_code), clr_normal);
+
+  fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintReadFP(uintptr_t address,
+                            size_t size,
+                            unsigned reg_code) {
+  // The template is "# reg:bits (reg:value) <- address".
+  switch (size) {
+    case kSRegSizeInBytes:
+      VIXL_STATIC_ASSERT(kSRegSizeInBytes == 4);
+      fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s (%s%s: %s%gf%s)",
+              clr_fpreg_name, VRegNameForCode(reg_code),
+              clr_fpreg_value, fpreg<uint64_t>(reg_code), clr_normal,
+              clr_fpreg_name, SRegNameForCode(reg_code),
+              clr_fpreg_value, fpreg<float>(reg_code), clr_normal);
+      break;
+    case kDRegSizeInBytes:
+      VIXL_STATIC_ASSERT(kDRegSizeInBytes == 8);
+      fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s (%s%s: %s%g%s)",
+              clr_fpreg_name, VRegNameForCode(reg_code),
+              clr_fpreg_value, fpreg<uint64_t>(reg_code), clr_normal,
+              clr_fpreg_name, DRegNameForCode(reg_code),
+              clr_fpreg_value, fpreg<double>(reg_code), clr_normal);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintWrite(uintptr_t address,
+                           size_t size,
+                           unsigned reg_code) {
+  // The template is "# reg:value -> address". To keep the trace tidy and
+  // readable, the value is aligned with the values in the register trace.
+  switch (size) {
+    case 1:
+      fprintf(stream_, "# %s%5s<7:0>:          %s0x%02" PRIx8 "%s",
+              clr_reg_name, WRegNameForCode(reg_code),
+              clr_reg_value, reg<uint8_t>(reg_code), clr_normal);
+      break;
+    case 2:
+      fprintf(stream_, "# %s%5s<15:0>:       %s0x%04" PRIx16 "%s",
+              clr_reg_name, WRegNameForCode(reg_code),
+              clr_reg_value, reg<uint16_t>(reg_code), clr_normal);
+      break;
+    case kWRegSizeInBytes:
+      VIXL_STATIC_ASSERT(kWRegSizeInBytes == 4);
+      fprintf(stream_, "# %s%5s:         %s0x%08" PRIx32 "%s",
+              clr_reg_name, WRegNameForCode(reg_code),
+              clr_reg_value, reg<uint32_t>(reg_code), clr_normal);
+      break;
+    case kXRegSizeInBytes:
+      VIXL_STATIC_ASSERT(kXRegSizeInBytes == 8);
+      fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s",
+              clr_reg_name, XRegNameForCode(reg_code),
+              clr_reg_value, reg<uint64_t>(reg_code), clr_normal);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintWriteFP(uintptr_t address,
+                             size_t size,
+                             unsigned reg_code) {
+  // The template is "# reg:bits (reg:value) -> address". To keep the trace tidy
+  // and readable, the value is aligned with the values in the register trace.
+  switch (size) {
+    case kSRegSizeInBytes:
+      VIXL_STATIC_ASSERT(kSRegSize == 32);
+      fprintf(stream_, "# %s%5s<31:0>:   %s0x%08" PRIx32 "%s (%s%s: %s%gf%s)",
+              clr_fpreg_name, VRegNameForCode(reg_code),
+              clr_fpreg_value, fpreg<uint32_t>(reg_code), clr_normal,
+              clr_fpreg_name, SRegNameForCode(reg_code),
+              clr_fpreg_value, fpreg<float>(reg_code), clr_normal);
+      break;
+    case kDRegSizeInBytes:
+      VIXL_STATIC_ASSERT(kDRegSize == 64);
+      fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s (%s%s: %s%g%s)",
+              clr_fpreg_name, VRegNameForCode(reg_code),
+              clr_fpreg_value, fpreg<uint64_t>(reg_code), clr_normal,
+              clr_fpreg_name, DRegNameForCode(reg_code),
+              clr_fpreg_value, fpreg<double>(reg_code), clr_normal);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address, address, clr_normal);
 }
 
 
@@ -612,7 +753,7 @@
     default: VIXL_UNREACHABLE();
   }
 
-  set_reg(reg_size, instr->Rd(), new_val, instr->RdMode());
+  set_reg(reg_size, instr->Rd(), new_val, LogRegWrites, instr->RdMode());
 }
 
 
@@ -701,9 +842,10 @@
     nzcv().SetZ(CalcZFlag(result));
     nzcv().SetC(0);
     nzcv().SetV(0);
+    LogSystemRegister(NZCV);
   }
 
-  set_reg(reg_size, instr->Rd(), result, instr->RdMode());
+  set_reg(reg_size, instr->Rd(), result, LogRegWrites, instr->RdMode());
 }
 
 
@@ -735,6 +877,7 @@
   } else {
     // If the condition fails, set the status flags to the nzcv immediate.
     nzcv().SetFlags(instr->Nzcv());
+    LogSystemRegister(NZCV);
   }
 }
 
@@ -775,21 +918,45 @@
                                 int64_t offset,
                                 AddrMode addrmode) {
   unsigned srcdst = instr->Rt();
-  uint8_t* address = AddressModeHelper(instr->Rn(), offset, addrmode);
+  uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
 
   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreOpMask));
   switch (op) {
-    case LDRB_w:  set_wreg(srcdst, MemoryRead<uint8_t>(address)); break;
-    case LDRH_w:  set_wreg(srcdst, MemoryRead<uint16_t>(address)); break;
-    case LDR_w:   set_wreg(srcdst, MemoryRead<uint32_t>(address)); break;
-    case LDR_x:   set_xreg(srcdst, MemoryRead<uint64_t>(address)); break;
-    case LDRSB_w: set_wreg(srcdst, MemoryRead<int8_t>(address)); break;
-    case LDRSH_w: set_wreg(srcdst, MemoryRead<int16_t>(address)); break;
-    case LDRSB_x: set_xreg(srcdst, MemoryRead<int8_t>(address)); break;
-    case LDRSH_x: set_xreg(srcdst, MemoryRead<int16_t>(address)); break;
-    case LDRSW_x: set_xreg(srcdst, MemoryRead<int32_t>(address)); break;
-    case LDR_s:   set_sreg(srcdst, MemoryRead<float>(address)); break;
-    case LDR_d:   set_dreg(srcdst, MemoryRead<double>(address)); break;
+    // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
+    // will print a more detailed log.
+    case LDRB_w:
+      set_wreg(srcdst, MemoryRead<uint8_t>(address), NoRegLog);
+      break;
+    case LDRH_w:
+      set_wreg(srcdst, MemoryRead<uint16_t>(address), NoRegLog);
+      break;
+    case LDR_w:
+      set_wreg(srcdst, MemoryRead<uint32_t>(address), NoRegLog);
+      break;
+    case LDR_x:
+      set_xreg(srcdst, MemoryRead<uint64_t>(address), NoRegLog);
+      break;
+    case LDRSB_w:
+      set_wreg(srcdst, MemoryRead<int8_t>(address), NoRegLog);
+      break;
+    case LDRSH_w:
+      set_wreg(srcdst, MemoryRead<int16_t>(address), NoRegLog);
+      break;
+    case LDRSB_x:
+      set_xreg(srcdst, MemoryRead<int8_t>(address), NoRegLog);
+      break;
+    case LDRSH_x:
+      set_xreg(srcdst, MemoryRead<int16_t>(address), NoRegLog);
+      break;
+    case LDRSW_x:
+      set_xreg(srcdst, MemoryRead<int32_t>(address), NoRegLog);
+      break;
+    case LDR_s:
+      set_sreg(srcdst, MemoryRead<float>(address), NoRegLog);
+      break;
+    case LDR_d:
+      set_dreg(srcdst, MemoryRead<double>(address), NoRegLog);
+      break;
 
     case STRB_w:  MemoryWrite<uint8_t>(address, wreg(srcdst)); break;
     case STRH_w:  MemoryWrite<uint16_t>(address, wreg(srcdst)); break;
@@ -798,9 +965,27 @@
     case STR_s:   MemoryWrite<float>(address, sreg(srcdst)); break;
     case STR_d:   MemoryWrite<double>(address, dreg(srcdst)); break;
 
+    // Ignore prfm hint instructions.
+    case PRFM: break;
+
     default: VIXL_UNIMPLEMENTED();
   }
 
+  size_t access_size = 1 << instr->SizeLS();
+  if (instr->IsLoad()) {
+    if ((op == LDR_s) || (op == LDR_d)) {
+      LogReadFP(address, access_size, srcdst);
+    } else {
+      LogRead(address, access_size, srcdst);
+    }
+  } else {
+    if ((op == STR_s) || (op == STR_d)) {
+      LogWriteFP(address, access_size, srcdst);
+    } else {
+      LogWrite(address, access_size, srcdst);
+    }
+  }
+
   local_monitor_.MaybeClear();
 }
 
@@ -829,8 +1014,10 @@
                                     AddrMode addrmode) {
   unsigned rt = instr->Rt();
   unsigned rt2 = instr->Rt2();
-  int offset = instr->ImmLSPair() << instr->SizeLSPair();
-  uint8_t* address = AddressModeHelper(instr->Rn(), offset, addrmode);
+  size_t access_size = 1 << instr->SizeLSPair();
+  int64_t offset = instr->ImmLSPair() * access_size;
+  uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
+  uintptr_t address2 = address + access_size;
 
   LoadStorePairOp op =
     static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
@@ -839,54 +1026,85 @@
   VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
 
   switch (op) {
+    // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
+    // will print a more detailed log.
     case LDP_w: {
-      set_wreg(rt, MemoryRead<uint32_t>(address));
-      set_wreg(rt2, MemoryRead<uint32_t>(address + kWRegSizeInBytes));
+      VIXL_ASSERT(access_size == kWRegSizeInBytes);
+      set_wreg(rt, MemoryRead<uint32_t>(address), NoRegLog);
+      set_wreg(rt2, MemoryRead<uint32_t>(address2), NoRegLog);
       break;
     }
     case LDP_s: {
-      set_sreg(rt, MemoryRead<float>(address));
-      set_sreg(rt2, MemoryRead<float>(address + kSRegSizeInBytes));
+      VIXL_ASSERT(access_size == kSRegSizeInBytes);
+      set_sreg(rt, MemoryRead<float>(address), NoRegLog);
+      set_sreg(rt2, MemoryRead<float>(address2), NoRegLog);
       break;
     }
     case LDP_x: {
-      set_xreg(rt, MemoryRead<uint64_t>(address));
-      set_xreg(rt2, MemoryRead<uint64_t>(address + kXRegSizeInBytes));
+      VIXL_ASSERT(access_size == kXRegSizeInBytes);
+      set_xreg(rt, MemoryRead<uint64_t>(address), NoRegLog);
+      set_xreg(rt2, MemoryRead<uint64_t>(address2), NoRegLog);
       break;
     }
     case LDP_d: {
-      set_dreg(rt, MemoryRead<double>(address));
-      set_dreg(rt2, MemoryRead<double>(address + kDRegSizeInBytes));
+      VIXL_ASSERT(access_size == kDRegSizeInBytes);
+      set_dreg(rt, MemoryRead<double>(address), NoRegLog);
+      set_dreg(rt2, MemoryRead<double>(address2), NoRegLog);
       break;
     }
     case LDPSW_x: {
-      set_xreg(rt, MemoryRead<int32_t>(address));
-      set_xreg(rt2, MemoryRead<int32_t>(address + kWRegSizeInBytes));
+      VIXL_ASSERT(access_size == kWRegSizeInBytes);
+      set_xreg(rt, MemoryRead<int32_t>(address), NoRegLog);
+      set_xreg(rt2, MemoryRead<int32_t>(address2), NoRegLog);
       break;
     }
     case STP_w: {
+      VIXL_ASSERT(access_size == kWRegSizeInBytes);
       MemoryWrite<uint32_t>(address, wreg(rt));
-      MemoryWrite<uint32_t>(address + kWRegSizeInBytes, wreg(rt2));
+      MemoryWrite<uint32_t>(address2, wreg(rt2));
       break;
     }
     case STP_s: {
+      VIXL_ASSERT(access_size == kSRegSizeInBytes);
       MemoryWrite<float>(address, sreg(rt));
-      MemoryWrite<float>(address + kSRegSizeInBytes, sreg(rt2));
+      MemoryWrite<float>(address2, sreg(rt2));
       break;
     }
     case STP_x: {
+      VIXL_ASSERT(access_size == kXRegSizeInBytes);
       MemoryWrite<uint64_t>(address, xreg(rt));
-      MemoryWrite<uint64_t>(address + kXRegSizeInBytes, xreg(rt2));
+      MemoryWrite<uint64_t>(address2, xreg(rt2));
       break;
     }
     case STP_d: {
+      VIXL_ASSERT(access_size == kDRegSizeInBytes);
       MemoryWrite<double>(address, dreg(rt));
-      MemoryWrite<double>(address + kDRegSizeInBytes, dreg(rt2));
+      MemoryWrite<double>(address2, dreg(rt2));
       break;
     }
     default: VIXL_UNREACHABLE();
   }
 
+  // Print a detailed trace (including the memory address) instead of the basic
+  // register:value trace generated by set_*reg().
+  if (instr->IsLoad()) {
+    if ((op == LDP_s) || (op == LDP_d)) {
+      LogReadFP(address, access_size, rt);
+      LogReadFP(address2, access_size, rt2);
+    } else {
+      LogRead(address, access_size, rt);
+      LogRead(address2, access_size, rt2);
+    }
+  } else {
+    if ((op == STP_s) || (op == STP_d)) {
+      LogWriteFP(address, access_size, rt);
+      LogWriteFP(address2, access_size, rt2);
+    } else {
+      LogWrite(address, access_size, rt);
+      LogWrite(address2, access_size, rt2);
+    }
+  }
+
   local_monitor_.MaybeClear();
 }
 
@@ -919,9 +1137,12 @@
   bool is_load = instr->LdStXLoad();
   bool is_pair = instr->LdStXPair();
 
-  uint8_t * address = reg<uint8_t *>(rn, Reg31IsStackPointer);
   size_t element_size = 1 << instr->LdStXSizeLog2();
   size_t access_size = is_pair ? element_size * 2 : element_size;
+  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+  // Verify that the address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
 
   // Check the alignment of `address`.
   if (AlignDown(address, access_size) != address) {
@@ -942,36 +1163,38 @@
       local_monitor_.Clear();
     }
 
+    // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
+    // will print a more detailed log.
     switch (op) {
       case LDXRB_w:
       case LDAXRB_w:
       case LDARB_w:
-        set_wreg(rt, MemoryRead<uint8_t>(address));
+        set_wreg(rt, MemoryRead<uint8_t>(address), NoRegLog);
         break;
       case LDXRH_w:
       case LDAXRH_w:
       case LDARH_w:
-        set_wreg(rt, MemoryRead<uint16_t>(address));
+        set_wreg(rt, MemoryRead<uint16_t>(address), NoRegLog);
         break;
       case LDXR_w:
       case LDAXR_w:
       case LDAR_w:
-        set_wreg(rt, MemoryRead<uint32_t>(address));
+        set_wreg(rt, MemoryRead<uint32_t>(address), NoRegLog);
         break;
       case LDXR_x:
       case LDAXR_x:
       case LDAR_x:
-        set_xreg(rt, MemoryRead<uint64_t>(address));
+        set_xreg(rt, MemoryRead<uint64_t>(address), NoRegLog);
         break;
       case LDXP_w:
       case LDAXP_w:
-        set_wreg(rt, MemoryRead<uint32_t>(address));
-        set_wreg(rt2, MemoryRead<uint32_t>(address + element_size));
+        set_wreg(rt, MemoryRead<uint32_t>(address), NoRegLog);
+        set_wreg(rt2, MemoryRead<uint32_t>(address + element_size), NoRegLog);
         break;
       case LDXP_x:
       case LDAXP_x:
-        set_xreg(rt, MemoryRead<uint64_t>(address));
-        set_xreg(rt2, MemoryRead<uint64_t>(address + element_size));
+        set_xreg(rt, MemoryRead<uint64_t>(address), NoRegLog);
+        set_xreg(rt2, MemoryRead<uint64_t>(address + element_size), NoRegLog);
         break;
       default:
         VIXL_UNREACHABLE();
@@ -981,6 +1204,11 @@
       // Approximate load-acquire by issuing a full barrier after the load.
       __sync_synchronize();
     }
+
+    LogRead(address, access_size, rt);
+    if (is_pair) {
+      LogRead(address + element_size, access_size, rt2);
+    }
   } else {
     if (is_acquire_release) {
       // Approximate store-release by issuing a full barrier before the store.
@@ -1035,21 +1263,50 @@
         default:
           VIXL_UNREACHABLE();
       }
+
+      LogWrite(address, element_size, rt);
+      if (is_pair) {
+        LogWrite(address + element_size, element_size, rt2);
+      }
     }
   }
 }
 
 
 void Simulator::VisitLoadLiteral(const Instruction* instr) {
-  uint8_t* address = instr->LiteralAddress();
   unsigned rt = instr->Rt();
+  uint64_t address = instr->LiteralAddress<uint64_t>();
+
+  // Verify that the calculated address is available to the host.
+  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
 
   switch (instr->Mask(LoadLiteralMask)) {
-    case LDR_w_lit: set_wreg(rt, MemoryRead<uint32_t>(address)); break;
-    case LDR_x_lit: set_xreg(rt, MemoryRead<uint64_t>(address)); break;
-    case LDR_s_lit: set_sreg(rt, MemoryRead<float>(address)); break;
-    case LDR_d_lit: set_dreg(rt, MemoryRead<double>(address)); break;
-    case LDRSW_x_lit: set_xreg(rt, MemoryRead<int32_t>(address)); break;
+    // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS), then
+    // print a more detailed log.
+    case LDR_w_lit:
+      set_wreg(rt, MemoryRead<uint32_t>(address), NoRegLog);
+      LogRead(address, kWRegSizeInBytes, rt);
+      break;
+    case LDR_x_lit:
+      set_xreg(rt, MemoryRead<uint64_t>(address), NoRegLog);
+      LogRead(address, kXRegSizeInBytes, rt);
+      break;
+    case LDR_s_lit:
+      set_sreg(rt, MemoryRead<float>(address), NoRegLog);
+      LogReadFP(address, kSRegSizeInBytes, rt);
+      break;
+    case LDR_d_lit:
+      set_dreg(rt, MemoryRead<double>(address), NoRegLog);
+      LogReadFP(address, kDRegSizeInBytes, rt);
+      break;
+    case LDRSW_x_lit:
+      set_xreg(rt, MemoryRead<int32_t>(address), NoRegLog);
+      LogRead(address, kWRegSizeInBytes, rt);
+      break;
+
+    // Ignore prfm hint instructions.
+    case PRFM_lit: break;
+
     default: VIXL_UNREACHABLE();
   }
 
@@ -1057,9 +1314,9 @@
 }
 
 
-uint8_t* Simulator::AddressModeHelper(unsigned addr_reg,
-                                      int64_t offset,
-                                      AddrMode addrmode) {
+uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
+                                       int64_t offset,
+                                       AddrMode addrmode) {
   uint64_t address = xreg(addr_reg, Reg31IsStackPointer);
 
   if ((addr_reg == 31) && ((address % 16) != 0)) {
@@ -1071,7 +1328,7 @@
 
   if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
     VIXL_ASSERT(offset != 0);
-    set_xreg(addr_reg, address + offset, Reg31IsStackPointer);
+    set_xreg(addr_reg, address + offset, LogRegWrites, Reg31IsStackPointer);
   }
 
   if ((addrmode == Offset) || (addrmode == PreIndex)) {
@@ -1081,7 +1338,7 @@
   // Verify that the calculated address is available to the host.
   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
 
-  return reinterpret_cast<uint8_t*>(address);
+  return static_cast<uintptr_t>(address);
 }
 
 
@@ -1616,6 +1873,7 @@
         FPCompare(sreg(instr->Rn()), sreg(instr->Rm()));
       } else {
         nzcv().SetFlags(instr->Nzcv());
+        LogSystemRegister(NZCV);
       }
       break;
     case FCCMP_d:
@@ -1623,6 +1881,7 @@
         FPCompare(dreg(instr->Rn()), dreg(instr->Rm()));
       } else {
         nzcv().SetFlags(instr->Nzcv());
+        LogSystemRegister(NZCV);
       }
       break;
     default: VIXL_UNIMPLEMENTED();
@@ -1650,6 +1909,7 @@
 
 void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
   AssertSupportedFPCR();
+  const FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
 
   unsigned fd = instr->Rd();
   unsigned fn = instr->Rn();
@@ -1665,12 +1925,32 @@
     case FSQRT_d: set_dreg(fd, FPSqrt(dreg(fn))); break;
     case FRINTA_s: set_sreg(fd, FPRoundInt(sreg(fn), FPTieAway)); break;
     case FRINTA_d: set_dreg(fd, FPRoundInt(dreg(fn), FPTieAway)); break;
+    case FRINTI_s: set_sreg(fd, FPRoundInt(sreg(fn), fpcr_rounding)); break;
+    case FRINTI_d: set_dreg(fd, FPRoundInt(dreg(fn), fpcr_rounding)); break;
     case FRINTM_s:
         set_sreg(fd, FPRoundInt(sreg(fn), FPNegativeInfinity)); break;
     case FRINTM_d:
         set_dreg(fd, FPRoundInt(dreg(fn), FPNegativeInfinity)); break;
     case FRINTN_s: set_sreg(fd, FPRoundInt(sreg(fn), FPTieEven)); break;
     case FRINTN_d: set_dreg(fd, FPRoundInt(dreg(fn), FPTieEven)); break;
+    case FRINTP_s:
+        set_sreg(fd, FPRoundInt(sreg(fn), FPPositiveInfinity)); break;
+    case FRINTP_d:
+        set_dreg(fd, FPRoundInt(dreg(fn), FPPositiveInfinity)); break;
+    case FRINTX_s: {
+      float input = sreg(fn);
+      float rounded = FPRoundInt(input, fpcr_rounding);
+      set_sreg(fd, rounded);
+      if (!isnan(input) && (input != rounded)) FPProcessException();
+      break;
+    }
+    case FRINTX_d: {
+      double input = dreg(fn);
+      double rounded = FPRoundInt(input, fpcr_rounding);
+      set_dreg(fd, rounded);
+      if (!isnan(input) && (input != rounded)) FPProcessException();
+      break;
+    }
     case FRINTZ_s: set_sreg(fd, FPRoundInt(sreg(fn), FPZero)); break;
     case FRINTZ_d: set_dreg(fd, FPRoundInt(dreg(fn), FPZero)); break;
     case FCVT_ds: set_dreg(fd, FPToDouble(sreg(fn))); break;
@@ -1973,6 +2253,18 @@
       // We always use floor(value).
       break;
     }
+    case FPPositiveInfinity: {
+      // Take care of correctly handling the range ]-1.0, -0.0], which must
+      // yield -0.0.
+      if ((-1.0 < value) && (value < 0.0)) {
+        int_result = -0.0;
+
+      // If the error is non-zero, round up.
+      } else if (error > 0.0) {
+        int_result++;
+      }
+      break;
+    }
     default: VIXL_UNIMPLEMENTED();
   }
   return int_result;
@@ -2426,8 +2718,14 @@
       }
       case MSR: {
         switch (instr->ImmSystemRegister()) {
-          case NZCV: nzcv().SetRawValue(xreg(instr->Rt())); break;
-          case FPCR: fpcr().SetRawValue(xreg(instr->Rt())); break;
+          case NZCV:
+            nzcv().SetRawValue(xreg(instr->Rt()));
+            LogSystemRegister(NZCV);
+            break;
+          case FPCR:
+            fpcr().SetRawValue(xreg(instr->Rt()));
+            LogSystemRegister(FPCR);
+            break;
           default: VIXL_UNIMPLEMENTED();
         }
         break;
@@ -2449,22 +2747,91 @@
 
 void Simulator::VisitException(const Instruction* instr) {
   switch (instr->Mask(ExceptionMask)) {
-    case BRK: HostBreakpoint(); break;
     case HLT:
-      // The Printf pseudo instruction is so useful, we include it in the
-      // default simulator.
-      if (instr->ImmException() == kPrintfOpcode) {
-        DoPrintf(instr);
-      } else {
-        HostBreakpoint();
+      switch (instr->ImmException()) {
+        case kUnreachableOpcode:
+          DoUnreachable(instr);
+          return;
+        case kTraceOpcode:
+          DoTrace(instr);
+          return;
+        case kLogOpcode:
+          DoLog(instr);
+          return;
+        case kPrintfOpcode:
+          DoPrintf(instr);
+          return;
+        default:
+          HostBreakpoint();
+          return;
       }
-      break;
+    case BRK:
+      HostBreakpoint();
+      return;
     default:
       VIXL_UNIMPLEMENTED();
   }
 }
 
 
+void Simulator::DoUnreachable(const Instruction* instr) {
+  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+              (instr->ImmException() == kUnreachableOpcode));
+
+  fprintf(stream_, "Hit UNREACHABLE marker at pc=%p.\n",
+          reinterpret_cast<const void*>(instr));
+  abort();
+}
+
+
+void Simulator::DoTrace(const Instruction* instr) {
+  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+              (instr->ImmException() == kTraceOpcode));
+
+  // Read the arguments encoded inline in the instruction stream.
+  uint32_t parameters;
+  uint32_t command;
+
+  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+  memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
+  memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
+
+  switch (command) {
+    case TRACE_ENABLE:
+      set_trace_parameters(trace_parameters() | parameters);
+      break;
+    case TRACE_DISABLE:
+      set_trace_parameters(trace_parameters() & ~parameters);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  set_pc(instr->InstructionAtOffset(kTraceLength));
+}
+
+
+void Simulator::DoLog(const Instruction* instr) {
+  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+              (instr->ImmException() == kLogOpcode));
+
+  // Read the arguments encoded inline in the instruction stream.
+  uint32_t parameters;
+
+  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+  memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
+
+  // We don't support a one-shot LOG_DISASM.
+  VIXL_ASSERT((parameters & LOG_DISASM) == 0);
+  // Print the requested information.
+  if (parameters & LOG_SYS_REGS) PrintSystemRegisters();
+  if (parameters & LOG_REGS) PrintRegisters();
+  if (parameters & LOG_FP_REGS) PrintFPRegisters();
+
+  set_pc(instr->InstructionAtOffset(kLogLength));
+}
+
+
 void Simulator::DoPrintf(const Instruction* instr) {
   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
               (instr->ImmException() == kPrintfOpcode));
diff --git a/src/a64/simulator-a64.h b/src/a64/simulator-a64.h
index 6cf0c7d..05e3494 100644
--- a/src/a64/simulator-a64.h
+++ b/src/a64/simulator-a64.h
@@ -36,13 +36,34 @@
 
 namespace vixl {
 
-enum ReverseByteMode {
-  Reverse16 = 0,
-  Reverse32 = 1,
-  Reverse64 = 2
-};
+// Debug instructions.
+//
+// VIXL's macro-assembler and simulator support a few pseudo instructions to
+// make debugging easier. These pseudo instructions do not exist on real
+// hardware.
+//
+// TODO: Provide controls to prevent the macro assembler from emitting
+// pseudo-instructions. This is important for ahead-of-time compilers, where the
+// macro assembler is built with USE_SIMULATOR but the code will eventually be
+// run on real hardware.
+//
+// TODO: Also consider allowing these pseudo-instructions to be disabled in the
+// simulator, so that users can check that the input is a valid native code.
+// (This isn't possible in all cases. Printf won't work, for example.)
+//
+// Each debug pseudo instruction is represented by a HLT instruction. The HLT
+// immediate field is used to identify the type of debug pseudo isntruction.
+// Each pseudo instruction uses a custom encoding for additional arguments, as
+// described below.
 
-// Printf. See debugger-a64.h for more information on pseudo instructions.
+// Unreachable
+//
+// Instruction which should never be executed. This is used as a guard in parts
+// of the code that should not be reachable, such as in data encoded inline in
+// the instructions.
+const Instr kUnreachableOpcode = 0xdeb0;
+
+// Printf
 //  - arg_count: The number of arguments.
 //  - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields.
 //
@@ -55,7 +76,7 @@
 // but the format string is not trivial to parse so we encode the relevant
 // information with the HLT instruction.
 //
-// The interface is as follows:
+// Also, the following registers are populated (as if for a native A64 call):
 //    x0: The format string
 // x1-x7: Optional arguments, if type == CPURegister::kRegister
 // d0-d7: Optional arguments, if type == CPURegister::kFPRegister
@@ -77,6 +98,49 @@
 };
 static const unsigned kPrintfArgPatternBits = 2;
 
+// Trace
+//  - parameter: TraceParameter stored as a uint32_t
+//  - command: TraceCommand stored as a uint32_t
+//
+// Allow for trace management in the generated code. This enables or disables
+// automatic tracing of the specified information for every simulated
+// instruction.
+const Instr kTraceOpcode = 0xdeb2;
+const unsigned kTraceParamsOffset = 1 * kInstructionSize;
+const unsigned kTraceCommandOffset = 2 * kInstructionSize;
+const unsigned kTraceLength = 3 * kInstructionSize;
+
+// Trace parameters.
+enum TraceParameters {
+  LOG_DISASM     = 1 << 0,  // Log disassembly.
+  LOG_REGS       = 1 << 1,  // Log general purpose registers.
+  LOG_FP_REGS    = 1 << 2,  // Log floating-point registers.
+  LOG_SYS_REGS   = 1 << 3,  // Log the flags and system registers.
+  LOG_WRITE      = 1 << 4,  // Log writes to memory.
+
+  LOG_NONE       = 0,
+  LOG_STATE      = LOG_REGS | LOG_FP_REGS | LOG_SYS_REGS,
+  LOG_ALL        = LOG_DISASM | LOG_STATE | LOG_WRITE
+};
+
+// Trace commands.
+enum TraceCommand {
+  TRACE_ENABLE   = 1,
+  TRACE_DISABLE  = 2
+};
+
+// Log
+//  - parameter: TraceParameter stored as a uint32_t
+//
+// Print the specified information once. This mechanism is separate from Trace.
+// In particular, _all_ of the specified registers are printed, rather than just
+// the registers that the instruction writes.
+//
+// Any combination of the TraceParameters values can be used, except that
+// LOG_DISASM is not supported for Log.
+const Instr kLogOpcode = 0xdeb3;
+const unsigned kLogParamsOffset = 1 * kInstructionSize;
+const unsigned kLogLength = 2 * kInstructionSize;
 
 // The proper way to initialize a simulated system register (such as NZCV) is as
 // follows:
@@ -87,19 +151,19 @@
   // It is not possible to set its value to anything other than 0.
   SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) { }
 
-  inline uint32_t RawValue() const {
+  uint32_t RawValue() const {
     return value_;
   }
 
-  inline void SetRawValue(uint32_t new_value) {
+  void SetRawValue(uint32_t new_value) {
     value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_);
   }
 
-  inline uint32_t Bits(int msb, int lsb) const {
+  uint32_t Bits(int msb, int lsb) const {
     return unsigned_bitextract_32(msb, lsb, value_);
   }
 
-  inline int32_t SignedBits(int msb, int lsb) const {
+  int32_t SignedBits(int msb, int lsb) const {
     return signed_bitextract_32(msb, lsb, value_);
   }
 
@@ -109,8 +173,8 @@
   static SimSystemRegister DefaultValueFor(SystemRegister id);
 
 #define DEFINE_GETTER(Name, HighBit, LowBit, Func)                            \
-  inline uint32_t Name() const { return Func(HighBit, LowBit); }              \
-  inline void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
+  uint32_t Name() const { return Func(HighBit, LowBit); }              \
+  void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
 #define DEFINE_WRITE_IGNORE_MASK(Name, Mask)                                  \
   static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask);
 
@@ -185,26 +249,21 @@
   }
 
   // Mark the address range for exclusive access (like load-exclusive).
-  template <typename T>
-  void MarkExclusive(T address, size_t size) {
-    VIXL_STATIC_ASSERT(sizeof(address) == sizeof(address_));
-    address_ = reinterpret_cast<uintptr_t>(address);
+  void MarkExclusive(uint64_t address, size_t size) {
+    address_ = address;
     size_ = size;
   }
 
   // Return true if the address range is marked (like store-exclusive).
   // This helper doesn't implicitly clear the monitor.
-  template <typename T>
-  bool IsExclusive(T address, size_t size) {
-    VIXL_STATIC_ASSERT(sizeof(address) == sizeof(address_));
+  bool IsExclusive(uint64_t address, size_t size) {
     VIXL_ASSERT(size > 0);
     // Be pedantic: Require both the address and the size to match.
-    return (size == size_) &&
-           (reinterpret_cast<uintptr_t>(address) == address_);
+    return (size == size_) && (address == address_);
   }
 
  private:
-  uintptr_t address_;
+  uint64_t address_;
   size_t size_;
 
   const int kSkipClearProbability;
@@ -219,8 +278,7 @@
  public:
   SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {}
 
-  template <typename T>
-  bool IsExclusive(T address, size_t size) {
+  bool IsExclusive(uint64_t address, size_t size) {
     USE(address);
     USE(size);
 
@@ -248,13 +306,13 @@
   void RunFrom(const Instruction* first);
 
   // Simulation helpers.
-  inline const Instruction* pc() const { return pc_; }
-  inline void set_pc(const Instruction* new_pc) {
+  const Instruction* pc() const { return pc_; }
+  void set_pc(const Instruction* new_pc) {
     pc_ = AddressUntag(new_pc);
     pc_modified_ = true;
   }
 
-  inline void increment_pc() {
+  void increment_pc() {
     if (!pc_modified_) {
       pc_ = pc_->NextInstruction();
     }
@@ -262,7 +320,7 @@
     pc_modified_ = false;
   }
 
-  inline void ExecuteInstruction() {
+  void ExecuteInstruction() {
     // The program counter should always be aligned.
     VIXL_ASSERT(IsWordAligned(pc_));
     decoder_->Decode(pc_);
@@ -270,7 +328,7 @@
   }
 
   // Declare all Visitor functions.
-  #define DECLARE(A)  void Visit##A(const Instruction* instr);
+  #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
   #undef DECLARE
 
@@ -278,36 +336,32 @@
 
   // Basic accessor: Read the register as the specified type.
   template<typename T>
-  inline T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
-    VIXL_STATIC_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
-                       (sizeof(T) == kXRegSizeInBytes));
+  T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
     VIXL_ASSERT(code < kNumberOfRegisters);
-
     if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
       T result;
       memset(&result, 0, sizeof(result));
       return result;
     }
-
     return registers_[code].Get<T>();
   }
 
   // Common specialized accessors for the reg() template.
-  inline int32_t wreg(unsigned code,
-                      Reg31Mode r31mode = Reg31IsZeroRegister) const {
+  int32_t wreg(unsigned code,
+               Reg31Mode r31mode = Reg31IsZeroRegister) const {
     return reg<int32_t>(code, r31mode);
   }
 
-  inline int64_t xreg(unsigned code,
-                      Reg31Mode r31mode = Reg31IsZeroRegister) const {
+  int64_t xreg(unsigned code,
+               Reg31Mode r31mode = Reg31IsZeroRegister) const {
     return reg<int64_t>(code, r31mode);
   }
 
   // As above, with parameterized size and return type. The value is
   // either zero-extended or truncated to fit, as required.
   template<typename T>
-  inline T reg(unsigned size, unsigned code,
-               Reg31Mode r31mode = Reg31IsZeroRegister) const {
+  T reg(unsigned size, unsigned code,
+        Reg31Mode r31mode = Reg31IsZeroRegister) const {
     uint64_t raw;
     switch (size) {
       case kWRegSize: raw = reg<uint32_t>(code, r31mode); break;
@@ -325,15 +379,22 @@
   }
 
   // Use int64_t by default if T is not specified.
-  inline int64_t reg(unsigned size, unsigned code,
-                     Reg31Mode r31mode = Reg31IsZeroRegister) const {
+  int64_t reg(unsigned size, unsigned code,
+              Reg31Mode r31mode = Reg31IsZeroRegister) const {
     return reg<int64_t>(size, code, r31mode);
   }
 
-  // Basic accessor: Write the specified value.
+  enum RegLogMode {
+    LogRegWrites,
+    NoRegLog
+  };
+
+  // Write 'value' into an integer register. The value is zero-extended. This
+  // behaviour matches AArch64 register writes.
   template<typename T>
-  inline void set_reg(unsigned code, T value,
-                      Reg31Mode r31mode = Reg31IsZeroRegister) {
+  void set_reg(unsigned code, T value,
+               RegLogMode log_mode = LogRegWrites,
+               Reg31Mode r31mode = Reg31IsZeroRegister) {
     VIXL_STATIC_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
                        (sizeof(T) == kXRegSizeInBytes));
     VIXL_ASSERT(code < kNumberOfRegisters);
@@ -343,24 +404,29 @@
     }
 
     registers_[code].Set(value);
+
+    if (log_mode == LogRegWrites) LogRegister(code, r31mode);
   }
 
   // Common specialized accessors for the set_reg() template.
-  inline void set_wreg(unsigned code, int32_t value,
-                       Reg31Mode r31mode = Reg31IsZeroRegister) {
-    set_reg(code, value, r31mode);
+  void set_wreg(unsigned code, int32_t value,
+                RegLogMode log_mode = LogRegWrites,
+                Reg31Mode r31mode = Reg31IsZeroRegister) {
+    set_reg(code, value, log_mode, r31mode);
   }
 
-  inline void set_xreg(unsigned code, int64_t value,
-                       Reg31Mode r31mode = Reg31IsZeroRegister) {
-    set_reg(code, value, r31mode);
+  void set_xreg(unsigned code, int64_t value,
+               RegLogMode log_mode = LogRegWrites,
+                Reg31Mode r31mode = Reg31IsZeroRegister) {
+    set_reg(code, value, log_mode, r31mode);
   }
 
   // As above, with parameterized size and type. The value is either
   // zero-extended or truncated to fit, as required.
   template<typename T>
-  inline void set_reg(unsigned size, unsigned code, T value,
-                      Reg31Mode r31mode = Reg31IsZeroRegister) {
+  void set_reg(unsigned size, unsigned code, T value,
+               RegLogMode log_mode = LogRegWrites,
+               Reg31Mode r31mode = Reg31IsZeroRegister) {
     // Zero-extend the input.
     uint64_t raw = 0;
     VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw));
@@ -368,8 +434,8 @@
 
     // Write (and possibly truncate) the value.
     switch (size) {
-      case kWRegSize: set_reg<uint32_t>(code, raw, r31mode); break;
-      case kXRegSize: set_reg<uint64_t>(code, raw, r31mode); break;
+      case kWRegSize: set_reg<uint32_t>(code, raw, log_mode, r31mode); break;
+      case kXRegSize: set_reg<uint64_t>(code, raw, log_mode, r31mode); break;
       default:
         VIXL_UNREACHABLE();
         return;
@@ -380,13 +446,13 @@
 
   // Commonly-used special cases.
   template<typename T>
-  inline void set_lr(T value) {
+  void set_lr(T value) {
     set_reg(kLinkRegCode, value);
   }
 
   template<typename T>
-  inline void set_sp(T value) {
-    set_reg(31, value, Reg31IsStackPointer);
+  void set_sp(T value) {
+    set_reg(31, value, LogRegWrites, Reg31IsStackPointer);
   }
 
   // FP register accessors.
@@ -395,7 +461,7 @@
 
   // Basic accessor: Read the register as the specified type.
   template<typename T>
-  inline T fpreg(unsigned code) const {
+  T fpreg(unsigned code) const {
     VIXL_STATIC_ASSERT((sizeof(T) == kSRegSizeInBytes) ||
                        (sizeof(T) == kDRegSizeInBytes));
     VIXL_ASSERT(code < kNumberOfFPRegisters);
@@ -404,26 +470,26 @@
   }
 
   // Common specialized accessors for the fpreg() template.
-  inline float sreg(unsigned code) const {
+  float sreg(unsigned code) const {
     return fpreg<float>(code);
   }
 
-  inline uint32_t sreg_bits(unsigned code) const {
+  uint32_t sreg_bits(unsigned code) const {
     return fpreg<uint32_t>(code);
   }
 
-  inline double dreg(unsigned code) const {
+  double dreg(unsigned code) const {
     return fpreg<double>(code);
   }
 
-  inline uint64_t dreg_bits(unsigned code) const {
+  uint64_t dreg_bits(unsigned code) const {
     return fpreg<uint64_t>(code);
   }
 
   // As above, with parameterized size and return type. The value is
   // either zero-extended or truncated to fit, as required.
   template<typename T>
-  inline T fpreg(unsigned size, unsigned code) const {
+  T fpreg(unsigned size, unsigned code) const {
     uint64_t raw;
     switch (size) {
       case kSRegSize: raw = fpreg<uint32_t>(code); break;
@@ -443,34 +509,47 @@
 
   // Basic accessor: Write the specified value.
   template<typename T>
-  inline void set_fpreg(unsigned code, T value) {
+  void set_fpreg(unsigned code, T value,
+                RegLogMode log_mode = LogRegWrites) {
     VIXL_STATIC_ASSERT((sizeof(value) == kSRegSizeInBytes) ||
                        (sizeof(value) == kDRegSizeInBytes));
     VIXL_ASSERT(code < kNumberOfFPRegisters);
     fpregisters_[code].Set(value);
+
+    if (log_mode == LogRegWrites) {
+      if (sizeof(value) <= kSRegSizeInBytes) {
+        LogFPRegister(code, kPrintSRegValue);
+      } else {
+        LogFPRegister(code, kPrintDRegValue);
+      }
+    }
   }
 
   // Common specialized accessors for the set_fpreg() template.
-  inline void set_sreg(unsigned code, float value) {
-    set_fpreg(code, value);
+  void set_sreg(unsigned code, float value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_fpreg(code, value, log_mode);
   }
 
-  inline void set_sreg_bits(unsigned code, uint32_t value) {
-    set_fpreg(code, value);
+  void set_sreg_bits(unsigned code, uint32_t value,
+                     RegLogMode log_mode = LogRegWrites) {
+    set_fpreg(code, value, log_mode);
   }
 
-  inline void set_dreg(unsigned code, double value) {
-    set_fpreg(code, value);
+  void set_dreg(unsigned code, double value,
+                RegLogMode log_mode = LogRegWrites) {
+    set_fpreg(code, value, log_mode);
   }
 
-  inline void set_dreg_bits(unsigned code, uint64_t value) {
-    set_fpreg(code, value);
+  void set_dreg_bits(unsigned code, uint64_t value,
+                     RegLogMode log_mode = LogRegWrites) {
+    set_fpreg(code, value, log_mode);
   }
 
-  bool N() { return nzcv_.N() != 0; }
-  bool Z() { return nzcv_.Z() != 0; }
-  bool C() { return nzcv_.C() != 0; }
-  bool V() { return nzcv_.V() != 0; }
+  bool N() const { return nzcv_.N() != 0; }
+  bool Z() const { return nzcv_.Z() != 0; }
+  bool C() const { return nzcv_.C() != 0; }
+  bool V() const { return nzcv_.V() != 0; }
   SimSystemRegister& nzcv() { return nzcv_; }
 
   // TODO(jbramley): Find a way to make the fpcr_ members return the proper
@@ -479,11 +558,73 @@
   bool DN() { return fpcr_.DN() != 0; }
   SimSystemRegister& fpcr() { return fpcr_; }
 
-  // Debug helpers
-  void PrintSystemRegisters(bool print_all = false);
-  void PrintRegisters(bool print_all_regs = false);
-  void PrintFPRegisters(bool print_all_regs = false);
-  void PrintProcessorState();
+  // Print all registers of the specified types.
+  void PrintRegisters();
+  void PrintFPRegisters();
+  void PrintSystemRegisters();
+
+  // Like Print* (above), but respect trace_parameters().
+  void LogSystemRegisters() {
+    if (trace_parameters() & LOG_SYS_REGS) PrintSystemRegisters();
+  }
+  void LogRegisters() {
+    if (trace_parameters() & LOG_REGS) PrintRegisters();
+  }
+  void LogFPRegisters() {
+    if (trace_parameters() & LOG_FP_REGS) PrintFPRegisters();
+  }
+
+  // Specify relevant register sizes, for PrintFPRegister.
+  //
+  // These values are bit masks; they can be combined in case multiple views of
+  // a machine register are interesting.
+  enum PrintFPRegisterSizes {
+    kPrintDRegValue = 1 << kDRegSizeInBytes,
+    kPrintSRegValue = 1 << kSRegSizeInBytes,
+    kPrintAllFPRegValues = kPrintDRegValue | kPrintSRegValue
+  };
+
+  // Print individual register values (after update).
+  void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);
+  void PrintFPRegister(unsigned code,
+                       PrintFPRegisterSizes sizes = kPrintAllFPRegValues);
+  void PrintSystemRegister(SystemRegister id);
+
+  // Like Print* (above), but respect trace_parameters().
+  void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {
+    if (trace_parameters() & LOG_REGS) PrintRegister(code, r31mode);
+  }
+  void LogFPRegister(unsigned code,
+                     PrintFPRegisterSizes sizes = kPrintAllFPRegValues) {
+    if (trace_parameters() & LOG_FP_REGS) PrintFPRegister(code, sizes);
+  }
+  void LogSystemRegister(SystemRegister id) {
+    if (trace_parameters() & LOG_SYS_REGS) PrintSystemRegister(id);
+  }
+
+  // Print memory accesses.
+  void PrintRead(uintptr_t address, size_t size, unsigned reg_code);
+  void PrintReadFP(uintptr_t address, size_t size, unsigned reg_code);
+  void PrintWrite(uintptr_t address, size_t size, unsigned reg_code);
+  void PrintWriteFP(uintptr_t address, size_t size, unsigned reg_code);
+
+  // Like Print* (above), but respect trace_parameters().
+  void LogRead(uintptr_t address, size_t size, unsigned reg_code) {
+    if (trace_parameters() & LOG_REGS) PrintRead(address, size, reg_code);
+  }
+  void LogReadFP(uintptr_t address, size_t size, unsigned reg_code) {
+    if (trace_parameters() & LOG_FP_REGS) PrintReadFP(address, size, reg_code);
+  }
+  void LogWrite(uintptr_t address, size_t size, unsigned reg_code) {
+    if (trace_parameters() & LOG_WRITE) PrintWrite(address, size, reg_code);
+  }
+  void LogWriteFP(uintptr_t address, size_t size, unsigned reg_code) {
+    if (trace_parameters() & LOG_WRITE) PrintWriteFP(address, size, reg_code);
+  }
+
+  void DoUnreachable(const Instruction* instr);
+  void DoTrace(const Instruction* instr);
+  void DoLog(const Instruction* instr);
 
   static const char* WRegNameForCode(unsigned code,
                                      Reg31Mode mode = Reg31IsZeroRegister);
@@ -493,38 +634,21 @@
   static const char* DRegNameForCode(unsigned code);
   static const char* VRegNameForCode(unsigned code);
 
-  inline bool coloured_trace() { return coloured_trace_; }
+  bool coloured_trace() const { return coloured_trace_; }
   void set_coloured_trace(bool value);
 
-  inline bool disasm_trace() { return disasm_trace_; }
-  inline void set_disasm_trace(bool value) {
-    if (value != disasm_trace_) {
-      if (value) {
-        decoder_->InsertVisitorBefore(print_disasm_, this);
-      } else {
-        decoder_->RemoveVisitor(print_disasm_);
-      }
-      disasm_trace_ = value;
-    }
-  }
-  inline void set_instruction_stats(bool value) {
-    if (value != instruction_stats_) {
-      if (value) {
-        decoder_->AppendVisitor(instrumentation_);
-      } else {
-        decoder_->RemoveVisitor(instrumentation_);
-      }
-      instruction_stats_ = value;
-    }
-  }
+  int trace_parameters() const { return trace_parameters_; }
+  void set_trace_parameters(int parameters);
+
+  void set_instruction_stats(bool value);
 
   // Clear the simulated local monitor to force the next store-exclusive
   // instruction to fail.
-  inline void ClearLocalMonitor() {
+  void ClearLocalMonitor() {
     local_monitor_.Clear();
   }
 
-  inline void SilenceExclusiveAccessWarning() {
+  void SilenceExclusiveAccessWarning() {
     print_exclusive_access_warning_ = false;
   }
 
@@ -536,10 +660,7 @@
   const char* clr_reg_value;
   const char* clr_fpreg_name;
   const char* clr_fpreg_value;
-  const char* clr_memory_value;
   const char* clr_memory_address;
-  const char* clr_debug_number;
-  const char* clr_debug_message;
   const char* clr_warning;
   const char* clr_warning_message;
   const char* clr_printf;
@@ -604,14 +725,18 @@
                        int64_t offset,
                        AddrMode addrmode);
   void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
-  uint8_t* AddressModeHelper(unsigned addr_reg,
-                             int64_t offset,
-                             AddrMode addrmode);
+  uintptr_t AddressModeHelper(unsigned addr_reg,
+                              int64_t offset,
+                              AddrMode addrmode);
+
+  uint64_t AddressUntag(uint64_t address) {
+    return address & ~kAddressTagMask;
+  }
 
   template <typename T>
-  T AddressUntag(T address) {
-    uint64_t bits = reinterpret_cast<uint64_t>(address);
-    return reinterpret_cast<T>(bits & ~kAddressTagMask);
+  T* AddressUntag(T* address) {
+    uintptr_t address_raw = reinterpret_cast<uintptr_t>(address);
+    return reinterpret_cast<T*>(AddressUntag(address_raw));
   }
 
   template <typename T, typename A>
@@ -645,8 +770,13 @@
                       Extend extend_type,
                       unsigned left_shift = 0);
 
-  uint64_t ReverseBits(uint64_t value, unsigned num_bits);
+  enum ReverseByteMode {
+    Reverse16 = 0,
+    Reverse32 = 1,
+    Reverse64 = 2
+  };
   uint64_t ReverseBytes(uint64_t value, ReverseByteMode mode);
+  uint64_t ReverseBits(uint64_t value, unsigned num_bits);
 
   template <typename T>
   T FPDefaultNaN() const;
@@ -755,11 +885,11 @@
     // is irrelevant, and is not checked here.
   }
 
-  static inline int CalcNFlag(uint64_t result, unsigned reg_size) {
+  static int CalcNFlag(uint64_t result, unsigned reg_size) {
     return (result >> (reg_size - 1)) & 1;
   }
 
-  static inline int CalcZFlag(uint64_t result) {
+  static int CalcZFlag(uint64_t result) {
     return result == 0;
   }
 
@@ -789,8 +919,8 @@
  private:
   bool coloured_trace_;
 
-  // Indicates whether the disassembly trace is active.
-  bool disasm_trace_;
+  // A set of TraceParameters flags.
+  int trace_parameters_;
 
   // Indicates whether the instruction instrumentation is active.
   bool instruction_stats_;
diff --git a/src/code-buffer.cc b/src/code-buffer.cc
index 70db860..bc86e75 100644
--- a/src/code-buffer.cc
+++ b/src/code-buffer.cc
@@ -84,7 +84,7 @@
 
 
 void CodeBuffer::Reset() {
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   if (managed_) {
     // TODO(all): Consider allowing for custom default values, e.g. HLT.
     memset(buffer_, 0, capacity_);
diff --git a/src/globals.h b/src/globals.h
index e28dc66..0c24931 100644
--- a/src/globals.h
+++ b/src/globals.h
@@ -58,7 +58,7 @@
 const int MBytes = 1024 * KBytes;
 
 #define VIXL_ABORT() printf("in %s, line %i", __FILE__, __LINE__); abort()
-#ifdef DEBUG
+#ifdef VIXL_DEBUG
   #define VIXL_ASSERT(condition) assert(condition)
   #define VIXL_CHECK(condition) VIXL_ASSERT(condition)
   #define VIXL_UNIMPLEMENTED() printf("UNIMPLEMENTED\t"); VIXL_ABORT()
diff --git a/src/utils.cc b/src/utils.cc
index 21965d7..80b132a 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -135,4 +135,17 @@
   return (value != 0) && ((value & (value - 1)) == 0);
 }
 
+
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
+  VIXL_ASSERT((reg_size % 8) == 0);
+  int count = 0;
+  for (unsigned i = 0; i < (reg_size / 16); i++) {
+    if ((imm & 0xffff) == 0) {
+      count++;
+    }
+    imm >>= 16;
+  }
+  return count;
+}
+
 }  // namespace vixl
diff --git a/src/utils.h b/src/utils.h
index 1540c30..b440626 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -166,6 +166,8 @@
 uint64_t LowestSetBit(uint64_t value);
 bool IsPowerOf2(int64_t value);
 
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size);
+
 // Pointer alignment
 // TODO: rename/refactor to make it specific to instructions.
 template<typename T>
@@ -174,14 +176,14 @@
   return ((intptr_t)(pointer) & 3) == 0;
 }
 
-// Increment a pointer until it has the specified alignment.
+// Increment a pointer (up to 64 bits) until it has the specified alignment.
 template<class T>
 T AlignUp(T pointer, size_t alignment) {
   // Use C-style casts to get static_cast behaviour for integral types (T), and
   // reinterpret_cast behaviour for other types.
 
-  uintptr_t pointer_raw = (uintptr_t)pointer;
-  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(pointer_raw));
+  uint64_t pointer_raw = (uint64_t)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
 
   size_t align_step = (alignment - pointer_raw) % alignment;
   VIXL_ASSERT((pointer_raw + align_step) % alignment == 0);
@@ -189,14 +191,14 @@
   return (T)(pointer_raw + align_step);
 }
 
-// Decrement a pointer until it has the specified alignment.
+// Decrement a pointer (up to 64 bits) until it has the specified alignment.
 template<class T>
 T AlignDown(T pointer, size_t alignment) {
   // Use C-style casts to get static_cast behaviour for integral types (T), and
   // reinterpret_cast behaviour for other types.
 
-  uintptr_t pointer_raw = (uintptr_t)pointer;
-  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(pointer_raw));
+  uint64_t pointer_raw = (uint64_t)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
 
   size_t align_step = pointer_raw % alignment;
   VIXL_ASSERT((pointer_raw - align_step) % alignment == 0);
diff --git a/test/cctest.cc b/test/cctest.cc
deleted file mode 100644
index 1bb9507..0000000
--- a/test/cctest.cc
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright 2013, ARM Limited
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   * Redistributions of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//   * Neither the name of ARM Limited nor the names of its contributors may be
-//     used to endorse or promote products derived from this software without
-//     specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "cctest.h"
-
-// Initialize the list as empty.
-vixl::Cctest* vixl::Cctest::first_ = NULL;
-vixl::Cctest* vixl::Cctest::last_ = NULL;
-
-// No debugger to start with.
-bool vixl::Cctest::debug_ = false;
-
-// No tracing to start with.
-bool vixl::Cctest::trace_sim_ = false;
-bool vixl::Cctest::trace_reg_ = false;
-
-// No colour highlight by default.
-bool vixl::Cctest::coloured_trace_ = false;
-
-// No instruction statistics by default.
-bool vixl::Cctest::instruction_stats_ = false;
-
-// Don't generate simulator test traces by default.
-bool vixl::Cctest::sim_test_trace_ = false;
-
-// Instantiate a Cctest and append it to the linked list.
-vixl::Cctest::Cctest(const char* name, CctestFunction* callback)
-  : name_(name), callback_(callback), next_(NULL) {
-  // Append this cctest to the linked list.
-  if (first_ == NULL) {
-    VIXL_ASSERT(last_ == NULL);
-    first_ = this;
-  } else {
-    last_->next_ = this;
-  }
-  last_ = this;
-}
-
-
-// Look for 'search' in the arguments.
-bool IsInArgs(const char* search, int argc, char* argv[]) {
-  for (int i = 1; i < argc; i++) {
-    if (strcmp(search, argv[i]) == 0) {
-      return true;
-    }
-  }
-  return false;
-}
-
-
-// Special keywords used as arguments must be registered here.
-bool IsSpecialArgument(const char* arg) {
-  return (strcmp(arg, "--help") == 0) ||
-         (strcmp(arg, "--list") == 0) ||
-         (strcmp(arg, "--run_all") == 0) ||
-         (strcmp(arg, "--debugger") == 0) ||
-         (strcmp(arg, "--trace_sim") == 0) ||
-         (strcmp(arg, "--trace_reg") == 0) ||
-         (strcmp(arg, "--coloured_trace") == 0) ||
-         (strcmp(arg, "--instruction_stats") == 0) ||
-         (strcmp(arg, "--sim_test_trace") == 0);
-}
-
-
-void PrintHelpMessage() {
-  printf("Usage:  ./cctest [options] [test names]\n"
-         "Run all tests specified on the command line.\n"
-         "--help              print this help message.\n"
-         "--list              list all available tests.\n"
-         "--run_all           run all available tests.\n"
-         "--debugger          run in the debugger.\n"
-         "--trace_sim         generate a trace of simulated instructions.\n"
-         "--trace_reg         generate a trace of simulated registers. "
-                             "Implies --debugger.\n"
-         "--coloured_trace    generate coloured trace.\n"
-         "--instruction_stats log instruction statistics to vixl_stats.csv.\n"
-         "--sim_test_trace    Print result traces for SIM_* tests.\n");
-}
-
-int main(int argc, char* argv[]) {
-  // Parse the arguments. Option flags must appear first, followed by an
-  // optional list of tests to run.
-
-  if (IsInArgs("--coloured_trace", argc, argv)) {
-    vixl::Cctest::set_coloured_trace(true);
-  }
-
-  if (IsInArgs("--debugger", argc, argv)) {
-    vixl::Cctest::set_debug(true);
-  }
-
-  if (IsInArgs("--trace_reg", argc, argv)) {
-    vixl::Cctest::set_trace_reg(true);
-  }
-
-  if (IsInArgs("--trace_sim", argc, argv)) {
-    vixl::Cctest::set_trace_sim(true);
-  }
-
-  if (IsInArgs("--instruction_stats", argc, argv)) {
-    vixl::Cctest::set_instruction_stats(true);
-  }
-
-  if (IsInArgs("--sim_test_trace", argc, argv)) {
-    vixl::Cctest::set_sim_test_trace(true);
-  }
-
-  if (IsInArgs("--help", argc, argv)) {
-    PrintHelpMessage();
-
-  } else if (IsInArgs("--list", argc, argv)) {
-    // List all registered cctests.
-    for (vixl::Cctest* c = vixl::Cctest::first(); c != NULL; c = c->next()) {
-      printf("%s\n", c->name());
-    }
-
-  } else if (IsInArgs("--run_all", argc, argv)) {
-    // Run all registered cctests.
-    for (vixl::Cctest* c = vixl::Cctest::first(); c != NULL; c = c->next()) {
-      printf("Running %s\n", c->name());
-      c->callback()();
-    }
-
-  } else {
-    if (argc <= 1)
-      PrintHelpMessage();
-    // Other arguments must be tests to run.
-    int i = 1;
-    for (i = 1; i < argc; i++) {
-      if (!IsSpecialArgument(argv[i])) {
-        vixl::Cctest* c;
-        for (c = vixl::Cctest::first(); c != NULL; c = c->next()) {
-          if (strcmp(c->name(), argv[i]) == 0) {
-            c->callback()();
-            break;
-          }
-        }
-        // Fail if we have not found a matching test to run.
-        if (c == NULL) {
-          printf("Test '%s' does not exist. Aborting...\n", argv[i]);
-          abort();
-        }
-      }
-    }
-  }
-
-  return EXIT_SUCCESS;
-}
-
diff --git a/test/examples/test-examples.cc b/test/examples/test-examples.cc
index a4a1feb..099135f 100644
--- a/test/examples/test-examples.cc
+++ b/test/examples/test-examples.cc
@@ -32,7 +32,7 @@
 #include "custom-disassembler.h"
 #include "../test-utils-a64.h"
 
-#include "../cctest.h"
+#include "../test-runner.h"
 
 #define TEST(name) TEST_(EXAMPLE_##name)
 
@@ -149,14 +149,14 @@
   MacroAssembler masm(BUF_SIZE);                            \
   Decoder decoder;                                          \
   Debugger simulator(&decoder);                             \
-  simulator.set_coloured_trace(Cctest::coloured_trace());   \
+  simulator.set_coloured_trace(Test::coloured_trace());   \
   PrintDisassembler* pdis = NULL;                           \
   Instrument* inst = NULL;                                  \
-  if (Cctest::trace_sim()) {                                \
+  if (Test::trace_sim()) {                                \
     pdis = new PrintDisassembler(stdout);                   \
     decoder.PrependVisitor(pdis);                           \
   }                                                         \
-  if (Cctest::instruction_stats()) {                        \
+  if (Test::instruction_stats()) {                        \
     inst = new Instrument("vixl_stats.csv", 10);            \
     inst->Enable();                                         \
     decoder.AppendVisitor(inst);                            \
diff --git a/test/test-assembler-a64.cc b/test/test-assembler-a64.cc
index e2b02b8..156e1cb 100644
--- a/test/test-assembler-a64.cc
+++ b/test/test-assembler-a64.cc
@@ -30,7 +30,7 @@
 #include <math.h>
 #include <float.h>
 
-#include "cctest.h"
+#include "test-runner.h"
 #include "test-utils-a64.h"
 #include "a64/macro-assembler-a64.h"
 #include "a64/simulator-a64.h"
@@ -108,15 +108,10 @@
 
 #define SETUP_COMMON()                                                         \
   Decoder decoder;                                                             \
-  Simulator* simulator = NULL;                                                 \
-  if (Cctest::run_debugger()) {                                                \
-    simulator = new Debugger(&decoder);                                        \
-  } else {                                                                     \
-    simulator = new Simulator(&decoder);                                       \
-    simulator->set_disasm_trace(Cctest::trace_sim());                          \
-  }                                                                            \
-  simulator->set_coloured_trace(Cctest::coloured_trace());                     \
-  simulator->set_instruction_stats(Cctest::instruction_stats());               \
+  Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder)         \
+                                              : new Simulator(&decoder);       \
+  simulator->set_coloured_trace(Test::coloured_trace());                       \
+  simulator->set_instruction_stats(Test::instruction_stats());                 \
   RegisterDump core
 
 // This is a convenience macro to avoid creating a scope for every assembler
@@ -128,25 +123,24 @@
   masm.Reset();                                                                \
   simulator->ResetState();                                                     \
   __ PushCalleeSavedRegisters();                                               \
-  if (Cctest::run_debugger()) {                                                \
-    if (Cctest::trace_reg()) {                                                 \
-      __ Trace(LOG_STATE, TRACE_ENABLE);                                       \
-    }                                                                          \
-    if (Cctest::trace_sim()) {                                                 \
-      __ Trace(LOG_DISASM, TRACE_ENABLE);                                      \
-    }                                                                          \
+  if (Test::trace_reg()) {                                                     \
+    __ Trace(LOG_STATE, TRACE_ENABLE);                                         \
   }                                                                            \
-  if (Cctest::instruction_stats()) {                                           \
+  if (Test::trace_write()) {                                                   \
+    __ Trace(LOG_WRITE, TRACE_ENABLE);                                         \
+  }                                                                            \
+  if (Test::trace_sim()) {                                                     \
+    __ Trace(LOG_DISASM, TRACE_ENABLE);                                        \
+  }                                                                            \
+  if (Test::instruction_stats()) {                                             \
     __ EnableInstrumentation();                                                \
   }
 
 #define END()                                                                  \
-  if (Cctest::instruction_stats()) {                                           \
+  if (Test::instruction_stats()) {                                             \
     __ DisableInstrumentation();                                               \
   }                                                                            \
-  if (Cctest::run_debugger()) {                                                \
-    __ Trace(LOG_ALL, TRACE_DISABLE);                                          \
-  }                                                                            \
+  __ Trace(LOG_ALL, TRACE_DISABLE);                                            \
   core.Dump(&masm);                                                            \
   __ PopCalleeSavedRegisters();                                                \
   __ Ret();                                                                    \
@@ -3193,8 +3187,8 @@
 
   // Emit more code than the maximum literal load range to ensure the pool
   // should be emitted.
-  const ptrdiff_t offset = masm.CursorOffset();
-  while ((masm.CursorOffset() - offset) < (2 * kMaxLoadLiteralRange)) {
+  const ptrdiff_t end = masm.CursorOffset() + 2 * kMaxLoadLiteralRange;
+  while (masm.CursorOffset() < end) {
     __ Nop();
   }
 
@@ -3330,34 +3324,54 @@
 
 
 TEST(ldr_literal_custom) {
-  // The macro assembler always emit pools after the instruction using them,
-  // this test emit a pool then use it.
   SETUP();
   ALLOW_ASM();
 
-  Label end_of_pool;
-  Literal<uint64_t> literal_x(0x1234567890abcdef);
-  Literal<uint32_t> literal_w(0xfedcba09);
-  Literal<uint32_t> literal_sx(0x80000000);
-  Literal<double> literal_d(1.234);
-  Literal<float> literal_s(2.5);
+  Label end_of_pool_before;
+  Label end_of_pool_after;
+  Literal<uint64_t> before_x(0x1234567890abcdef);
+  Literal<uint32_t> before_w(0xfedcba09);
+  Literal<uint32_t> before_sx(0x80000000);
+  Literal<double> before_d(1.234);
+  Literal<float> before_s(2.5);
+  Literal<uint64_t> after_x(0x1234567890abcdef);
+  Literal<uint32_t> after_w(0xfedcba09);
+  Literal<uint32_t> after_sx(0x80000000);
+  Literal<double> after_d(1.234);
+  Literal<float> after_s(2.5);
 
   START();
-  // "Manually generate a pool.
-  __ B(&end_of_pool);
-  __ place(&literal_x);
-  __ place(&literal_w);
-  __ place(&literal_sx);
-  __ place(&literal_d);
-  __ place(&literal_s);
-  __ Bind(&end_of_pool);
 
-  // now load the entries.
-  __ ldr(x2, &literal_x);
-  __ ldr(w3, &literal_w);
-  __ ldrsw(x5, &literal_sx);
-  __ ldr(d13, &literal_d);
-  __ ldr(s25, &literal_s);
+  // Manually generate a pool.
+  __ B(&end_of_pool_before);
+  __ place(&before_x);
+  __ place(&before_w);
+  __ place(&before_sx);
+  __ place(&before_d);
+  __ place(&before_s);
+  __ Bind(&end_of_pool_before);
+
+  __ ldr(x2, &before_x);
+  __ ldr(w3, &before_w);
+  __ ldrsw(x5, &before_sx);
+  __ ldr(d13, &before_d);
+  __ ldr(s25, &before_s);
+
+  __ ldr(x6, &after_x);
+  __ ldr(w7, &after_w);
+  __ ldrsw(x8, &after_sx);
+  __ ldr(d14, &after_d);
+  __ ldr(s26, &after_s);
+
+  // Manually generate a pool.
+  __ B(&end_of_pool_after);
+  __ place(&after_x);
+  __ place(&after_w);
+  __ place(&after_sx);
+  __ place(&after_d);
+  __ place(&after_s);
+  __ Bind(&end_of_pool_after);
+
   END();
 
   RUN();
@@ -3368,6 +3382,328 @@
   ASSERT_EQUAL_FP64(1.234, d13);
   ASSERT_EQUAL_FP32(2.5, s25);
 
+  ASSERT_EQUAL_64(0x1234567890abcdef, x6);
+  ASSERT_EQUAL_64(0xfedcba09, x7);
+  ASSERT_EQUAL_64(0xffffffff80000000, x8);
+  ASSERT_EQUAL_FP64(1.234, d14);
+  ASSERT_EQUAL_FP32(2.5, s26);
+
+  TEARDOWN();
+}
+
+
+TEST(ldr_literal_custom_shared) {
+  SETUP();
+  ALLOW_ASM();
+
+  Label end_of_pool_before;
+  Label end_of_pool_after;
+  Literal<uint64_t> before_x(0x1234567890abcdef);
+  Literal<uint32_t> before_w(0xfedcba09);
+  Literal<double> before_d(1.234);
+  Literal<float> before_s(2.5);
+  Literal<uint64_t> after_x(0x1234567890abcdef);
+  Literal<uint32_t> after_w(0xfedcba09);
+  Literal<double> after_d(1.234);
+  Literal<float> after_s(2.5);
+
+  START();
+
+  // Manually generate a pool.
+  __ B(&end_of_pool_before);
+  __ place(&before_x);
+  __ place(&before_w);
+  __ place(&before_d);
+  __ place(&before_s);
+  __ Bind(&end_of_pool_before);
+
+  // Load the entries several times to test that literals can be shared.
+  for (int i = 0; i < 50; i++) {
+    __ ldr(x2, &before_x);
+    __ ldr(w3, &before_w);
+    __ ldrsw(x5, &before_w);    // Re-use before_w.
+    __ ldr(d13, &before_d);
+    __ ldr(s25, &before_s);
+
+    __ ldr(x6, &after_x);
+    __ ldr(w7, &after_w);
+    __ ldrsw(x8, &after_w);     // Re-use after_w.
+    __ ldr(d14, &after_d);
+    __ ldr(s26, &after_s);
+  }
+
+  // Manually generate a pool.
+  __ B(&end_of_pool_after);
+  __ place(&after_x);
+  __ place(&after_w);
+  __ place(&after_d);
+  __ place(&after_s);
+  __ Bind(&end_of_pool_after);
+
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_64(0x1234567890abcdef, x2);
+  ASSERT_EQUAL_64(0xfedcba09, x3);
+  ASSERT_EQUAL_64(0xfffffffffedcba09, x5);
+  ASSERT_EQUAL_FP64(1.234, d13);
+  ASSERT_EQUAL_FP32(2.5, s25);
+
+  ASSERT_EQUAL_64(0x1234567890abcdef, x6);
+  ASSERT_EQUAL_64(0xfedcba09, x7);
+  ASSERT_EQUAL_64(0xfffffffffedcba09, x8);
+  ASSERT_EQUAL_FP64(1.234, d14);
+  ASSERT_EQUAL_FP32(2.5, s26);
+
+  TEARDOWN();
+}
+
+
+TEST(prfm_offset) {
+  SETUP();
+
+  START();
+  // The address used in prfm doesn't have to be valid.
+  __ Mov(x0, 0x0123456789abcdef);
+
+  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
+    // Unallocated prefetch operations are ignored, so test all of them.
+    PrefetchOperation op = static_cast<PrefetchOperation>(i);
+
+    __ Prfm(op, MemOperand(x0));
+    __ Prfm(op, MemOperand(x0, 8));
+    __ Prfm(op, MemOperand(x0, 32760));
+    __ Prfm(op, MemOperand(x0, 32768));
+
+    __ Prfm(op, MemOperand(x0, 1));
+    __ Prfm(op, MemOperand(x0, 9));
+    __ Prfm(op, MemOperand(x0, 255));
+    __ Prfm(op, MemOperand(x0, 257));
+    __ Prfm(op, MemOperand(x0, -1));
+    __ Prfm(op, MemOperand(x0, -9));
+    __ Prfm(op, MemOperand(x0, -255));
+    __ Prfm(op, MemOperand(x0, -257));
+
+    __ Prfm(op, MemOperand(x0, 0xfedcba9876543210));
+  }
+
+  END();
+  RUN();
+  TEARDOWN();
+}
+
+
+TEST(prfm_regoffset) {
+  SETUP();
+
+  START();
+  // The address used in prfm doesn't have to be valid.
+  __ Mov(x0, 0x0123456789abcdef);
+
+  CPURegList inputs(CPURegister::kRegister, kXRegSize, 10, 18);
+  __ Mov(x10, 0);
+  __ Mov(x11, 1);
+  __ Mov(x12, 8);
+  __ Mov(x13, 255);
+  __ Mov(x14, -0);
+  __ Mov(x15, -1);
+  __ Mov(x16, -8);
+  __ Mov(x17, -255);
+  __ Mov(x18, 0xfedcba9876543210);
+
+  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
+    // Unallocated prefetch operations are ignored, so test all of them.
+    PrefetchOperation op = static_cast<PrefetchOperation>(i);
+
+    CPURegList loop = inputs;
+    while (!loop.IsEmpty()) {
+      Register input(loop.PopLowestIndex());
+      __ Prfm(op, MemOperand(x0, input));
+      __ Prfm(op, MemOperand(x0, input, UXTW));
+      __ Prfm(op, MemOperand(x0, input, UXTW, 3));
+      __ Prfm(op, MemOperand(x0, input, LSL));
+      __ Prfm(op, MemOperand(x0, input, LSL, 3));
+      __ Prfm(op, MemOperand(x0, input, SXTW));
+      __ Prfm(op, MemOperand(x0, input, SXTW, 3));
+      __ Prfm(op, MemOperand(x0, input, SXTX));
+      __ Prfm(op, MemOperand(x0, input, SXTX, 3));
+    }
+  }
+
+  END();
+  RUN();
+  TEARDOWN();
+}
+
+
+TEST(prfm_literal_imm19) {
+  SETUP();
+  ALLOW_ASM();
+  START();
+
+  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
+    // Unallocated prefetch operations are ignored, so test all of them.
+    PrefetchOperation op = static_cast<PrefetchOperation>(i);
+
+    // The address used in prfm doesn't have to be valid.
+    __ prfm(op, 0);
+    __ prfm(op, 1);
+    __ prfm(op, -1);
+    __ prfm(op, 1000);
+    __ prfm(op, -1000);
+    __ prfm(op, 0x3ffff);
+    __ prfm(op, -0x40000);
+  }
+
+  END();
+  RUN();
+  TEARDOWN();
+}
+
+
+TEST(prfm_literal) {
+  SETUP();
+  ALLOW_ASM();
+
+  Label end_of_pool_before;
+  Label end_of_pool_after;
+  Literal<uint64_t> before(0);
+  Literal<uint64_t> after(0);
+
+  START();
+
+  // Manually generate a pool.
+  __ B(&end_of_pool_before);
+  __ place(&before);
+  __ Bind(&end_of_pool_before);
+
+  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
+    // Unallocated prefetch operations are ignored, so test all of them.
+    PrefetchOperation op = static_cast<PrefetchOperation>(i);
+
+    CodeBufferCheckScope guard(&masm, 2 * kInstructionSize);
+    __ prfm(op, &before);
+    __ prfm(op, &after);
+  }
+
+  // Manually generate a pool.
+  __ B(&end_of_pool_after);
+  __ place(&after);
+  __ Bind(&end_of_pool_after);
+
+  END();
+  RUN();
+  TEARDOWN();
+}
+
+
+TEST(prfm_wide) {
+  SETUP();
+
+  START();
+  // The address used in prfm doesn't have to be valid.
+  __ Mov(x0, 0x0123456789abcdef);
+
+  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
+    // Unallocated prefetch operations are ignored, so test all of them.
+    PrefetchOperation op = static_cast<PrefetchOperation>(i);
+
+    __ Prfm(op, MemOperand(x0, 0x40000));
+    __ Prfm(op, MemOperand(x0, -0x40001));
+    __ Prfm(op, MemOperand(x0, UINT64_C(0x5555555555555555)));
+    __ Prfm(op, MemOperand(x0, UINT64_C(0xfedcba9876543210)));
+  }
+
+  END();
+  RUN();
+  TEARDOWN();
+}
+
+
+TEST(load_prfm_literal) {
+  // Test literals shared between both prfm and ldr.
+  SETUP();
+  ALLOW_ASM();
+
+  Label end_of_pool_before;
+  Label end_of_pool_after;
+  Literal<uint64_t> before_x(0x1234567890abcdef);
+  Literal<uint32_t> before_w(0xfedcba09);
+  Literal<uint32_t> before_sx(0x80000000);
+  Literal<double> before_d(1.234);
+  Literal<float> before_s(2.5);
+  Literal<uint64_t> after_x(0x1234567890abcdef);
+  Literal<uint32_t> after_w(0xfedcba09);
+  Literal<uint32_t> after_sx(0x80000000);
+  Literal<double> after_d(1.234);
+  Literal<float> after_s(2.5);
+
+  START();
+
+  // Manually generate a pool.
+  __ B(&end_of_pool_before);
+  __ place(&before_x);
+  __ place(&before_w);
+  __ place(&before_sx);
+  __ place(&before_d);
+  __ place(&before_s);
+  __ Bind(&end_of_pool_before);
+
+  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
+    // Unallocated prefetch operations are ignored, so test all of them.
+    PrefetchOperation op = static_cast<PrefetchOperation>(i);
+
+    __ prfm(op, &before_x);
+    __ prfm(op, &before_w);
+    __ prfm(op, &before_sx);
+    __ prfm(op, &before_d);
+    __ prfm(op, &before_s);
+
+    __ prfm(op, &after_x);
+    __ prfm(op, &after_w);
+    __ prfm(op, &after_sx);
+    __ prfm(op, &after_d);
+    __ prfm(op, &after_s);
+  }
+
+  __ ldr(x2, &before_x);
+  __ ldr(w3, &before_w);
+  __ ldrsw(x5, &before_sx);
+  __ ldr(d13, &before_d);
+  __ ldr(s25, &before_s);
+
+  __ ldr(x6, &after_x);
+  __ ldr(w7, &after_w);
+  __ ldrsw(x8, &after_sx);
+  __ ldr(d14, &after_d);
+  __ ldr(s26, &after_s);
+
+  // Manually generate a pool.
+  __ B(&end_of_pool_after);
+  __ place(&after_x);
+  __ place(&after_w);
+  __ place(&after_sx);
+  __ place(&after_d);
+  __ place(&after_s);
+  __ Bind(&end_of_pool_after);
+
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_64(0x1234567890abcdef, x2);
+  ASSERT_EQUAL_64(0xfedcba09, x3);
+  ASSERT_EQUAL_64(0xffffffff80000000, x5);
+  ASSERT_EQUAL_FP64(1.234, d13);
+  ASSERT_EQUAL_FP32(2.5, s25);
+
+  ASSERT_EQUAL_64(0x1234567890abcdef, x6);
+  ASSERT_EQUAL_64(0xfedcba09, x7);
+  ASSERT_EQUAL_64(0xffffffff80000000, x8);
+  ASSERT_EQUAL_FP64(1.234, d14);
+  ASSERT_EQUAL_FP32(2.5, s26);
+
   TEARDOWN();
 }
 
@@ -6217,6 +6553,96 @@
 }
 
 
+TEST(frinti) {
+  // VIXL only supports the round-to-nearest FPCR mode, so this test has the
+  // same results as frintn.
+  SETUP();
+
+  START();
+  __ Fmov(s16, 1.0);
+  __ Fmov(s17, 1.1);
+  __ Fmov(s18, 1.5);
+  __ Fmov(s19, 1.9);
+  __ Fmov(s20, 2.5);
+  __ Fmov(s21, -1.5);
+  __ Fmov(s22, -2.5);
+  __ Fmov(s23, kFP32PositiveInfinity);
+  __ Fmov(s24, kFP32NegativeInfinity);
+  __ Fmov(s25, 0.0);
+  __ Fmov(s26, -0.0);
+  __ Fmov(s27, -0.2);
+
+  __ Frinti(s0, s16);
+  __ Frinti(s1, s17);
+  __ Frinti(s2, s18);
+  __ Frinti(s3, s19);
+  __ Frinti(s4, s20);
+  __ Frinti(s5, s21);
+  __ Frinti(s6, s22);
+  __ Frinti(s7, s23);
+  __ Frinti(s8, s24);
+  __ Frinti(s9, s25);
+  __ Frinti(s10, s26);
+  __ Frinti(s11, s27);
+
+  __ Fmov(d16, 1.0);
+  __ Fmov(d17, 1.1);
+  __ Fmov(d18, 1.5);
+  __ Fmov(d19, 1.9);
+  __ Fmov(d20, 2.5);
+  __ Fmov(d21, -1.5);
+  __ Fmov(d22, -2.5);
+  __ Fmov(d23, kFP32PositiveInfinity);
+  __ Fmov(d24, kFP32NegativeInfinity);
+  __ Fmov(d25, 0.0);
+  __ Fmov(d26, -0.0);
+  __ Fmov(d27, -0.2);
+
+  __ Frinti(d12, d16);
+  __ Frinti(d13, d17);
+  __ Frinti(d14, d18);
+  __ Frinti(d15, d19);
+  __ Frinti(d16, d20);
+  __ Frinti(d17, d21);
+  __ Frinti(d18, d22);
+  __ Frinti(d19, d23);
+  __ Frinti(d20, d24);
+  __ Frinti(d21, d25);
+  __ Frinti(d22, d26);
+  __ Frinti(d23, d27);
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_FP32(1.0, s0);
+  ASSERT_EQUAL_FP32(1.0, s1);
+  ASSERT_EQUAL_FP32(2.0, s2);
+  ASSERT_EQUAL_FP32(2.0, s3);
+  ASSERT_EQUAL_FP32(2.0, s4);
+  ASSERT_EQUAL_FP32(-2.0, s5);
+  ASSERT_EQUAL_FP32(-2.0, s6);
+  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
+  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
+  ASSERT_EQUAL_FP32(0.0, s9);
+  ASSERT_EQUAL_FP32(-0.0, s10);
+  ASSERT_EQUAL_FP32(-0.0, s11);
+  ASSERT_EQUAL_FP64(1.0, d12);
+  ASSERT_EQUAL_FP64(1.0, d13);
+  ASSERT_EQUAL_FP64(2.0, d14);
+  ASSERT_EQUAL_FP64(2.0, d15);
+  ASSERT_EQUAL_FP64(2.0, d16);
+  ASSERT_EQUAL_FP64(-2.0, d17);
+  ASSERT_EQUAL_FP64(-2.0, d18);
+  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
+  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
+  ASSERT_EQUAL_FP64(0.0, d21);
+  ASSERT_EQUAL_FP64(-0.0, d22);
+  ASSERT_EQUAL_FP64(-0.0, d23);
+
+  TEARDOWN();
+}
+
+
 TEST(frintm) {
   SETUP();
 
@@ -6393,6 +6819,184 @@
 }
 
 
+TEST(frintp) {
+  SETUP();
+
+  START();
+  __ Fmov(s16, 1.0);
+  __ Fmov(s17, 1.1);
+  __ Fmov(s18, 1.5);
+  __ Fmov(s19, 1.9);
+  __ Fmov(s20, 2.5);
+  __ Fmov(s21, -1.5);
+  __ Fmov(s22, -2.5);
+  __ Fmov(s23, kFP32PositiveInfinity);
+  __ Fmov(s24, kFP32NegativeInfinity);
+  __ Fmov(s25, 0.0);
+  __ Fmov(s26, -0.0);
+  __ Fmov(s27, -0.2);
+
+  __ Frintp(s0, s16);
+  __ Frintp(s1, s17);
+  __ Frintp(s2, s18);
+  __ Frintp(s3, s19);
+  __ Frintp(s4, s20);
+  __ Frintp(s5, s21);
+  __ Frintp(s6, s22);
+  __ Frintp(s7, s23);
+  __ Frintp(s8, s24);
+  __ Frintp(s9, s25);
+  __ Frintp(s10, s26);
+  __ Frintp(s11, s27);
+
+  __ Fmov(d16, 1.0);
+  __ Fmov(d17, 1.1);
+  __ Fmov(d18, 1.5);
+  __ Fmov(d19, 1.9);
+  __ Fmov(d20, 2.5);
+  __ Fmov(d21, -1.5);
+  __ Fmov(d22, -2.5);
+  __ Fmov(d23, kFP32PositiveInfinity);
+  __ Fmov(d24, kFP32NegativeInfinity);
+  __ Fmov(d25, 0.0);
+  __ Fmov(d26, -0.0);
+  __ Fmov(d27, -0.2);
+
+  __ Frintp(d12, d16);
+  __ Frintp(d13, d17);
+  __ Frintp(d14, d18);
+  __ Frintp(d15, d19);
+  __ Frintp(d16, d20);
+  __ Frintp(d17, d21);
+  __ Frintp(d18, d22);
+  __ Frintp(d19, d23);
+  __ Frintp(d20, d24);
+  __ Frintp(d21, d25);
+  __ Frintp(d22, d26);
+  __ Frintp(d23, d27);
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_FP32(1.0, s0);
+  ASSERT_EQUAL_FP32(2.0, s1);
+  ASSERT_EQUAL_FP32(2.0, s2);
+  ASSERT_EQUAL_FP32(2.0, s3);
+  ASSERT_EQUAL_FP32(3.0, s4);
+  ASSERT_EQUAL_FP32(-1.0, s5);
+  ASSERT_EQUAL_FP32(-2.0, s6);
+  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
+  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
+  ASSERT_EQUAL_FP32(0.0, s9);
+  ASSERT_EQUAL_FP32(-0.0, s10);
+  ASSERT_EQUAL_FP32(-0.0, s11);
+  ASSERT_EQUAL_FP64(1.0, d12);
+  ASSERT_EQUAL_FP64(2.0, d13);
+  ASSERT_EQUAL_FP64(2.0, d14);
+  ASSERT_EQUAL_FP64(2.0, d15);
+  ASSERT_EQUAL_FP64(3.0, d16);
+  ASSERT_EQUAL_FP64(-1.0, d17);
+  ASSERT_EQUAL_FP64(-2.0, d18);
+  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
+  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
+  ASSERT_EQUAL_FP64(0.0, d21);
+  ASSERT_EQUAL_FP64(-0.0, d22);
+  ASSERT_EQUAL_FP64(-0.0, d23);
+
+  TEARDOWN();
+}
+
+
+TEST(frintx) {
+  // VIXL only supports the round-to-nearest FPCR mode, and it doesn't support
+  // FP exceptions, so this test has the same results as frintn (and frinti).
+  SETUP();
+
+  START();
+  __ Fmov(s16, 1.0);
+  __ Fmov(s17, 1.1);
+  __ Fmov(s18, 1.5);
+  __ Fmov(s19, 1.9);
+  __ Fmov(s20, 2.5);
+  __ Fmov(s21, -1.5);
+  __ Fmov(s22, -2.5);
+  __ Fmov(s23, kFP32PositiveInfinity);
+  __ Fmov(s24, kFP32NegativeInfinity);
+  __ Fmov(s25, 0.0);
+  __ Fmov(s26, -0.0);
+  __ Fmov(s27, -0.2);
+
+  __ Frintx(s0, s16);
+  __ Frintx(s1, s17);
+  __ Frintx(s2, s18);
+  __ Frintx(s3, s19);
+  __ Frintx(s4, s20);
+  __ Frintx(s5, s21);
+  __ Frintx(s6, s22);
+  __ Frintx(s7, s23);
+  __ Frintx(s8, s24);
+  __ Frintx(s9, s25);
+  __ Frintx(s10, s26);
+  __ Frintx(s11, s27);
+
+  __ Fmov(d16, 1.0);
+  __ Fmov(d17, 1.1);
+  __ Fmov(d18, 1.5);
+  __ Fmov(d19, 1.9);
+  __ Fmov(d20, 2.5);
+  __ Fmov(d21, -1.5);
+  __ Fmov(d22, -2.5);
+  __ Fmov(d23, kFP32PositiveInfinity);
+  __ Fmov(d24, kFP32NegativeInfinity);
+  __ Fmov(d25, 0.0);
+  __ Fmov(d26, -0.0);
+  __ Fmov(d27, -0.2);
+
+  __ Frintx(d12, d16);
+  __ Frintx(d13, d17);
+  __ Frintx(d14, d18);
+  __ Frintx(d15, d19);
+  __ Frintx(d16, d20);
+  __ Frintx(d17, d21);
+  __ Frintx(d18, d22);
+  __ Frintx(d19, d23);
+  __ Frintx(d20, d24);
+  __ Frintx(d21, d25);
+  __ Frintx(d22, d26);
+  __ Frintx(d23, d27);
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_FP32(1.0, s0);
+  ASSERT_EQUAL_FP32(1.0, s1);
+  ASSERT_EQUAL_FP32(2.0, s2);
+  ASSERT_EQUAL_FP32(2.0, s3);
+  ASSERT_EQUAL_FP32(2.0, s4);
+  ASSERT_EQUAL_FP32(-2.0, s5);
+  ASSERT_EQUAL_FP32(-2.0, s6);
+  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
+  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
+  ASSERT_EQUAL_FP32(0.0, s9);
+  ASSERT_EQUAL_FP32(-0.0, s10);
+  ASSERT_EQUAL_FP32(-0.0, s11);
+  ASSERT_EQUAL_FP64(1.0, d12);
+  ASSERT_EQUAL_FP64(1.0, d13);
+  ASSERT_EQUAL_FP64(2.0, d14);
+  ASSERT_EQUAL_FP64(2.0, d15);
+  ASSERT_EQUAL_FP64(2.0, d16);
+  ASSERT_EQUAL_FP64(-2.0, d17);
+  ASSERT_EQUAL_FP64(-2.0, d18);
+  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
+  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
+  ASSERT_EQUAL_FP64(0.0, d21);
+  ASSERT_EQUAL_FP64(-0.0, d22);
+  ASSERT_EQUAL_FP64(-0.0, d23);
+
+  TEARDOWN();
+}
+
+
 TEST(frintz) {
   SETUP();
 
diff --git a/test/test-disasm-a64.cc b/test/test-disasm-a64.cc
index 60cc630..1528fbb 100644
--- a/test/test-disasm-a64.cc
+++ b/test/test-disasm-a64.cc
@@ -26,7 +26,7 @@
 
 #include <stdio.h>
 #include <cstring>
-#include "cctest.h"
+#include "test-runner.h"
 
 #include "a64/macro-assembler-a64.h"
 #include "a64/disasm-a64.h"
@@ -57,9 +57,12 @@
   decoder->Decode(reinterpret_cast<Instruction*>(buf));                        \
   encoding = *reinterpret_cast<uint32_t*>(buf);                                \
   if (strcmp(disasm->GetOutput(), EXP) != 0) {                                 \
-    printf("Encoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",            \
+    printf("\nEncoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",          \
            encoding, EXP, disasm->GetOutput());                                \
     abort();                                                                   \
+  }                                                                            \
+  if (Test::trace_sim()) {                                                     \
+    printf("%08" PRIx32 "\t%s\n", encoding, disasm->GetOutput());              \
   }
 
 #define COMPARE_PREFIX(ASM, EXP)                                               \
@@ -72,9 +75,12 @@
   decoder->Decode(reinterpret_cast<Instruction*>(buf));                        \
   encoding = *reinterpret_cast<uint32_t*>(buf);                                \
   if (strncmp(disasm->GetOutput(), EXP, strlen(EXP)) != 0) {                   \
-    printf("Encoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",            \
+    printf("\nEncoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",          \
            encoding, EXP, disasm->GetOutput());                                \
     abort();                                                                   \
+  }                                                                            \
+  if (Test::trace_sim()) {                                                     \
+    printf("%08" PRIx32 "\t%s\n", encoding, disasm->GetOutput());              \
   }
 
 #define COMPARE_MACRO(ASM, EXP)                                                \
@@ -84,9 +90,12 @@
   decoder->Decode(reinterpret_cast<Instruction*>(buf));                        \
   encoding = *reinterpret_cast<uint32_t*>(buf);                                \
   if (strncmp(disasm->GetOutput(), EXP, strlen(EXP)) != 0) {                   \
-    printf("Encoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",            \
+    printf("\nEncoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",          \
            encoding, EXP, disasm->GetOutput());                                \
     abort();                                                                   \
+  }                                                                            \
+  if (Test::trace_sim()) {                                                     \
+    printf("%08" PRIx32 "\t%s\n", encoding, disasm->GetOutput());              \
   }
 
 #define CLEANUP()                                                              \
@@ -106,7 +115,7 @@
   COMPARE(dci(0x910003fd), "mov x29, sp");
   COMPARE(dci(0x9100e3a0), "add x0, x29, #0x38 (56)");
   COMPARE(dci(0xb900001f), "str wzr, [x0]");
-  COMPARE(dci(0x528000e1), "movz w1, #0x7");
+  COMPARE(dci(0x528000e1), "mov w1, #0x7");
   COMPARE(dci(0xb9001c01), "str w1, [x0, #28]");
   COMPARE(dci(0x390043a0), "strb w0, [x29, #16]");
   COMPARE(dci(0x790027a0), "strh w0, [x29, #18]");
@@ -130,8 +139,8 @@
 TEST(mov_mvn) {
   SETUP_CLASS(MacroAssembler);
 
-  COMPARE(Mov(w0, Operand(0x1234)), "movz w0, #0x1234");
-  COMPARE(Mov(x1, Operand(0x1234)), "movz x1, #0x1234");
+  COMPARE(Mov(w0, Operand(0x1234)), "mov w0, #0x1234");
+  COMPARE(Mov(x1, Operand(0x1234)), "mov x1, #0x1234");
   COMPARE(Mov(w2, Operand(w3)), "mov w2, w3");
   COMPARE(Mov(x4, Operand(x5)), "mov x4, x5");
   COMPARE(Mov(w6, Operand(w7, LSL, 5)), "lsl w6, w7, #5");
@@ -141,8 +150,8 @@
   COMPARE(Mov(w14, Operand(w15, SXTH, 2)), "sbfiz w14, w15, #2, #16");
   COMPARE(Mov(x16, Operand(x17, SXTW, 3)), "sbfiz x16, x17, #3, #32");
 
-  COMPARE(Mvn(w0, Operand(0x101)), "movn w0, #0x101");
-  COMPARE(Mvn(x1, Operand(0xfff1)), "movn x1, #0xfff1");
+  COMPARE(Mvn(w0, Operand(0x101)), "mov w0, #0xfffffefe");
+  COMPARE(Mvn(x1, Operand(0xfff1)), "mov x1, #0xffffffffffff000e");
   COMPARE(Mvn(w2, Operand(w3)), "mvn w2, w3");
   COMPARE(Mvn(x4, Operand(x5)), "mvn x4, x5");
   COMPARE(Mvn(w6, Operand(w7, LSL, 12)), "mvn w6, w7, lsl #12");
@@ -155,13 +164,13 @@
 TEST(move_immediate) {
   SETUP();
 
-  COMPARE(movz(w0, 0x1234), "movz w0, #0x1234");
-  COMPARE(movz(x1, 0xabcd0000), "movz x1, #0xabcd0000");
-  COMPARE(movz(x2, 0x555500000000), "movz x2, #0x555500000000");
-  COMPARE(movz(x3, 0xaaaa000000000000), "movz x3, #0xaaaa000000000000");
-  COMPARE(movz(x4, 0xabcd, 16), "movz x4, #0xabcd0000");
-  COMPARE(movz(x5, 0x5555, 32), "movz x5, #0x555500000000");
-  COMPARE(movz(x6, 0xaaaa, 48), "movz x6, #0xaaaa000000000000");
+  COMPARE(movz(w0, 0x1234), "mov w0, #0x1234");
+  COMPARE(movz(x1, 0xabcd0000), "mov x1, #0xabcd0000");
+  COMPARE(movz(x2, 0x555500000000), "mov x2, #0x555500000000");
+  COMPARE(movz(x3, 0xaaaa000000000000), "mov x3, #0xaaaa000000000000");
+  COMPARE(movz(x4, 0xabcd, 16), "mov x4, #0xabcd0000");
+  COMPARE(movz(x5, 0x5555, 32), "mov x5, #0x555500000000");
+  COMPARE(movz(x6, 0xaaaa, 48), "mov x6, #0xaaaa000000000000");
 
   COMPARE(movk(w7, 0x1234), "movk w7, #0x1234");
   COMPARE(movk(x8, 0xabcd0000), "movk x8, #0xabcd, lsl #16");
@@ -171,13 +180,13 @@
   COMPARE(movk(x12, 0x5555, 32), "movk x12, #0x5555, lsl #32");
   COMPARE(movk(x13, 0xaaaa, 48), "movk x13, #0xaaaa, lsl #48");
 
-  COMPARE(movn(w14, 0x1234), "movn w14, #0x1234");
-  COMPARE(movn(x15, 0xabcd0000), "movn x15, #0xabcd0000");
-  COMPARE(movn(x16, 0x555500000000), "movn x16, #0x555500000000");
-  COMPARE(movn(x17, 0xaaaa000000000000), "movn x17, #0xaaaa000000000000");
-  COMPARE(movn(w18, 0xabcd, 16), "movn w18, #0xabcd0000");
-  COMPARE(movn(x19, 0x5555, 32), "movn x19, #0x555500000000");
-  COMPARE(movn(x20, 0xaaaa, 48), "movn x20, #0xaaaa000000000000");
+  COMPARE(movn(w14, 0x1234), "mov w14, #0xffffedcb");
+  COMPARE(movn(x15, 0xabcd0000), "mov x15, #0xffffffff5432ffff");
+  COMPARE(movn(x16, 0x555500000000), "mov x16, #0xffffaaaaffffffff");
+  COMPARE(movn(x17, 0xaaaa000000000000), "mov x17, #0x5555ffffffffffff");
+  COMPARE(movn(w18, 0xabcd, 16), "mov w18, #0x5432ffff");
+  COMPARE(movn(x19, 0x5555, 32), "mov x19, #0xffffaaaaffffffff");
+  COMPARE(movn(x20, 0xaaaa, 48), "mov x20, #0x5555ffffffffffff");
 
   COMPARE(movk(w21, 0), "movk w21, #0x0");
   COMPARE(movk(x22, 0, 0), "movk x22, #0x0");
@@ -185,6 +194,10 @@
   COMPARE(movk(x24, 0, 32), "movk x24, #0x0, lsl #32");
   COMPARE(movk(x25, 0, 48), "movk x25, #0x0, lsl #48");
 
+  COMPARE(movz(x26, 0, 48), "movz x26, #0x0");
+  COMPARE(movn(x27, 0, 48), "movn x27, #0x0");
+  COMPARE(movn(w28, 0xffff), "movn w28, #0xffff");
+
   CLEANUP();
 }
 
@@ -194,45 +207,45 @@
 
   // Move instructions expected for certain immediates. This is really a macro
   // assembler test, to ensure it generates immediates efficiently.
-  COMPARE(Mov(w0, 0), "movz w0, #0x0");
-  COMPARE(Mov(w0, 0x0000ffff), "movz w0, #0xffff");
-  COMPARE(Mov(w0, 0x00010000), "movz w0, #0x10000");
-  COMPARE(Mov(w0, 0xffff0000), "movz w0, #0xffff0000");
-  COMPARE(Mov(w0, 0x0001ffff), "movn w0, #0xfffe0000");
-  COMPARE(Mov(w0, 0xffff8000), "movn w0, #0x7fff");
-  COMPARE(Mov(w0, 0xfffffffe), "movn w0, #0x1");
-  COMPARE(Mov(w0, 0xffffffff), "movn w0, #0x0");
+  COMPARE(Mov(w0, 0), "mov w0, #0x0");
+  COMPARE(Mov(w0, 0x0000ffff), "mov w0, #0xffff");
+  COMPARE(Mov(w0, 0x00010000), "mov w0, #0x10000");
+  COMPARE(Mov(w0, 0xffff0000), "mov w0, #0xffff0000");
+  COMPARE(Mov(w0, 0x0001ffff), "mov w0, #0x1ffff");
+  COMPARE(Mov(w0, 0xffff8000), "mov w0, #0xffff8000");
+  COMPARE(Mov(w0, 0xfffffffe), "mov w0, #0xfffffffe");
+  COMPARE(Mov(w0, 0xffffffff), "mov w0, #0xffffffff");
   COMPARE(Mov(w0, 0x00ffff00), "mov w0, #0xffff00");
   COMPARE(Mov(w0, 0xfffe7fff), "mov w0, #0xfffe7fff");
-  COMPARE(Mov(w0, 0xfffeffff), "movn w0, #0x10000");
-  COMPARE(Mov(w0, 0xffff7fff), "movn w0, #0x8000");
+  COMPARE(Mov(w0, 0xfffeffff), "mov w0, #0xfffeffff");
+  COMPARE(Mov(w0, 0xffff7fff), "mov w0, #0xffff7fff");
 
-  COMPARE(Mov(x0, 0), "movz x0, #0x0");
-  COMPARE(Mov(x0, 0x0000ffff), "movz x0, #0xffff");
-  COMPARE(Mov(x0, 0x00010000), "movz x0, #0x10000");
-  COMPARE(Mov(x0, 0xffff0000), "movz x0, #0xffff0000");
+  COMPARE(Mov(x0, 0), "mov x0, #0x0");
+  COMPARE(Mov(x0, 0x0000ffff), "mov x0, #0xffff");
+  COMPARE(Mov(x0, 0x00010000), "mov x0, #0x10000");
+  COMPARE(Mov(x0, 0xffff0000), "mov x0, #0xffff0000");
   COMPARE(Mov(x0, 0x0001ffff), "mov x0, #0x1ffff");
   COMPARE(Mov(x0, 0xffff8000), "mov x0, #0xffff8000");
   COMPARE(Mov(x0, 0xfffffffe), "mov x0, #0xfffffffe");
   COMPARE(Mov(x0, 0xffffffff), "mov x0, #0xffffffff");
   COMPARE(Mov(x0, 0x00ffff00), "mov x0, #0xffff00");
-  COMPARE(Mov(x0, 0xffff000000000000), "movz x0, #0xffff000000000000");
-  COMPARE(Mov(x0, 0x0000ffff00000000), "movz x0, #0xffff00000000");
-  COMPARE(Mov(x0, 0x00000000ffff0000), "movz x0, #0xffff0000");
-  COMPARE(Mov(x0, 0xffffffffffff0000), "movn x0, #0xffff");
-  COMPARE(Mov(x0, 0xffffffff0000ffff), "movn x0, #0xffff0000");
-  COMPARE(Mov(x0, 0xffff0000ffffffff), "movn x0, #0xffff00000000");
-  COMPARE(Mov(x0, 0x0000ffffffffffff), "movn x0, #0xffff000000000000");
+  COMPARE(Mov(x0, 0xffff000000000000), "mov x0, #0xffff000000000000");
+  COMPARE(Mov(x0, 0x0000ffff00000000), "mov x0, #0xffff00000000");
+  COMPARE(Mov(x0, 0x00000000ffff0000), "mov x0, #0xffff0000");
+  COMPARE(Mov(x0, 0xffffffffffff0000), "mov x0, #0xffffffffffff0000");
+  COMPARE(Mov(x0, 0xffffffff0000ffff), "mov x0, #0xffffffff0000ffff");
+  COMPARE(Mov(x0, 0xffff0000ffffffff), "mov x0, #0xffff0000ffffffff");
+  COMPARE(Mov(x0, 0x0000ffffffffffff), "mov x0, #0xffffffffffff");
   COMPARE(Mov(x0, 0xfffe7fffffffffff), "mov x0, #0xfffe7fffffffffff");
-  COMPARE(Mov(x0, 0xfffeffffffffffff), "movn x0, #0x1000000000000");
-  COMPARE(Mov(x0, 0xffff7fffffffffff), "movn x0, #0x800000000000");
+  COMPARE(Mov(x0, 0xfffeffffffffffff), "mov x0, #0xfffeffffffffffff");
+  COMPARE(Mov(x0, 0xffff7fffffffffff), "mov x0, #0xffff7fffffffffff");
   COMPARE(Mov(x0, 0xfffffffe7fffffff), "mov x0, #0xfffffffe7fffffff");
-  COMPARE(Mov(x0, 0xfffffffeffffffff), "movn x0, #0x100000000");
-  COMPARE(Mov(x0, 0xffffffff7fffffff), "movn x0, #0x80000000");
+  COMPARE(Mov(x0, 0xfffffffeffffffff), "mov x0, #0xfffffffeffffffff");
+  COMPARE(Mov(x0, 0xffffffff7fffffff), "mov x0, #0xffffffff7fffffff");
   COMPARE(Mov(x0, 0xfffffffffffe7fff), "mov x0, #0xfffffffffffe7fff");
-  COMPARE(Mov(x0, 0xfffffffffffeffff), "movn x0, #0x10000");
-  COMPARE(Mov(x0, 0xffffffffffff7fff), "movn x0, #0x8000");
-  COMPARE(Mov(x0, 0xffffffffffffffff), "movn x0, #0x0");
+  COMPARE(Mov(x0, 0xfffffffffffeffff), "mov x0, #0xfffffffffffeffff");
+  COMPARE(Mov(x0, 0xffffffffffff7fff), "mov x0, #0xffffffffffff7fff");
+  COMPARE(Mov(x0, 0xffffffffffffffff), "mov x0, #0xffffffffffffffff");
 
   COMPARE(Movk(w0, 0x1234, 0), "movk w0, #0x1234");
   COMPARE(Movk(x1, 0x2345, 0), "movk x1, #0x2345");
@@ -795,12 +808,12 @@
   SETUP();
 
   COMPARE_PREFIX(adrp(x0, 0), "adrp x0, #+0x0");
-  COMPARE_PREFIX(adrp(x1, 1), "adrp x1, #+0x1");
-  COMPARE_PREFIX(adrp(x2, -1), "adrp x2, #-0x1");
-  COMPARE_PREFIX(adrp(x3, 4), "adrp x3, #+0x4");
-  COMPARE_PREFIX(adrp(x4, -4), "adrp x4, #-0x4");
-  COMPARE_PREFIX(adrp(x5, 0x000fffff), "adrp x5, #+0xfffff");
-  COMPARE_PREFIX(adrp(x6, -0x00100000), "adrp x6, #-0x100000");
+  COMPARE_PREFIX(adrp(x1, 1), "adrp x1, #+0x1000");
+  COMPARE_PREFIX(adrp(x2, -1), "adrp x2, #-0x1000");
+  COMPARE_PREFIX(adrp(x3, 4), "adrp x3, #+0x4000");
+  COMPARE_PREFIX(adrp(x4, -4), "adrp x4, #-0x4000");
+  COMPARE_PREFIX(adrp(x5, 0x000fffff), "adrp x5, #+0xfffff000");
+  COMPARE_PREFIX(adrp(x6, -0x00100000), "adrp x6, #-0x100000000");
   COMPARE_PREFIX(adrp(xzr, 0), "adrp xzr, #+0x0");
 
   CLEANUP();
@@ -1498,14 +1511,305 @@
 }
 
 
-TEST(load_literal) {
+TEST(load_literal_macro) {
   SETUP_CLASS(MacroAssembler);
 
-  COMPARE_PREFIX(Ldr(x10, 0x1234567890abcdef),  "ldr x10, pc+0");
-  COMPARE_PREFIX(Ldr(w20, 0xfedcba09),  "ldr w20, pc+0");
-  COMPARE_PREFIX(Ldr(d11, 1.234),  "ldr d11, pc+0");
-  COMPARE_PREFIX(Ldr(s22, 2.5f),  "ldr s22, pc+0");
-  COMPARE_PREFIX(Ldrsw(x21, 0x80000000), "ldrsw x21, pc+0");
+  // In each case, the literal will be placed at PC+8:
+  //    ldr   x10, pc+8               // Test instruction.
+  //    ldr   xzr, pc+12              // Pool marker.
+  //    .word64 #0x1234567890abcdef   // Test literal.
+
+  COMPARE_PREFIX(Ldr(x10, 0x1234567890abcdef),  "ldr x10, pc+8");
+  COMPARE_PREFIX(Ldr(w20, 0xfedcba09),  "ldr w20, pc+8");
+  COMPARE_PREFIX(Ldr(d11, 1.234),  "ldr d11, pc+8");
+  COMPARE_PREFIX(Ldr(s22, 2.5f),  "ldr s22, pc+8");
+  COMPARE_PREFIX(Ldrsw(x21, 0x80000000), "ldrsw x21, pc+8");
+
+  CLEANUP();
+}
+
+
+TEST(load_literal) {
+  SETUP();
+
+  COMPARE_PREFIX(ldr(x20, 0), "ldr x20, pc+0");
+  COMPARE_PREFIX(ldr(x20, 1), "ldr x20, pc+4");
+  COMPARE_PREFIX(ldr(x20, -1), "ldr x20, pc-4");
+  COMPARE_PREFIX(ldr(x20, 0x3ffff), "ldr x20, pc+1048572");
+  COMPARE_PREFIX(ldr(x20, -0x40000), "ldr x20, pc-1048576");
+  COMPARE_PREFIX(ldr(w21, 0), "ldr w21, pc+0");
+  COMPARE_PREFIX(ldr(w21, 1), "ldr w21, pc+4");
+  COMPARE_PREFIX(ldr(w21, -1), "ldr w21, pc-4");
+  COMPARE_PREFIX(ldr(w21, 0x3ffff), "ldr w21, pc+1048572");
+  COMPARE_PREFIX(ldr(w21, -0x40000), "ldr w21, pc-1048576");
+  COMPARE_PREFIX(ldr(d22, 0), "ldr d22, pc+0");
+  COMPARE_PREFIX(ldr(d22, 1), "ldr d22, pc+4");
+  COMPARE_PREFIX(ldr(d22, -1), "ldr d22, pc-4");
+  COMPARE_PREFIX(ldr(d22, 0x3ffff), "ldr d22, pc+1048572");
+  COMPARE_PREFIX(ldr(d22, -0x40000), "ldr d22, pc-1048576");
+  COMPARE_PREFIX(ldr(s23, 0), "ldr s23, pc+0");
+  COMPARE_PREFIX(ldr(s23, 1), "ldr s23, pc+4");
+  COMPARE_PREFIX(ldr(s23, -1), "ldr s23, pc-4");
+  COMPARE_PREFIX(ldr(s23, 0x3ffff), "ldr s23, pc+1048572");
+  COMPARE_PREFIX(ldr(s23, -0x40000), "ldr s23, pc-1048576");
+  COMPARE_PREFIX(ldrsw(x24, 0), "ldrsw x24, pc+0");
+  COMPARE_PREFIX(ldrsw(x24, 1), "ldrsw x24, pc+4");
+  COMPARE_PREFIX(ldrsw(x24, -1), "ldrsw x24, pc-4");
+  COMPARE_PREFIX(ldrsw(x24, 0x3ffff), "ldrsw x24, pc+1048572");
+  COMPARE_PREFIX(ldrsw(x24, -0x40000), "ldrsw x24, pc-1048576");
+
+  CLEANUP();
+}
+
+
+TEST(prfm_operations) {
+  SETUP();
+
+  // Test every encodable prefetch operation.
+  const char* expected[] = {
+    "prfm pldl1keep, ",
+    "prfm pldl1strm, ",
+    "prfm pldl2keep, ",
+    "prfm pldl2strm, ",
+    "prfm pldl3keep, ",
+    "prfm pldl3strm, ",
+    "prfm #0b00110, ",
+    "prfm #0b00111, ",
+    "prfm plil1keep, ",
+    "prfm plil1strm, ",
+    "prfm plil2keep, ",
+    "prfm plil2strm, ",
+    "prfm plil3keep, ",
+    "prfm plil3strm, ",
+    "prfm #0b01110, ",
+    "prfm #0b01111, ",
+    "prfm pstl1keep, ",
+    "prfm pstl1strm, ",
+    "prfm pstl2keep, ",
+    "prfm pstl2strm, ",
+    "prfm pstl3keep, ",
+    "prfm pstl3strm, ",
+    "prfm #0b10110, ",
+    "prfm #0b10111, ",
+    "prfm #0b11000, ",
+    "prfm #0b11001, ",
+    "prfm #0b11010, ",
+    "prfm #0b11011, ",
+    "prfm #0b11100, ",
+    "prfm #0b11101, ",
+    "prfm #0b11110, ",
+    "prfm #0b11111, ",
+  };
+  const int expected_count = sizeof(expected) / sizeof(expected[0]);
+  VIXL_STATIC_ASSERT((1 << ImmPrefetchOperation_width) == expected_count);
+
+  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
+    PrefetchOperation op = static_cast<PrefetchOperation>(i);
+    COMPARE_PREFIX(prfm(op, 0), expected[i]);
+    COMPARE_PREFIX(prfm(op, MemOperand(x0, 0)), expected[i]);
+    COMPARE_PREFIX(prfm(op, MemOperand(x0, x1)), expected[i]);
+  }
+
+  CLEANUP();
+}
+
+
+TEST(prfum_operations) {
+  SETUP();
+
+  // Test every encodable prefetch operation.
+  const char* expected[] = {
+    "prfum pldl1keep, ",
+    "prfum pldl1strm, ",
+    "prfum pldl2keep, ",
+    "prfum pldl2strm, ",
+    "prfum pldl3keep, ",
+    "prfum pldl3strm, ",
+    "prfum #0b00110, ",
+    "prfum #0b00111, ",
+    "prfum plil1keep, ",
+    "prfum plil1strm, ",
+    "prfum plil2keep, ",
+    "prfum plil2strm, ",
+    "prfum plil3keep, ",
+    "prfum plil3strm, ",
+    "prfum #0b01110, ",
+    "prfum #0b01111, ",
+    "prfum pstl1keep, ",
+    "prfum pstl1strm, ",
+    "prfum pstl2keep, ",
+    "prfum pstl2strm, ",
+    "prfum pstl3keep, ",
+    "prfum pstl3strm, ",
+    "prfum #0b10110, ",
+    "prfum #0b10111, ",
+    "prfum #0b11000, ",
+    "prfum #0b11001, ",
+    "prfum #0b11010, ",
+    "prfum #0b11011, ",
+    "prfum #0b11100, ",
+    "prfum #0b11101, ",
+    "prfum #0b11110, ",
+    "prfum #0b11111, ",
+  };
+  const int expected_count = sizeof(expected) / sizeof(expected[0]);
+  VIXL_STATIC_ASSERT((1 << ImmPrefetchOperation_width) == expected_count);
+
+  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
+    PrefetchOperation op = static_cast<PrefetchOperation>(i);
+    COMPARE_PREFIX(prfum(op, MemOperand(x0, 0)), expected[i]);
+  }
+
+  CLEANUP();
+}
+
+
+TEST(prfm_offset) {
+  SETUP();
+
+  COMPARE(prfm(PLDL1KEEP, MemOperand(x1)), "prfm pldl1keep, [x1]");
+  COMPARE(prfm(PLDL1STRM, MemOperand(x3, 8)), "prfm pldl1strm, [x3, #8]");
+  COMPARE(prfm(PLDL2KEEP, MemOperand(x5, 32760)),
+          "prfm pldl2keep, [x5, #32760]");
+
+  COMPARE(prfm(PLDL2STRM, MemOperand(sp)), "prfm pldl2strm, [sp]");
+  COMPARE(prfm(PLDL3KEEP, MemOperand(sp, 8)), "prfm pldl3keep, [sp, #8]");
+  COMPARE(prfm(PLDL3STRM, MemOperand(sp, 32760)),
+          "prfm pldl3strm, [sp, #32760]");
+
+  CLEANUP();
+}
+
+
+TEST(prfm_regoffset) {
+  SETUP();
+
+  COMPARE(prfm(PLIL1KEEP, MemOperand(x1, x2)), "prfm plil1keep, [x1, x2]");
+  COMPARE(prfm(PLIL1STRM, MemOperand(x3, w4, SXTW)),
+          "prfm plil1strm, [x3, w4, sxtw]");
+  COMPARE(prfm(PLIL2KEEP, MemOperand(x5, x6, LSL, 3)),
+          "prfm plil2keep, [x5, x6, lsl #3]");
+
+  COMPARE(prfm(PLIL2STRM, MemOperand(sp, xzr)), "prfm plil2strm, [sp, xzr]");
+  COMPARE(prfm(PLIL3KEEP, MemOperand(sp, wzr, SXTW)),
+          "prfm plil3keep, [sp, wzr, sxtw]");
+  COMPARE(prfm(PLIL3STRM, MemOperand(sp, xzr, LSL, 3)),
+          "prfm plil3strm, [sp, xzr, lsl #3]");
+
+  CLEANUP();
+}
+
+
+TEST(prfm_literal) {
+  SETUP();
+
+  COMPARE_PREFIX(prfm(PSTL1KEEP, 0), "prfm pstl1keep, pc+0");
+  COMPARE_PREFIX(prfm(PSTL1STRM, 1), "prfm pstl1strm, pc+4");
+  COMPARE_PREFIX(prfm(PSTL2KEEP, -1), "prfm pstl2keep, pc-4");
+  COMPARE_PREFIX(prfm(PSTL2STRM, 0x3ffff), "prfm pstl2strm, pc+1048572");
+  COMPARE_PREFIX(prfm(PSTL3KEEP, -0x3ffff), "prfm pstl3keep, pc-1048572");
+  COMPARE_PREFIX(prfm(PSTL3STRM, -0x40000), "prfm pstl3strm, pc-1048576");
+
+  CLEANUP();
+}
+
+
+TEST(prfm_unscaled) {
+  SETUP();
+
+  // If an unscaled-offset instruction is requested, it is used, even if the
+  // offset could be encoded in a scaled-offset instruction.
+  COMPARE(prfum(PLDL1KEEP, MemOperand(x1)), "prfum pldl1keep, [x1]");
+  COMPARE(prfum(PLDL1STRM, MemOperand(x1, 8)), "prfum pldl1strm, [x1, #8]");
+  COMPARE(prfum(PLDL2KEEP, MemOperand(x1, 248)), "prfum pldl2keep, [x1, #248]");
+
+  // Normal offsets are converted to unscaled offsets if necssary.
+  COMPARE(prfm(PLDL2STRM, MemOperand(x1, 1)), "prfum pldl2strm, [x1, #1]");
+  COMPARE(prfm(PLDL3KEEP, MemOperand(x1, -1)), "prfum pldl3keep, [x1, #-1]");
+  COMPARE(prfm(PLDL3STRM, MemOperand(x1, 255)), "prfum pldl3strm, [x1, #255]");
+  COMPARE(prfm(PLDL3STRM, MemOperand(x1, -256)),
+          "prfum pldl3strm, [x1, #-256]");
+
+  CLEANUP();
+}
+
+
+TEST(prfm_unscaled_option) {
+  SETUP();
+
+  // Just like prfm_unscaled, but specify the scaling option explicitly.
+
+  // Require unscaled-offset forms.
+  LoadStoreScalingOption option = RequireUnscaledOffset;
+
+  COMPARE(prfum(PLDL1KEEP, MemOperand(x1), option), "prfum pldl1keep, [x1]");
+  COMPARE(prfum(PLDL1STRM, MemOperand(x1, 8), option),
+          "prfum pldl1strm, [x1, #8]");
+  COMPARE(prfum(PLDL2KEEP, MemOperand(x1, 248), option),
+          "prfum pldl2keep, [x1, #248]");
+  COMPARE(prfum(PLDL2STRM, MemOperand(x1, 1), option),
+          "prfum pldl2strm, [x1, #1]");
+  COMPARE(prfum(PLDL3KEEP, MemOperand(x1, -1), option),
+          "prfum pldl3keep, [x1, #-1]");
+  COMPARE(prfum(PLDL3STRM, MemOperand(x1, 255), option),
+          "prfum pldl3strm, [x1, #255]");
+  COMPARE(prfum(PLIL1KEEP, MemOperand(x1, -256), option),
+          "prfum plil1keep, [x1, #-256]");
+
+  // Require scaled-offset forms..
+  option = RequireScaledOffset;
+
+  COMPARE(prfm(PLDL1KEEP, MemOperand(x1), option), "prfm pldl1keep, [x1]");
+  COMPARE(prfm(PLDL1STRM, MemOperand(x1, 8), option),
+          "prfm pldl1strm, [x1, #8]");
+  COMPARE(prfm(PLDL2KEEP, MemOperand(x1, 248), option),
+          "prfm pldl2keep, [x1, #248]");
+  COMPARE(prfm(PLIL2STRM, MemOperand(x1, 256), option),
+          "prfm plil2strm, [x1, #256]");
+  COMPARE(prfm(PLIL3KEEP, MemOperand(x1, 32760), option),
+          "prfm plil3keep, [x1, #32760]");
+
+  // Prefer unscaled-offset forms, but allow scaled-offset forms if necessary.
+  option = PreferUnscaledOffset;
+
+  COMPARE(prfum(PLDL1KEEP, MemOperand(x1), option), "prfum pldl1keep, [x1]");
+  COMPARE(prfum(PLDL1STRM, MemOperand(x1, 8), option),
+          "prfum pldl1strm, [x1, #8]");
+  COMPARE(prfum(PLDL2KEEP, MemOperand(x1, 248), option),
+          "prfum pldl2keep, [x1, #248]");
+  COMPARE(prfum(PLDL2STRM, MemOperand(x1, 1), option),
+          "prfum pldl2strm, [x1, #1]");
+  COMPARE(prfum(PLDL3KEEP, MemOperand(x1, -1), option),
+          "prfum pldl3keep, [x1, #-1]");
+  COMPARE(prfum(PLDL3STRM, MemOperand(x1, 255), option),
+          "prfum pldl3strm, [x1, #255]");
+  COMPARE(prfum(PLIL1KEEP, MemOperand(x1, -256), option),
+          "prfum plil1keep, [x1, #-256]");
+  COMPARE(prfum(PLIL1STRM, MemOperand(x1, 256), option),
+          "prfm plil1strm, [x1, #256]");
+  COMPARE(prfum(PLIL2KEEP, MemOperand(x1, 32760), option),
+          "prfm plil2keep, [x1, #32760]");
+
+  // Prefer scaled-offset forms, but allow unscaled-offset forms if necessary.
+  option = PreferScaledOffset;
+
+  COMPARE(prfm(PLDL1KEEP, MemOperand(x1), option), "prfm pldl1keep, [x1]");
+  COMPARE(prfm(PLDL1STRM, MemOperand(x1, 8), option),
+          "prfm pldl1strm, [x1, #8]");
+  COMPARE(prfm(PLDL2KEEP, MemOperand(x1, 248), option),
+          "prfm pldl2keep, [x1, #248]");
+  COMPARE(prfm(PLDL2STRM, MemOperand(x1, 1), option),
+          "prfum pldl2strm, [x1, #1]");
+  COMPARE(prfm(PLDL3KEEP, MemOperand(x1, -1), option),
+          "prfum pldl3keep, [x1, #-1]");
+  COMPARE(prfm(PLDL3STRM, MemOperand(x1, 255), option),
+          "prfum pldl3strm, [x1, #255]");
+  COMPARE(prfm(PLIL1KEEP, MemOperand(x1, -256), option),
+          "prfum plil1keep, [x1, #-256]");
+  COMPARE(prfm(PLIL1STRM, MemOperand(x1, 256), option),
+          "prfm plil1strm, [x1, #256]");
+  COMPARE(prfm(PLIL2KEEP, MemOperand(x1, 32760), option),
+          "prfm plil2keep, [x1, #32760]");
 
   CLEANUP();
 }
@@ -1635,10 +1939,22 @@
   COMPARE(frinta(s31, s30), "frinta s31, s30");
   COMPARE(frinta(d12, d13), "frinta d12, d13");
   COMPARE(frinta(d31, d30), "frinta d31, d30");
+  COMPARE(frinti(s10, s11), "frinti s10, s11");
+  COMPARE(frinti(s31, s30), "frinti s31, s30");
+  COMPARE(frinti(d12, d13), "frinti d12, d13");
+  COMPARE(frinti(d31, d30), "frinti d31, d30");
+  COMPARE(frintm(s10, s11), "frintm s10, s11");
+  COMPARE(frintm(s31, s30), "frintm s31, s30");
+  COMPARE(frintm(d12, d13), "frintm d12, d13");
+  COMPARE(frintm(d31, d30), "frintm d31, d30");
   COMPARE(frintn(s10, s11), "frintn s10, s11");
   COMPARE(frintn(s31, s30), "frintn s31, s30");
   COMPARE(frintn(d12, d13), "frintn d12, d13");
   COMPARE(frintn(d31, d30), "frintn d31, d30");
+  COMPARE(frintx(s10, s11), "frintx s10, s11");
+  COMPARE(frintx(s31, s30), "frintx s31, s30");
+  COMPARE(frintx(d12, d13), "frintx d12, d13");
+  COMPARE(frintx(d31, d30), "frintx d31, d30");
   COMPARE(frintz(s10, s11), "frintz s10, s11");
   COMPARE(frintz(s31, s30), "frintz s31, s30");
   COMPARE(frintz(d12, d13), "frintz d12, d13");
@@ -1942,31 +2258,31 @@
 TEST(logical_immediate_move) {
   SETUP_CLASS(MacroAssembler);
 
-  COMPARE(And(w0, w1, 0), "movz w0, #0x0");
-  COMPARE(And(x0, x1, 0), "movz x0, #0x0");
+  COMPARE(And(w0, w1, 0), "mov w0, #0x0");
+  COMPARE(And(x0, x1, 0), "mov x0, #0x0");
   COMPARE(Orr(w2, w3, 0), "mov w2, w3");
   COMPARE(Orr(x2, x3, 0), "mov x2, x3");
   COMPARE(Eor(w4, w5, 0), "mov w4, w5");
   COMPARE(Eor(x4, x5, 0), "mov x4, x5");
   COMPARE(Bic(w6, w7, 0), "mov w6, w7");
   COMPARE(Bic(x6, x7, 0), "mov x6, x7");
-  COMPARE(Orn(w8, w9, 0), "movn w8, #0x0");
-  COMPARE(Orn(x8, x9, 0), "movn x8, #0x0");
+  COMPARE(Orn(w8, w9, 0), "mov w8, #0xffffffff");
+  COMPARE(Orn(x8, x9, 0), "mov x8, #0xffffffffffffffff");
   COMPARE(Eon(w10, w11, 0), "mvn w10, w11");
   COMPARE(Eon(x10, x11, 0), "mvn x10, x11");
 
   COMPARE(And(w12, w13, 0xffffffff), "mov w12, w13");
   COMPARE(And(x12, x13, 0xffffffff), "and x12, x13, #0xffffffff");
   COMPARE(And(x12, x13, 0xffffffffffffffff), "mov x12, x13");
-  COMPARE(Orr(w14, w15, 0xffffffff), "movn w14, #0x0");
+  COMPARE(Orr(w14, w15, 0xffffffff), "mov w14, #0xffffffff");
   COMPARE(Orr(x14, x15, 0xffffffff), "orr x14, x15, #0xffffffff");
-  COMPARE(Orr(x14, x15, 0xffffffffffffffff), "movn x14, #0x0");
+  COMPARE(Orr(x14, x15, 0xffffffffffffffff), "mov x14, #0xffffffffffffffff");
   COMPARE(Eor(w16, w17, 0xffffffff), "mvn w16, w17");
   COMPARE(Eor(x16, x17, 0xffffffff), "eor x16, x17, #0xffffffff");
   COMPARE(Eor(x16, x17, 0xffffffffffffffff), "mvn x16, x17");
-  COMPARE(Bic(w18, w19, 0xffffffff), "movz w18, #0x0");
+  COMPARE(Bic(w18, w19, 0xffffffff), "mov w18, #0x0");
   COMPARE(Bic(x18, x19, 0xffffffff), "and x18, x19, #0xffffffff00000000");
-  COMPARE(Bic(x18, x19, 0xffffffffffffffff), "movz x18, #0x0");
+  COMPARE(Bic(x18, x19, 0xffffffffffffffff), "mov x18, #0x0");
   COMPARE(Orn(w20, w21, 0xffffffff), "mov w20, w21");
   COMPARE(Orn(x20, x21, 0xffffffff), "orr x20, x21, #0xffffffff00000000");
   COMPARE(Orn(x20, x21, 0xffffffffffffffff), "mov x20, x21");
@@ -2030,4 +2346,79 @@
 
   CLEANUP();
 }
+
+
+TEST(address_map) {
+  // Check that we can disassemble from a fake base address.
+  SETUP();
+
+  disasm->MapCodeAddress(0, reinterpret_cast<Instruction*>(buf));
+  COMPARE(ldr(x0, 0), "ldr x0, pc+0 (addr 0x0)");
+  COMPARE(ldr(x0, -1), "ldr x0, pc-4 (addr -0x4)");
+  COMPARE(ldr(x0, 1), "ldr x0, pc+4 (addr 0x4)");
+  COMPARE(prfm(PLIL1KEEP, 0), "prfm plil1keep, pc+0 (addr 0x0)");
+  COMPARE(prfm(PLIL1KEEP, -1), "prfm plil1keep, pc-4 (addr -0x4)");
+  COMPARE(prfm(PLIL1KEEP, 1), "prfm plil1keep, pc+4 (addr 0x4)");
+  COMPARE(adr(x0, 0), "adr x0, #+0x0 (addr 0x0)");
+  COMPARE(adr(x0, -1), "adr x0, #-0x1 (addr -0x1)");
+  COMPARE(adr(x0, 1), "adr x0, #+0x1 (addr 0x1)");
+  COMPARE(adrp(x0, 0), "adrp x0, #+0x0 (addr 0x0)");
+  COMPARE(adrp(x0, -1), "adrp x0, #-0x1000 (addr -0x1000)");
+  COMPARE(adrp(x0, 1), "adrp x0, #+0x1000 (addr 0x1000)");
+  COMPARE(b(0), "b #+0x0 (addr 0x0)");
+  COMPARE(b(-1), "b #-0x4 (addr -0x4)");
+  COMPARE(b(1), "b #+0x4 (addr 0x4)");
+
+  disasm->MapCodeAddress(0x1234, reinterpret_cast<Instruction*>(buf));
+  COMPARE(ldr(x0, 0), "ldr x0, pc+0 (addr 0x1234)");
+  COMPARE(ldr(x0, -1), "ldr x0, pc-4 (addr 0x1230)");
+  COMPARE(ldr(x0, 1), "ldr x0, pc+4 (addr 0x1238)");
+  COMPARE(prfm(PLIL1KEEP, 0), "prfm plil1keep, pc+0 (addr 0x1234)");
+  COMPARE(prfm(PLIL1KEEP, -1), "prfm plil1keep, pc-4 (addr 0x1230)");
+  COMPARE(prfm(PLIL1KEEP, 1), "prfm plil1keep, pc+4 (addr 0x1238)");
+  COMPARE(adr(x0, 0), "adr x0, #+0x0 (addr 0x1234)");
+  COMPARE(adr(x0, -1), "adr x0, #-0x1 (addr 0x1233)");
+  COMPARE(adr(x0, 1), "adr x0, #+0x1 (addr 0x1235)");
+  COMPARE(adrp(x0, 0), "adrp x0, #+0x0 (addr 0x1000)");
+  COMPARE(adrp(x0, -1), "adrp x0, #-0x1000 (addr 0x0)");
+  COMPARE(adrp(x0, 1), "adrp x0, #+0x1000 (addr 0x2000)");
+  COMPARE(b(0), "b #+0x0 (addr 0x1234)");
+  COMPARE(b(-1), "b #-0x4 (addr 0x1230)");
+  COMPARE(b(1), "b #+0x4 (addr 0x1238)");
+
+  // Check that 64-bit addresses work.
+  disasm->MapCodeAddress(UINT64_C(0x100000000),
+                         reinterpret_cast<Instruction*>(buf));
+  COMPARE(ldr(x0, 0), "ldr x0, pc+0 (addr 0x100000000)");
+  COMPARE(ldr(x0, -1), "ldr x0, pc-4 (addr 0xfffffffc)");
+  COMPARE(ldr(x0, 1), "ldr x0, pc+4 (addr 0x100000004)");
+  COMPARE(prfm(PLIL1KEEP, 0), "prfm plil1keep, pc+0 (addr 0x100000000)");
+  COMPARE(prfm(PLIL1KEEP, -1), "prfm plil1keep, pc-4 (addr 0xfffffffc)");
+  COMPARE(prfm(PLIL1KEEP, 1), "prfm plil1keep, pc+4 (addr 0x100000004)");
+  COMPARE(adr(x0, 0), "adr x0, #+0x0 (addr 0x100000000)");
+  COMPARE(adr(x0, -1), "adr x0, #-0x1 (addr 0xffffffff)");
+  COMPARE(adr(x0, 1), "adr x0, #+0x1 (addr 0x100000001)");
+  COMPARE(adrp(x0, 0), "adrp x0, #+0x0 (addr 0x100000000)");
+  COMPARE(adrp(x0, -1), "adrp x0, #-0x1000 (addr 0xfffff000)");
+  COMPARE(adrp(x0, 1), "adrp x0, #+0x1000 (addr 0x100001000)");
+  COMPARE(b(0), "b #+0x0 (addr 0x100000000)");
+  COMPARE(b(-1), "b #-0x4 (addr 0xfffffffc)");
+  COMPARE(b(1), "b #+0x4 (addr 0x100000004)");
+
+  disasm->MapCodeAddress(0xfffffffc, reinterpret_cast<Instruction*>(buf));
+  COMPARE(ldr(x0, 1), "ldr x0, pc+4 (addr 0x100000000)");
+  COMPARE(prfm(PLIL1KEEP, 1), "prfm plil1keep, pc+4 (addr 0x100000000)");
+  COMPARE(b(1), "b #+0x4 (addr 0x100000000)");
+  COMPARE(adr(x0, 4), "adr x0, #+0x4 (addr 0x100000000)");
+  COMPARE(adrp(x0, 1), "adrp x0, #+0x1000 (addr 0x100000000)");
+
+  // Check that very large offsets are handled properly. This detects misuse of
+  // the host's ptrdiff_t type when run on a 32-bit host. Only adrp is capable
+  // of encoding such offsets.
+  disasm->MapCodeAddress(0, reinterpret_cast<Instruction*>(buf));
+  COMPARE(adrp(x0, 0x000fffff), "adrp x0, #+0xfffff000 (addr 0xfffff000)");
+  COMPARE(adrp(x0, -0x00100000), "adrp x0, #-0x100000000 (addr -0x100000000)");
+
+  CLEANUP();
+}
 }  // namespace vixl
diff --git a/test/test-fuzz-a64.cc b/test/test-fuzz-a64.cc
index 0a35e86..e69c4a5 100644
--- a/test/test-fuzz-a64.cc
+++ b/test/test-fuzz-a64.cc
@@ -25,7 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdlib.h>
-#include "cctest.h"
+#include "test-runner.h"
 
 #include "a64/decoder-a64.h"
 #include "a64/disasm-a64.h"
@@ -76,13 +76,12 @@
 
 #if 0
 // These tests are commented out as they take a long time to run, causing the
-// test script to timeout. After enabling them, they are best run individually
-// using cctest:
+// test script to timeout. After enabling them, they are best run manually:
 //
-//     cctest_sim FUZZ_decoder_pedantic
-//     cctest_sim FUZZ_disasm_pedantic
+//     test-runner_sim FUZZ_decoder_pedantic
+//     test-runner_sim FUZZ_disasm_pedantic
 //
-// or cctest_sim_g for debug builds.
+// or test-runner_sim_g for debug builds.
 
 TEST(decoder_pedantic) {
   // Test the entire instruction space.
diff --git a/test/test-runner.cc b/test/test-runner.cc
new file mode 100644
index 0000000..ba884ab
--- /dev/null
+++ b/test/test-runner.cc
@@ -0,0 +1,210 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "test-runner.h"
+
+// Initialize the list as empty.
+vixl::Test* vixl::Test::first_ = NULL;
+vixl::Test* vixl::Test::last_ = NULL;
+
+// No debugger to start with.
+bool vixl::Test::debug_ = false;
+
+// No tracing to start with.
+bool vixl::Test::trace_sim_ = false;
+bool vixl::Test::trace_reg_ = false;
+bool vixl::Test::trace_write_ = false;
+
+// No colour highlight by default.
+bool vixl::Test::coloured_trace_ = false;
+
+// No instruction statistics by default.
+bool vixl::Test::instruction_stats_ = false;
+
+// Don't generate simulator test traces by default.
+bool vixl::Test::sim_test_trace_ = false;
+
+// Instantiate a Test and append it to the linked list.
+vixl::Test::Test(const char* name, TestFunction* callback)
+  : name_(name), callback_(callback), next_(NULL) {
+  // Append this test to the linked list.
+  if (first_ == NULL) {
+    VIXL_ASSERT(last_ == NULL);
+    first_ = this;
+  } else {
+    last_->next_ = this;
+  }
+  last_ = this;
+}
+
+
+// Look for 'search' in the arguments.
+static bool IsInArgs(const char* search, int argc, char* argv[]) {
+  for (int i = 1; i < argc; i++) {
+    if (strcmp(search, argv[i]) == 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+static bool IsOption(const char* arg) {
+  // Any argument like "--option" is an option.
+  return ((arg[0] == '-') && (arg[1] == '-'));
+}
+
+
+static void NormalizeOption(char * arg) {
+  // Squash all '_' characters in options. This allows --trace_sim and
+  // --trace-sim to be handled in the same way, for example.
+  VIXL_ASSERT(IsOption(arg));
+  for (char * c = arg; *c != '\0'; c++) {
+    if (*c == '_') {
+      *c = '-';
+    }
+  }
+}
+
+
+static void PrintHelpMessage() {
+  printf("Usage:  ./test [options] [test names]\n"
+      "Run all tests specified on the command line.\n"
+      "--help              Print this help message.\n"
+      "--list              List all available tests.\n"
+      "--run_all           Run all available tests.\n"
+      "--debugger          Run in the debugger.\n"
+      "--trace_all         Enable all trace options, plus --coloured_trace.\n"
+      "--trace_sim         Generate a trace of simulated instructions, as\n"
+      "                    well as disassembly from the DISASM tests.\n"
+      "--trace_reg         Generate a trace of simulated registers.\n"
+      "--trace_write       Generate a trace of memory writes.\n"
+      "--coloured_trace    Generate coloured trace.\n"
+      "--instruction_stats Log instruction statistics to vixl_stats.csv.\n"
+      "--sim_test_trace    Print result traces for SIM_* tests.\n");
+}
+
+int main(int argc, char* argv[]) {
+  // Parse the arguments. Option flags must appear first, followed by an
+  // optional list of tests to run.
+
+  int test_specifiers = 0;
+  for (int i = 1; i < argc; i++) {
+    if (IsOption(argv[i])) {
+      NormalizeOption(argv[i]);
+    } else {
+      // Anything that isn't an option is a test specifier.
+      test_specifiers++;
+    }
+  }
+
+  // Options controlling test conditions and debug output.
+
+  if (IsInArgs("--trace-all", argc, argv)) {
+    vixl::Test::set_trace_reg(true);
+    vixl::Test::set_trace_write(true);
+    vixl::Test::set_trace_sim(true);
+    vixl::Test::set_coloured_trace(true);
+  }
+
+  if (IsInArgs("--coloured-trace", argc, argv)) {
+    vixl::Test::set_coloured_trace(true);
+  }
+
+  if (IsInArgs("--debugger", argc, argv)) {
+    vixl::Test::set_debug(true);
+  }
+
+  if (IsInArgs("--trace-write", argc, argv)) {
+    vixl::Test::set_trace_write(true);
+  }
+
+  if (IsInArgs("--trace-reg", argc, argv)) {
+    vixl::Test::set_trace_reg(true);
+  }
+
+  if (IsInArgs("--trace-sim", argc, argv)) {
+    vixl::Test::set_trace_sim(true);
+  }
+
+  if (IsInArgs("--instruction-stats", argc, argv)) {
+    vixl::Test::set_instruction_stats(true);
+  }
+
+  if (IsInArgs("--sim-test-trace", argc, argv)) {
+    vixl::Test::set_sim_test_trace(true);
+  }
+
+  // Basic (mutually-exclusive) operations.
+
+  if (IsInArgs("--help", argc, argv)) {
+    PrintHelpMessage();
+
+  } else if (IsInArgs("--list", argc, argv)) {
+    // List all registered tests, then exit.
+    for (vixl::Test* c = vixl::Test::first(); c != NULL; c = c->next()) {
+      printf("%s\n", c->name());
+    }
+
+  } else if (IsInArgs("--run-all", argc, argv)) {
+    // Run all registered tests.
+    for (vixl::Test* c = vixl::Test::first(); c != NULL; c = c->next()) {
+      printf("Running %s\n", c->name());
+      c->callback()();
+    }
+
+  } else {
+    // Run the specified tests.
+    if (test_specifiers == 0) {
+      printf("No tests specified.\n");
+      PrintHelpMessage();
+      return EXIT_FAILURE;
+    }
+
+    for (int i = 1; i < argc; i++) {
+      if (!IsOption(argv[i])) {
+        vixl::Test* c;
+        for (c = vixl::Test::first(); c != NULL; c = c->next()) {
+          if (strcmp(c->name(), argv[i]) == 0) {
+            c->callback()();
+            break;
+          }
+        }
+        // Fail if we have not found a matching test to run.
+        if (c == NULL) {
+          printf("Test '%s' does not exist. Aborting...\n", argv[i]);
+          abort();
+        }
+      }
+    }
+  }
+
+  return EXIT_SUCCESS;
+}
+
diff --git a/test/cctest.h b/test/test-runner.h
similarity index 76%
rename from test/cctest.h
rename to test/test-runner.h
index a81d51e..db38e53 100644
--- a/test/cctest.h
+++ b/test/test-runner.h
@@ -24,32 +24,34 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#ifndef TEST_CCTEST_H_
-#define TEST_CCTEST_H_
+#ifndef TEST_TEST_H_
+#define TEST_TEST_H_
 
 #include "utils.h"
 
 namespace vixl {
 
-// Each actual test is represented by a CCtest instance.
-// Cctests are appended to a static linked list upon creation.
-class Cctest {
-  typedef void (CctestFunction)();
+// Each actual test is represented by a Test instance.
+// Tests are appended to a static linked list upon creation.
+class Test {
+  typedef void (TestFunction)();
 
  public:
-  Cctest(const char* name, CctestFunction* callback);
+  Test(const char* name, TestFunction* callback);
 
   const char* name() { return name_; }
-  CctestFunction* callback() { return callback_; }
-  static Cctest* first() { return first_; }
-  static Cctest* last() { return last_; }
-  Cctest* next() { return next_; }
+  TestFunction* callback() { return callback_; }
+  static Test* first() { return first_; }
+  static Test* last() { return last_; }
+  Test* next() { return next_; }
   static bool debug() { return debug_; }
   static void set_debug(bool value) { debug_ = value; }
   static bool trace_sim() { return trace_sim_; }
   static void set_trace_sim(bool value) { trace_sim_ = value; }
   static bool trace_reg() { return trace_reg_; }
   static void set_trace_reg(bool value) { trace_reg_ = value; }
+  static bool trace_write() { return trace_write_; }
+  static void set_trace_write(bool value) { trace_write_ = value; }
   static bool coloured_trace() { return coloured_trace_; }
   static void set_coloured_trace(bool value) { coloured_trace_ = value; }
   static bool instruction_stats() { return instruction_stats_; }
@@ -58,31 +60,32 @@
   static void set_sim_test_trace(bool value) { sim_test_trace_ = value; }
 
   // The debugger is needed to trace register values.
-  static bool run_debugger() { return debug_ || trace_reg_; }
+  static bool run_debugger() { return debug_; }
 
  private:
   const char* name_;
-  CctestFunction* callback_;
+  TestFunction* callback_;
 
-  static Cctest* first_;
-  static Cctest* last_;
-  Cctest* next_;
+  static Test* first_;
+  static Test* last_;
+  Test* next_;
   static bool debug_;
   static bool trace_sim_;
   static bool trace_reg_;
+  static bool trace_write_;
   static bool coloured_trace_;
   static bool instruction_stats_;
   static bool sim_test_trace_;
 };
 
-// Define helper macros for cctest files.
+// Define helper macros for test files.
 
-// Macro to register a cctest. It instantiates a Cctest and registers its
+// Macro to register a test. It instantiates a Test and registers its
 // callback function.
 #define TEST_(Name)                                                            \
 void Test##Name();                                                             \
-Cctest cctest_##Name(#Name, &Test##Name);                                      \
+Test test_##Name(#Name, &Test##Name);                                          \
 void Test##Name()
 }  // namespace vixl
 
-#endif  // TEST_CCTEST_H_
+#endif  // TEST_TEST_H_
diff --git a/test/test-simulator-a64.cc b/test/test-simulator-a64.cc
index 9a21824..256843d 100644
--- a/test/test-simulator-a64.cc
+++ b/test/test-simulator-a64.cc
@@ -27,7 +27,7 @@
 #include <stdio.h>
 #include <float.h>
 
-#include "cctest.h"
+#include "test-runner.h"
 #include "test-utils-a64.h"
 #include "test-simulator-inputs-a64.h"
 #include "test-simulator-traces-a64.h"
@@ -56,39 +56,33 @@
 #define SETUP()                                                               \
   MacroAssembler masm(BUF_SIZE);                                              \
   Decoder decoder;                                                            \
-  Simulator* simulator = NULL;                                                \
-  if (Cctest::run_debugger()) {                                               \
-    simulator = new Debugger(&decoder);                                       \
-  } else {                                                                    \
-    simulator = new Simulator(&decoder);                                      \
-    simulator->set_disasm_trace(Cctest::trace_sim());                         \
-  }                                                                           \
-  simulator->set_coloured_trace(Cctest::coloured_trace());                    \
-  simulator->set_instruction_stats(Cctest::instruction_stats());
+  Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder)        \
+                                              : new Simulator(&decoder);      \
+  simulator->set_coloured_trace(Test::coloured_trace());                      \
+  simulator->set_instruction_stats(Test::instruction_stats());                \
 
 #define START()                                                               \
   masm.Reset();                                                               \
   simulator->ResetState();                                                    \
   __ PushCalleeSavedRegisters();                                              \
-  if (Cctest::run_debugger()) {                                               \
-    if (Cctest::trace_reg()) {                                                \
-      __ Trace(LOG_STATE, TRACE_ENABLE);                                      \
-    }                                                                         \
-    if (Cctest::trace_sim()) {                                                \
-      __ Trace(LOG_DISASM, TRACE_ENABLE);                                     \
-    }                                                                         \
+  if (Test::trace_reg()) {                                                    \
+    __ Trace(LOG_STATE, TRACE_ENABLE);                                        \
   }                                                                           \
-  if (Cctest::instruction_stats()) {                                          \
+  if (Test::trace_write()) {                                                  \
+    __ Trace(LOG_WRITE, TRACE_ENABLE);                                        \
+  }                                                                           \
+  if (Test::trace_sim()) {                                                    \
+    __ Trace(LOG_DISASM, TRACE_ENABLE);                                       \
+  }                                                                           \
+  if (Test::instruction_stats()) {                                            \
     __ EnableInstrumentation();                                               \
   }
 
 #define END()                                                                 \
-  if (Cctest::instruction_stats()) {                                          \
+  if (Test::instruction_stats()) {                                            \
     __ DisableInstrumentation();                                              \
   }                                                                           \
-  if (Cctest::run_debugger()) {                                               \
-    __ Trace(LOG_ALL, TRACE_DISABLE);                                         \
-  }                                                                           \
+  __ Trace(LOG_ALL, TRACE_DISABLE);                                           \
   __ PopCalleeSavedRegisters();                                               \
   __ Ret();                                                                   \
   masm.FinalizeCode()
@@ -201,7 +195,7 @@
   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
 
   {
-    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    SingleEmissionCheckScope guard(&masm);
     (masm.*helper)(fd, fn);
   }
   __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
@@ -234,7 +228,7 @@
   Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
                  reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
 
-  if (Cctest::sim_test_trace()) {
+  if (Test::sim_test_trace()) {
     // Print the results.
     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
     for (unsigned d = 0; d < results_length; d++) {
@@ -312,7 +306,7 @@
   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
 
   {
-    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    SingleEmissionCheckScope guard(&masm);
     (masm.*helper)(fd, fn, fm);
   }
     __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
@@ -348,7 +342,7 @@
   Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
                  reinterpret_cast<uintptr_t>(results), bits);
 
-  if (Cctest::sim_test_trace()) {
+  if (Test::sim_test_trace()) {
     // Print the results.
     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
     for (unsigned d = 0; d < results_length; d++) {
@@ -438,7 +432,7 @@
   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
 
   {
-    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    SingleEmissionCheckScope guard(&masm);
     (masm.*helper)(fd, fn, fm, fa);
   }
   __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
@@ -478,7 +472,7 @@
   Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
                  reinterpret_cast<uintptr_t>(results), bits);
 
-  if (Cctest::sim_test_trace()) {
+  if (Test::sim_test_trace()) {
     // Print the results.
     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
     for (unsigned d = 0; d < results_length; d++) {
@@ -567,7 +561,7 @@
   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
 
   {
-    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    SingleEmissionCheckScope guard(&masm);
     (masm.*helper)(fn, fm);
   }
   __ Mrs(flags, NZCV);
@@ -605,7 +599,7 @@
   TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
                  reinterpret_cast<uintptr_t>(results), bits);
 
-  if (Cctest::sim_test_trace()) {
+  if (Test::sim_test_trace()) {
     // Print the results.
     printf("const uint8_t kExpected_%s[] = {\n", name);
     for (unsigned d = 0; d < results_length; d++) {
@@ -690,7 +684,7 @@
   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
 
   {
-    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    SingleEmissionCheckScope guard(&masm);
     (masm.*helper)(fn, 0.0);
   }
   __ Mrs(flags, NZCV);
@@ -724,7 +718,7 @@
   TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
                      reinterpret_cast<uintptr_t>(results), bits);
 
-  if (Cctest::sim_test_trace()) {
+  if (Test::sim_test_trace()) {
     // Print the results.
     printf("const uint8_t kExpected_%s[] = {\n", name);
     for (unsigned d = 0; d < results_length; d++) {
@@ -806,7 +800,7 @@
   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
 
   {
-    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    SingleEmissionCheckScope guard(&masm);
     (masm.*helper)(rd, fn);
   }
   __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
@@ -841,7 +835,7 @@
   TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
                      reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
 
-  if (Cctest::sim_test_trace()) {
+  if (Test::sim_test_trace()) {
     // Print the results.
     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
     // There is no simple C++ literal for INT*_MIN that doesn't produce
@@ -918,7 +912,7 @@
   TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
                      reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
 
-  if (Cctest::sim_test_trace()) {
+  if (Test::sim_test_trace()) {
     // Print the results.
     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
     for (unsigned d = 0; d < results_length; d++) {
@@ -998,7 +992,11 @@
 DEFINE_TEST_FP(fneg, 1Op, Basic)
 DEFINE_TEST_FP(fsqrt, 1Op, Basic)
 DEFINE_TEST_FP(frinta, 1Op, Conversions)
+DEFINE_TEST_FP(frinti, 1Op, Conversions)
+DEFINE_TEST_FP(frintm, 1Op, Conversions)
 DEFINE_TEST_FP(frintn, 1Op, Conversions)
+DEFINE_TEST_FP(frintp, 1Op, Conversions)
+DEFINE_TEST_FP(frintx, 1Op, Conversions)
 DEFINE_TEST_FP(frintz, 1Op, Conversions)
 
 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
diff --git a/test/test-simulator-traces-a64.h b/test/test-simulator-traces-a64.h
index b8ee1ff..f129398 100644
--- a/test/test-simulator-traces-a64.h
+++ b/test/test-simulator-traces-a64.h
@@ -468310,6 +468310,644 @@
 };
 const unsigned kExpectedCount_frinta_s = 104;
 
+const uint64_t kExpected_frinti_d[] = {
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x4000000000000000,
+  0x4024000000000000,
+  0x7fefffffffffffff,
+  0x7ff0000000000000,
+  0x7ff923456789abcd,
+  0x7ff8000000000000,
+  0x7ff923456789abcd,
+  0x7ff0000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xc000000000000000,
+  0xc024000000000000,
+  0xffefffffffffffff,
+  0xfff0000000000000,
+  0xfff923456789abcd,
+  0xfff8000000000000,
+  0xfff923456789abcd,
+  0xfff0000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x47efffff00000000,
+  0x0000000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x47efffffefffffff,
+  0x47f0000000000000,
+  0x47effffff0000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0xc7efffff00000000,
+  0x8000000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xc7efffffefffffff,
+  0xc7f0000000000000,
+  0xc7effffff0000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x4330000000000000,
+  0x4330000000000001,
+  0x4330000000000002,
+  0x4330000000000003,
+  0x433fedcba9876543,
+  0x433ffffffffffffc,
+  0x433ffffffffffffd,
+  0x433ffffffffffffe,
+  0x433fffffffffffff,
+  0x4320000000000000,
+  0x4320000000000000,
+  0x4320000000000002,
+  0x4320000000000004,
+  0x432fedcba9876544,
+  0x432ffffffffffffc,
+  0x432ffffffffffffc,
+  0x432ffffffffffffe,
+  0x4330000000000000,
+  0x4310000000000000,
+  0x4310000000000000,
+  0x4310000000000000,
+  0x4310000000000004,
+  0x431fedcba9876544,
+  0x431ffffffffffffc,
+  0x431ffffffffffffc,
+  0x4320000000000000,
+  0x4320000000000000,
+  0xc330000000000000,
+  0xc330000000000001,
+  0xc330000000000002,
+  0xc330000000000003,
+  0xc33fedcba9876543,
+  0xc33ffffffffffffc,
+  0xc33ffffffffffffd,
+  0xc33ffffffffffffe,
+  0xc33fffffffffffff,
+  0xc320000000000000,
+  0xc320000000000000,
+  0xc320000000000002,
+  0xc320000000000004,
+  0xc32fedcba9876544,
+  0xc32ffffffffffffc,
+  0xc32ffffffffffffc,
+  0xc32ffffffffffffe,
+  0xc330000000000000,
+  0xc310000000000000,
+  0xc310000000000000,
+  0xc310000000000000,
+  0xc310000000000004,
+  0xc31fedcba9876544,
+  0xc31ffffffffffffc,
+  0xc31ffffffffffffc,
+  0xc320000000000000,
+  0xc320000000000000,
+  0xc3e0000000000001,
+  0xc3e0000000000000,
+  0xc3dfffffffffffff,
+  0x43dfffffffffffff,
+  0x43e0000000000000,
+  0x43efffffffffffff,
+  0x43f0000000000000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1dfffffffc00000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41e0000000000000,
+  0x41e0000000000000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41f0000000000000,
+  0x41f0000000000000,
+};
+const unsigned kExpectedCount_frinti_d = 207;
+
+const uint32_t kExpected_frinti_s[] = {
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x40000000,
+  0x41200000,
+  0x7fcfffff,
+  0x7f800000,
+  0x7fd23456,
+  0x7fc00000,
+  0x7fd23456,
+  0x7fc00001,
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xc0000000,
+  0xc1200000,
+  0xffcfffff,
+  0xff800000,
+  0xffd23456,
+  0xffc00000,
+  0xffd23456,
+  0xffc00001,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0x4b000000,
+  0x4b000001,
+  0x4b000002,
+  0x4b000003,
+  0x4b765432,
+  0x4b7ffffc,
+  0x4b7ffffd,
+  0x4b7ffffe,
+  0x4b7fffff,
+  0x4a800000,
+  0x4a800000,
+  0x4a800002,
+  0x4a800004,
+  0x4af65432,
+  0x4afffffc,
+  0x4afffffc,
+  0x4afffffe,
+  0x4b000000,
+  0x4a000000,
+  0x4a000000,
+  0x4a000000,
+  0x4a000004,
+  0x4a765430,
+  0x4a7ffffc,
+  0x4a7ffffc,
+  0x4a800000,
+  0x4a800000,
+  0xcb000000,
+  0xcb000001,
+  0xcb000002,
+  0xcb000003,
+  0xcb765432,
+  0xcb7ffffc,
+  0xcb7ffffd,
+  0xcb7ffffe,
+  0xcb7fffff,
+  0xca800000,
+  0xca800000,
+  0xca800002,
+  0xca800004,
+  0xcaf65432,
+  0xcafffffc,
+  0xcafffffc,
+  0xcafffffe,
+  0xcb000000,
+  0xca000000,
+  0xca000000,
+  0xca000000,
+  0xca000004,
+  0xca765430,
+  0xca7ffffc,
+  0xca7ffffc,
+  0xca800000,
+  0xca800000,
+  0xdf000001,
+  0xdf000000,
+  0xdeffffff,
+  0x5effffff,
+  0x5f000000,
+  0x5f7fffff,
+  0x5f800000,
+  0xcf000001,
+  0xcf000000,
+  0xceffffff,
+  0x4effffff,
+  0x4f000000,
+};
+const unsigned kExpectedCount_frinti_s = 104;
+
+const uint64_t kExpected_frintm_d[] = {
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x4024000000000000,
+  0x7fefffffffffffff,
+  0x7ff0000000000000,
+  0x7ff923456789abcd,
+  0x7ff8000000000000,
+  0x7ff923456789abcd,
+  0x7ff0000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x8000000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc024000000000000,
+  0xffefffffffffffff,
+  0xfff0000000000000,
+  0xfff923456789abcd,
+  0xfff8000000000000,
+  0xfff923456789abcd,
+  0xfff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0x47efffff00000000,
+  0x0000000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x0000000000000000,
+  0x47efffffefffffff,
+  0x47f0000000000000,
+  0x47effffff0000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0xc7efffff00000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xc000000000000000,
+  0xbff0000000000000,
+  0xc7efffffefffffff,
+  0xc7f0000000000000,
+  0xc7effffff0000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0x4330000000000000,
+  0x4330000000000001,
+  0x4330000000000002,
+  0x4330000000000003,
+  0x433fedcba9876543,
+  0x433ffffffffffffc,
+  0x433ffffffffffffd,
+  0x433ffffffffffffe,
+  0x433fffffffffffff,
+  0x4320000000000000,
+  0x4320000000000000,
+  0x4320000000000002,
+  0x4320000000000002,
+  0x432fedcba9876542,
+  0x432ffffffffffffc,
+  0x432ffffffffffffc,
+  0x432ffffffffffffe,
+  0x432ffffffffffffe,
+  0x4310000000000000,
+  0x4310000000000000,
+  0x4310000000000000,
+  0x4310000000000000,
+  0x431fedcba9876540,
+  0x431ffffffffffffc,
+  0x431ffffffffffffc,
+  0x431ffffffffffffc,
+  0x431ffffffffffffc,
+  0xc330000000000000,
+  0xc330000000000001,
+  0xc330000000000002,
+  0xc330000000000003,
+  0xc33fedcba9876543,
+  0xc33ffffffffffffc,
+  0xc33ffffffffffffd,
+  0xc33ffffffffffffe,
+  0xc33fffffffffffff,
+  0xc320000000000000,
+  0xc320000000000002,
+  0xc320000000000002,
+  0xc320000000000004,
+  0xc32fedcba9876544,
+  0xc32ffffffffffffc,
+  0xc32ffffffffffffe,
+  0xc32ffffffffffffe,
+  0xc330000000000000,
+  0xc310000000000000,
+  0xc310000000000004,
+  0xc310000000000004,
+  0xc310000000000004,
+  0xc31fedcba9876544,
+  0xc31ffffffffffffc,
+  0xc320000000000000,
+  0xc320000000000000,
+  0xc320000000000000,
+  0xc3e0000000000001,
+  0xc3e0000000000000,
+  0xc3dfffffffffffff,
+  0x43dfffffffffffff,
+  0x43e0000000000000,
+  0x43efffffffffffff,
+  0x43f0000000000000,
+  0xc1e0000000400000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0x41dfffffff400000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41efffffffa00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+};
+const unsigned kExpectedCount_frintm_d = 207;
+
+const uint32_t kExpected_frintm_s[] = {
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x41200000,
+  0x7fcfffff,
+  0x7f800000,
+  0x7fd23456,
+  0x7fc00000,
+  0x7fd23456,
+  0x7fc00001,
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x80000000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xc0000000,
+  0xc0000000,
+  0xc1200000,
+  0xffcfffff,
+  0xff800000,
+  0xffd23456,
+  0xffc00000,
+  0xffd23456,
+  0xffc00001,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0x4b000000,
+  0x4b000001,
+  0x4b000002,
+  0x4b000003,
+  0x4b765432,
+  0x4b7ffffc,
+  0x4b7ffffd,
+  0x4b7ffffe,
+  0x4b7fffff,
+  0x4a800000,
+  0x4a800000,
+  0x4a800002,
+  0x4a800002,
+  0x4af65432,
+  0x4afffffc,
+  0x4afffffc,
+  0x4afffffe,
+  0x4afffffe,
+  0x4a000000,
+  0x4a000000,
+  0x4a000000,
+  0x4a000000,
+  0x4a765430,
+  0x4a7ffffc,
+  0x4a7ffffc,
+  0x4a7ffffc,
+  0x4a7ffffc,
+  0xcb000000,
+  0xcb000001,
+  0xcb000002,
+  0xcb000003,
+  0xcb765432,
+  0xcb7ffffc,
+  0xcb7ffffd,
+  0xcb7ffffe,
+  0xcb7fffff,
+  0xca800000,
+  0xca800002,
+  0xca800002,
+  0xca800004,
+  0xcaf65432,
+  0xcafffffc,
+  0xcafffffe,
+  0xcafffffe,
+  0xcb000000,
+  0xca000000,
+  0xca000004,
+  0xca000004,
+  0xca000004,
+  0xca765434,
+  0xca7ffffc,
+  0xca800000,
+  0xca800000,
+  0xca800000,
+  0xdf000001,
+  0xdf000000,
+  0xdeffffff,
+  0x5effffff,
+  0x5f000000,
+  0x5f7fffff,
+  0x5f800000,
+  0xcf000001,
+  0xcf000000,
+  0xceffffff,
+  0x4effffff,
+  0x4f000000,
+};
+const unsigned kExpectedCount_frintm_s = 104;
+
 const uint64_t kExpected_frintn_d[] = {
   0x0000000000000000,
   0x0000000000000000,
@@ -468629,6 +469267,644 @@
 };
 const unsigned kExpectedCount_frintn_s = 104;
 
+const uint64_t kExpected_frintp_d[] = {
+  0x0000000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4024000000000000,
+  0x7fefffffffffffff,
+  0x7ff0000000000000,
+  0x7ff923456789abcd,
+  0x7ff8000000000000,
+  0x7ff923456789abcd,
+  0x7ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xc024000000000000,
+  0xffefffffffffffff,
+  0xfff0000000000000,
+  0xfff923456789abcd,
+  0xfff8000000000000,
+  0xfff923456789abcd,
+  0xfff0000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x47efffff00000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x4000000000000000,
+  0x3ff0000000000000,
+  0x47efffffefffffff,
+  0x47f0000000000000,
+  0x47effffff0000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0xc7efffff00000000,
+  0x8000000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0x8000000000000000,
+  0xc7efffffefffffff,
+  0xc7f0000000000000,
+  0xc7effffff0000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x4330000000000000,
+  0x4330000000000001,
+  0x4330000000000002,
+  0x4330000000000003,
+  0x433fedcba9876543,
+  0x433ffffffffffffc,
+  0x433ffffffffffffd,
+  0x433ffffffffffffe,
+  0x433fffffffffffff,
+  0x4320000000000000,
+  0x4320000000000002,
+  0x4320000000000002,
+  0x4320000000000004,
+  0x432fedcba9876544,
+  0x432ffffffffffffc,
+  0x432ffffffffffffe,
+  0x432ffffffffffffe,
+  0x4330000000000000,
+  0x4310000000000000,
+  0x4310000000000004,
+  0x4310000000000004,
+  0x4310000000000004,
+  0x431fedcba9876544,
+  0x431ffffffffffffc,
+  0x4320000000000000,
+  0x4320000000000000,
+  0x4320000000000000,
+  0xc330000000000000,
+  0xc330000000000001,
+  0xc330000000000002,
+  0xc330000000000003,
+  0xc33fedcba9876543,
+  0xc33ffffffffffffc,
+  0xc33ffffffffffffd,
+  0xc33ffffffffffffe,
+  0xc33fffffffffffff,
+  0xc320000000000000,
+  0xc320000000000000,
+  0xc320000000000002,
+  0xc320000000000002,
+  0xc32fedcba9876542,
+  0xc32ffffffffffffc,
+  0xc32ffffffffffffc,
+  0xc32ffffffffffffe,
+  0xc32ffffffffffffe,
+  0xc310000000000000,
+  0xc310000000000000,
+  0xc310000000000000,
+  0xc310000000000000,
+  0xc31fedcba9876540,
+  0xc31ffffffffffffc,
+  0xc31ffffffffffffc,
+  0xc31ffffffffffffc,
+  0xc31ffffffffffffc,
+  0xc3e0000000000001,
+  0xc3e0000000000000,
+  0xc3dfffffffffffff,
+  0x43dfffffffffffff,
+  0x43e0000000000000,
+  0x43efffffffffffff,
+  0x43f0000000000000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1dfffffffc00000,
+  0xc1dfffffffc00000,
+  0xc1dfffffffc00000,
+  0xc1dfffffffc00000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41e0000000000000,
+  0x41e0000000000000,
+  0x41e0000000000000,
+  0x41e0000000000000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41f0000000000000,
+  0x41f0000000000000,
+  0x41f0000000000000,
+  0x41f0000000000000,
+};
+const unsigned kExpectedCount_frintp_d = 207;
+
+const uint32_t kExpected_frintp_s[] = {
+  0x00000000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x40000000,
+  0x40000000,
+  0x41200000,
+  0x7fcfffff,
+  0x7f800000,
+  0x7fd23456,
+  0x7fc00000,
+  0x7fd23456,
+  0x7fc00001,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xc1200000,
+  0xffcfffff,
+  0xff800000,
+  0xffd23456,
+  0xffc00000,
+  0xffd23456,
+  0xffc00001,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0x4b000000,
+  0x4b000001,
+  0x4b000002,
+  0x4b000003,
+  0x4b765432,
+  0x4b7ffffc,
+  0x4b7ffffd,
+  0x4b7ffffe,
+  0x4b7fffff,
+  0x4a800000,
+  0x4a800002,
+  0x4a800002,
+  0x4a800004,
+  0x4af65432,
+  0x4afffffc,
+  0x4afffffe,
+  0x4afffffe,
+  0x4b000000,
+  0x4a000000,
+  0x4a000004,
+  0x4a000004,
+  0x4a000004,
+  0x4a765434,
+  0x4a7ffffc,
+  0x4a800000,
+  0x4a800000,
+  0x4a800000,
+  0xcb000000,
+  0xcb000001,
+  0xcb000002,
+  0xcb000003,
+  0xcb765432,
+  0xcb7ffffc,
+  0xcb7ffffd,
+  0xcb7ffffe,
+  0xcb7fffff,
+  0xca800000,
+  0xca800000,
+  0xca800002,
+  0xca800002,
+  0xcaf65432,
+  0xcafffffc,
+  0xcafffffc,
+  0xcafffffe,
+  0xcafffffe,
+  0xca000000,
+  0xca000000,
+  0xca000000,
+  0xca000000,
+  0xca765430,
+  0xca7ffffc,
+  0xca7ffffc,
+  0xca7ffffc,
+  0xca7ffffc,
+  0xdf000001,
+  0xdf000000,
+  0xdeffffff,
+  0x5effffff,
+  0x5f000000,
+  0x5f7fffff,
+  0x5f800000,
+  0xcf000001,
+  0xcf000000,
+  0xceffffff,
+  0x4effffff,
+  0x4f000000,
+};
+const unsigned kExpectedCount_frintp_s = 104;
+
+const uint64_t kExpected_frintx_d[] = {
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x4000000000000000,
+  0x4024000000000000,
+  0x7fefffffffffffff,
+  0x7ff0000000000000,
+  0x7ff923456789abcd,
+  0x7ff8000000000000,
+  0x7ff923456789abcd,
+  0x7ff0000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xc000000000000000,
+  0xc024000000000000,
+  0xffefffffffffffff,
+  0xfff0000000000000,
+  0xfff923456789abcd,
+  0xfff8000000000000,
+  0xfff923456789abcd,
+  0xfff0000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x47efffff00000000,
+  0x0000000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x3ff0000000000000,
+  0x47efffffefffffff,
+  0x47f0000000000000,
+  0x47effffff0000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0x0000000000000000,
+  0xc7efffff00000000,
+  0x8000000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xbff0000000000000,
+  0xc7efffffefffffff,
+  0xc7f0000000000000,
+  0xc7effffff0000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x8000000000000000,
+  0x4330000000000000,
+  0x4330000000000001,
+  0x4330000000000002,
+  0x4330000000000003,
+  0x433fedcba9876543,
+  0x433ffffffffffffc,
+  0x433ffffffffffffd,
+  0x433ffffffffffffe,
+  0x433fffffffffffff,
+  0x4320000000000000,
+  0x4320000000000000,
+  0x4320000000000002,
+  0x4320000000000004,
+  0x432fedcba9876544,
+  0x432ffffffffffffc,
+  0x432ffffffffffffc,
+  0x432ffffffffffffe,
+  0x4330000000000000,
+  0x4310000000000000,
+  0x4310000000000000,
+  0x4310000000000000,
+  0x4310000000000004,
+  0x431fedcba9876544,
+  0x431ffffffffffffc,
+  0x431ffffffffffffc,
+  0x4320000000000000,
+  0x4320000000000000,
+  0xc330000000000000,
+  0xc330000000000001,
+  0xc330000000000002,
+  0xc330000000000003,
+  0xc33fedcba9876543,
+  0xc33ffffffffffffc,
+  0xc33ffffffffffffd,
+  0xc33ffffffffffffe,
+  0xc33fffffffffffff,
+  0xc320000000000000,
+  0xc320000000000000,
+  0xc320000000000002,
+  0xc320000000000004,
+  0xc32fedcba9876544,
+  0xc32ffffffffffffc,
+  0xc32ffffffffffffc,
+  0xc32ffffffffffffe,
+  0xc330000000000000,
+  0xc310000000000000,
+  0xc310000000000000,
+  0xc310000000000000,
+  0xc310000000000004,
+  0xc31fedcba9876544,
+  0xc31ffffffffffffc,
+  0xc31ffffffffffffc,
+  0xc320000000000000,
+  0xc320000000000000,
+  0xc3e0000000000001,
+  0xc3e0000000000000,
+  0xc3dfffffffffffff,
+  0x43dfffffffffffff,
+  0x43e0000000000000,
+  0x43efffffffffffff,
+  0x43f0000000000000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000200000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1e0000000000000,
+  0xc1dfffffffc00000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffff800000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41dfffffffc00000,
+  0x41e0000000000000,
+  0x41e0000000000000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffc00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41efffffffe00000,
+  0x41f0000000000000,
+  0x41f0000000000000,
+};
+const unsigned kExpectedCount_frintx_d = 207;
+
+const uint32_t kExpected_frintx_s[] = {
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x3f800000,
+  0x40000000,
+  0x41200000,
+  0x7fcfffff,
+  0x7f800000,
+  0x7fd23456,
+  0x7fc00000,
+  0x7fd23456,
+  0x7fc00001,
+  0x00000000,
+  0x00000000,
+  0x00000000,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xbf800000,
+  0xc0000000,
+  0xc1200000,
+  0xffcfffff,
+  0xff800000,
+  0xffd23456,
+  0xffc00000,
+  0xffd23456,
+  0xffc00001,
+  0x80000000,
+  0x80000000,
+  0x80000000,
+  0x4b000000,
+  0x4b000001,
+  0x4b000002,
+  0x4b000003,
+  0x4b765432,
+  0x4b7ffffc,
+  0x4b7ffffd,
+  0x4b7ffffe,
+  0x4b7fffff,
+  0x4a800000,
+  0x4a800000,
+  0x4a800002,
+  0x4a800004,
+  0x4af65432,
+  0x4afffffc,
+  0x4afffffc,
+  0x4afffffe,
+  0x4b000000,
+  0x4a000000,
+  0x4a000000,
+  0x4a000000,
+  0x4a000004,
+  0x4a765430,
+  0x4a7ffffc,
+  0x4a7ffffc,
+  0x4a800000,
+  0x4a800000,
+  0xcb000000,
+  0xcb000001,
+  0xcb000002,
+  0xcb000003,
+  0xcb765432,
+  0xcb7ffffc,
+  0xcb7ffffd,
+  0xcb7ffffe,
+  0xcb7fffff,
+  0xca800000,
+  0xca800000,
+  0xca800002,
+  0xca800004,
+  0xcaf65432,
+  0xcafffffc,
+  0xcafffffc,
+  0xcafffffe,
+  0xcb000000,
+  0xca000000,
+  0xca000000,
+  0xca000000,
+  0xca000004,
+  0xca765430,
+  0xca7ffffc,
+  0xca7ffffc,
+  0xca800000,
+  0xca800000,
+  0xdf000001,
+  0xdf000000,
+  0xdeffffff,
+  0x5effffff,
+  0x5f000000,
+  0x5f7fffff,
+  0x5f800000,
+  0xcf000001,
+  0xcf000000,
+  0xceffffff,
+  0x4effffff,
+  0x4f000000,
+};
+const unsigned kExpectedCount_frintx_s = 104;
+
 const uint64_t kExpected_frintz_d[] = {
   0x0000000000000000,
   0x0000000000000000,
diff --git a/test/test-utils-a64.cc b/test/test-utils-a64.cc
index bc04787..9b2c5de 100644
--- a/test/test-utils-a64.cc
+++ b/test/test-utils-a64.cc
@@ -28,7 +28,7 @@
 
 #include <math.h>   // Needed for isnan().
 
-#include "cctest.h"
+#include "test-runner.h"
 #include "a64/macro-assembler-a64.h"
 #include "a64/simulator-a64.h"
 #include "a64/disasm-a64.h"
@@ -38,6 +38,18 @@
 
 namespace vixl {
 
+
+// This value is a signalling NaN as both a double and as a float (taking the
+// least-significant word).
+const double kFP64SignallingNaN =
+    rawbits_to_double(UINT64_C(0x7ff000007f800001));
+const float kFP32SignallingNaN = rawbits_to_float(0x7f800001);
+
+// A similar value, but as a quiet NaN.
+const double kFP64QuietNaN = rawbits_to_double(UINT64_C(0x7ff800007fc00001));
+const float kFP32QuietNaN = rawbits_to_float(0x7fc00001);
+
+
 bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
   if (result != expected) {
     printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
diff --git a/test/test-utils-a64.h b/test/test-utils-a64.h
index 8884ffe..de9162d 100644
--- a/test/test-utils-a64.h
+++ b/test/test-utils-a64.h
@@ -27,7 +27,7 @@
 #ifndef VIXL_A64_TEST_UTILS_A64_H_
 #define VIXL_A64_TEST_UTILS_A64_H_
 
-#include "cctest.h"
+#include "test-runner.h"
 #include "a64/macro-assembler-a64.h"
 #include "a64/simulator-a64.h"
 #include "a64/disasm-a64.h"
@@ -35,6 +35,17 @@
 
 namespace vixl {
 
+// Signalling and quiet NaNs in double format, constructed such that the bottom
+// 32 bits look like a signalling or quiet NaN (as appropriate) when interpreted
+// as a float. These values are not architecturally significant, but they're
+// useful in tests for initialising registers.
+extern const double kFP64SignallingNaN;
+extern const double kFP64QuietNaN;
+
+// Signalling and quiet NaNs in float format.
+extern const float kFP32SignallingNaN;
+extern const float kFP32QuietNaN;
+
 // RegisterDump: Object allowing integer, floating point and flags registers
 // to be saved to itself for future reference.
 class RegisterDump {
diff --git a/tools/generate_simulator_traces.py b/tools/generate_simulator_traces.py
index 53faaa7..7ab9aef 100755
--- a/tools/generate_simulator_traces.py
+++ b/tools/generate_simulator_traces.py
@@ -34,8 +34,8 @@
 
 def BuildOptions(root):
   result = argparse.ArgumentParser(description = 'Simulator test generator.')
-  result.add_argument('--cctest', action='store', default=root+'/cctest',
-                      help='The cctest executable to run.')
+  result.add_argument('--runner', action='store', default=root+'/test-runner',
+                      help='The test executable to run.')
   result.add_argument('--out', action='store',
                       default='test/test-simulator-traces-a64.h')
   return result.parse_args()
@@ -81,14 +81,14 @@
     util.abort('Failed to find output section in ' + args.out + '.')
 
   # Find the simulator tests.
-  status, output = util.getstatusoutput(args.cctest + ' --list')
+  status, output = util.getstatusoutput(args.runner + ' --list')
   if status != 0: util.abort('Failed to list all tests')
   tests = filter(lambda t: 'SIM_' in t, output.split())
   tests.sort()
 
   # Run each test.
   for test in tests:
-    cmd = ' '.join([args.cctest, '--sim_test_trace', test])
+    cmd = ' '.join([args.runner, '--sim_test_trace', test])
     status, output = util.getstatusoutput(cmd)
     if status != 0: util.abort('Failed to run ' + cmd + '.')
 
diff --git a/tools/git.py b/tools/git.py
index b661d11..5cc3278 100644
--- a/tools/git.py
+++ b/tools/git.py
@@ -31,33 +31,10 @@
 def is_git_repository_root():
   return os.path.isdir('.git')
 
-def get_current_branch():
-  status, branches = util.getstatusoutput('git branch')
-  if status != 0: util.abort('Failed to run git branch.')
-  match = re.search("^\* (.*)$", branches, re.MULTILINE)
-  if not match: util.abort('Failed to find the current branch.')
-
-  branch = match.group(1);
-
-  # If we are not on a named branch, return the hash of the HEAD commit.
-  # This can occur (for example) if a specific revision is checked out by
-  # commit hash, or during a rebase.
-  if branch == '(no branch)':
-    status, commit = util.getstatusoutput('git log -1 --pretty=format:"%H"')
-    if status != 0: util.abort('Failed to run git log.')
-    match = re.search('^[0-9a-fA-F]{40}$', commit, re.MULTILINE)
-    if not match: util.abort('Failed to find the current revision.')
-    branch = match.group(0)
-
-  return branch
-
-
 def get_tracked_files():
-  command = 'git ls-tree '
-  branch = get_current_branch()
-  options = ' -r --full-tree --name-only'
+  command = 'git ls-tree HEAD -r --full-tree --name-only'
 
-  status, tracked = util.getstatusoutput(command + branch + options)
+  status, tracked = util.getstatusoutput(command)
   if status != 0: util.abort('Failed to list tracked files.')
 
   return tracked
diff --git a/tools/presubmit.py b/tools/presubmit.py
index f0cc2ba..6036986 100755
--- a/tools/presubmit.py
+++ b/tools/presubmit.py
@@ -67,26 +67,26 @@
 
 def CleanBuildSystem():
   def clean(mode):
-    if args.verbose: print('Cleaning ' + mode + ' mode cctest...')
+    if args.verbose: print('Cleaning ' + mode + ' mode test...')
     command = 'scons mode=%s simulator=%s all --clean' % \
               (mode, args.simulator)
     status, output = util.getstatusoutput(command)
     if status != 0:
       print(output)
-      util.abort('Failed cleaning cctest: ' + command)
+      util.abort('Failed cleaning test: ' + command)
   clean('debug')
   clean('release')
 
 
 def BuildEverything():
   def build(mode):
-    if args.verbose: print('Building ' + mode + ' mode cctest...')
+    if args.verbose: print('Building ' + mode + ' mode test...')
     command = 'scons mode=%s simulator=%s all -j%u' % \
               (mode, args.simulator, args.jobs)
     status, output = util.getstatusoutput(command)
     if status != 0:
       print(output)
-      util.abort('Failed building cctest: ' + command)
+      util.abort('Failed building test: ' + command)
   build('debug')
   build('release')
 
@@ -120,7 +120,7 @@
     print('Presubmit tests ' + result + '.')
 
 
-class Cctest(Test):
+class VIXLTest(Test):
   def __init__(self, mode, simulator, debugger = False, verbose = False):
     if not mode in ['release', 'debug']:
       print 'Invalid mode.'
@@ -129,19 +129,19 @@
     self.debugger = debugger
     self.verbose = verbose
 
-    name = 'cctest ' + mode
+    name = 'test ' + mode
     if simulator:
       name += ' (%s)' % ('debugger' if debugger else 'simulator')
     Test.__init__(self, name)
 
-    self.cctest = './cctest'
+    self.exe = './test-runner'
     if simulator:
-        self.cctest += '_sim'
+        self.exe += '_sim'
     if mode == 'debug':
-      self.cctest += '_g'
+      self.exe += '_g'
 
   def Run(self):
-    manifest = test.ReadManifest(self.cctest, [], self.debugger,
+    manifest = test.ReadManifest(self.exe, [], self.debugger,
                                  False, self.verbose)
     retcode = test.RunTests(manifest, jobs = args.jobs,
                             verbose = self.verbose, debugger = self.debugger,
@@ -171,6 +171,28 @@
     self.status = PASSED if n_errors == 0 else FAILED
 
 
+class BenchTest(Test):
+  def __init__(self, mode, simulator):
+    name = 'benchmarks ' + mode
+    Test.__init__(self, name)
+    self.exe_suffix = ''
+    if simulator:
+      self.exe_suffix += '_sim'
+    if mode == 'debug':
+      self.exe_suffix += '_g'
+
+  def Run(self):
+    benchmarks = ['bench-dataop', 'bench-branch', 'bench-branch-link']
+    self.status = PASSED
+    for bench in benchmarks:
+      command = './' + bench + self.exe_suffix
+      (rc, out) = util.getstatusoutput(command)
+      if rc != 0:
+        self.status = FAILED
+        print self.name_prefix() + 'Failed to run `' + command + '`'
+    print self.name_prefix() + self.status
+
+
 
 if __name__ == '__main__':
   original_dir = os.path.abspath('.')
@@ -194,14 +216,18 @@
     BuildEverything()
 
     if args.simulator == 'on':
-      #                      mode,     sim,   debugger, verbose
-      tester.AddTest(Cctest('release', True,  True,     args.verbose))
-      tester.AddTest(Cctest('debug',   True,  True,     args.verbose))
-      tester.AddTest(Cctest('release', True,  False,    args.verbose))
-      tester.AddTest(Cctest('debug',   True,  False,    args.verbose))
+      #                        mode,      sim,   debugger, verbose
+      tester.AddTest(VIXLTest('release',  True,  True,     args.verbose))
+      tester.AddTest(VIXLTest('debug',    True,  True,     args.verbose))
+      tester.AddTest(VIXLTest('release',  True,  False,    args.verbose))
+      tester.AddTest(VIXLTest('debug',    True,  False,    args.verbose))
+      tester.AddTest(BenchTest('release', True))
+      tester.AddTest(BenchTest('debug',   True))
     else:
-      tester.AddTest(Cctest('release', False, False,    args.verbose))
-      tester.AddTest(Cctest('debug',   False, False,    args.verbose))
+      tester.AddTest(VIXLTest('release',  False, False,    args.verbose))
+      tester.AddTest(VIXLTest('debug',    False, False,    args.verbose))
+      tester.AddTest(BenchTest('release', False))
+      tester.AddTest(BenchTest('debug',   False))
 
   tester.RunAll()
 
diff --git a/tools/test.py b/tools/test.py
index 4344a9d..92bef6d 100755
--- a/tools/test.py
+++ b/tools/test.py
@@ -43,21 +43,22 @@
 def BuildOptions():
   result = argparse.ArgumentParser(
       description =
-      '''This tool runs each test reported by $CCTEST --list (and filtered as
+      '''This tool runs each test reported by $TEST --list (and filtered as
          specified). A summary will be printed, and detailed test output will be
-         stored in log/$CCTEST.''',
+         stored in log/$TEST.''',
       # Print default values.
       formatter_class=argparse.ArgumentDefaultsHelpFormatter)
   result.add_argument('filters', metavar='filter', nargs='*',
                       help='Run tests matching all of the (regexp) filters.')
-  result.add_argument('--cctest', action='store', required=True,
-                      help='The cctest executable to run.')
+  result.add_argument('--runner', action='store', required=True,
+                      help='The test executable to run.')
   result.add_argument('--coloured_trace', action='store_true',
-                      help='''Pass --coloured_trace to cctest. This will put
-                              colour codes in the log files. The coloured output
-                              can be viewed by "less -R", for example.''')
+                      help='''Pass --coloured_trace to the test runner. This
+                              will put colour codes in the log files. The
+                              coloured output can be viewed by "less -R", for
+                              example.''')
   result.add_argument('--debugger', action='store_true',
-                      help='''Pass --debugger to cctest, so that the debugger is
+                      help='''Pass --debugger to test, so that the debugger is
                               used instead of the simulator. This has no effect
                               when running natively.''')
   result.add_argument('--verbose', action='store_true',
@@ -81,24 +82,26 @@
 
 # A class representing an individual test.
 class Test:
-  def __init__(self, name, cctest, debugger, coloured_trace, verbose):
+  def __init__(self, name, runner, debugger, coloured_trace, verbose):
     self.name = name
-    self.cctest = cctest
+    self.runner = runner
     self.debugger = debugger
     self.coloured_trace = coloured_trace
     self.verbose = verbose
-    self.logpath = os.path.join('log', os.path.basename(self.cctest))
+    self.logpath = os.path.join('log', os.path.basename(self.runner))
     if self.debugger:
       basename = name + '_debugger'
     else:
       basename = name
     self.logout = os.path.join(self.logpath, basename + '.stdout')
     self.logerr = os.path.join(self.logpath, basename + '.stderr')
+    if not os.path.exists(self.logpath): os.makedirs(self.logpath)
 
   # Run the test.
   # Use a thread to be able to control the test.
   def Run(self):
-    command = [self.cctest, '--trace_sim', '--trace_reg', self.name]
+    command = \
+        [self.runner, '--trace_sim', '--trace_reg', '--trace_write', self.name]
     if self.coloured_trace:
       command.append('--coloured_trace')
     if self.debugger:
@@ -115,7 +118,6 @@
     retcode = process.poll()
 
     # Write stdout and stderr to the log.
-    if not os.path.exists(self.logpath): os.makedirs(self.logpath)
     with open(self.logout, 'w') as f: f.write(stdout)
     with open(self.logerr, 'w') as f: f.write(stderr)
 
@@ -137,16 +139,17 @@
 
 
 # Scan matching tests and return a test manifest.
-def ReadManifest(cctest, filters = [],
+def ReadManifest(runner, filters = [],
                  debugger = False, coloured_trace = False, verbose = False):
-  status, output = util.getstatusoutput(cctest +  ' --list')
+  status, output = util.getstatusoutput(runner +  ' --list')
   if status != 0: util.abort('Failed to list all tests')
 
   names = output.split()
   for f in filters:
     names = filter(re.compile(f).search, names)
 
-  return map(lambda x: Test(x, cctest, debugger, coloured_trace, verbose), names)
+  return map(lambda x:
+      Test(x, runner, debugger, coloured_trace, verbose), names)
 
 
 # Shared state for multiprocessing. Ideally the context should be passed with
@@ -223,15 +226,15 @@
   # Parse the arguments.
   args = BuildOptions()
 
-  # Find a valid path to args.cctest (in case it doesn't begin with './').
-  args.cctest = os.path.join('.', args.cctest)
+  # Find a valid path to args.runner (in case it doesn't begin with './').
+  args.runner = os.path.join('.', args.runner)
 
-  if not os.access(args.cctest, os.X_OK):
-    print "'" + args.cctest + "' is not executable or does not exist."
+  if not os.access(args.runner, os.X_OK):
+    print "'" + args.test + "' is not executable or does not exist."
     sys.exit(1)
 
   # List all matching tests.
-  manifest = ReadManifest(args.cctest, args.filters,
+  manifest = ReadManifest(args.runner, args.filters,
                           args.debugger, args.coloured_trace, args.verbose)
 
   # Run the tests.