13 files changed, 491 insertions, 133 deletions
diff --git a/docs/CMake.rst b/docs/CMake.rst
index 473672b5f73..05edec64da3 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -224,6 +224,10 @@ LLVM-specific variables
   Generate build targets for the LLVM tools. Defaults to ON. You can use this
   option to disable the generation of build targets for the LLVM tools.
 
+**LLVM_INSTALL_BINUTILS_SYMLINKS**:BOOL
+  Install symlinks from the binutils tool names to the corresponding LLVM tools.
+  For example, ar will be symlinked to llvm-ar.
+
 **LLVM_BUILD_EXAMPLES**:BOOL
   Build LLVM examples. Defaults to OFF. Targets for building each example are
   generated in any case. See documentation for *LLVM_BUILD_TOOLS* above for more
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index f1f93c7a228..0f2681e0cd8 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -3,7 +3,7 @@ if (DOXYGEN_FOUND)
 if (LLVM_ENABLE_DOXYGEN)
   set(abs_top_srcdir ${CMAKE_CURRENT_SOURCE_DIR})
   set(abs_top_builddir ${CMAKE_CURRENT_BINARY_DIR})
-  
+
   if (HAVE_DOT)
     set(DOT ${LLVM_PATH_DOT})
   endif()
@@ -21,20 +21,20 @@ if (LLVM_ENABLE_DOXYGEN)
     set(enable_external_search "NO")
     set(extra_search_mappings "")
   endif()
-  
+
   # If asked, configure doxygen for the creation of a Qt Compressed Help file.
   option(LLVM_ENABLE_DOXYGEN_QT_HELP
     "Generate a Qt Compressed Help file." OFF)
   if (LLVM_ENABLE_DOXYGEN_QT_HELP)
     set(LLVM_DOXYGEN_QCH_FILENAME "org.llvm.qch" CACHE STRING
       "Filename of the Qt Compressed help file")
-    set(LLVM_DOXYGEN_QHP_NAMESPACE "org.llvm" CACHE STRING 
+    set(LLVM_DOXYGEN_QHP_NAMESPACE "org.llvm" CACHE STRING
       "Namespace under which the intermediate Qt Help Project file lives")
     set(LLVM_DOXYGEN_QHP_CUST_FILTER_NAME "${PACKAGE_STRING}" CACHE STRING
       "See http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters")
     set(LLVM_DOXYGEN_QHP_CUST_FILTER_ATTRS "${PACKAGE_NAME},${PACKAGE_VERSION}" CACHE STRING
       "See http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes")
-    find_program(LLVM_DOXYGEN_QHELPGENERATOR_PATH qhelpgenerator 
+    find_program(LLVM_DOXYGEN_QHELPGENERATOR_PATH qhelpgenerator
       DOC "Path to the qhelpgenerator binary")
     if (NOT LLVM_DOXYGEN_QHELPGENERATOR_PATH)
       message(FATAL_ERROR "Failed to find qhelpgenerator binary")
@@ -55,7 +55,7 @@ if (LLVM_ENABLE_DOXYGEN)
     set(llvm_doxygen_qhp_cust_filter_name "")
     set(llvm_doxygen_qhp_cust_filter_attrs "")
   endif()
-  
+
   option(LLVM_DOXYGEN_SVG
     "Use svg instead of png files for doxygen graphs." OFF)
   if (LLVM_DOXYGEN_SVG)
@@ -113,6 +113,7 @@ if (LLVM_ENABLE_SPHINX)
     if (${SPHINX_OUTPUT_MAN})
       add_sphinx_target(man llvm)
       add_sphinx_target(man llvm-dwarfdump)
+      add_sphinx_target(man dsymutil)
     endif()
 
   endif()
diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst
index 44cc57cebaf..9078f65e01c 100644
--- a/docs/CommandGuide/FileCheck.rst
+++ b/docs/CommandGuide/FileCheck.rst
@@ -86,6 +86,11 @@ OPTIONS
 
   All other variables get undefined after each encountered ``CHECK-LABEL``.
 
+.. option:: -D<VAR=VALUE>
+
+  Sets a filecheck variable ``VAR`` with value ``VALUE`` that can be used in
+  ``CHECK:`` lines.
+
 .. option:: -version
 
  Show the version number of this program.
diff --git a/docs/CommandGuide/dsymutil.rst b/docs/CommandGuide/dsymutil.rst
new file mode 100644
index 00000000000..a29bc3c295c
--- /dev/null
+++ b/docs/CommandGuide/dsymutil.rst
@@ -0,0 +1,89 @@
+dsymutil - manipulate archived DWARF debug symbol files
+=======================================================
+
+SYNOPSIS
+--------
+
+| :program:`dsymutil` [*options*] *executable*
+
+DESCRIPTION
+-----------
+
+:program:`dsymutil` links the DWARF debug information found in the object files
+for an executable *executable* by using debug symbols information contained in
+its symbol table. By default, the linked debug information is placed in a
+``.dSYM`` bundle with the same name as the executable.
+
+OPTIONS
+-------
+.. option:: --arch=<arch>
+
+ Link DWARF debug information only for specified CPU architecture types.
+ Architectures may be specified by name. When using this option, an error will
+ be returned if any architectures can not be properly linked.  This option can
+ be specified multiple times, once for each desired architecture. All CPU
+ architectures will be linked by default and any architectures that can't be
+ properly linked will cause :program:`dsymutil` to return an error.
+
+.. option:: --dump-debug-map
+
+ Dump the *executable*'s debug-map (the list of the object files containing the
+ debug information) in YAML format and exit. Not DWARF link will take place.
+
+.. option:: -f, --flat
+
+ Produce a flat dSYM file. A ``.dwarf`` extension will be appended to the
+ executable name unless the output file is specified using the -o option.
+
+.. option:: --no-odr
+
+ Do not use ODR (One Definition Rule) for uniquing C++ types.
+
+.. option:: --no-output
+
+ Do the link in memory, but do not emit the result file.
+
+.. option:: --no-swiftmodule-timestamp
+
+ Don't check the timestamp for swiftmodule files.
+
+.. option:: -j <n>, --num-threads=<n>
+
+ Specifies the maximum number (``n``) of simultaneous threads to use when
+ linking multiple architectures.
+
+.. option:: -o <filename>
+
+ Specifies an alternate ``path`` to place the dSYM bundle. The default dSYM
+ bundle path is created by appending ``.dSYM`` to the executable name.
+
+.. option:: --oso-prepend-path=<path>
+
+ Specifies a ``path`` to prepend to all debug symbol object file paths.
+
+.. option:: -s, --symtab
+
+ Dumps the symbol table found in *executable* or object file(s) and exits.
+
+.. option:: -v, --verbose
+
+ Display verbose information when linking.
+
+.. option:: --version
+
+ Display the version of the tool.
+
+.. option:: -y
+
+ Treat *executable* as a YAML debug-map rather than an executable.
+
+EXIT STATUS
+-----------
+
+:program:`dsymutil` returns 0 if the DWARF debug information was linked
+successfully. Otherwise, it returns 1.
+
+SEE ALSO
+--------
+
+:manpage:`llvm-dwarfdump(1)`
diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst
index 5a0a98ceb1f..805df00c173 100644
--- a/docs/CommandGuide/index.rst
+++ b/docs/CommandGuide/index.rst
@@ -30,6 +30,7 @@ Basic Commands
    llvm-stress
    llvm-symbolizer
    llvm-dwarfdump
+   dsymutil
 
 Debugging Tools
 ~~~~~~~~~~~~~~~
diff --git a/docs/CommandGuide/lli.rst b/docs/CommandGuide/lli.rst
index 9da13ee47e0..58481073d06 100644
--- a/docs/CommandGuide/lli.rst
+++ b/docs/CommandGuide/lli.rst
@@ -122,7 +122,7 @@ CODE GENERATION OPTIONS
 
  Choose the code model from:
 
- .. code-block:: perl
+ .. code-block:: text
 
       default: Target default code model
       small: Small code model
@@ -154,7 +154,7 @@ CODE GENERATION OPTIONS
 
  Instruction schedulers available (before register allocation):
 
- .. code-block:: perl
+ .. code-block:: text
 
       =default: Best scheduler for the target
       =none: No scheduling: breadth first sequencing
@@ -168,7 +168,7 @@ CODE GENERATION OPTIONS
 
  Register allocator to use (default=linearscan)
 
- .. code-block:: perl
+ .. code-block:: text
 
       =bigblock: Big-block register allocator
       =linearscan: linear scan register allocator =local -   local register allocator
@@ -178,7 +178,7 @@ CODE GENERATION OPTIONS
 
  Choose relocation model from:
 
- .. code-block:: perl
+ .. code-block:: text
 
       =default: Target default relocation model
       =static: Non-relocatable code =pic -   Fully relocatable, position independent code
@@ -188,7 +188,7 @@ CODE GENERATION OPTIONS
 
  Spiller to use (default=local)
 
- .. code-block:: perl
+ .. code-block:: text
 
       =simple: simple spiller
       =local: local spiller
@@ -197,7 +197,7 @@ CODE GENERATION OPTIONS
 
  Choose style of code to emit from X86 backend:
 
- .. code-block:: perl
+ .. code-block:: text
 
       =att: Emit AT&T-style assembly
       =intel: Emit Intel-style assembly
diff --git a/docs/CommandGuide/llvm-pdbutil.rst b/docs/CommandGuide/llvm-pdbutil.rst
index 8836f3a3eb4..29d487e0e74 100644
--- a/docs/CommandGuide/llvm-pdbutil.rst
+++ b/docs/CommandGuide/llvm-pdbutil.rst
@@ -142,7 +142,7 @@ Symbol Type Options
 
  Displays class definitions in the specified format.
 
- .. code-block:: perl
+ .. code-block:: text
 
     =all      - Display all class members including data, constants, typedefs, functions, etc (default)
     =layout   - Only display members that contribute to class size.
@@ -152,7 +152,7 @@ Symbol Type Options
 
  Displays classes in the specified order.
 
- .. code-block:: perl
+ .. code-block:: text
 
     =none            - Undefined / no particular sort order (default)
     =name            - Sort classes by name
@@ -200,7 +200,7 @@ Symbol Type Options
  Type of symbols to dump when -globals, -externals, or -module-syms is
  specified. (default all)
 
- .. code-block:: perl
+ .. code-block:: text
 
     =thunks - Display thunk symbols
     =data   - Display data symbols
@@ -212,7 +212,7 @@ Symbol Type Options
  For symbols dumped via the -module-syms, -globals, or -externals options, sort
  the results in specified order.
 
- .. code-block:: perl
+ .. code-block:: text
 
     =none - Undefined / no particular sort order
     =name - Sort symbols by name
diff --git a/docs/GetElementPtr.rst b/docs/GetElementPtr.rst
index c2da640fe06..b593871695f 100644
--- a/docs/GetElementPtr.rst
+++ b/docs/GetElementPtr.rst
@@ -196,7 +196,7 @@ illegal.
 In order to access the 18th integer in the array, you would need to do the
 following:
 
-.. code-block:: llvm
+.. code-block:: text
 
   %idx = getelementptr { [40 x i32]* }, { [40 x i32]* }* %, i64 0, i32 0
   %arr = load [40 x i32]** %idx
diff --git a/docs/HowToCrossCompileBuiltinsOnArm.rst b/docs/HowToCrossCompileBuiltinsOnArm.rst
new file mode 100644
index 00000000000..4b4d563a5a9
--- /dev/null
+++ b/docs/HowToCrossCompileBuiltinsOnArm.rst
@@ -0,0 +1,201 @@
+===================================================================
+How to Cross Compile Compiler-rt Builtins For Arm
+===================================================================
+
+Introduction
+============
+
+This document contains information about building and testing the builtins part
+of compiler-rt for an Arm target, from an x86_64 Linux machine.
+
+While this document concentrates on Arm and Linux the general principles should
+apply to other targets supported by compiler-rt. Further contributions for other
+targets are welcome.
+
+The instructions in this document depend on libraries and programs external to
+LLVM, there are many ways to install and configure these dependencies so you
+may need to adapt the instructions here to fit your own local situation.
+
+Prerequisites
+=============
+
+In this use case we'll be using CMake on a Debian-based Linux system,
+cross-compiling from an x86_64 host to a hard-float Armv7-A target. We'll be
+using as many of the LLVM tools as we can, but it is possible to use GNU
+equivalents.
+
+ * ``A build of LLVM/clang for the llvm-tools and llvm-config``
+ * ``The qemu-arm user mode emulator``
+ * ``An arm-linux-gnueabihf sysroot``
+
+See https://compiler-rt.llvm.org/ for more information about the dependencies
+on clang and LLVM.
+
+``qemu-arm`` should be available as a package for your Linux distribution.
+
+The most complicated of the prequisites to satisfy is the arm-linux-gnueabihf
+sysroot. The :doc:`HowToCrossCompileLLVM` has information about how to use the
+Linux distributions multiarch support to fulfill the dependencies for building
+LLVM. Alternatively, as building and testing just the compiler-rt builtins
+requires fewer dependencies than LLVM, it is possible to use the Linaro
+arm-linux-gnueabihf gcc installation as our sysroot.
+
+Building compiler-rt builtins for Arm
+=====================================
+We will be doing a standalone build of compiler-rt using the following cmake
+options.
+
+* ``path/to/llvm/projects/compiler-rt``
+* ``-DCOMPILER_RT_BUILD_BUILTINS=ON``
+* ``-DCOMPILER_RT_BUILD_SANITIZERS=OFF``
+* ``-DCOMPILER_RT_BUILD_XRAY=OFF``
+* ``-DCOMPILER_RT_BUILD_LIBFUZZER=OFF``
+* ``-DCOMPILER_RT_BUILD_PROFILE=OFF``
+* ``-DCMAKE_C_COMPILER=/path/to/clang``
+* ``-DCMAKE_AR=/path/to/llvm-ar``
+* ``-DCMAKE_NM=/path/to/llvm-nm``
+* ``-DCMAKE_RANLIB=/path/to/llvm-ranlib``
+* ``-DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld"``
+* ``-DCMAKE_C_COMPILER_TARGET="arm-linux-gnueabihf"``
+* ``-DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON``
+* ``-DLLVM_CONFIG_PATH=/path/to/llvm-config``
+* ``-DCMAKE_C_FLAGS="build-c-flags"``
+
+The build-c-flags need to be sufficient to pass the C-make compiler check and
+to compile compiler-rt. When using a GCC 7 Linaro arm-linux-gnueabihf
+installation the following flags are needed:
+
+* ``--target=arm-linux-gnueabihf``
+* ``--march=armv7a``
+* ``--gcc-toolchain=/path/to/dir/toolchain``
+* ``--sysroot=/path/to/toolchain/arm-linux-gnueabihf/libc``
+
+Depending on how your sysroot is laid out, you may not need ``--gcc-toolchain``.
+For example if you have added armhf as an architecture using your Linux
+distributions multiarch support then you should be able to use ``--sysroot=/``.
+
+Once cmake has completed the builtins can be built with ``ninja builtins``
+
+Testing compiler-rt builtins using qemu-arm
+===========================================
+To test the builtins library we need to add a few more cmake flags to enable
+testing and set up the compiler and flags for test case. We must also tell
+cmake that we wish to run the tests on ``qemu-arm``.
+
+* ``-DCOMPILER_RT_EMULATOR="qemu-arm -L /path/to/armhf/sysroot``
+* ``-DCOMPILER_RT_INCLUDE_TESTS=ON``
+* ``-DCOMPILER_RT_TEST_COMPILER="/path/to/clang"``
+* ``-DCOMPILER_RT_TEST_COMPILER_CFLAGS="test-c-flags"``
+
+The ``/path/to/armhf/sysroot`` should be the same as the one passed to
+``--sysroot`` in the "build-c-flags".
+
+The "test-c-flags" can be the same as the "build-c-flags", with the addition
+of ``"-fuse-ld=lld`` if you wish to use lld to link the tests.
+
+Once cmake has completed the tests can be built and run using
+``ninja check-builtins``
+
+Modifications for other Targets
+===============================
+
+Arm Soft-Float Target
+---------------------
+The instructions for the Arm hard-float target can be used for the soft-float
+target by substituting soft-float equivalents for the sysroot and target. The
+target to use is:
+
+* ``-DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi``
+
+Depending on whether you want to use floating point instructions or not you
+may need extra c-flags such as ``-mfloat-abi=softfp`` for use of floating-point
+instructions, and ``-mfloat-abi=soft -mfpu=none`` for software floating-point
+emulation.
+
+AArch64 Target
+--------------
+The instructions for Arm can be used for AArch64 by substituting AArch64
+equivalents for the sysroot, emulator and target.
+
+* ``-DCMAKE_C_COMPILER_TARGET=aarch64-linux-gnu``
+* ``-DCOMPILER_RT_EMULATOR="qemu-aarch64 -L /path/to/aarch64/sysroot``
+
+The CMAKE_C_FLAGS and COMPILER_RT_TEST_COMPILER_CFLAGS may also need:
+``"--sysroot=/path/to/aarch64/sysroot --gcc-toolchain=/path/to/gcc-toolchain"``
+
+Armv6-m, Armv7-m and Armv7E-M targets
+-------------------------------------
+If you wish to build, but not test compiler-rt for Armv6-M, Armv7-M or Armv7E-M
+then the easiest way is to use the BaremetalARM.cmake recipe in
+clang/cmake/caches.
+
+You will need a bare metal sysroot such as that provided by the GNU ARM
+Embedded toolchain.
+
+The libraries can be built with the cmake options:
+
+* ``-DBAREMETAL_ARMV6M_SYSROOT=/path/to/bare/metal/sysroot``
+* ``-DBAREMETAL_ARMV7M_SYSROOT=/path/to/bare/metal/sysroot``
+* ``-DBAREMETAL_ARMV7EM_SYSROOT=/path/to/bare/metal/sysroot``
+* ``-C /path/to/llvm/source/tools/clang/cmake/caches/BaremetalARM.cmake``
+
+**Note** that for the recipe to work the compiler-rt source must be checked out
+into the directory llvm/runtimes and not llvm/projects.
+
+To build and test the libraries using a similar method to Armv7-A is possible
+but more difficult. The main problems are:
+
+* There isn't a ``qemu-arm`` user-mode emulator for bare-metal systems. The ``qemu-system-arm`` can be used but this is significantly more difficult to setup.
+* The target to compile compiler-rt have the suffix -none-eabi. This uses the BareMetal driver in clang and by default won't find the libraries needed to pass the cmake compiler check.
+
+As the Armv6-M, Armv7-M and Armv7E-M builds of compiler-rt only use instructions
+that are supported on Armv7-A we can still get most of the value of running the
+tests using the same ``qemu-arm`` that we used for Armv7-A by building and
+running the test cases for Armv7-A but using the builtins compiled for
+Armv6-M, Armv7-M or Armv7E-M. This will not catch instructions that are
+supported on Armv7-A but not Armv6-M, Armv7-M and Armv7E-M.
+
+To get the cmake compile test to pass the libraries needed to successfully link
+the test application will need to be manually added to ``CMAKE_CFLAGS``.
+Alternatively if you are using version 3.6 or above of cmake you can use
+``CMAKE_TRY_COMPILE_TARGET=STATIC_LIBRARY`` to skip the link step.
+
+* ``-DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY``
+* ``-DCOMPILER_RT_OS_DIR="baremetal"``
+* ``-DCOMPILER_RT_BUILD_BUILTINS=ON``
+* ``-DCOMPILER_RT_BUILD_SANITIZERS=OFF``
+* ``-DCOMPILER_RT_BUILD_XRAY=OFF``
+* ``-DCOMPILER_RT_BUILD_LIBFUZZER=OFF``
+* ``-DCOMPILER_RT_BUILD_PROFILE=OFF``
+* ``-DCMAKE_C_COMPILER=${host_install_dir}/bin/clang``
+* ``-DCMAKE_C_COMPILER_TARGET="your *-none-eabi target"``
+* ``-DCMAKE_AR=/path/to/llvm-ar``
+* ``-DCMAKE_NM=/path/to/llvm-nm``
+* ``-DCMAKE_RANLIB=/path/to/llvm-ranlib``
+* ``-DCOMPILER_RT_BAREMETAL_BUILD=ON``
+* ``-DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON``
+* ``-DLLVM_CONFIG_PATH=/path/to/llvm-config``
+* ``-DCMAKE_C_FLAGS="build-c-flags"``
+* ``-DCMAKE_ASM_FLAGS="${arm_cflags}"``
+* ``-DCOMPILER_RT_EMULATOR="qemu-arm -L /path/to/armv7-A/sysroot"``
+* ``-DCOMPILER_RT_INCLUDE_TESTS=ON``
+* ``-DCOMPILER_RT_TEST_COMPILER="/path/to/clang"``
+* ``-DCOMPILER_RT_TEST_COMPILER_CFLAGS="test-c-flags"``
+
+The Armv6-M builtins will use the soft-float ABI. When compiling the tests for
+Armv7-A we must include ``"-mthumb -mfloat-abi=soft -mfpu=none"`` in the
+test-c-flags. We must use an Armv7-A soft-float abi sysroot for ``qemu-arm``.
+
+Unfortunately at time of writing the Armv7-M and Armv7E-M builds of
+compiler-rt will always include assembler files including floating point
+instructions. This means that building for a cpu without a floating point unit
+requires something like removing the arm_Thumb1_VFPv2_SOURCES from the
+arm_Thumb1_SOURCES in builtins/CMakeLists.txt. The float-abi of the compiler-rt
+library must be matched by the float abi of the Armv7-A sysroot used by
+qemu-arm.
+
+Depending on the linker used for the test cases you may encounter BuildAttribute
+mismatches between the M-profile objects from compiler-rt and the A-profile
+objects from the test. The lld linker does not check the BuildAttributes so it
+can be used to link the tests by adding -fuse-ld=lld to the
+``COMPILER_RT_TEST_COMPILER_CFLAGS``.
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 9d910568bd5..8cbed7d87d1 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -542,7 +542,7 @@ symbol is assumed to be ``dso_preemptable``.
 
 ``dso_local``
     The compiler may assume that a function or variable marked as ``dso_local``
-    will resolve to a symbol within the same linkage unit. Direct access will 
+    will resolve to a symbol within the same linkage unit. Direct access will
     be generated even if the definition is not within this compilation unit.
 
 .. _namedtypes:
@@ -597,9 +597,9 @@ Global variables in other translation units can also be declared, in which
 case they don't have an initializer.
 
 Either global variable definitions or declarations may have an explicit section
-to be placed in and may have an optional explicit alignment specified. If there 
-is a mismatch between the explicit or inferred section information for the 
-variable declaration and its definition the resulting behavior is undefined. 
+to be placed in and may have an optional explicit alignment specified. If there
+is a mismatch between the explicit or inferred section information for the
+variable declaration and its definition the resulting behavior is undefined.
 
 A variable may be defined as a global ``constant``, which indicates that
 the contents of the variable will **never** be modified (enabling better
@@ -642,11 +642,11 @@ target supports it, it will emit globals to the section specified.
 Additionally, the global can placed in a comdat if the target has the necessary
 support.
 
-External declarations may have an explicit section specified. Section 
-information is retained in LLVM IR for targets that make use of this 
-information. Attaching section information to an external declaration is an 
-assertion that its definition is located in the specified section. If the 
-definition is located in a different section, the behavior is undefined.   
+External declarations may have an explicit section specified. Section
+information is retained in LLVM IR for targets that make use of this
+information. Attaching section information to an external declaration is an
+assertion that its definition is located in the specified section. If the
+definition is located in a different section, the behavior is undefined.
 
 By default, global initializers are optimized by assuming that global
 variables defined within the module are not modified from their
@@ -2272,11 +2272,11 @@ seq\_cst total orderings of other operations that are not marked
 Fast-Math Flags
 ---------------
 
-LLVM IR floating-point binary ops (:ref:`fadd <i_fadd>`,
+LLVM IR floating-point operations (:ref:`fadd <i_fadd>`,
 :ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
 :ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`) and :ref:`call <i_call>`
-instructions have the following flags that can be set to enable
-otherwise unsafe floating point transformations.
+may use the following flags to enable otherwise unsafe 
+floating-point transformations.
 
 ``nnan``
    No NaNs - Allow optimizations to assume the arguments and result are not
@@ -2300,10 +2300,17 @@ otherwise unsafe floating point transformations.
    Allow floating-point contraction (e.g. fusing a multiply followed by an
    addition into a fused multiply-and-add).
 
+``afn``
+   Approximate functions - Allow substitution of approximate calculations for
+   functions (sin, log, sqrt, etc). See floating-point intrinsic definitions 
+   for places where this can apply to LLVM's intrinsic math functions. 
+
+``reassoc``
+   Allow reassociation transformations for floating-point instructions. 
+   This may dramatically change results in floating point.
+
 ``fast``
-   Fast - Allow algebraically equivalent transformations that may
-   dramatically change results in floating point (e.g. reassociate). This
-   flag implies all the others.
+   This flag implies all of the others.
 
 .. _uselistorder:
 
@@ -4499,7 +4506,7 @@ source variable. DIExpressions also follow this model: A DIExpression that
 doesn't have a trailing ``DW_OP_stack_value`` will describe an *address* when
 combined with a concrete location.
 
-.. code-block:: llvm
+.. code-block:: text
 
     !0 = !DIExpression(DW_OP_deref)
     !1 = !DIExpression(DW_OP_plus_uconst, 3)
@@ -4639,13 +4646,13 @@ As a concrete example, the type descriptor graph for the following program
       int i;    // offset 0
       float f;  // offset 4
     };
-    
+
     struct Outer {
       float f;  // offset 0
       double d; // offset 4
       struct Inner inner_a;  // offset 12
     };
-    
+
     void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) {
       outer->f = 0;            // tag0: (OuterStructTy, FloatScalarTy, 0)
       outer->inner_a.i = 0;    // tag1: (OuterStructTy, IntScalarTy, 12)
@@ -5194,14 +5201,37 @@ the loop identifier metadata node directly:
    !1 = !{!1} ; an identifier for the inner loop
    !2 = !{!2} ; an identifier for the outer loop
 
+'``irr_loop``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``irr_loop`` metadata may be attached to the terminator instruction of a basic
+block that's an irreducible loop header (note that an irreducible loop has more
+than once header basic blocks.) If ``irr_loop`` metadata is attached to the
+terminator instruction of a basic block that is not really an irreducible loop
+header, the behavior is undefined. The intent of this metadata is to improve the
+accuracy of the block frequency propagation. For example, in the code below, the
+block ``header0`` may have a loop header weight (relative to the other headers of
+the irreducible loop) of 100:
+
+.. code-block:: llvm
+
+    header0:
+    ...
+    br i1 %cmp, label %t1, label %t2, !irr_loop !0
+
+    ...
+    !0 = !{"loop_header_weight", i64 100}
+
+Irreducible loop header weights are typically based on profile data.
+
 '``invariant.group``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The ``invariant.group`` metadata may be attached to ``load``/``store`` instructions.
-The existence of the ``invariant.group`` metadata on the instruction tells 
-the optimizer that every ``load`` and ``store`` to the same pointer operand 
-within the same invariant group can be assumed to load or store the same  
-value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects 
+The existence of the ``invariant.group`` metadata on the instruction tells
+the optimizer that every ``load`` and ``store`` to the same pointer operand
+within the same invariant group can be assumed to load or store the same
+value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects
 when two pointers are considered the same). Pointers returned by bitcast or
 getelementptr with only zero indices are considered the same.
 
@@ -5214,26 +5244,26 @@ Examples:
    %ptr = alloca i8
    store i8 42, i8* %ptr, !invariant.group !0
    call void @foo(i8* %ptr)
-   
+
    %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
    call void @foo(i8* %ptr)
    %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed
-  
-   %newPtr = call i8* @getPointer(i8* %ptr) 
+
+   %newPtr = call i8* @getPointer(i8* %ptr)
    %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
-   
+
    %unknownValue = load i8, i8* @unknownPtr
    store i8 %unknownValue, i8* %ptr, !invariant.group !0 ; Can assume that %unknownValue == 42
-   
+
    call void @foo(i8* %ptr)
    %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
    %d = load i8, i8* %newPtr2, !invariant.group !0  ; Can't step through invariant.group.barrier to get value of %ptr
-   
+
    ...
    declare void @foo(i8*)
    declare i8* @getPointer(i8*)
    declare i8* @llvm.invariant.group.barrier(i8*)
-   
+
    !0 = !{!"magic ptr"}
    !1 = !{!"other ptr"}
 
@@ -5242,7 +5272,7 @@ another based on aliasing information. This is because invariant.group is tied
 to the SSA value of the pointer operand.
 
 .. code-block:: llvm
-  
+
   %v = load i8, i8* %x, !invariant.group !0
   ; if %x mustalias %y then we can replace the above instruction with
   %v = load i8, i8* %y
@@ -5272,7 +5302,7 @@ It does not have any effect on non-ELF targets.
 
 Example:
 
-.. code-block:: llvm
+.. code-block:: text
 
     $a = comdat any
     @a = global i32 1, comdat $a
@@ -6700,9 +6730,9 @@ remainder.
 
 Note that unsigned integer remainder and signed integer remainder are
 distinct operations; for signed integer remainder, use '``srem``'.
- 
+
 Taking the remainder of a division by zero is undefined behavior.
-For vectors, if any element of the divisor is zero, the operation has 
+For vectors, if any element of the divisor is zero, the operation has
 undefined behavior.
 
 Example:
@@ -6754,7 +6784,7 @@ Note that signed integer remainder and unsigned integer remainder are
 distinct operations; for unsigned integer remainder, use '``urem``'.
 
 Taking the remainder of a division by zero is undefined behavior.
-For vectors, if any element of the divisor is zero, the operation has 
+For vectors, if any element of the divisor is zero, the operation has
 undefined behavior.
 Overflow also leads to undefined behavior; this is a rare case, but can
 occur, for example, by taking the remainder of a 32-bit division of
@@ -7627,7 +7657,7 @@ be reused in the cache. The code generator may select special
 instructions to save cache bandwidth, such as the ``MOVNT`` instruction on
 x86.
 
-The optional ``!invariant.group`` metadata must reference a 
+The optional ``!invariant.group`` metadata must reference a
 single metadata name ``<index>``. See ``invariant.group`` metadata.
 
 Semantics:
@@ -7701,7 +7731,7 @@ A ``fence`` instruction can also take an optional
 Example:
 """"""""
 
-.. code-block:: llvm
+.. code-block:: text
 
       fence acquire                                        ; yields void
       fence syncscope("singlethread") seq_cst              ; yields void
@@ -7733,10 +7763,10 @@ There are three arguments to the '``cmpxchg``' instruction: an address
 to operate on, a value to compare to the value currently be at that
 address, and a new value to place at that address if the compared values
 are equal. The type of '<cmp>' must be an integer or pointer type whose
-bit width is a power of two greater than or equal to eight and less 
+bit width is a power of two greater than or equal to eight and less
 than or equal to a target-specific size limit. '<cmp>' and '<new>' must
-have the same type, and the type of '<pointer>' must be a pointer to 
-that type. If the ``cmpxchg`` is marked as ``volatile``, then the 
+have the same type, and the type of '<pointer>' must be a pointer to
+that type. If the ``cmpxchg`` is marked as ``volatile``, then the
 optimizer is not allowed to modify the number or order of execution of
 this ``cmpxchg`` with other :ref:`volatile operations <volatile>`.
 
@@ -9030,7 +9060,7 @@ This instruction requires several arguments:
    ``tail`` or ``musttail`` markers to the call. It is used to prevent tail
    call optimization from being performed on the call.
 
-#. The optional ``fast-math flags`` marker indicates that the call has one or more 
+#. The optional ``fast-math flags`` marker indicates that the call has one or more
    :ref:`fast-math flags <fastmath>`, which are optimization hints to enable
    otherwise unsafe floating-point optimizations. Fast-math flags are only valid
    for calls that return a floating-point scalar or vector type.
@@ -10460,7 +10490,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.sqrt`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10474,20 +10504,22 @@ all types however.
 Overview:
 """""""""
 
-The '``llvm.sqrt``' intrinsics return the square root of the specified value,
-returning the same value as the libm '``sqrt``' functions would, but without
-trapping or setting ``errno``.
+The '``llvm.sqrt``' intrinsics return the square root of the specified value.
 
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same type.
+The argument and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the square root of the operand if it is a nonnegative
-floating point number.
+Return the same value as a corresponding libm '``sqrt``' function but without
+trapping or setting ``errno``. For types specified by IEEE-754, the result 
+matches a conforming libm implementation.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.powi.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10534,7 +10566,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.sin`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10553,14 +10585,16 @@ The '``llvm.sin.*``' intrinsics return the sine of the operand.
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same type.
+The argument and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the sine of the specified operand, returning the
-same values as the libm ``sin`` functions would, and handles error
-conditions in the same way.
+Return the same value as a corresponding libm '``sin``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.cos.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10569,7 +10603,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.cos`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10588,14 +10622,16 @@ The '``llvm.cos.*``' intrinsics return the cosine of the operand.
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same type.
+The argument and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the cosine of the specified operand, returning the
-same values as the libm ``cos`` functions would, and handles error
-conditions in the same way.
+Return the same value as a corresponding libm '``cos``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.pow.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10604,7 +10640,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.pow`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10624,15 +10660,16 @@ specified (positive or negative) power.
 Arguments:
 """"""""""
 
-The second argument is a floating point power, and the first is a value
-to raise to that power.
+The arguments and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the first value raised to the second power,
-returning the same values as the libm ``pow`` functions would, and
-handles error conditions in the same way.
+Return the same value as a corresponding libm '``pow``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.exp.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10641,7 +10678,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.exp`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10661,13 +10698,16 @@ value.
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same type.
+The argument and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the same values as the libm ``exp`` functions
-would, and handles error conditions in the same way.
+Return the same value as a corresponding libm '``exp``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.exp2.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10676,7 +10716,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.exp2`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10696,13 +10736,16 @@ specified value.
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same type.
+The argument and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the same values as the libm ``exp2`` functions
-would, and handles error conditions in the same way.
+Return the same value as a corresponding libm '``exp2``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.log.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10711,7 +10754,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.log`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10731,13 +10774,16 @@ value.
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same type.
+The argument and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the same values as the libm ``log`` functions
-would, and handles error conditions in the same way.
+Return the same value as a corresponding libm '``log``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.log10.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10746,7 +10792,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.log10`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10766,13 +10812,16 @@ specified value.
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same type.
+The argument and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the same values as the libm ``log10`` functions
-would, and handles error conditions in the same way.
+Return the same value as a corresponding libm '``log10``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.log2.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10781,7 +10830,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.log2`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10801,13 +10850,16 @@ value.
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same type.
+The argument and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the same values as the libm ``log2`` functions
-would, and handles error conditions in the same way.
+Return the same value as a corresponding libm '``log2``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.fma.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10816,7 +10868,7 @@ Syntax:
 """""""
 
 This is an overloaded intrinsic. You can use ``llvm.fma`` on any
-floating point or vector of floating point type. Not all targets support
+floating-point or vector of floating-point type. Not all targets support
 all types however.
 
 ::
@@ -10830,20 +10882,21 @@ all types however.
 Overview:
 """""""""
 
-The '``llvm.fma.*``' intrinsics perform the fused multiply-add
-operation.
+The '``llvm.fma.*``' intrinsics perform the fused multiply-add operation.
 
 Arguments:
 """"""""""
 
-The argument and return value are floating point numbers of the same
-type.
+The arguments and return value are floating-point numbers of the same type.
 
 Semantics:
 """"""""""
 
-This function returns the same values as the libm ``fma`` functions
-would, and does not set errno.
+Return the same value as a corresponding libm '``fma``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated 
+using a less accurate calculation.
 
 '``llvm.fabs.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -12772,7 +12825,7 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant 
+The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant
 established by invariant.group metadata no longer holds, to obtain a new pointer
 value that does not carry the invariant information.
 
@@ -12786,7 +12839,7 @@ the pointer to the memory for which the ``invariant.group`` no longer holds.
 Semantics:
 """"""""""
 
-Returns another pointer that aliases its argument but which is considered different 
+Returns another pointer that aliases its argument but which is considered different
 for the purposes of ``load``/``store`` ``invariant.group`` metadata.
 
 Constrained Floating Point Intrinsics
@@ -12864,7 +12917,7 @@ strictly preserve the floating point exception semantics of the original code.
 Any FP exception that would have been raised by the original code must be raised
 by the transformed code, and the transformed code must not raise any FP
 exceptions that would not have been raised by the original code.  This is the
-exception behavior argument that will be used if the code being compiled reads 
+exception behavior argument that will be used if the code being compiled reads
 the FP exception status flags, but this mode can also be used with code that
 unmasks FP exceptions.
 
@@ -12882,7 +12935,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.fadd(<type> <op1>, <type> <op2>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -12919,7 +12972,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.fsub(<type> <op1>, <type> <op2>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -12956,7 +13009,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.fmul(<type> <op1>, <type> <op2>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -12993,7 +13046,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.fdiv(<type> <op1>, <type> <op2>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -13030,7 +13083,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.frem(<type> <op1>, <type> <op2>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -13059,7 +13112,7 @@ Semantics:
 
 The value produced is the floating point remainder from the division of the two
 value operands and has the same type as the operands.  The remainder has the
-same sign as the dividend. 
+same sign as the dividend.
 
 '``llvm.experimental.constrained.fma``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -13119,7 +13172,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.sqrt(<type> <op1>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -13156,7 +13209,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.pow(<type> <op1>, <type> <op2>,
                                          metadata <rounding mode>,
                                          metadata <exception behavior>)
@@ -13193,7 +13246,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.powi(<type> <op1>, i32 <op2>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -13232,7 +13285,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.sin(<type> <op1>,
                                          metadata <rounding mode>,
                                          metadata <exception behavior>)
@@ -13268,7 +13321,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.cos(<type> <op1>,
                                          metadata <rounding mode>,
                                          metadata <exception behavior>)
@@ -13304,7 +13357,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.exp(<type> <op1>,
                                          metadata <rounding mode>,
                                          metadata <exception behavior>)
@@ -13339,7 +13392,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.exp2(<type> <op1>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -13375,7 +13428,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.log(<type> <op1>,
                                          metadata <rounding mode>,
                                          metadata <exception behavior>)
@@ -13411,7 +13464,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.log10(<type> <op1>,
                                            metadata <rounding mode>,
                                            metadata <exception behavior>)
@@ -13446,7 +13499,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.log2(<type> <op1>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -13481,7 +13534,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.rint(<type> <op1>,
                                           metadata <rounding mode>,
                                           metadata <exception behavior>)
@@ -13520,7 +13573,7 @@ Syntax:
 
 ::
 
-      declare <type> 
+      declare <type>
       @llvm.experimental.constrained.nearbyint(<type> <op1>,
                                                metadata <rounding mode>,
                                                metadata <exception behavior>)
@@ -14281,7 +14334,7 @@ The '``llvm.memcpy.element.unordered.atomic.*``' intrinsic copies ``len`` bytes
 memory from the source location to the destination location. These locations are not
 allowed to overlap. The memory copy is performed as a sequence of load/store operations
 where each access is guaranteed to be a multiple of ``element_size`` bytes wide and
-aligned at an ``element_size`` boundary. 
+aligned at an ``element_size`` boundary.
 
 The order of the copy is unspecified. The same value may be read from the source
 buffer many times, but only one write is issued to the destination buffer per
@@ -14356,7 +14409,7 @@ The '``llvm.memmove.element.unordered.atomic.*``' intrinsic copies ``len`` bytes
 of memory from the source location to the destination location. These locations
 are allowed to overlap. The memory copy is performed as a sequence of load/store
 operations where each access is guaranteed to be a multiple of ``element_size``
-bytes wide and aligned at an ``element_size`` boundary. 
+bytes wide and aligned at an ``element_size`` boundary.
 
 The order of the copy is unspecified. The same value may be read from the source
 buffer many times, but only one write is issued to the destination buffer per
@@ -14431,7 +14484,7 @@ Semantics:
 The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of
 memory starting at the destination location to the given ``value``. The memory is
 set with a sequence of store operations where each access is guaranteed to be a
-multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary. 
+multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary.
 
 The order of the assignment is unspecified. Only one write is issued to the
 destination buffer per element. It is well defined to have concurrent reads and
diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst
index 491171393ab..103c6e0365b 100644
--- a/docs/SourceLevelDebugging.rst
+++ b/docs/SourceLevelDebugging.rst
@@ -188,7 +188,7 @@ the variable.  The third argument is a `complex expression
 <LangRef.html#diexpression>`_.  An `llvm.dbg.addr` intrinsic describes the
 *address* of a source variable.
 
-.. code-block:: llvm
+.. code-block:: text
 
     %i.addr = alloca i32, align 4
     call void @llvm.dbg.addr(metadata i32* %i.addr, metadata !1,
diff --git a/docs/WritingAnLLVMPass.rst b/docs/WritingAnLLVMPass.rst
index 54b3630e655..41f400740e8 100644
--- a/docs/WritingAnLLVMPass.rst
+++ b/docs/WritingAnLLVMPass.rst
@@ -1032,7 +1032,7 @@ implementation for the interface.
 Pass Statistics
 ===============
 
-The `Statistic <http://llvm.org/doxygen/Statistic_8h-source.html>`_ class is
+The `Statistic <http://llvm.org/doxygen/Statistic_8h_source.html>`_ class is
 designed to be an easy way to expose various success metrics from passes.
 These statistics are printed at the end of a run, when the :option:`-stats`
 command line option is enabled on the command line.  See the :ref:`Statistics
@@ -1043,7 +1043,7 @@ section <Statistic>` in the Programmer's Manual for details.
 What PassManager does
 ---------------------
 
-The `PassManager <http://llvm.org/doxygen/PassManager_8h-source.html>`_ `class
+The `PassManager <http://llvm.org/doxygen/PassManager_8h_source.html>`_ `class
 <http://llvm.org/doxygen/classllvm_1_1PassManager.html>`_ takes a list of
 passes, ensures their :ref:`prerequisites <writing-an-llvm-pass-interaction>`
 are set up correctly, and then schedules passes to run efficiently.  All of the
diff --git a/docs/index.rst b/docs/index.rst
index 955607a751c..47c2f047393 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -68,6 +68,7 @@ representation.
    CMakePrimer
    AdvancedBuilds
    HowToBuildOnARM
+   HowToCrossCompileBuiltinsOnArm
    HowToCrossCompileLLVM
    CommandGuide/index
    GettingStarted
@@ -105,6 +106,9 @@ representation.
 :doc:`HowToBuildOnARM`
    Notes on building and testing LLVM/Clang on ARM.
 
+:doc:`HowToCrossCompileBuiltinsOnArm`
+   Notes on cross-building and testing the compiler-rt builtins for Arm.
+
 :doc:`HowToCrossCompileLLVM`
    Notes on cross-building and testing LLVM/Clang.