aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2015-12-31 12:34:34 +0100
committerYvan Roux <yvan.roux@linaro.org>2015-12-31 12:44:03 +0100
commit07e0492a623485886dd08100f5ae8f22adb11fa2 (patch)
treeee712bf6d74d9eee5a80db91e0f0d22617a551c4
parentdad84c9a5ef6b373364319202be3f01036ea3fa3 (diff)
Backport from trunk r231678. 2015-12-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm-arches.def: Add "armv8.1-a" and "armv8.1-a+crc". * config/arm/arm-protos.h (FL2_ARCH8_1): New. (FL2_FOR_ARCH8_1A): New. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm.c (arm_arch8_1): New. (arm_option_override): Set arm_arch8_1. * config/arm/arm.h (TARGET_NEON_RDMA): New. (arm_arch8_1): Declare. * doc/invoke.texi (ARM Options, -march): Add "armv8.1-a" and "armv8.1-a+crc". (ARM Options, -mfpu): Fix a typo. gcc/ Backport from trunk r231680. 2015-12-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/t-aprofile: Make "armv8.1-a" and "armv8.1-a+crc" matches for "armv8-a". gcc/ Backport from trunk r231681. 2015-12-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/iterators.md (VQRDMLH_AS): New. (neon_rdma_as): New. * config/arm/neon.md (neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>): New. (neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>): New. * config/arm/unspecs.md (UNSPEC_VQRDMLAH): New. (UNSPEC_VQRDMLSH): New. gcc/ Backport from trunk r231682. 2015-12-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm-c.c (arm_cpu_builtins): Define __ARM_FEATURE_QRDMX. Clean up some trailing whitespace. gcc/ Backport from trunk r231683. 2015-12-16 Matthew Wahab <matthew.wahab@arm.com> * doc/sourcebuild.texi (ARM-specific attributes): Add "arm_v8_1a_neon_ok" and "arm_v8_1a_neon_hw". gcc/testsuite/ Backport from trunk r231683. 2015-12-16 Matthew Wahab <matthew.wahab@arm.com> * lib/target-supports.exp (add_options_for_arm_v8_1a_neon): Update comment. Use check_effective_target_arm_v8_1a_neon_ok to select the command line options. (check_effective_target_arm_v8_1a_neon_ok_nocache): Update initial test to allow ARM targets. Select and record a working set of command line options. (check_effective_target_arm_v8_1a_neon_hw): Add tests for ARM targets. gcc/ Backport from trunk r231685. 2015-12-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New. (vqrdmlahq_s16, vqrdmlahq_s32): New. (vqrdmlsh_s16, vqrdmlsh_s32): New. (vqrdmlahq_s16, vqrdmlshq_s32): New. * config/arm/arm_neon_builtins.def: Add "vqrdmlah" and "vqrdmlsh". gcc/ Backport from trunk r231686. 2015-12-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm_neon.h (vqrdmlahq_lane_s16): New. (vqrdmlahq_lane_s32): New. (vqrdmlah_lane_s16): New. (vqrdmlah_lane_s32): New. (vqrdmlshq_lane_s16): New. (vqrdmlshq_lane_s32): New. (vqrdmlsh_lane_s16): New. (vqrdmlsh_lane_s32): New. * config/arm/arm_neon_builtins.def: Add "vqrdmlah_lane" and "vqrdmlsh_lane". Change-Id: Ie3e2a64a5efbb99cd38fa38373f907c7bdd54c21
-rw-r--r--gcc/config/arm/arm-arches.def5
-rw-r--r--gcc/config/arm/arm-c.c9
-rw-r--r--gcc/config/arm/arm-protos.h3
-rw-r--r--gcc/config/arm/arm-tables.opt10
-rw-r--r--gcc/config/arm/arm.c4
-rw-r--r--gcc/config/arm/arm.h6
-rw-r--r--gcc/config/arm/arm_neon.h100
-rw-r--r--gcc/config/arm/arm_neon_builtins.def4
-rw-r--r--gcc/config/arm/iterators.md5
-rw-r--r--gcc/config/arm/neon.md45
-rw-r--r--gcc/config/arm/t-aprofile2
-rw-r--r--gcc/config/arm/unspecs.md2
-rw-r--r--gcc/doc/invoke.texi6
-rw-r--r--gcc/doc/sourcebuild.texi9
-rw-r--r--gcc/testsuite/lib/target-supports.exp60
15 files changed, 247 insertions, 23 deletions
diff --git a/gcc/config/arm/arm-arches.def b/gcc/config/arm/arm-arches.def
index ddf6c3c330f..6c831538b2d 100644
--- a/gcc/config/arm/arm-arches.def
+++ b/gcc/config/arm/arm-arches.def
@@ -57,6 +57,11 @@ ARM_ARCH("armv7-m", cortexm3, 7M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_
ARM_ARCH("armv7e-m", cortexm4, 7EM, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7EM))
ARM_ARCH("armv8-a", cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH8A))
ARM_ARCH("armv8-a+crc",cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A))
+ARM_ARCH ("armv8.1-a", cortexa53, 8A,
+ ARM_FSET_MAKE (FL_CO_PROC | FL_FOR_ARCH8A, FL2_FOR_ARCH8_1A))
+ARM_ARCH ("armv8.1-a+crc",cortexa53, 8A,
+ ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A,
+ FL2_FOR_ARCH8_1A))
ARM_ARCH("iwmmxt", iwmmxt, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT))
ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2))
diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
index 84bb35989ab..27728f0fd77 100644
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@@ -75,19 +75,22 @@ static void
arm_cpu_builtins (struct cpp_reader* pfile)
{
def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY);
- def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT);
+ def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT);
def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT);
if (TARGET_CRYPTO)
builtin_define ("__ARM_FEATURE_CRYPTO");
if (unaligned_access)
builtin_define ("__ARM_FEATURE_UNALIGNED");
+
+ def_or_undef_macro (pfile, "__ARM_FEATURE_QRDMX", TARGET_NEON_RDMA);
+
if (TARGET_CRC32)
builtin_define ("__ARM_FEATURE_CRC32");
- def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT);
+ def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT);
if (TARGET_ARM_FEATURE_LDREX)
- builtin_define_with_int_value ("__ARM_FEATURE_LDREX",
+ builtin_define_with_int_value ("__ARM_FEATURE_LDREX",
TARGET_ARM_FEATURE_LDREX);
else
cpp_undef (pfile, "__ARM_FEATURE_LDREX");
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index d3d7216488e..177072cef8f 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -386,6 +386,8 @@ extern bool arm_is_constant_pool_ref (rtx);
#define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
#define FL_ARCH6KZ (1 << 31) /* ARMv6KZ architecture. */
+#define FL2_ARCH8_1 (1 << 0) /* Architecture 8.1. */
+
/* Flags that only effect tuning, not available instructions. */
#define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
| FL_CO_PROC)
@@ -414,6 +416,7 @@ extern bool arm_is_constant_pool_ref (rtx);
#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
#define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
+#define FL2_FOR_ARCH8_1A FL2_ARCH8_1
/* There are too many feature bits to fit in a single word so the set of cpu and
fpu capabilities is a structure. A feature set is created and manipulated
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index 41bf1ff250b..51fa3dff4ff 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -413,10 +413,16 @@ EnumValue
Enum(arm_arch) String(armv8-a+crc) Value(26)
EnumValue
-Enum(arm_arch) String(iwmmxt) Value(27)
+Enum(arm_arch) String(armv8.1-a) Value(27)
EnumValue
-Enum(arm_arch) String(iwmmxt2) Value(28)
+Enum(arm_arch) String(armv8.1-a+crc) Value(28)
+
+EnumValue
+Enum(arm_arch) String(iwmmxt) Value(29)
+
+EnumValue
+Enum(arm_arch) String(iwmmxt2) Value(30)
Enum
Name(arm_fpu) Type(int)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 9b848bd5ed6..9bcadba11dd 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -848,6 +848,9 @@ int arm_arch7em = 0;
/* Nonzero if instructions present in ARMv8 can be used. */
int arm_arch8 = 0;
+/* Nonzero if this chip supports the ARMv8.1 extensions. */
+int arm_arch8_1 = 0;
+
/* Nonzero if this chip can benefit from load scheduling. */
int arm_ld_sched = 0;
@@ -3099,6 +3102,7 @@ arm_option_override (void)
arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
+ arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1);
arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 0cef4474f6f..f1e85118001 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -219,6 +219,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
&& ARM_FPU_FSET_HAS (arm_fpu_desc->features, FPU_FL_NEON))
+/* FPU supports ARMv8.1 Adv.SIMD extensions. */
+#define TARGET_NEON_RDMA (TARGET_NEON && arm_arch8_1)
+
/* Q-bit is present. */
#define TARGET_ARM_QBIT \
(TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7))
@@ -430,6 +433,9 @@ extern int arm_arch7em;
/* Nonzero if this chip supports the ARM Architecture 8 extensions. */
extern int arm_arch8;
+/* Nonzero if this chip supports the ARM Architecture 8.1 extensions. */
+extern int arm_arch8_1;
+
/* Nonzero if this chip can benefit from load scheduling. */
extern int arm_ld_sched;
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 66622dfcfe2..1f00357b9a1 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -1159,6 +1159,56 @@ vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
return (int32x4_t)__builtin_neon_vqrdmulhv4si (__a, __b);
}
+#ifdef __ARM_FEATURE_QRDMX
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+ return (int16x4_t)__builtin_neon_vqrdmlahv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+ return (int32x2_t)__builtin_neon_vqrdmlahv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+ return (int16x8_t)__builtin_neon_vqrdmlahv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+ return (int32x4_t)__builtin_neon_vqrdmlahv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+ return (int16x4_t)__builtin_neon_vqrdmlshv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+ return (int32x2_t)__builtin_neon_vqrdmlshv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+ return (int16x8_t)__builtin_neon_vqrdmlshv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+ return (int32x4_t)__builtin_neon_vqrdmlshv4si (__a, __b, __c);
+}
+#endif
+
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmull_s8 (int8x8_t __a, int8x8_t __b)
{
@@ -7041,6 +7091,56 @@ vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
return (int32x2_t)__builtin_neon_vqrdmulh_lanev2si (__a, __b, __c);
}
+#ifdef __ARM_FEATURE_QRDMX
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+ return (int16x8_t)__builtin_neon_vqrdmlah_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+ return (int32x4_t)__builtin_neon_vqrdmlah_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+ return (int16x4_t)__builtin_neon_vqrdmlah_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+ return (int32x2_t)__builtin_neon_vqrdmlah_lanev2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+ return (int16x8_t)__builtin_neon_vqrdmlsh_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+ return (int32x4_t)__builtin_neon_vqrdmlsh_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+ return (int16x4_t)__builtin_neon_vqrdmlsh_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+ return (int32x2_t)__builtin_neon_vqrdmlsh_lanev2si (__a, __b, __c, __d);
+}
+#endif
+
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_n_s16 (int16x4_t __a, int16_t __b)
{
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 0b719df7607..1fdb2a8283a 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -45,6 +45,8 @@ VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si)
VAR4 (BINOP, vqrdmulh, v4hi, v2si, v8hi, v4si)
VAR2 (TERNOP, vqdmlal, v4hi, v2si)
VAR2 (TERNOP, vqdmlsl, v4hi, v2si)
+VAR4 (TERNOP, vqrdmlah, v4hi, v2si, v8hi, v4si)
+VAR4 (TERNOP, vqrdmlsh, v4hi, v2si, v8hi, v4si)
VAR3 (BINOP, vmullp, v8qi, v4hi, v2si)
VAR3 (BINOP, vmulls, v8qi, v4hi, v2si)
VAR3 (BINOP, vmullu, v8qi, v4hi, v2si)
@@ -58,6 +60,8 @@ VAR4 (BINOP, vqdmulh_n, v4hi, v2si, v8hi, v4si)
VAR4 (BINOP, vqrdmulh_n, v4hi, v2si, v8hi, v4si)
VAR4 (SETLANE, vqdmulh_lane, v4hi, v2si, v8hi, v4si)
VAR4 (SETLANE, vqrdmulh_lane, v4hi, v2si, v8hi, v4si)
+VAR4 (MAC_LANE, vqrdmlah_lane, v4hi, v2si, v8hi, v4si)
+VAR4 (MAC_LANE, vqrdmlsh_lane, v4hi, v2si, v8hi, v4si)
VAR2 (BINOP, vqdmull, v4hi, v2si)
VAR8 (BINOP, vshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
VAR8 (BINOP, vshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 47cc1eebecd..be62f6747d7 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -359,6 +359,8 @@
(define_int_iterator CRYPTO_SELECTING [UNSPEC_SHA1C UNSPEC_SHA1M
UNSPEC_SHA1P])
+(define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH])
+
;;----------------------------------------------------------------------------
;; Mode attributes
;;----------------------------------------------------------------------------
@@ -828,3 +830,6 @@
(simple_return " && use_simple_return_p ()")])
(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)")
(simple_return " && use_simple_return_p ()")])
+
+;; Attributes for VQRDMLAH/VQRDMLSH
+(define_int_attr neon_rdma_as [(UNSPEC_VQRDMLAH "a") (UNSPEC_VQRDMLSH "s")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 62fb6daae99..844ef5eb2f3 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2014,6 +2014,18 @@
[(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
)
+;; vqrdmlah, vqrdmlsh
+(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
+ [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
+ (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
+ (match_operand:VMDQI 2 "s_register_operand" "w")
+ (match_operand:VMDQI 3 "s_register_operand" "w")]
+ VQRDMLH_AS))]
+ "TARGET_NEON_RDMA"
+ "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+ [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
+)
+
(define_insn "neon_vqdmlal<mode>"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
@@ -3176,6 +3188,39 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
)
+;; vqrdmlah_lane, vqrdmlsh_lane
+(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
+ [(set (match_operand:VMQI 0 "s_register_operand" "=w")
+ (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
+ (match_operand:VMQI 2 "s_register_operand" "w")
+ (match_operand:<V_HALF> 3 "s_register_operand"
+ "<scalar_mul_constraint>")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ VQRDMLH_AS))]
+ "TARGET_NEON_RDMA"
+{
+ return
+ "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
+}
+ [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
+)
+
+(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
+ [(set (match_operand:VMDI 0 "s_register_operand" "=w")
+ (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
+ (match_operand:VMDI 2 "s_register_operand" "w")
+ (match_operand:VMDI 3 "s_register_operand"
+ "<scalar_mul_constraint>")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ VQRDMLH_AS))]
+ "TARGET_NEON_RDMA"
+{
+ return
+ "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
+}
+ [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
+)
+
(define_insn "neon_vmla_lane<mode>"
[(set (match_operand:VMD 0 "s_register_operand" "=w")
(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
diff --git a/gcc/config/arm/t-aprofile b/gcc/config/arm/t-aprofile
index 5d02ea5223d..94fd0ca9259 100644
--- a/gcc/config/arm/t-aprofile
+++ b/gcc/config/arm/t-aprofile
@@ -97,6 +97,8 @@ MULTILIB_MATCHES += march?armv8-a=mcpu?xgene1
# Arch Matches
MULTILIB_MATCHES += march?armv8-a=march?armv8-a+crc
+MULTILIB_MATCHES += march?armv8-a=march?armv8.1-a
+MULTILIB_MATCHES += march?armv8-a=march?armv8.1-a+crc
# FPU matches
MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 0ec2c48abea..ad8a0041648 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -358,5 +358,7 @@
UNSPEC_NVRINTX
UNSPEC_NVRINTA
UNSPEC_NVRINTN
+ UNSPEC_VQRDMLAH
+ UNSPEC_VQRDMLSH
])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7a765318658..aa347ed4f52 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -13228,8 +13228,8 @@ of the @option{-mcpu=} option. Permissible names are: @samp{armv2},
@samp{armv6}, @samp{armv6j},
@samp{armv6t2}, @samp{armv6z}, @samp{armv6kz}, @samp{armv6-m},
@samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m}, @samp{armv7e-m},
-@samp{armv7ve}, @samp{armv8-a}, @samp{armv8-a+crc},
-@samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+@samp{armv7ve}, @samp{armv8-a}, @samp{armv8-a+crc}, @samp{armv8.1-a},
+@samp{armv8.1-a+crc}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
@option{-march=armv7ve} is the armv7-a architecture with virtualization
extensions.
@@ -13332,7 +13332,7 @@ available on the target. Permissible names are: @samp{vfp}, @samp{vfpv3},
@samp{vfpv3xd-fp16}, @samp{neon}, @samp{neon-fp16}, @samp{vfpv4},
@samp{vfpv4-d16}, @samp{fpv4-sp-d16}, @samp{neon-vfpv4},
@samp{fpv5-d16}, @samp{fpv5-sp-d16},
-@samp{fp-armv8}, @samp{neon-fp-armv8}, and @samp{crypto-neon-fp-armv8}.
+@samp{fp-armv8}, @samp{neon-fp-armv8} and @samp{crypto-neon-fp-armv8}.
If @option{-msoft-float} is specified this specifies the format of
floating-point values.
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 474f5fff968..5cf1efca8dd 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1579,6 +1579,15 @@ Some multilibs may be incompatible with these options.
ARM target supports @code{-mfpu=neon-fp-armv8 -mfloat-abi=softfp}.
Some multilibs may be incompatible with these options.
+@item arm_v8_1a_neon_ok
+ARM target supports options to generate ARMv8.1 Adv.SIMD instructions.
+Some multilibs may be incompatible with these options.
+
+@item arm_v8_1a_neon_hw
+ARM target supports executing ARMv8.1 Adv.SIMD instructions. Some
+multilibs may be incompatible with the options needed. Implies
+arm_v8_1a_neon_ok.
+
@item arm_prefer_ldrd_strd
ARM target prefers @code{LDRD} and @code{STRD} instructions over
@code{LDM} and @code{STM} instructions.
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 7d237a2ac01..800dcbe0bfc 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2666,14 +2666,15 @@ proc add_options_for_arm_v8_neon { flags } {
return "$flags $et_arm_v8_neon_flags -march=armv8-a"
}
-# Add the options needed for ARMv8.1 Adv.SIMD.
+# Add the options needed for ARMv8.1 Adv.SIMD. Also adds the ARMv8 NEON
+# options for AArch64 and for ARM.
proc add_options_for_arm_v8_1a_neon { flags } {
- if { [istarget aarch64*-*-*] } {
- return "$flags -march=armv8.1-a"
- } else {
+ if { ! [check_effective_target_arm_v8_1a_neon_ok] } {
return "$flags"
}
+ global et_arm_v8_1a_neon_flags
+ return "$flags $et_arm_v8_1a_neon_flags -march=armv8.1-a"
}
proc add_options_for_arm_crc { flags } {
@@ -3119,17 +3120,33 @@ proc check_effective_target_arm_neonv2_hw { } {
}
# Return 1 if the target supports the ARMv8.1 Adv.SIMD extension, 0
-# otherwise. The test is valid for AArch64.
+# otherwise. The test is valid for AArch64 and ARM. Record the command
+# line options needed.
proc check_effective_target_arm_v8_1a_neon_ok_nocache { } {
- if { ![istarget aarch64*-*-*] } {
- return 0
+ global et_arm_v8_1a_neon_flags
+ set et_arm_v8_1a_neon_flags ""
+
+ if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } {
+ return 0;
}
- return [check_no_compiler_messages_nocache arm_v8_1a_neon_ok assembly {
- #if !defined (__ARM_FEATURE_QRDMX)
- #error "__ARM_FEATURE_QRDMX not defined"
- #endif
- } [add_options_for_arm_v8_1a_neon ""]]
+
+ # Iterate through sets of options to find the compiler flags that
+ # need to be added to the -march option. Start with the empty set
+ # since AArch64 only needs the -march setting.
+ foreach flags {"" "-mfpu=neon-fp-armv8" "-mfloat-abi=softfp" \
+ "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
+ if { [check_no_compiler_messages_nocache arm_v8_1a_neon_ok object {
+ #if !defined (__ARM_FEATURE_QRDMX)
+ #error "__ARM_FEATURE_QRDMX not defined"
+ #endif
+ } "$flags -march=armv8.1-a"] } {
+ set et_arm_v8_1a_neon_flags "$flags -march=armv8.1-a"
+ return 1
+ }
+ }
+
+ return 0;
}
proc check_effective_target_arm_v8_1a_neon_ok { } {
@@ -3156,16 +3173,17 @@ proc check_effective_target_arm_v8_neon_hw { } {
}
# Return 1 if the target supports executing the ARMv8.1 Adv.SIMD extension, 0
-# otherwise. The test is valid for AArch64.
+# otherwise. The test is valid for AArch64 and ARM.
proc check_effective_target_arm_v8_1a_neon_hw { } {
if { ![check_effective_target_arm_v8_1a_neon_ok] } {
return 0;
}
- return [check_runtime_nocache arm_v8_1a_neon_hw_available {
+ return [check_runtime arm_v8_1a_neon_hw_available {
int
main (void)
{
+ #ifdef __ARM_ARCH_ISA_A64
__Int32x2_t a = {0, 1};
__Int32x2_t b = {0, 2};
__Int32x2_t result;
@@ -3175,9 +3193,21 @@ proc check_effective_target_arm_v8_1a_neon_hw { } {
: "w"(a), "w"(b)
: /* No clobbers. */);
+ #else
+
+ __simd64_int32_t a = {0, 1};
+ __simd64_int32_t b = {0, 2};
+ __simd64_int32_t result;
+
+ asm ("vqrdmlah.s32 %P0, %P1, %P2"
+ : "=w"(result)
+ : "w"(a), "w"(b)
+ : /* No clobbers. */);
+ #endif
+
return result[0];
}
- } [add_options_for_arm_v8_1a_neon ""]]
+ } [add_options_for_arm_v8_1a_neon ""]]
}
# Return 1 if this is a ARM target with NEON enabled.