diff options
author | Yvan Roux <yvan.roux@linaro.org> | 2015-12-31 12:34:34 +0100 |
---|---|---|
committer | Yvan Roux <yvan.roux@linaro.org> | 2015-12-31 12:44:03 +0100 |
commit | 07e0492a623485886dd08100f5ae8f22adb11fa2 (patch) | |
tree | ee712bf6d74d9eee5a80db91e0f0d22617a551c4 | |
parent | dad84c9a5ef6b373364319202be3f01036ea3fa3 (diff) |
Backport from trunk r231678.
2015-12-16 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/arm-arches.def: Add "armv8.1-a" and "armv8.1-a+crc".
* config/arm/arm-protos.h (FL2_ARCH8_1): New.
(FL2_FOR_ARCH8_1A): New.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm.c (arm_arch8_1): New.
(arm_option_override): Set arm_arch8_1.
* config/arm/arm.h (TARGET_NEON_RDMA): New.
(arm_arch8_1): Declare.
* doc/invoke.texi (ARM Options, -march): Add "armv8.1-a" and
"armv8.1-a+crc".
(ARM Options, -mfpu): Fix a typo.
gcc/
Backport from trunk r231680.
2015-12-16 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/t-aprofile: Make "armv8.1-a" and "armv8.1-a+crc"
matches for "armv8-a".
gcc/
Backport from trunk r231681.
2015-12-16 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/iterators.md (VQRDMLH_AS): New.
(neon_rdma_as): New.
* config/arm/neon.md
(neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>): New.
(neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>): New.
* config/arm/unspecs.md (UNSPEC_VQRDMLAH): New.
(UNSPEC_VQRDMLSH): New.
gcc/
Backport from trunk r231682.
2015-12-16 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/arm-c.c (arm_cpu_builtins): Define
__ARM_FEATURE_QRDMX. Clean up some trailing whitespace.
gcc/
Backport from trunk r231683.
2015-12-16 Matthew Wahab <matthew.wahab@arm.com>
* doc/sourcebuild.texi (ARM-specific attributes): Add
"arm_v8_1a_neon_ok" and "arm_v8_1a_neon_hw".
gcc/testsuite/
Backport from trunk r231683.
2015-12-16 Matthew Wahab <matthew.wahab@arm.com>
* lib/target-supports.exp (add_options_for_arm_v8_1a_neon): Update
comment. Use check_effective_target_arm_v8_1a_neon_ok to select
the command line options.
(check_effective_target_arm_v8_1a_neon_ok_nocache): Update initial
test to allow ARM targets. Select and record a working set of
command line options.
(check_effective_target_arm_v8_1a_neon_hw): Add tests for ARM
targets.
gcc/
Backport from trunk r231685.
2015-12-16 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New.
(vqrdmlahq_s16, vqrdmlahq_s32): New.
(vqrdmlsh_s16, vqrdmlsh_s32): New.
(vqrdmlahq_s16, vqrdmlshq_s32): New.
* config/arm/arm_neon_builtins.def: Add "vqrdmlah" and "vqrdmlsh".
gcc/
Backport from trunk r231686.
2015-12-16 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/arm_neon.h (vqrdmlahq_lane_s16): New.
(vqrdmlahq_lane_s32): New.
(vqrdmlah_lane_s16): New.
(vqrdmlah_lane_s32): New.
(vqrdmlshq_lane_s16): New.
(vqrdmlshq_lane_s32): New.
(vqrdmlsh_lane_s16): New.
(vqrdmlsh_lane_s32): New.
* config/arm/arm_neon_builtins.def: Add "vqrdmlah_lane" and
"vqrdmlsh_lane".
Change-Id: Ie3e2a64a5efbb99cd38fa38373f907c7bdd54c21
-rw-r--r-- | gcc/config/arm/arm-arches.def | 5 | ||||
-rw-r--r-- | gcc/config/arm/arm-c.c | 9 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/arm/arm-tables.opt | 10 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 4 | ||||
-rw-r--r-- | gcc/config/arm/arm.h | 6 | ||||
-rw-r--r-- | gcc/config/arm/arm_neon.h | 100 | ||||
-rw-r--r-- | gcc/config/arm/arm_neon_builtins.def | 4 | ||||
-rw-r--r-- | gcc/config/arm/iterators.md | 5 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 45 | ||||
-rw-r--r-- | gcc/config/arm/t-aprofile | 2 | ||||
-rw-r--r-- | gcc/config/arm/unspecs.md | 2 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 6 | ||||
-rw-r--r-- | gcc/doc/sourcebuild.texi | 9 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 60 |
15 files changed, 247 insertions, 23 deletions
diff --git a/gcc/config/arm/arm-arches.def b/gcc/config/arm/arm-arches.def index ddf6c3c330f..6c831538b2d 100644 --- a/gcc/config/arm/arm-arches.def +++ b/gcc/config/arm/arm-arches.def @@ -57,6 +57,11 @@ ARM_ARCH("armv7-m", cortexm3, 7M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ ARM_ARCH("armv7e-m", cortexm4, 7EM, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7EM)) ARM_ARCH("armv8-a", cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH8A)) ARM_ARCH("armv8-a+crc",cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A)) +ARM_ARCH ("armv8.1-a", cortexa53, 8A, + ARM_FSET_MAKE (FL_CO_PROC | FL_FOR_ARCH8A, FL2_FOR_ARCH8_1A)) +ARM_ARCH ("armv8.1-a+crc",cortexa53, 8A, + ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, + FL2_FOR_ARCH8_1A)) ARM_ARCH("iwmmxt", iwmmxt, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)) ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)) diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c index 84bb35989ab..27728f0fd77 100644 --- a/gcc/config/arm/arm-c.c +++ b/gcc/config/arm/arm-c.c @@ -75,19 +75,22 @@ static void arm_cpu_builtins (struct cpp_reader* pfile) { def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY); - def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT); + def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT); def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT); if (TARGET_CRYPTO) builtin_define ("__ARM_FEATURE_CRYPTO"); if (unaligned_access) builtin_define ("__ARM_FEATURE_UNALIGNED"); + + def_or_undef_macro (pfile, "__ARM_FEATURE_QRDMX", TARGET_NEON_RDMA); + if (TARGET_CRC32) builtin_define ("__ARM_FEATURE_CRC32"); - def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT); + def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT); if (TARGET_ARM_FEATURE_LDREX) - builtin_define_with_int_value ("__ARM_FEATURE_LDREX", + builtin_define_with_int_value ("__ARM_FEATURE_LDREX", TARGET_ARM_FEATURE_LDREX); else cpp_undef (pfile, "__ARM_FEATURE_LDREX"); diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index d3d7216488e..177072cef8f 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -386,6 +386,8 @@ extern bool arm_is_constant_pool_ref (rtx); #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */ #define FL_ARCH6KZ (1 << 31) /* ARMv6KZ architecture. */ +#define FL2_ARCH8_1 (1 << 0) /* Architecture 8.1. */ + /* Flags that only effect tuning, not available instructions. */ #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ | FL_CO_PROC) @@ -414,6 +416,7 @@ extern bool arm_is_constant_pool_ref (rtx); #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV) #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8) +#define FL2_FOR_ARCH8_1A FL2_ARCH8_1 /* There are too many feature bits to fit in a single word so the set of cpu and fpu capabilities is a structure. A feature set is created and manipulated diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index 41bf1ff250b..51fa3dff4ff 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -413,10 +413,16 @@ EnumValue Enum(arm_arch) String(armv8-a+crc) Value(26) EnumValue -Enum(arm_arch) String(iwmmxt) Value(27) +Enum(arm_arch) String(armv8.1-a) Value(27) EnumValue -Enum(arm_arch) String(iwmmxt2) Value(28) +Enum(arm_arch) String(armv8.1-a+crc) Value(28) + +EnumValue +Enum(arm_arch) String(iwmmxt) Value(29) + +EnumValue +Enum(arm_arch) String(iwmmxt2) Value(30) Enum Name(arm_fpu) Type(int) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 9b848bd5ed6..9bcadba11dd 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -848,6 +848,9 @@ int arm_arch7em = 0; /* Nonzero if instructions present in ARMv8 can be used. */ int arm_arch8 = 0; +/* Nonzero if this chip supports the ARMv8.1 extensions. */ +int arm_arch8_1 = 0; + /* Nonzero if this chip can benefit from load scheduling. */ int arm_ld_sched = 0; @@ -3099,6 +3102,7 @@ arm_option_override (void) arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7); arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM); arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8); + arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1); arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2); arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE); diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 0cef4474f6f..f1e85118001 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -219,6 +219,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); && ARM_FPU_FSET_HAS (arm_fpu_desc->features, FPU_FL_NEON)) +/* FPU supports ARMv8.1 Adv.SIMD extensions. */ +#define TARGET_NEON_RDMA (TARGET_NEON && arm_arch8_1) + /* Q-bit is present. */ #define TARGET_ARM_QBIT \ (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7)) @@ -430,6 +433,9 @@ extern int arm_arch7em; /* Nonzero if this chip supports the ARM Architecture 8 extensions. */ extern int arm_arch8; +/* Nonzero if this chip supports the ARM Architecture 8.1 extensions. */ +extern int arm_arch8_1; + /* Nonzero if this chip can benefit from load scheduling. */ extern int arm_ld_sched; diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 66622dfcfe2..1f00357b9a1 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -1159,6 +1159,56 @@ vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) return (int32x4_t)__builtin_neon_vqrdmulhv4si (__a, __b); } +#ifdef __ARM_FEATURE_QRDMX +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vqrdmlahv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vqrdmlahv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vqrdmlahv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqrdmlahv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vqrdmlshv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vqrdmlshv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vqrdmlshv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqrdmlshv4si (__a, __b, __c); +} +#endif + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vmull_s8 (int8x8_t __a, int8x8_t __b) { @@ -7041,6 +7091,56 @@ vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) return (int32x2_t)__builtin_neon_vqrdmulh_lanev2si (__a, __b, __c); } +#ifdef __ARM_FEATURE_QRDMX +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return (int16x8_t)__builtin_neon_vqrdmlah_lanev8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vqrdmlah_lanev4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int16x4_t)__builtin_neon_vqrdmlah_lanev4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int32x2_t)__builtin_neon_vqrdmlah_lanev2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return (int16x8_t)__builtin_neon_vqrdmlsh_lanev8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vqrdmlsh_lanev4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int16x4_t)__builtin_neon_vqrdmlsh_lanev4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int32x2_t)__builtin_neon_vqrdmlsh_lanev2si (__a, __b, __c, __d); +} +#endif + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vmul_n_s16 (int16x4_t __a, int16_t __b) { diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 0b719df7607..1fdb2a8283a 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -45,6 +45,8 @@ VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) VAR4 (BINOP, vqrdmulh, v4hi, v2si, v8hi, v4si) VAR2 (TERNOP, vqdmlal, v4hi, v2si) VAR2 (TERNOP, vqdmlsl, v4hi, v2si) +VAR4 (TERNOP, vqrdmlah, v4hi, v2si, v8hi, v4si) +VAR4 (TERNOP, vqrdmlsh, v4hi, v2si, v8hi, v4si) VAR3 (BINOP, vmullp, v8qi, v4hi, v2si) VAR3 (BINOP, vmulls, v8qi, v4hi, v2si) VAR3 (BINOP, vmullu, v8qi, v4hi, v2si) @@ -58,6 +60,8 @@ VAR4 (BINOP, vqdmulh_n, v4hi, v2si, v8hi, v4si) VAR4 (BINOP, vqrdmulh_n, v4hi, v2si, v8hi, v4si) VAR4 (SETLANE, vqdmulh_lane, v4hi, v2si, v8hi, v4si) VAR4 (SETLANE, vqrdmulh_lane, v4hi, v2si, v8hi, v4si) +VAR4 (MAC_LANE, vqrdmlah_lane, v4hi, v2si, v8hi, v4si) +VAR4 (MAC_LANE, vqrdmlsh_lane, v4hi, v2si, v8hi, v4si) VAR2 (BINOP, vqdmull, v4hi, v2si) VAR8 (BINOP, vshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (BINOP, vshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 47cc1eebecd..be62f6747d7 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -359,6 +359,8 @@ (define_int_iterator CRYPTO_SELECTING [UNSPEC_SHA1C UNSPEC_SHA1M UNSPEC_SHA1P]) +(define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH]) + ;;---------------------------------------------------------------------------- ;; Mode attributes ;;---------------------------------------------------------------------------- @@ -828,3 +830,6 @@ (simple_return " && use_simple_return_p ()")]) (define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)") (simple_return " && use_simple_return_p ()")]) + +;; Attributes for VQRDMLAH/VQRDMLSH +(define_int_attr neon_rdma_as [(UNSPEC_VQRDMLAH "a") (UNSPEC_VQRDMLSH "s")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 62fb6daae99..844ef5eb2f3 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2014,6 +2014,18 @@ [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")] ) +;; vqrdmlah, vqrdmlsh +(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>" + [(set (match_operand:VMDQI 0 "s_register_operand" "=w") + (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0") + (match_operand:VMDQI 2 "s_register_operand" "w") + (match_operand:VMDQI 3 "s_register_operand" "w")] + VQRDMLH_AS))] + "TARGET_NEON_RDMA" + "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" + [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] +) + (define_insn "neon_vqdmlal<mode>" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") @@ -3176,6 +3188,39 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] ) +;; vqrdmlah_lane, vqrdmlsh_lane +(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" + [(set (match_operand:VMQI 0 "s_register_operand" "=w") + (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0") + (match_operand:VMQI 2 "s_register_operand" "w") + (match_operand:<V_HALF> 3 "s_register_operand" + "<scalar_mul_constraint>") + (match_operand:SI 4 "immediate_operand" "i")] + VQRDMLH_AS))] + "TARGET_NEON_RDMA" +{ + return + "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]"; +} + [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")] +) + +(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" + [(set (match_operand:VMDI 0 "s_register_operand" "=w") + (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "<scalar_mul_constraint>") + (match_operand:SI 4 "immediate_operand" "i")] + VQRDMLH_AS))] + "TARGET_NEON_RDMA" +{ + return + "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]"; +} + [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")] +) + (define_insn "neon_vmla_lane<mode>" [(set (match_operand:VMD 0 "s_register_operand" "=w") (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") diff --git a/gcc/config/arm/t-aprofile b/gcc/config/arm/t-aprofile index 5d02ea5223d..94fd0ca9259 100644 --- a/gcc/config/arm/t-aprofile +++ b/gcc/config/arm/t-aprofile @@ -97,6 +97,8 @@ MULTILIB_MATCHES += march?armv8-a=mcpu?xgene1 # Arch Matches MULTILIB_MATCHES += march?armv8-a=march?armv8-a+crc +MULTILIB_MATCHES += march?armv8-a=march?armv8.1-a +MULTILIB_MATCHES += march?armv8-a=march?armv8.1-a+crc # FPU matches MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3 diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 0ec2c48abea..ad8a0041648 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -358,5 +358,7 @@ UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN + UNSPEC_VQRDMLAH + UNSPEC_VQRDMLSH ]) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7a765318658..aa347ed4f52 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -13228,8 +13228,8 @@ of the @option{-mcpu=} option. Permissible names are: @samp{armv2}, @samp{armv6}, @samp{armv6j}, @samp{armv6t2}, @samp{armv6z}, @samp{armv6kz}, @samp{armv6-m}, @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m}, @samp{armv7e-m}, -@samp{armv7ve}, @samp{armv8-a}, @samp{armv8-a+crc}, -@samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. +@samp{armv7ve}, @samp{armv8-a}, @samp{armv8-a+crc}, @samp{armv8.1-a}, +@samp{armv8.1-a+crc}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. @option{-march=armv7ve} is the armv7-a architecture with virtualization extensions. @@ -13332,7 +13332,7 @@ available on the target. Permissible names are: @samp{vfp}, @samp{vfpv3}, @samp{vfpv3xd-fp16}, @samp{neon}, @samp{neon-fp16}, @samp{vfpv4}, @samp{vfpv4-d16}, @samp{fpv4-sp-d16}, @samp{neon-vfpv4}, @samp{fpv5-d16}, @samp{fpv5-sp-d16}, -@samp{fp-armv8}, @samp{neon-fp-armv8}, and @samp{crypto-neon-fp-armv8}. +@samp{fp-armv8}, @samp{neon-fp-armv8} and @samp{crypto-neon-fp-armv8}. If @option{-msoft-float} is specified this specifies the format of floating-point values. diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 474f5fff968..5cf1efca8dd 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1579,6 +1579,15 @@ Some multilibs may be incompatible with these options. ARM target supports @code{-mfpu=neon-fp-armv8 -mfloat-abi=softfp}. Some multilibs may be incompatible with these options. +@item arm_v8_1a_neon_ok +ARM target supports options to generate ARMv8.1 Adv.SIMD instructions. +Some multilibs may be incompatible with these options. + +@item arm_v8_1a_neon_hw +ARM target supports executing ARMv8.1 Adv.SIMD instructions. Some +multilibs may be incompatible with the options needed. Implies +arm_v8_1a_neon_ok. + @item arm_prefer_ldrd_strd ARM target prefers @code{LDRD} and @code{STRD} instructions over @code{LDM} and @code{STM} instructions. diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 7d237a2ac01..800dcbe0bfc 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2666,14 +2666,15 @@ proc add_options_for_arm_v8_neon { flags } { return "$flags $et_arm_v8_neon_flags -march=armv8-a" } -# Add the options needed for ARMv8.1 Adv.SIMD. +# Add the options needed for ARMv8.1 Adv.SIMD. Also adds the ARMv8 NEON +# options for AArch64 and for ARM. proc add_options_for_arm_v8_1a_neon { flags } { - if { [istarget aarch64*-*-*] } { - return "$flags -march=armv8.1-a" - } else { + if { ! [check_effective_target_arm_v8_1a_neon_ok] } { return "$flags" } + global et_arm_v8_1a_neon_flags + return "$flags $et_arm_v8_1a_neon_flags -march=armv8.1-a" } proc add_options_for_arm_crc { flags } { @@ -3119,17 +3120,33 @@ proc check_effective_target_arm_neonv2_hw { } { } # Return 1 if the target supports the ARMv8.1 Adv.SIMD extension, 0 -# otherwise. The test is valid for AArch64. +# otherwise. The test is valid for AArch64 and ARM. Record the command +# line options needed. proc check_effective_target_arm_v8_1a_neon_ok_nocache { } { - if { ![istarget aarch64*-*-*] } { - return 0 + global et_arm_v8_1a_neon_flags + set et_arm_v8_1a_neon_flags "" + + if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { + return 0; } - return [check_no_compiler_messages_nocache arm_v8_1a_neon_ok assembly { - #if !defined (__ARM_FEATURE_QRDMX) - #error "__ARM_FEATURE_QRDMX not defined" - #endif - } [add_options_for_arm_v8_1a_neon ""]] + + # Iterate through sets of options to find the compiler flags that + # need to be added to the -march option. Start with the empty set + # since AArch64 only needs the -march setting. + foreach flags {"" "-mfpu=neon-fp-armv8" "-mfloat-abi=softfp" \ + "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache arm_v8_1a_neon_ok object { + #if !defined (__ARM_FEATURE_QRDMX) + #error "__ARM_FEATURE_QRDMX not defined" + #endif + } "$flags -march=armv8.1-a"] } { + set et_arm_v8_1a_neon_flags "$flags -march=armv8.1-a" + return 1 + } + } + + return 0; } proc check_effective_target_arm_v8_1a_neon_ok { } { @@ -3156,16 +3173,17 @@ proc check_effective_target_arm_v8_neon_hw { } { } # Return 1 if the target supports executing the ARMv8.1 Adv.SIMD extension, 0 -# otherwise. The test is valid for AArch64. +# otherwise. The test is valid for AArch64 and ARM. proc check_effective_target_arm_v8_1a_neon_hw { } { if { ![check_effective_target_arm_v8_1a_neon_ok] } { return 0; } - return [check_runtime_nocache arm_v8_1a_neon_hw_available { + return [check_runtime arm_v8_1a_neon_hw_available { int main (void) { + #ifdef __ARM_ARCH_ISA_A64 __Int32x2_t a = {0, 1}; __Int32x2_t b = {0, 2}; __Int32x2_t result; @@ -3175,9 +3193,21 @@ proc check_effective_target_arm_v8_1a_neon_hw { } { : "w"(a), "w"(b) : /* No clobbers. */); + #else + + __simd64_int32_t a = {0, 1}; + __simd64_int32_t b = {0, 2}; + __simd64_int32_t result; + + asm ("vqrdmlah.s32 %P0, %P1, %P2" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers. */); + #endif + return result[0]; } - } [add_options_for_arm_v8_1a_neon ""]] + } [add_options_for_arm_v8_1a_neon ""]] } # Return 1 if this is a ARM target with NEON enabled. |