aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDelia Burduv <delia.burduv@arm.com>2020-03-05 11:18:04 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2020-03-05 11:19:43 +0000
commit43031fbdda7d4edbd607365a4f3bbec069fe3983 (patch)
tree8386e7f7dcba7b17dafa82d913c46b49e2b570a2
parent7d6b3a788f75faee6e885ea0db5a3bf7d1798c91 (diff)
[AArch32] ACLE intrinsics bfloat16 vmmla and vfma<b/t> for AArch32 AdvSIMD
This patch adds the ARMv8.6 ACLE intrinsics for vmmla, vfmab and vfmat as part of the BFloat16 extension. (https://developer.arm.com/docs/101028/latest.) The intrinsics are declared in arm_neon.h and the RTL patterns are defined in neon.md. Two new tests are added to check assembler output and lane indices. 2020-03-05 Delia Burduv <delia.burduv@arm.com> * config/arm/arm_neon.h (vbfmmlaq_f32): New. (vbfmlalbq_f32): New. (vbfmlaltq_f32): New. (vbfmlalbq_lane_f32): New. (vbfmlaltq_lane_f32): New. (vbfmlalbq_laneq_f32): New. (vbfmlaltq_laneq_f32): New. * config/arm/arm_neon_builtins.def (vmmla): New. (vfmab): New. (vfmat): New. (vfmab_lane): New. (vfmat_lane): New. (vfmab_laneq): New. (vfmat_laneq): New. * config/arm/iterators.md (BF_MA): New int iterator. (bt): New int attribute. (VQXBF): Copy of VQX with V8BF. * config/arm/neon.md (neon_vmmlav8bf): New insn. (neon_vfma<bt>v8bf): New insn. (neon_vfma<bt>_lanev8bf): New insn. (neon_vfma<bt>_laneqv8bf): New expand. (neon_vget_high<mode>): Changed iterator to VQXBF. * config/arm/unspecs.md (UNSPEC_BFMMLA): New UNSPEC. (UNSPEC_BFMAB): New UNSPEC. (UNSPEC_BFMAT): New UNSPEC. 2020-03-05 Delia Burduv <delia.burduv@arm.com> * gcc.target/arm/simd/bf16_ma_1.c: New test. * gcc.target/arm/simd/bf16_ma_2.c: New test. * gcc.target/arm/simd/bf16_mmla_1.c: New test.
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c79
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c35
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c19
4 files changed, 139 insertions, 0 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5b94ab519a0..7dcc80d0851 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2020-03-05 Delia Burduv <delia.burduv@arm.com>
+
+ * gcc.target/arm/simd/bf16_ma_1.c: New test.
+ * gcc.target/arm/simd/bf16_ma_2.c: New test.
+ * gcc.target/arm/simd/bf16_mmla_1.c: New test.
+
2020-03-05 Jakub Jelinek <jakub@redhat.com>
PR middle-end/93399
diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c
new file mode 100644
index 00000000000..6729af76fbe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c
@@ -0,0 +1,79 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps -O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+**test_vfmabq_f32:
+** ...
+** vfmab.bf16 q0, q1, q2
+** bx lr
+*/
+float32x4_t
+test_vfmabq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+ return vbfmlalbq_f32 (r, a, b);
+}
+
+/*
+**test_vfmatq_f32:
+** ...
+** vfmat.bf16 q0, q1, q2
+** bx lr
+*/
+float32x4_t
+test_vfmatq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+ return vbfmlaltq_f32 (r, a, b);
+}
+
+/*
+**test_vfmabq_lane_f32:
+** ...
+** vfmab.bf16 q0, q1, d4\[0\]
+** bx lr
+*/
+float32x4_t
+test_vfmabq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+ return vbfmlalbq_lane_f32 (r, a, b, 0);
+}
+
+/*
+**test_vfmatq_lane_f32:
+** ...
+** vfmat.bf16 q0, q1, d4\[2\]
+** bx lr
+*/
+float32x4_t
+test_vfmatq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+ return vbfmlaltq_lane_f32 (r, a, b, 2);
+}
+
+/*
+**test_vfmabq_laneq_f32:
+** ...
+** vfmab.bf16 q0, q1, d5\[1\]
+** bx lr
+*/
+float32x4_t
+test_vfmabq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+ return vbfmlalbq_laneq_f32 (r, a, b, 5);
+}
+
+/*
+**test_vfmatq_laneq_f32:
+** ...
+** vfmat.bf16 q0, q1, d5\[3\]
+** bx lr
+*/
+float32x4_t
+test_vfmatq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+ return vbfmlaltq_laneq_f32 (r, a, b, 7);
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c
new file mode 100644
index 00000000000..5a7a2a71791
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include "arm_neon.h"
+
+/* Test lane index limits for vfmabq_lane_f32 */
+float32x4_t
+test_vfmabq_lane_f32_low (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+ return vbfmlalbq_lane_f32 (r, a, b, -1);
+}
+
+float32x4_t
+test_vfmabq_lane_f32_high (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+ return vbfmlalbq_lane_f32 (r, a, b, 4);
+}
+
+/* Test lane index limits for vfmatq_lane_f32 */
+float32x4_t
+test_vfmatq_lane_f32_low (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+ /* { dg-error "lane -2 out of range 0 - 3" "" { target *-*-* } 0 } */
+ return vbfmlaltq_lane_f32 (r, a, b, -2);
+}
+
+float32x4_t
+test_vfmatq_lane_f32_high (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+ /* { dg-error "lane 5 out of range 0 - 3" "" { target *-*-* } 0 } */
+ return vbfmlaltq_lane_f32 (r, a, b, 5);
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c b/gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c
new file mode 100644
index 00000000000..5f9c85b41a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps -O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_vmmlaq_f32:
+** ...
+** vmmla.bf16 q0, q1, q2
+** bx lr
+*/
+float32x4_t
+test_vmmlaq_f32 (float32x4_t r, bfloat16x8_t x, bfloat16x8_t y)
+{
+ return vbfmmlaq_f32 (r, x, y);
+}