From f00d954d3a6a6c070dbd29bf37c5982ef21f0946 Mon Sep 17 00:00:00 2001 From: Yvan Roux Date: Fri, 17 Mar 2017 14:24:06 +0100 Subject: gcc/ Backport from trunk r245328. 2017-02-10 Christophe Lyon * config/aarch64/arm_neon.h (vtst_p8): Rewrite without asm. (vtst_p16): Likewise. (vtstq_p8): Likewise. (vtstq_p16): Likewise. (vtst_p64): New. (vtstq_p64): Likewise. * config/arm/arm_neon.h (vgetq_lane_p64): New. (vset_lane_p64): New. (vsetq_lane_p64): New. gcc/testsuite/ Backport from trunk r245328. 2017-02-10 Christophe Lyon * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1): New. (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2) (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0) (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0) (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3) (vtst_expected_poly64x2): Move to aarch64-only section. (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64) (vtst_p64, vtstq_p64): New tests. Change-Id: I88c50b9b0efc55fc10b287b98231dba2dcd23f3c --- gcc/config/aarch64/arm_neon.h | 47 +++--- gcc/config/arm/arm_neon.h | 27 ++++ .../aarch64/advsimd-intrinsics/p64_p128.c | 169 +++++++++++++++------ 3 files changed, 174 insertions(+), 69 deletions(-) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index b8466440d82..88f2ed10eb1 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -10862,48 +10862,47 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_p8 (poly8x8_t a, poly8x8_t b) { - uint8x8_t result; - __asm__ ("cmtst %0.8b, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b)) + != 0); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_p16 (poly16x4_t a, poly16x4_t b) { - uint16x4_t result; - __asm__ ("cmtst %0.4h, %1.4h, %2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b)) + != 0); +} + +__extension__ extern __inline uint64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vtst_p64 (poly64x1_t a, poly64x1_t b) +{ + return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0)); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_p8 (poly8x16_t a, poly8x16_t b) { - uint8x16_t result; - __asm__ ("cmtst %0.16b, %1.16b, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b)) + != 0); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_p16 (poly16x8_t a, poly16x8_t b) { - uint16x8_t result; - __asm__ ("cmtst %0.8h, %1.8h, %2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; + return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b)) + != 0); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vtstq_p64 (poly64x2_t a, poly64x2_t b) +{ + return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b)) + != __AARCH64_INT64_C (0)); } /* End of temporary inline asm implementations. */ diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 30e22484a8b..0b5ab70c7cf 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -6309,6 +6309,16 @@ vgetq_lane_s64 (int64x2_t __a, const int __b) return (int64_t)__builtin_neon_vget_lanev2di (__a, __b); } +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") +__extension__ extern __inline poly64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vgetq_lane_p64 (poly64x2_t __a, const int __b) +{ + return (poly64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b); +} + +#pragma GCC pop_options __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_u64 (uint64x2_t __a, const int __b) @@ -6405,6 +6415,16 @@ vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c) return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c); } +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") +__extension__ extern __inline poly64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vset_lane_p64 (poly64_t __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c); +} + +#pragma GCC pop_options __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c) @@ -6496,6 +6516,13 @@ vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c) #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsetq_lane_p64 (poly64_t __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c); +} + __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_p64 (uint64_t __a) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c index 7c5bca2ef70..a3210a94b1d 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c @@ -39,17 +39,6 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 }; VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; -/* Expected results: vmov_n. */ -VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 }; -VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0, - 0xfffffffffffffff0 }; -VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 }; -VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1, - 0xfffffffffffffff1 }; -VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 }; -VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2, - 0xfffffffffffffff2 }; - /* Expected results: vext. */ VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 }; @@ -124,6 +113,29 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x3333333333333333 }; +/* Expected results: vget_lane. */ +VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0; +VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0; + +/* Expected results: vset_lane. */ +VECT_VAR_DECL(vset_lane_expected,poly,64,1) [] = { 0x88 }; +VECT_VAR_DECL(vset_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x11 }; + +/* Expected results: vtst. */ +VECT_VAR_DECL(vtst_expected,uint,64,1) [] = { 0x0 }; + +#ifdef __aarch64__ +/* Expected results: vmov_n. */ +VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; +VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2, + 0xfffffffffffffff2 }; + /* Expected results: vldX_lane. */ VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0, @@ -153,9 +165,9 @@ VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 }; VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa }; -/* Expected results: vget_lane. */ -VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0; -VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0; +/* Expected results: vtst. */ +VECT_VAR_DECL(vtst_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff }; +#endif int main (void) { @@ -727,7 +739,105 @@ int main (void) CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, ""); CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, ""); + /* vget_lane_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VGET_LANE/VGETQ_LANE" + +#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \ + VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \ + if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \ + fprintf(stderr, \ + "ERROR in %s (%s line %d in result '%s') at type %s " \ + "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \ + TEST_MSG, __FILE__, __LINE__, \ + STR(VECT_VAR(vget_lane_expected, T1, W, N)), \ + STR(VECT_NAME(T1, W, N)), \ + VECT_VAR(vget_lane_vector, T1, W, N), \ + VECT_VAR(vget_lane_expected, T1, W, N)); \ + abort (); \ + } + + /* Initialize input values. */ + DECL_VARIABLE(vget_lane_vector1, poly, 64, 1); + DECL_VARIABLE(vget_lane_vector1, poly, 64, 2); + + VLOAD(vget_lane_vector1, buffer, , poly, p, 64, 1); + VLOAD(vget_lane_vector1, buffer, q, poly, p, 64, 2); + + VECT_VAR_DECL(vget_lane_vector, poly, 64, 1); + VECT_VAR_DECL(vget_lane_vector, poly, 64, 2); + + TEST_VGET_LANE( , poly, p, 64, 1, 0); + TEST_VGET_LANE(q, poly, p, 64, 2, 0); + + + /* vset_lane_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VSET_LANE/VSETQ_LANE" + +#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L) \ + VECT_VAR(vset_lane_vector, T1, W, N) = \ + vset##Q##_lane_##T2##W(V, \ + VECT_VAR(vset_lane_vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1, W, N)) + + /* Initialize input values. */ + DECL_VARIABLE(vset_lane_vector, poly, 64, 1); + DECL_VARIABLE(vset_lane_vector, poly, 64, 2); + + CLEAN(result, uint, 64, 1); + CLEAN(result, uint, 64, 2); + + VLOAD(vset_lane_vector, buffer, , poly, p, 64, 1); + VLOAD(vset_lane_vector, buffer, q, poly, p, 64, 2); + + /* Choose value and lane arbitrarily. */ + TEST_VSET_LANE(, poly, p, 64, 1, 0x88, 0); + TEST_VSET_LANE(q, poly, p, 64, 2, 0x11, 1); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vset_lane_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vset_lane_expected, ""); + + + /* vtst_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VTST" + +#define TEST_VTST1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vtst_vector_res, uint, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N), \ + VECT_VAR(vtst_vector2, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, uint, W, N), \ + VECT_VAR(vtst_vector_res, uint, W, N)) + +#define TEST_VTST(INSN, Q, T1, T2, W, N) \ + TEST_VTST1(INSN, Q, T1, T2, W, N) \ + + /* Initialize input values. */ + DECL_VARIABLE(vtst_vector, poly, 64, 1); + DECL_VARIABLE(vtst_vector2, poly, 64, 1); + DECL_VARIABLE(vtst_vector_res, uint, 64, 1); + + CLEAN(result, uint, 64, 1); + + VLOAD(vtst_vector, buffer, , poly, p, 64, 1); + VDUP(vtst_vector2, , poly, p, 64, 1, 5); + + TEST_VTST(vtst, , poly, p, 64, 1); + + CHECK(TEST_MSG, uint, 64, 1, PRIx64, vtst_expected, ""); + + /* vtstq_p64 is supported by aarch64 only. */ #ifdef __aarch64__ + DECL_VARIABLE(vtst_vector, poly, 64, 2); + DECL_VARIABLE(vtst_vector2, poly, 64, 2); + DECL_VARIABLE(vtst_vector_res, uint, 64, 2); + CLEAN(result, uint, 64, 2); + VLOAD(vtst_vector, buffer, q, poly, p, 64, 2); + VDUP(vtst_vector2, q, poly, p, 64, 2, 5); + TEST_VTST(vtst, q, poly, p, 64, 2); + CHECK(TEST_MSG, uint, 64, 2, PRIx64, vtst_expected, ""); /* vmov_n_p64 tests. */ #undef TEST_MSG @@ -767,37 +877,6 @@ int main (void) } } - /* vget_lane_p64 tests. */ -#undef TEST_MSG -#define TEST_MSG "VGET_LANE/VGETQ_LANE" - -#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \ - VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ - if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \ - fprintf(stderr, \ - "ERROR in %s (%s line %d in result '%s') at type %s " \ - "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \ - TEST_MSG, __FILE__, __LINE__, \ - STR(VECT_VAR(vget_lane_expected, T1, W, N)), \ - STR(VECT_NAME(T1, W, N)), \ - (uint##W##_t)VECT_VAR(vget_lane_vector, T1, W, N), \ - (uint##W##_t)VECT_VAR(vget_lane_expected, T1, W, N)); \ - abort (); \ - } - - /* Initialize input values. */ - DECL_VARIABLE(vector, poly, 64, 1); - DECL_VARIABLE(vector, poly, 64, 2); - - VLOAD(vector, buffer, , poly, p, 64, 1); - VLOAD(vector, buffer, q, poly, p, 64, 2); - - VECT_VAR_DECL(vget_lane_vector, poly, 64, 1); - VECT_VAR_DECL(vget_lane_vector, poly, 64, 2); - - TEST_VGET_LANE( , poly, p, 64, 1, 0); - TEST_VGET_LANE(q, poly, p, 64, 2, 0); - /* vldx_lane_p64 tests. */ #undef TEST_MSG #define TEST_MSG "VLDX_LANE/VLDXQ_LANE" -- cgit v1.2.3