diff options
Diffstat (limited to 'SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c')
-rw-r--r-- | SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c b/SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c new file mode 100644 index 00000000..b96ff8b5 --- /dev/null +++ b/SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c @@ -0,0 +1,273 @@ + +/* + * Test 128 and 256-bit two operand integer intrinsics, + * with masked and zero-masked forms, by comparing + * their output with the corresponding 512-bit intrinsic. + * Here we check for _mm512_[mask|maskz]_[and|andnot|or|xor|add|max|min|mul|sub] + * intrinsics + */ + +#include "m512_test_util.h" +#include <stdio.h> + +V512 i8_src1; +V512 i8_src2; +V512 i16_src1; +V512 i16_src2; +V512 i32_src1; +V512 i32_src2; +V512 i64_src1; +V512 i64_src2; + +void NOINLINE init() { + volatile int i; + + for (i = 0; i < 64; i++) { + i8_src1.s8[i] = i; + i8_src2.s8[i] = (i & 1) ? i : -i; + } + + for (i = 0; i < 32; i++) { + i16_src1.s16[i] = i; + i16_src2.s16[i] = (i & 1) ? i : -i; + } + + for (i = 0; i < 16; i++) { + i32_src1.s32[i] = i; + i32_src2.s32[i] = (i & 1) ? i : -i; + } + + for (i = 0; i < 8; i++) { + i64_src1.s64[i] = i; + i64_src2.s64[i] = (i & 1) ? i : -i; + } +} + +/* + * Use "soft update" between tests to make compiler think src was updated. + * Prevents PRE'ing a load of src, thus allowing ciscization. + * Also prevents PRE'ing intrinsic operations, ensuring we + * execute the intended instructions. + */ +volatile int vol0 = 0; +#define soft_v512_update(var) (var).xmmi[vol0] = (var).xmmi[vol0] + +/* + * Generate a function that tests a packed int64 intrinsic + * by implementing the XMM, YMM and ZMM versions, and comparing + * the XMM and YMM results with the low part of the ZMM result. + * + * We test regular, masked and zero masked forms. + * + * Use GEN_I64_UNIFORM when the core intrinsic name is the same + * for all vector lengths, e.g. "add_epi64". Otherwise use + * GEN_I64 to list the different names, e.g. "and_si128" and "and_si256". + */ + +#define GEN_I64_UNIFORM(oper) GEN_I64(oper, oper, oper, oper, oper) + +#define GEN_I64(test_name, oper_epi64, oper_xmm, oper_ymm, oper_zmm) \ + void NOINLINE do_##test_name() { \ + V512 xmm_res, ymm_res, zmm_res; \ + __mmask8 k8 = 0x5a; \ + \ + /* Non-masked. */ \ + \ + soft_v512_update(i64_src2); \ + zmm_res.zmmi = _mm512_##oper_zmm(i64_src1.zmmi, i64_src2.zmmi); \ + \ + /* Masked. */ \ + \ + zmm_res.zmmi = _mm512_setzero_epi32(); \ + ymm_res = zmm_res; \ + xmm_res = zmm_res; \ + \ + soft_v512_update(i64_src2); \ + zmm_res.zmmi = _mm512_mask_##oper_epi64(zmm_res.zmmi, k8, i64_src1.zmmi, \ + i64_src2.zmmi); \ + \ + /* Zero-masked. */ \ + \ + zmm_res.zmmi = _mm512_set1_epi64(1); \ + ymm_res = zmm_res; \ + xmm_res = zmm_res; \ + \ + soft_v512_update(i64_src2); \ + zmm_res.zmmi = \ + _mm512_maskz_##oper_epi64(k8, i64_src1.zmmi, i64_src2.zmmi); \ + } + +#define GEN_I32_UNIFORM(oper) GEN_I32(oper, oper, oper, oper, oper) + +#define GEN_I32(test_name, oper_epi32, oper_xmm, oper_ymm, oper_zmm) \ + void NOINLINE do_##test_name() { \ + V512 xmm_res, ymm_res, zmm_res; \ + __mmask16 k16 = 0x7feb; \ + __mmask8 k8 = (__mmask8)k16; \ + \ + /* Non-masked. */ \ + \ + soft_v512_update(i32_src2); \ + zmm_res.zmmi = _mm512_##oper_zmm(i32_src1.zmmi, i32_src2.zmmi); \ + \ + /* Masked. */ \ + \ + zmm_res.zmmi = _mm512_setzero_epi32(); \ + ymm_res = zmm_res; \ + xmm_res = zmm_res; \ + \ + soft_v512_update(i32_src2); \ + zmm_res.zmmi = _mm512_mask_##oper_epi32(zmm_res.zmmi, k16, i32_src1.zmmi, \ + i32_src2.zmmi); \ + \ + /* Zero-masked. */ \ + \ + zmm_res.zmmi = _mm512_set1_epi32(1.0); \ + ymm_res = zmm_res; \ + xmm_res = zmm_res; \ + \ + soft_v512_update(i32_src2); \ + zmm_res.zmmi = \ + _mm512_maskz_##oper_epi32(k16, i32_src1.zmmi, i32_src2.zmmi); \ + } + +#define GEN_I16_UNIFORM(oper) GEN_I16(oper, oper, oper, oper, oper) + +#define GEN_I16(test_name, oper_epi16, oper_xmm, oper_ymm, oper_zmm) \ + void NOINLINE do_##test_name() { \ + V512 xmm_res, ymm_res, zmm_res; \ + __mmask32 k32 = 0x7febeb7f; \ + __mmask16 k16 = (__mmask16)k32; \ + __mmask8 k8 = (__mmask8)k16; \ + \ + /* Non-masked. */ \ + \ + soft_v512_update(i16_src2); \ + zmm_res.zmmi = _mm512_##oper_zmm(i16_src1.zmmi, i16_src2.zmmi); \ + \ + /* Masked. */ \ + \ + zmm_res.zmmi = _mm512_setzero_epi32(); \ + ymm_res = zmm_res; \ + xmm_res = zmm_res; \ + \ + soft_v512_update(i16_src2); \ + zmm_res.zmmi = _mm512_mask_##oper_epi16(zmm_res.zmmi, k32, i16_src1.zmmi, \ + i16_src2.zmmi); \ + \ + /* Zero-masked. */ \ + \ + zmm_res.zmmi = _mm512_set1_epi32(1.0); \ + ymm_res = zmm_res; \ + xmm_res = zmm_res; \ + \ + soft_v512_update(i16_src2); \ + zmm_res.zmmi = \ + _mm512_maskz_##oper_epi16(k32, i16_src1.zmmi, i16_src2.zmmi); \ + } + +#define GEN_I8_UNIFORM(oper) GEN_I8(oper, oper, oper, oper, oper) + +#define GEN_I8(test_name, oper_epi8, oper_xmm, oper_ymm, oper_zmm) \ + void NOINLINE do_##test_name() { \ + V512 xmm_res, ymm_res, zmm_res; \ + __mmask64 k64 = 0xa55a7febeb7f5aa5U; \ + __mmask32 k32 = (__mmask32)k64; \ + __mmask16 k16 = (__mmask16)k32; \ + \ + /* Non-masked. */ \ + \ + soft_v512_update(i8_src2); \ + zmm_res.zmmi = _mm512_##oper_zmm(i8_src1.zmmi, i8_src2.zmmi); \ + \ + /* Masked. */ \ + \ + zmm_res.zmmi = _mm512_setzero_epi32(); \ + ymm_res = zmm_res; \ + xmm_res = zmm_res; \ + \ + soft_v512_update(i8_src2); \ + zmm_res.zmmi = _mm512_mask_##oper_epi8(zmm_res.zmmi, k64, i8_src1.zmmi, \ + i8_src2.zmmi); \ + \ + /* Zero-masked. */ \ + \ + zmm_res.zmmi = _mm512_set1_epi32(1.0); \ + ymm_res = zmm_res; \ + xmm_res = zmm_res; \ + \ + soft_v512_update(i8_src2); \ + zmm_res.zmmi = _mm512_maskz_##oper_epi8(k64, i8_src1.zmmi, i8_src2.zmmi); \ + } + +GEN_I32(and_si512, and_epi32, and_si128, and_si256, and_si512) +GEN_I32(andnot_si512, andnot_epi32, andnot_si128, andnot_si256, andnot_si512) +GEN_I32(or_si512, or_epi32, or_si128, or_si256, or_si512) +GEN_I32(xor_si512, xor_epi32, xor_si128, xor_si256, xor_si512) + +GEN_I64(and_epi64, and_epi64, and_si128, and_si256, and_epi64) +GEN_I64(andnot_epi64, andnot_epi64, andnot_si128, andnot_si256, andnot_epi64) +GEN_I64(or_epi64, or_epi64, or_si128, or_si256, or_epi64) +GEN_I64(xor_epi64, xor_epi64, xor_si128, xor_si256, xor_epi64) + +GEN_I64_UNIFORM(add_epi64) +GEN_I64_UNIFORM(max_epi64) +GEN_I64_UNIFORM(max_epu64) +GEN_I64_UNIFORM(min_epi64) +GEN_I64_UNIFORM(min_epu64) +GEN_I64_UNIFORM(mul_epi32) /* Yes, these are really I64 vector elements. */ +GEN_I64_UNIFORM(mul_epu32) /* Yes, these are really I64 vector elements. */ + +GEN_I32(and_epi32, and_epi32, and_si128, and_si256, and_epi32) +GEN_I32(andnot_epi32, andnot_epi32, andnot_si128, andnot_si256, andnot_epi32) +GEN_I32(or_epi32, or_epi32, or_si128, or_si256, or_epi32) +GEN_I32(xor_epi32, xor_epi32, xor_si128, xor_si256, xor_epi32) + +GEN_I32_UNIFORM(add_epi32) +GEN_I32_UNIFORM(max_epi32) +GEN_I32_UNIFORM(max_epu32) +GEN_I32_UNIFORM(min_epi32) +GEN_I32_UNIFORM(min_epu32) +GEN_I32_UNIFORM(sub_epi32) + +int main() { + init(); + + do_and_si512(); + do_andnot_si512(); + do_or_si512(); + do_xor_si512(); + + do_and_epi64(); + do_andnot_epi64(); + do_or_epi64(); + do_xor_epi64(); + + do_add_epi64(); + do_max_epi64(); + do_max_epu64(); + do_min_epi64(); + do_min_epu64(); + do_mul_epi32(); + do_mul_epu32(); + + do_and_epi32(); + do_andnot_epi32(); + do_or_epi32(); + do_xor_epi32(); + + do_add_epi32(); + do_max_epi32(); + do_max_epu32(); + do_min_epi32(); + do_min_epu32(); + do_sub_epi32(); + + if (n_errs != 0) { + printf("FAILED\n"); + return 1; + } + + printf("PASSED\n"); + return 0; +} |