aboutsummaryrefslogtreecommitdiff
path: root/SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c
diff options
context:
space:
mode:
Diffstat (limited to 'SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c')
-rw-r--r--SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c273
1 files changed, 273 insertions, 0 deletions
diff --git a/SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c b/SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c
new file mode 100644
index 00000000..b96ff8b5
--- /dev/null
+++ b/SingleSource/UnitTests/Vector/AVX512F/op2_xyz_int.c
@@ -0,0 +1,273 @@
+
+/*
+ * Test 128 and 256-bit two operand integer intrinsics,
+ * with masked and zero-masked forms, by comparing
+ * their output with the corresponding 512-bit intrinsic.
+ * Here we check for _mm512_[mask|maskz]_[and|andnot|or|xor|add|max|min|mul|sub]
+ * intrinsics
+ */
+
+#include "m512_test_util.h"
+#include <stdio.h>
+
+V512 i8_src1;
+V512 i8_src2;
+V512 i16_src1;
+V512 i16_src2;
+V512 i32_src1;
+V512 i32_src2;
+V512 i64_src1;
+V512 i64_src2;
+
+void NOINLINE init() {
+ volatile int i;
+
+ for (i = 0; i < 64; i++) {
+ i8_src1.s8[i] = i;
+ i8_src2.s8[i] = (i & 1) ? i : -i;
+ }
+
+ for (i = 0; i < 32; i++) {
+ i16_src1.s16[i] = i;
+ i16_src2.s16[i] = (i & 1) ? i : -i;
+ }
+
+ for (i = 0; i < 16; i++) {
+ i32_src1.s32[i] = i;
+ i32_src2.s32[i] = (i & 1) ? i : -i;
+ }
+
+ for (i = 0; i < 8; i++) {
+ i64_src1.s64[i] = i;
+ i64_src2.s64[i] = (i & 1) ? i : -i;
+ }
+}
+
+/*
+ * Use "soft update" between tests to make compiler think src was updated.
+ * Prevents PRE'ing a load of src, thus allowing ciscization.
+ * Also prevents PRE'ing intrinsic operations, ensuring we
+ * execute the intended instructions.
+ */
+volatile int vol0 = 0;
+#define soft_v512_update(var) (var).xmmi[vol0] = (var).xmmi[vol0]
+
+/*
+ * Generate a function that tests a packed int64 intrinsic
+ * by implementing the XMM, YMM and ZMM versions, and comparing
+ * the XMM and YMM results with the low part of the ZMM result.
+ *
+ * We test regular, masked and zero masked forms.
+ *
+ * Use GEN_I64_UNIFORM when the core intrinsic name is the same
+ * for all vector lengths, e.g. "add_epi64". Otherwise use
+ * GEN_I64 to list the different names, e.g. "and_si128" and "and_si256".
+ */
+
+#define GEN_I64_UNIFORM(oper) GEN_I64(oper, oper, oper, oper, oper)
+
+#define GEN_I64(test_name, oper_epi64, oper_xmm, oper_ymm, oper_zmm) \
+ void NOINLINE do_##test_name() { \
+ V512 xmm_res, ymm_res, zmm_res; \
+ __mmask8 k8 = 0x5a; \
+ \
+ /* Non-masked. */ \
+ \
+ soft_v512_update(i64_src2); \
+ zmm_res.zmmi = _mm512_##oper_zmm(i64_src1.zmmi, i64_src2.zmmi); \
+ \
+ /* Masked. */ \
+ \
+ zmm_res.zmmi = _mm512_setzero_epi32(); \
+ ymm_res = zmm_res; \
+ xmm_res = zmm_res; \
+ \
+ soft_v512_update(i64_src2); \
+ zmm_res.zmmi = _mm512_mask_##oper_epi64(zmm_res.zmmi, k8, i64_src1.zmmi, \
+ i64_src2.zmmi); \
+ \
+ /* Zero-masked. */ \
+ \
+ zmm_res.zmmi = _mm512_set1_epi64(1); \
+ ymm_res = zmm_res; \
+ xmm_res = zmm_res; \
+ \
+ soft_v512_update(i64_src2); \
+ zmm_res.zmmi = \
+ _mm512_maskz_##oper_epi64(k8, i64_src1.zmmi, i64_src2.zmmi); \
+ }
+
+#define GEN_I32_UNIFORM(oper) GEN_I32(oper, oper, oper, oper, oper)
+
+#define GEN_I32(test_name, oper_epi32, oper_xmm, oper_ymm, oper_zmm) \
+ void NOINLINE do_##test_name() { \
+ V512 xmm_res, ymm_res, zmm_res; \
+ __mmask16 k16 = 0x7feb; \
+ __mmask8 k8 = (__mmask8)k16; \
+ \
+ /* Non-masked. */ \
+ \
+ soft_v512_update(i32_src2); \
+ zmm_res.zmmi = _mm512_##oper_zmm(i32_src1.zmmi, i32_src2.zmmi); \
+ \
+ /* Masked. */ \
+ \
+ zmm_res.zmmi = _mm512_setzero_epi32(); \
+ ymm_res = zmm_res; \
+ xmm_res = zmm_res; \
+ \
+ soft_v512_update(i32_src2); \
+ zmm_res.zmmi = _mm512_mask_##oper_epi32(zmm_res.zmmi, k16, i32_src1.zmmi, \
+ i32_src2.zmmi); \
+ \
+ /* Zero-masked. */ \
+ \
+ zmm_res.zmmi = _mm512_set1_epi32(1.0); \
+ ymm_res = zmm_res; \
+ xmm_res = zmm_res; \
+ \
+ soft_v512_update(i32_src2); \
+ zmm_res.zmmi = \
+ _mm512_maskz_##oper_epi32(k16, i32_src1.zmmi, i32_src2.zmmi); \
+ }
+
+#define GEN_I16_UNIFORM(oper) GEN_I16(oper, oper, oper, oper, oper)
+
+#define GEN_I16(test_name, oper_epi16, oper_xmm, oper_ymm, oper_zmm) \
+ void NOINLINE do_##test_name() { \
+ V512 xmm_res, ymm_res, zmm_res; \
+ __mmask32 k32 = 0x7febeb7f; \
+ __mmask16 k16 = (__mmask16)k32; \
+ __mmask8 k8 = (__mmask8)k16; \
+ \
+ /* Non-masked. */ \
+ \
+ soft_v512_update(i16_src2); \
+ zmm_res.zmmi = _mm512_##oper_zmm(i16_src1.zmmi, i16_src2.zmmi); \
+ \
+ /* Masked. */ \
+ \
+ zmm_res.zmmi = _mm512_setzero_epi32(); \
+ ymm_res = zmm_res; \
+ xmm_res = zmm_res; \
+ \
+ soft_v512_update(i16_src2); \
+ zmm_res.zmmi = _mm512_mask_##oper_epi16(zmm_res.zmmi, k32, i16_src1.zmmi, \
+ i16_src2.zmmi); \
+ \
+ /* Zero-masked. */ \
+ \
+ zmm_res.zmmi = _mm512_set1_epi32(1.0); \
+ ymm_res = zmm_res; \
+ xmm_res = zmm_res; \
+ \
+ soft_v512_update(i16_src2); \
+ zmm_res.zmmi = \
+ _mm512_maskz_##oper_epi16(k32, i16_src1.zmmi, i16_src2.zmmi); \
+ }
+
+#define GEN_I8_UNIFORM(oper) GEN_I8(oper, oper, oper, oper, oper)
+
+#define GEN_I8(test_name, oper_epi8, oper_xmm, oper_ymm, oper_zmm) \
+ void NOINLINE do_##test_name() { \
+ V512 xmm_res, ymm_res, zmm_res; \
+ __mmask64 k64 = 0xa55a7febeb7f5aa5U; \
+ __mmask32 k32 = (__mmask32)k64; \
+ __mmask16 k16 = (__mmask16)k32; \
+ \
+ /* Non-masked. */ \
+ \
+ soft_v512_update(i8_src2); \
+ zmm_res.zmmi = _mm512_##oper_zmm(i8_src1.zmmi, i8_src2.zmmi); \
+ \
+ /* Masked. */ \
+ \
+ zmm_res.zmmi = _mm512_setzero_epi32(); \
+ ymm_res = zmm_res; \
+ xmm_res = zmm_res; \
+ \
+ soft_v512_update(i8_src2); \
+ zmm_res.zmmi = _mm512_mask_##oper_epi8(zmm_res.zmmi, k64, i8_src1.zmmi, \
+ i8_src2.zmmi); \
+ \
+ /* Zero-masked. */ \
+ \
+ zmm_res.zmmi = _mm512_set1_epi32(1.0); \
+ ymm_res = zmm_res; \
+ xmm_res = zmm_res; \
+ \
+ soft_v512_update(i8_src2); \
+ zmm_res.zmmi = _mm512_maskz_##oper_epi8(k64, i8_src1.zmmi, i8_src2.zmmi); \
+ }
+
+GEN_I32(and_si512, and_epi32, and_si128, and_si256, and_si512)
+GEN_I32(andnot_si512, andnot_epi32, andnot_si128, andnot_si256, andnot_si512)
+GEN_I32(or_si512, or_epi32, or_si128, or_si256, or_si512)
+GEN_I32(xor_si512, xor_epi32, xor_si128, xor_si256, xor_si512)
+
+GEN_I64(and_epi64, and_epi64, and_si128, and_si256, and_epi64)
+GEN_I64(andnot_epi64, andnot_epi64, andnot_si128, andnot_si256, andnot_epi64)
+GEN_I64(or_epi64, or_epi64, or_si128, or_si256, or_epi64)
+GEN_I64(xor_epi64, xor_epi64, xor_si128, xor_si256, xor_epi64)
+
+GEN_I64_UNIFORM(add_epi64)
+GEN_I64_UNIFORM(max_epi64)
+GEN_I64_UNIFORM(max_epu64)
+GEN_I64_UNIFORM(min_epi64)
+GEN_I64_UNIFORM(min_epu64)
+GEN_I64_UNIFORM(mul_epi32) /* Yes, these are really I64 vector elements. */
+GEN_I64_UNIFORM(mul_epu32) /* Yes, these are really I64 vector elements. */
+
+GEN_I32(and_epi32, and_epi32, and_si128, and_si256, and_epi32)
+GEN_I32(andnot_epi32, andnot_epi32, andnot_si128, andnot_si256, andnot_epi32)
+GEN_I32(or_epi32, or_epi32, or_si128, or_si256, or_epi32)
+GEN_I32(xor_epi32, xor_epi32, xor_si128, xor_si256, xor_epi32)
+
+GEN_I32_UNIFORM(add_epi32)
+GEN_I32_UNIFORM(max_epi32)
+GEN_I32_UNIFORM(max_epu32)
+GEN_I32_UNIFORM(min_epi32)
+GEN_I32_UNIFORM(min_epu32)
+GEN_I32_UNIFORM(sub_epi32)
+
+int main() {
+ init();
+
+ do_and_si512();
+ do_andnot_si512();
+ do_or_si512();
+ do_xor_si512();
+
+ do_and_epi64();
+ do_andnot_epi64();
+ do_or_epi64();
+ do_xor_epi64();
+
+ do_add_epi64();
+ do_max_epi64();
+ do_max_epu64();
+ do_min_epi64();
+ do_min_epu64();
+ do_mul_epi32();
+ do_mul_epu32();
+
+ do_and_epi32();
+ do_andnot_epi32();
+ do_or_epi32();
+ do_xor_epi32();
+
+ do_add_epi32();
+ do_max_epi32();
+ do_max_epu32();
+ do_min_epi32();
+ do_min_epu32();
+ do_sub_epi32();
+
+ if (n_errs != 0) {
+ printf("FAILED\n");
+ return 1;
+ }
+
+ printf("PASSED\n");
+ return 0;
+}