aboutsummaryrefslogtreecommitdiff
path: root/SingleSource/UnitTests/Vector/AVX512F/m512_test_util.h
diff options
context:
space:
mode:
Diffstat (limited to 'SingleSource/UnitTests/Vector/AVX512F/m512_test_util.h')
-rw-r--r--SingleSource/UnitTests/Vector/AVX512F/m512_test_util.h258
1 files changed, 258 insertions, 0 deletions
diff --git a/SingleSource/UnitTests/Vector/AVX512F/m512_test_util.h b/SingleSource/UnitTests/Vector/AVX512F/m512_test_util.h
new file mode 100644
index 00000000..c98e174b
--- /dev/null
+++ b/SingleSource/UnitTests/Vector/AVX512F/m512_test_util.h
@@ -0,0 +1,258 @@
+#ifndef M512_TEST_UTIL_H_INCLUDED
+#define M512_TEST_UTIL_H_INCLUDED
+
+/*
+ * Common declarations useful for writing 512-bit unit tests.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <x86intrin.h>
+
+#define ALIGNTO(n) __declspec(align(n))
+
+/*
+ * For purposes of unit tests it can be beneficial to suppress inlining
+ * simply so that only a single instance of a test function is emitted.
+ * Makes it easier to diff A/B assembly output.
+ */
+#define NOINLINE __declspec(noinline)
+
+/*
+ * FULL_IREG(ax) expands to either eax or rax depending on the target.
+ */
+#if defined(__x86_64) || defined(_M_X64)
+#define FULL_IREG(reg) r##reg
+#else
+#define FULL_IREG(reg) e##reg
+#endif
+
+/* Number of elements in an array. */
+#define ASIZE(a) (sizeof((a)) / sizeof((a)[0]))
+
+typedef __int64 I64;
+typedef unsigned __int64 U64;
+
+typedef union ALIGNTO(64) {
+
+ __m512 zmm;
+ __m512d zmmd;
+ __m512i zmmi;
+
+ __m256 ymm[2];
+ __m256d ymmd[2];
+ __m256i ymmi[2];
+
+ __m128 xmm[4];
+ __m128d xmmd[4];
+ __m128i xmmi[4];
+
+ char c[64];
+ signed char s8[64];
+ unsigned char u8[64];
+ short s16[32];
+ unsigned short u16[32];
+ int s32[16];
+ unsigned int u32[16];
+ float f32[16];
+ I64 s64[8];
+ U64 u64[8];
+ double f64[8];
+
+} V512;
+
+int n_errs = 0;
+
+/*
+ * Print the low N 32-bit unsigned integers from p.
+ */
+
+void NOINLINE display_pd(const V512 *p, const char *banner, int n_elems) {
+ int i = 15;
+
+ if (banner) {
+ printf("%s", banner);
+ }
+
+ for (i = n_elems; i >= 0; i--) {
+ printf(" %0.8x", p->u32[i]);
+ if (i > 0 && i % 4 == 0) {
+ printf("\n");
+ if (banner) {
+ printf("%*s", (int)strlen((void *)banner), "");
+ }
+ }
+ }
+ printf("\n");
+}
+
+/*
+ * Print the low N 64-bit unsigned integers from p.
+ */
+void NOINLINE display_pq(const V512 *p, const char *banner, int n_elems) {
+ int i = 7;
+
+ if (banner) {
+ printf("%s", banner);
+ }
+
+ for (i = n_elems; i >= 0; i--) {
+ printf(" %0.16llx", p->u64[i]);
+ if (i > 0 && i % 4 == 0) {
+ printf("\n");
+ if (banner) {
+ printf("%*s", (int)strlen((void *)banner), "");
+ }
+ }
+ }
+ printf("\n");
+}
+
+/*
+ * Print the low N single precision floats from p.
+ */
+
+void NOINLINE display_psf(const V512 *p, const char *banner, int n_elems) {
+ int i = 15;
+
+ if (banner) {
+ printf("%s", banner);
+ }
+
+ for (i = n_elems; i >= 0; i--) {
+ printf(" %7g", p->f32[i]);
+ if (i > 0 && i % 4 == 0) {
+ printf("\n");
+ if (banner) {
+ printf("%*s", (int)strlen((void *)banner), "");
+ }
+ }
+ }
+ printf("\n");
+}
+
+/*
+ * Print the low N double precision floats from p.
+ */
+
+void NOINLINE display_pdf(const V512 *p, const char *banner, int n_elems) {
+ int i = 15;
+
+ if (banner) {
+ printf("%s", banner);
+ }
+
+ for (i = n_elems; i >= 0; i--) {
+ printf(" %7g", p->f64[i]);
+ if (i > 0 && i % 4 == 0) {
+ printf("\n");
+ if (banner) {
+ printf("%*s", (int)strlen((void *)banner), "");
+ }
+ }
+ }
+ printf("\n");
+}
+
+/*
+ * Check that the low N 32-bit elements of "got" and "expected" are the same.
+ */
+int NOINLINE check_equal_nd(void *got, void *expected, int n_elems,
+ char *banner, int line) {
+ int i, fail = 0;
+ V512 *v1 = (V512 *)got;
+ V512 *v2 = (V512 *)expected;
+
+ for (i = 0; i < n_elems; i++) {
+ if (v1->u32[i] != v2->u32[i]) {
+ printf("ERROR(%d): %s failed at %d'th element: 0x%0.8x != 0x%0.8x\n",
+ line, banner ? banner : "", i, v1->u32[i], v2->u32[i]);
+ display_pd(got, "got:", n_elems);
+ display_pd(expected, "exp:", n_elems);
+ n_errs++;
+ fail = 1;
+ break;
+ }
+ }
+ return fail;
+}
+
+/*
+ * Check that the low N 64-bit elements of "got" and "expected" are the same.
+ */
+int NOINLINE check_equal_nq(void *got, void *expected, int n_elems,
+ char *banner, int line) {
+ int i, fail = 0;
+ V512 *v1 = (V512 *)got;
+ V512 *v2 = (V512 *)expected;
+
+ for (i = 0; i < n_elems; i++) {
+ if (v1->u64[i] != v2->u64[i]) {
+ printf(
+ "ERROR(%d): %s failed at %d'th element: 0x%0.16llx != 0x%0.16llx\n",
+ line, banner ? banner : "", i, v1->u64[i], v2->u64[i]);
+ display_pq(got, "got:", n_elems);
+ display_pq(expected, "exp:", n_elems);
+ n_errs++;
+ fail = 1;
+ break;
+ }
+ }
+ return fail;
+}
+
+double delta = 1e-4;
+
+#define EQUAL_FP(v1, v2) \
+ ((v1) < (v2) ? ((v2) - (v1) < delta) : ((v1) - (v2) < delta))
+
+/*
+ * Check that the low N single precision float elements of "got" and "expected"
+ * are the same.
+ */
+int NOINLINE check_equal_nsf(void *got, void *expected, int n_elems,
+ char *banner, int line) {
+ int i, fail = 0;
+ V512 *v1 = (V512 *)got;
+ V512 *v2 = (V512 *)expected;
+
+ for (i = 0; i < n_elems; i++) {
+ if (!EQUAL_FP(v1->f32[i], v2->f32[i])) {
+ printf("ERROR(%d): %s failed at %d'th element: %7g != %7g \n", line,
+ banner ? banner : "", i, v1->f32[i], v2->f32[i]);
+ display_psf(got, "got:", n_elems);
+ display_psf(expected, "exp:", n_elems);
+ n_errs++;
+ fail = 1;
+ break;
+ }
+ }
+ return fail;
+}
+
+/*
+ * Check that the low N double precision float elements of "got" and "expected"
+ * are the same.
+ */
+int NOINLINE check_equal_ndf(void *got, void *expected, int n_elems,
+ char *banner, int line) {
+ int i, fail = 0;
+ V512 *v1 = (V512 *)got;
+ V512 *v2 = (V512 *)expected;
+
+ for (i = 0; i < n_elems; i++) {
+ if (!EQUAL_FP(v1->f64[i], v2->f64[i])) {
+ printf("ERROR(%d): %s failed at %d'th element: %7g != %7g \n", line,
+ banner ? banner : "", i, v1->f64[i], v2->f64[i]);
+ display_pdf(got, "got:", n_elems);
+ display_pdf(expected, "exp:", n_elems);
+ n_errs++;
+ fail = 1;
+ break;
+ }
+ }
+ return fail;
+}
+
+#endif /* M512_TEST_UTIL_H_INCLUDED */