aboutsummaryrefslogtreecommitdiff
path: root/final/Bitcode/simd_ops/simd_ops.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'final/Bitcode/simd_ops/simd_ops.cpp')
-rw-r--r--final/Bitcode/simd_ops/simd_ops.cpp133
1 files changed, 133 insertions, 0 deletions
diff --git a/final/Bitcode/simd_ops/simd_ops.cpp b/final/Bitcode/simd_ops/simd_ops.cpp
new file mode 100644
index 00000000..994e7027
--- /dev/null
+++ b/final/Bitcode/simd_ops/simd_ops.cpp
@@ -0,0 +1,133 @@
+#include "filter_test_op.h"
+
+#ifdef _MSC_VER
+#include <errno.h>
+#include <malloc.h>
+#endif
+
+// Allocate aligned memory, per recent requirement by the
+// Halide tests updated upstream.
+int allocate_aligned(void **mem, size_t alignment, size_t size) {
+#ifdef _MSC_VER
+ *p = _aligned_malloc(size, alignment);
+ return (*p) ? 0 : errno;
+#else
+ return posix_memalign(mem, alignment, size);
+#endif
+}
+
+#if defined(__aarch64__) || defined(__arm__)
+#define FACTOR 5
+#else
+#define FACTOR 1
+#endif
+
+template<typename T>
+T rand_value() {
+ return (T)((T)rand() / 8) - 100;
+}
+
+// Even on android, we want errors to stdout
+extern "C" void halide_print(void *, const char *msg) {
+ printf("%s\n", msg);
+}
+
+template<typename T>
+buffer_t make_buffer(int w, int h) {
+ T *mem;
+ int err = allocate_aligned((void **)&mem, 128, w * h * sizeof(T));
+
+ buffer_t buf = {0};
+ buf.host = (uint8_t *)mem;
+ buf.extent[0] = w;
+ buf.extent[1] = h;
+ buf.elem_size = sizeof(T);
+ buf.stride[0] = 1;
+ buf.stride[1] = w;
+
+ for (int i = 0; i < w*h; i++) {
+ mem[i] = rand_value<T>();
+ }
+
+ return buf;
+}
+
+
+int main(int argc, char **argv) {
+ unsigned int err_code = 0;
+#if (!__has_builtin(__builtin_cpu_supports) && (defined(__i386__) || defined(__x86_64__)))
+ return err_code;
+#endif
+#if defined(__i386__) || defined(__x86_64__)
+ if (!__builtin_cpu_supports("avx") ||
+ !__builtin_cpu_supports("avx2")) {
+ return err_code;
+ }
+#endif
+ time_t seed;
+ if (argc > 1) {
+ seed = atoi(argv[1]);
+ }
+ else {
+ seed = time(NULL);
+ srand (seed);
+ }
+ int W = 256*FACTOR, H = 100;
+ // Make some input buffers
+ buffer_t bufs[] = {
+ make_buffer<float>(W, H),
+ make_buffer<double>(W, H),
+ make_buffer<int8_t>(W, H),
+ make_buffer<uint8_t>(W, H),
+ make_buffer<int16_t>(W, H),
+ make_buffer<uint16_t>(W, H),
+ make_buffer<int32_t>(W, H),
+ make_buffer<uint32_t>(W, H),
+ make_buffer<int64_t>(W, H),
+ make_buffer<uint64_t>(W, H)
+ };
+ W/=FACTOR;
+
+ int NO = 2;
+ buffer_t out[] = {
+ make_buffer<double>(W, H),
+ make_buffer<double>(W, H)
+ };
+ double *out_value[NO];
+
+ for (int i = 0; i < NO; i++) {
+ filter f = filters[i];
+ f.fn(bufs + 0,
+ bufs + 1,
+ bufs + 2,
+ bufs + 3,
+ bufs + 4,
+ bufs + 5,
+ bufs + 6,
+ bufs + 7,
+ bufs + 8,
+ bufs + 9,
+ &(out[i]));
+ out_value[i] = (double *)(out[i].host);
+ }
+
+ int err;
+ for (int i = 0; i < W*H; i++) {
+ if ((err = out_value[0][i] - out_value[1][i]) > 0.0001) {
+ fprintf(stderr, "Code generation error (%d): %d. Seer used %ld\n", i, err, seed);
+ err_code = 1;
+ break;
+ }
+ }
+
+ for (int i = 0; i < sizeof(bufs)/sizeof(buffer_t); i++) {
+ delete[] bufs[i].host;
+ }
+
+ for (int i = 0; i < NO; i++) {
+ delete[] out[i].host;
+ }
+
+ return err_code;
+}
+