diff options
Diffstat (limited to 'final/Bitcode/simd_ops/simd_ops.cpp')
-rw-r--r-- | final/Bitcode/simd_ops/simd_ops.cpp | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/final/Bitcode/simd_ops/simd_ops.cpp b/final/Bitcode/simd_ops/simd_ops.cpp new file mode 100644 index 00000000..994e7027 --- /dev/null +++ b/final/Bitcode/simd_ops/simd_ops.cpp @@ -0,0 +1,133 @@ +#include "filter_test_op.h" + +#ifdef _MSC_VER +#include <errno.h> +#include <malloc.h> +#endif + +// Allocate aligned memory, per recent requirement by the +// Halide tests updated upstream. +int allocate_aligned(void **mem, size_t alignment, size_t size) { +#ifdef _MSC_VER + *p = _aligned_malloc(size, alignment); + return (*p) ? 0 : errno; +#else + return posix_memalign(mem, alignment, size); +#endif +} + +#if defined(__aarch64__) || defined(__arm__) +#define FACTOR 5 +#else +#define FACTOR 1 +#endif + +template<typename T> +T rand_value() { + return (T)((T)rand() / 8) - 100; +} + +// Even on android, we want errors to stdout +extern "C" void halide_print(void *, const char *msg) { + printf("%s\n", msg); +} + +template<typename T> +buffer_t make_buffer(int w, int h) { + T *mem; + int err = allocate_aligned((void **)&mem, 128, w * h * sizeof(T)); + + buffer_t buf = {0}; + buf.host = (uint8_t *)mem; + buf.extent[0] = w; + buf.extent[1] = h; + buf.elem_size = sizeof(T); + buf.stride[0] = 1; + buf.stride[1] = w; + + for (int i = 0; i < w*h; i++) { + mem[i] = rand_value<T>(); + } + + return buf; +} + + +int main(int argc, char **argv) { + unsigned int err_code = 0; +#if (!__has_builtin(__builtin_cpu_supports) && (defined(__i386__) || defined(__x86_64__))) + return err_code; +#endif +#if defined(__i386__) || defined(__x86_64__) + if (!__builtin_cpu_supports("avx") || + !__builtin_cpu_supports("avx2")) { + return err_code; + } +#endif + time_t seed; + if (argc > 1) { + seed = atoi(argv[1]); + } + else { + seed = time(NULL); + srand (seed); + } + int W = 256*FACTOR, H = 100; + // Make some input buffers + buffer_t bufs[] = { + make_buffer<float>(W, H), + make_buffer<double>(W, H), + make_buffer<int8_t>(W, H), + make_buffer<uint8_t>(W, H), + make_buffer<int16_t>(W, H), + make_buffer<uint16_t>(W, H), + make_buffer<int32_t>(W, H), + make_buffer<uint32_t>(W, H), + make_buffer<int64_t>(W, H), + make_buffer<uint64_t>(W, H) + }; + W/=FACTOR; + + int NO = 2; + buffer_t out[] = { + make_buffer<double>(W, H), + make_buffer<double>(W, H) + }; + double *out_value[NO]; + + for (int i = 0; i < NO; i++) { + filter f = filters[i]; + f.fn(bufs + 0, + bufs + 1, + bufs + 2, + bufs + 3, + bufs + 4, + bufs + 5, + bufs + 6, + bufs + 7, + bufs + 8, + bufs + 9, + &(out[i])); + out_value[i] = (double *)(out[i].host); + } + + int err; + for (int i = 0; i < W*H; i++) { + if ((err = out_value[0][i] - out_value[1][i]) > 0.0001) { + fprintf(stderr, "Code generation error (%d): %d. Seer used %ld\n", i, err, seed); + err_code = 1; + break; + } + } + + for (int i = 0; i < sizeof(bufs)/sizeof(buffer_t); i++) { + delete[] bufs[i].host; + } + + for (int i = 0; i < NO; i++) { + delete[] out[i].host; + } + + return err_code; +} + |