aboutsummaryrefslogtreecommitdiff
path: root/final/Bitcode/simd_ops/simd_ops.cpp
blob: 994e70271815d7333e1b2835a60f9da940484ec8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include "filter_test_op.h"

#ifdef _MSC_VER
#include <errno.h>
#include <malloc.h>
#endif

// Allocate aligned memory, per recent requirement by the
// Halide tests updated upstream.
int allocate_aligned(void **mem, size_t alignment, size_t size) {
#ifdef _MSC_VER
  *p = _aligned_malloc(size, alignment);
  return (*p) ? 0 : errno;
#else
  return posix_memalign(mem, alignment, size);
#endif
}

#if defined(__aarch64__) || defined(__arm__)
#define FACTOR 5
#else
#define FACTOR 1
#endif

template<typename T>
T rand_value() {
    return (T)((T)rand() / 8) - 100;
}

// Even on android, we want errors to stdout
extern "C" void halide_print(void *, const char *msg) {
    printf("%s\n", msg);
}

template<typename T>
buffer_t make_buffer(int w, int h) {
    T *mem;
    int err = allocate_aligned((void **)&mem, 128, w * h * sizeof(T));

    buffer_t buf = {0};
    buf.host = (uint8_t *)mem;
    buf.extent[0] = w;
    buf.extent[1] = h;
    buf.elem_size = sizeof(T);
    buf.stride[0] = 1;
    buf.stride[1] = w;

    for (int i = 0; i < w*h; i++) {
        mem[i] = rand_value<T>();
    }

    return buf;
}


int main(int argc, char **argv) {
    unsigned int err_code = 0;
#if (!__has_builtin(__builtin_cpu_supports) && (defined(__i386__) || defined(__x86_64__)))
    return err_code;
#endif
#if defined(__i386__) || defined(__x86_64__)
    if (!__builtin_cpu_supports("avx") ||
        !__builtin_cpu_supports("avx2")) {
      return err_code;
    }
#endif
    time_t seed;
    if (argc > 1) {
        seed = atoi(argv[1]);
    }
    else {
        seed = time(NULL);
        srand (seed);
    }
    int W = 256*FACTOR, H = 100;
    // Make some input buffers
    buffer_t bufs[] = {
        make_buffer<float>(W, H),
        make_buffer<double>(W, H),
        make_buffer<int8_t>(W, H),
        make_buffer<uint8_t>(W, H),
        make_buffer<int16_t>(W, H),
        make_buffer<uint16_t>(W, H),
        make_buffer<int32_t>(W, H),
        make_buffer<uint32_t>(W, H),
        make_buffer<int64_t>(W, H),
        make_buffer<uint64_t>(W, H)
    };
    W/=FACTOR;

    int NO = 2;
    buffer_t out[] = {
        make_buffer<double>(W, H),
        make_buffer<double>(W, H)
    };
    double *out_value[NO];

    for (int i = 0; i < NO; i++) {
        filter f = filters[i];
        f.fn(bufs + 0,
             bufs + 1,
             bufs + 2,
             bufs + 3,
             bufs + 4,
             bufs + 5,
             bufs + 6,
             bufs + 7,
             bufs + 8,
             bufs + 9,
             &(out[i]));
        out_value[i] = (double *)(out[i].host);
    }

    int err;
    for (int i = 0; i < W*H; i++) {
       if ((err = out_value[0][i] - out_value[1][i]) > 0.0001) {
         fprintf(stderr, "Code generation error (%d): %d. Seer used %ld\n", i, err, seed);
         err_code = 1;
         break;
       }
    }

    for (int i = 0; i < sizeof(bufs)/sizeof(buffer_t); i++) {
        delete[] bufs[i].host;
    }

    for (int i = 0; i < NO; i++) {
        delete[] out[i].host;
    }

    return err_code;
}