aboutsummaryrefslogtreecommitdiff
path: root/final/Bitcode/Benchmarks/Halide
diff options
context:
space:
mode:
Diffstat (limited to 'final/Bitcode/Benchmarks/Halide')
-rw-r--r--final/Bitcode/Benchmarks/Halide/CMakeLists.txt26
-rw-r--r--final/Bitcode/Benchmarks/Halide/bilateral_grid/CMakeLists.txt10
-rw-r--r--final/Bitcode/Benchmarks/Halide/bilateral_grid/bilateral_grid.bcbin0 -> 54640 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/bilateral_grid/bilateral_grid.h42
-rw-r--r--final/Bitcode/Benchmarks/Halide/bilateral_grid/driver.cpp37
-rw-r--r--final/Bitcode/Benchmarks/Halide/bilateral_grid/output/rgb_out.bytesbin0 -> 15728652 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/bilateral_grid/output/rgba_out.bytesbin0 -> 15728652 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/blur/CMakeLists.txt7
-rw-r--r--final/Bitcode/Benchmarks/Halide/blur/driver.cpp119
-rw-r--r--final/Bitcode/Benchmarks/Halide/blur/halide_blur.bcbin0 -> 22488 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/blur/halide_blur.h42
-rw-r--r--final/Bitcode/Benchmarks/Halide/common/benchmark.h58
-rw-r--r--final/Bitcode/Benchmarks/Halide/common/halide_buffer.h27
-rw-r--r--final/Bitcode/Benchmarks/Halide/common/halide_image.h234
-rw-r--r--final/Bitcode/Benchmarks/Halide/common/halide_image_info.h314
-rw-r--r--final/Bitcode/Benchmarks/Halide/common/halide_image_io.h320
-rw-r--r--final/Bitcode/Benchmarks/Halide/common/x86_halide_runtime.bcbin0 -> 104300 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/images/rgb.bytesbin0 -> 15728652 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/images/rgba.bytesbin0 -> 15728652 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/local_laplacian/CMakeLists.txt10
-rw-r--r--final/Bitcode/Benchmarks/Halide/local_laplacian/driver.cpp33
-rw-r--r--final/Bitcode/Benchmarks/Halide/local_laplacian/local_laplacian.bcbin0 -> 266460 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/local_laplacian/local_laplacian.h42
-rw-r--r--final/Bitcode/Benchmarks/Halide/local_laplacian/output/rgb_out.bytesbin0 -> 15728652 bytes
-rw-r--r--final/Bitcode/Benchmarks/Halide/local_laplacian/output/rgba_out.bytesbin0 -> 15728652 bytes
25 files changed, 1321 insertions, 0 deletions
diff --git a/final/Bitcode/Benchmarks/Halide/CMakeLists.txt b/final/Bitcode/Benchmarks/Halide/CMakeLists.txt
new file mode 100644
index 00000000..72a94af6
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/CMakeLists.txt
@@ -0,0 +1,26 @@
+if(NOT WIN32)
+ find_package(Threads)
+ list(APPEND LDFLAGS ${CMAKE_THREAD_LIBS_INIT})
+ foreach(lib ${CMAKE_DL_LIBS})
+ list(APPEND LDFLAGS -l${lib})
+ endforeach()
+endif()
+if(NOT MSVC)
+ list(APPEND CXXFLAGS "-std=c++11")
+endif()
+
+macro(test_img_input img)
+ set(imgpath "${CMAKE_CURRENT_SOURCE_DIR}/../images/${img}")
+ llvm_test_run(${imgpath}.bytes ${ARGN}
+ ${CMAKE_CURRENT_BINARY_DIR}/${img}_out.bytes)
+ llvm_test_verify(${FPCMP}
+ ${CMAKE_CURRENT_SOURCE_DIR}/output/${img}_out.bytes
+ ${CMAKE_CURRENT_BINARY_DIR}/${img}_out.bytes
+ )
+endmacro()
+
+if(ARCH STREQUAL "x86")
+ add_subdirectory(local_laplacian)
+ add_subdirectory(bilateral_grid)
+ add_subdirectory(blur)
+endif()
diff --git a/final/Bitcode/Benchmarks/Halide/bilateral_grid/CMakeLists.txt b/final/Bitcode/Benchmarks/Halide/bilateral_grid/CMakeLists.txt
new file mode 100644
index 00000000..0014d84d
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/bilateral_grid/CMakeLists.txt
@@ -0,0 +1,10 @@
+file(GLOB bcsources ${CMAKE_CURRENT_SOURCE_DIR}/../common/x86_halide_runtime.bc ${CMAKE_CURRENT_SOURCE_DIR}/bilateral_grid.bc)
+SET_SOURCE_FILES_PROPERTIES(${bcsources} PROPERTIES LANGUAGE CXX)
+
+test_img_input(rgb 0.1 10)
+test_img_input(rgba 0.1 10)
+
+llvm_multisource(halide_bilateral_grid
+ ${CMAKE_CURRENT_SOURCE_DIR}/driver.cpp
+ ${bcsources}
+)
diff --git a/final/Bitcode/Benchmarks/Halide/bilateral_grid/bilateral_grid.bc b/final/Bitcode/Benchmarks/Halide/bilateral_grid/bilateral_grid.bc
new file mode 100644
index 00000000..04977045
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/bilateral_grid/bilateral_grid.bc
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/bilateral_grid/bilateral_grid.h b/final/Bitcode/Benchmarks/Halide/bilateral_grid/bilateral_grid.h
new file mode 100644
index 00000000..04c002ce
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/bilateral_grid/bilateral_grid.h
@@ -0,0 +1,42 @@
+#ifndef HALIDE__bilateral_grid_h
+#define HALIDE__bilateral_grid_h
+#ifndef HALIDE_ATTRIBUTE_ALIGN
+ #ifdef _MSC_VER
+ #define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))
+ #else
+ #define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))
+ #endif
+#endif
+#ifndef BUFFER_T_DEFINED
+#define BUFFER_T_DEFINED
+#include <stdbool.h>
+#include <stdint.h>
+typedef struct buffer_t {
+ uint64_t dev;
+ uint8_t* host;
+ int32_t extent[4];
+ int32_t stride[4];
+ int32_t min[4];
+ int32_t elem_size;
+ HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty;
+ HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty;
+ HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)];
+} buffer_t;
+#endif
+struct halide_filter_metadata_t;
+#ifndef HALIDE_FUNCTION_ATTRS
+#define HALIDE_FUNCTION_ATTRS
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int bilateral_grid(float _r_sigma, buffer_t *_input_buffer, buffer_t *_bilateral_grid_buffer) HALIDE_FUNCTION_ATTRS;
+int bilateral_grid_argv(void **args) HALIDE_FUNCTION_ATTRS;
+// Result is never null and points to constant static data
+const struct halide_filter_metadata_t *bilateral_grid_metadata() HALIDE_FUNCTION_ATTRS;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif
diff --git a/final/Bitcode/Benchmarks/Halide/bilateral_grid/driver.cpp b/final/Bitcode/Benchmarks/Halide/bilateral_grid/driver.cpp
new file mode 100644
index 00000000..2d759b52
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/bilateral_grid/driver.cpp
@@ -0,0 +1,37 @@
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
+
+#include "../common/benchmark.h"
+#include "../common/halide_image.h"
+#include "../common/halide_image_io.h"
+#include "bilateral_grid.h"
+
+using namespace Halide::Tools;
+
+int main(int argc, char **argv) {
+
+ if (argc < 5) {
+ printf("Usage: ./filter input.png range_sigma timing_iterations output.png\n"
+ "e.g. ./filter input.png 0.1 10 output.png\n");
+ return 0;
+ }
+
+ int timing_iterations = atoi(argv[3]);
+
+ Image<float> input = load_image(argv[1]);
+ Image<float> output(input.width(), input.height(), 1);
+
+ bilateral_grid(atof(argv[2]), input, output);
+
+ // Timing code. Timing doesn't include copying the input data to
+ // the gpu or copying the output back.
+ double min_t = benchmark(timing_iterations, 10, [&]() {
+ bilateral_grid(atof(argv[2]), input, output);
+ });
+ printf("%gms\n", min_t * 1e3);
+
+ save_image(output, argv[4]);
+
+ return 0;
+}
diff --git a/final/Bitcode/Benchmarks/Halide/bilateral_grid/output/rgb_out.bytes b/final/Bitcode/Benchmarks/Halide/bilateral_grid/output/rgb_out.bytes
new file mode 100644
index 00000000..76957958
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/bilateral_grid/output/rgb_out.bytes
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/bilateral_grid/output/rgba_out.bytes b/final/Bitcode/Benchmarks/Halide/bilateral_grid/output/rgba_out.bytes
new file mode 100644
index 00000000..76957958
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/bilateral_grid/output/rgba_out.bytes
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/blur/CMakeLists.txt b/final/Bitcode/Benchmarks/Halide/blur/CMakeLists.txt
new file mode 100644
index 00000000..2c2db741
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/blur/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB bcsources ${CMAKE_CURRENT_SOURCE_DIR}/../common/x86_halide_runtime.bc ${CMAKE_CURRENT_SOURCE_DIR}/halide_blur.bc)
+SET_SOURCE_FILES_PROPERTIES(${bcsources} PROPERTIES LANGUAGE CXX)
+
+llvm_multisource(halide_blur
+ ${CMAKE_CURRENT_SOURCE_DIR}/driver.cpp
+ ${bcsources}
+)
diff --git a/final/Bitcode/Benchmarks/Halide/blur/driver.cpp b/final/Bitcode/Benchmarks/Halide/blur/driver.cpp
new file mode 100644
index 00000000..61a7696b
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/blur/driver.cpp
@@ -0,0 +1,119 @@
+#include <emmintrin.h>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+
+#include "../common/benchmark.h"
+#include "../common/halide_image.h"
+#include "../common/halide_image_io.h"
+#include "halide_blur.h"
+
+using namespace Halide::Tools;
+
+double t;
+
+Image<uint16_t> blur(Image<uint16_t> in) {
+ Image<uint16_t> tmp(in.width()-8, in.height());
+ Image<uint16_t> out(in.width()-8, in.height()-2);
+
+ t = benchmark(10, 1, [&]() {
+ for (int y = 0; y < tmp.height(); y++)
+ for (int x = 0; x < tmp.width(); x++)
+ tmp(x, y) = (in(x, y) + in(x+1, y) + in(x+2, y))/3;
+
+ for (int y = 0; y < out.height(); y++)
+ for (int x = 0; x < out.width(); x++)
+ out(x, y) = (tmp(x, y) + tmp(x, y+1) + tmp(x, y+2))/3;
+ });
+
+ return out;
+}
+
+
+Image<uint16_t> blur_fast(Image<uint16_t> in) {
+ Image<uint16_t> out(in.width()-8, in.height()-2);
+
+ t = benchmark(10, 1, [&]() {
+ __m128i one_third = _mm_set1_epi16(21846);
+#pragma omp parallel for
+ for (int yTile = 0; yTile < out.height(); yTile += 32) {
+ __m128i a, b, c, sum, avg;
+ __m128i tmp[(128/8) * (32 + 2)];
+ for (int xTile = 0; xTile < out.width(); xTile += 128) {
+ __m128i *tmpPtr = tmp;
+ for (int y = 0; y < 32+2; y++) {
+ const uint16_t *inPtr = &(in(xTile, yTile+y));
+ for (int x = 0; x < 128; x += 8) {
+ a = _mm_load_si128((__m128i*)(inPtr));
+ b = _mm_loadu_si128((__m128i*)(inPtr+1));
+ c = _mm_loadu_si128((__m128i*)(inPtr+2));
+ sum = _mm_add_epi16(_mm_add_epi16(a, b), c);
+ avg = _mm_mulhi_epi16(sum, one_third);
+ _mm_store_si128(tmpPtr++, avg);
+ inPtr+=8;
+ }
+ }
+ tmpPtr = tmp;
+ for (int y = 0; y < 32; y++) {
+ __m128i *outPtr = (__m128i *)(&(out(xTile, yTile+y)));
+ for (int x = 0; x < 128; x += 8) {
+ a = _mm_load_si128(tmpPtr+(2*128)/8);
+ b = _mm_load_si128(tmpPtr+128/8);
+ c = _mm_load_si128(tmpPtr++);
+ sum = _mm_add_epi16(_mm_add_epi16(a, b), c);
+ avg = _mm_mulhi_epi16(sum, one_third);
+ _mm_store_si128(outPtr++, avg);
+ }
+ }
+ }
+ }
+ });
+
+ return out;
+}
+
+Image<uint16_t> blur_halide(Image<uint16_t> in) {
+ Image<uint16_t> out(in.width()-8, in.height()-2);
+
+ // Call it once to initialize the halide runtime stuff
+ halide_blur(in, out);
+
+ t = benchmark(10, 1, [&]() {
+ // Compute the same region of the output as blur_fast (i.e., we're
+ // still being sloppy with boundary conditions)
+ halide_blur(in, out);
+ });
+
+ return out;
+}
+
+int main(int argc, char **argv) {
+
+ Image<uint16_t> input(6408, 4802);
+
+ for (int y = 0; y < input.height(); y++) {
+ for (int x = 0; x < input.width(); x++) {
+ input(x, y) = rand() & 0xfff;
+ }
+ }
+
+ Image<uint16_t> blurry = blur(input);
+ double slow_time = t;
+
+ Image<uint16_t> speedy = blur_fast(input);
+ double fast_time = t;
+
+ Image<uint16_t> halide = blur_halide(input);
+ double halide_time = t;
+
+ printf("times: %f %f %f\n", slow_time, fast_time, halide_time);
+
+ for (int y = 64; y < input.height() - 64; y++) {
+ for (int x = 64; x < input.width() - 64; x++) {
+ if (blurry(x, y) != speedy(x, y) || blurry(x, y) != halide(x, y))
+ printf("difference at (%d,%d): %d %d %d\n", x, y, blurry(x, y), speedy(x, y), halide(x, y));
+ }
+ }
+
+ return 0;
+}
diff --git a/final/Bitcode/Benchmarks/Halide/blur/halide_blur.bc b/final/Bitcode/Benchmarks/Halide/blur/halide_blur.bc
new file mode 100644
index 00000000..4623ab6e
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/blur/halide_blur.bc
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/blur/halide_blur.h b/final/Bitcode/Benchmarks/Halide/blur/halide_blur.h
new file mode 100644
index 00000000..ab098b70
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/blur/halide_blur.h
@@ -0,0 +1,42 @@
+#ifndef HALIDE__halide_blur_h
+#define HALIDE__halide_blur_h
+#ifndef HALIDE_ATTRIBUTE_ALIGN
+ #ifdef _MSC_VER
+ #define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))
+ #else
+ #define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))
+ #endif
+#endif
+#ifndef BUFFER_T_DEFINED
+#define BUFFER_T_DEFINED
+#include <stdbool.h>
+#include <stdint.h>
+typedef struct buffer_t {
+ uint64_t dev;
+ uint8_t* host;
+ int32_t extent[4];
+ int32_t stride[4];
+ int32_t min[4];
+ int32_t elem_size;
+ HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty;
+ HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty;
+ HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)];
+} buffer_t;
+#endif
+struct halide_filter_metadata_t;
+#ifndef HALIDE_FUNCTION_ATTRS
+#define HALIDE_FUNCTION_ATTRS
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int halide_blur(buffer_t *_p0_buffer, buffer_t *_blur_y_buffer) HALIDE_FUNCTION_ATTRS;
+int halide_blur_argv(void **args) HALIDE_FUNCTION_ATTRS;
+// Result is never null and points to constant static data
+const struct halide_filter_metadata_t *halide_blur_metadata() HALIDE_FUNCTION_ATTRS;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif
diff --git a/final/Bitcode/Benchmarks/Halide/common/benchmark.h b/final/Bitcode/Benchmarks/Halide/common/benchmark.h
new file mode 100644
index 00000000..922ea27b
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/common/benchmark.h
@@ -0,0 +1,58 @@
+#ifndef BENCHMARK_H
+#define BENCHMARK_H
+
+#include <limits>
+
+// Benchmark the operation 'op'. The number of iterations refers to
+// how many times the operation is run for each time measurement, the
+// result is the minimum over a number of samples runs. The result is the
+// amount of time in seconds for one iteration.
+#ifdef _WIN32
+
+union _LARGE_INTEGER;
+typedef union _LARGE_INTEGER LARGE_INTEGER;
+extern "C" int __stdcall QueryPerformanceCounter(LARGE_INTEGER*);
+extern "C" int __stdcall QueryPerformanceFrequency(LARGE_INTEGER*);
+
+template <typename F>
+double benchmark(int samples, int iterations, F op) {
+ int64_t freq;
+ QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
+
+ double best = std::numeric_limits<double>::infinity();
+ for (int i = 0; i < samples; i++) {
+ int64_t t1;
+ QueryPerformanceCounter((LARGE_INTEGER*)&t1);
+ for (int j = 0; j < iterations; j++) {
+ op();
+ }
+ int64_t t2;
+ QueryPerformanceCounter((LARGE_INTEGER*)&t2);
+ double dt = (t2 - t1) / static_cast<double>(freq);
+ if (dt < best) best = dt;
+ }
+ return best / iterations;
+}
+
+#else
+
+#include <chrono>
+
+template <typename F>
+double benchmark(int samples, int iterations, F op) {
+ double best = std::numeric_limits<double>::infinity();
+ for (int i = 0; i < samples; i++) {
+ auto t1 = std::chrono::high_resolution_clock::now();
+ for (int j = 0; j < iterations; j++) {
+ op();
+ }
+ auto t2 = std::chrono::high_resolution_clock::now();
+ double dt = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() / 1e6;
+ if (dt < best) best = dt;
+ }
+ return best / iterations;
+}
+
+#endif
+
+#endif
diff --git a/final/Bitcode/Benchmarks/Halide/common/halide_buffer.h b/final/Bitcode/Benchmarks/Halide/common/halide_buffer.h
new file mode 100644
index 00000000..90410e9d
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/common/halide_buffer.h
@@ -0,0 +1,27 @@
+#ifndef HALIDE_ATTRIBUTE_ALIGN
+ #ifdef _MSC_VER
+ #define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))
+ #else
+ #define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))
+ #endif
+#endif
+#ifndef BUFFER_T_DEFINED
+#define BUFFER_T_DEFINED
+#include <stdbool.h>
+#include <stdint.h>
+typedef struct buffer_t {
+ uint64_t dev;
+ uint8_t* host;
+ int32_t extent[4];
+ int32_t stride[4];
+ int32_t min[4];
+ int32_t elem_size;
+ HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty;
+ HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty;
+ HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)];
+} buffer_t;
+#endif
+struct halide_filter_metadata_t;
+#ifndef HALIDE_FUNCTION_ATTRS
+#define HALIDE_FUNCTION_ATTRS
+#endif
diff --git a/final/Bitcode/Benchmarks/Halide/common/halide_image.h b/final/Bitcode/Benchmarks/Halide/common/halide_image.h
new file mode 100644
index 00000000..1d528640
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/common/halide_image.h
@@ -0,0 +1,234 @@
+// This header defines a simple Image class which wraps a buffer_t. This is
+// useful when interacting with a statically-compiled Halide pipeline emitted by
+// Func::compile_to_file, when you do not want to link your processing program
+// against Halide.h/libHalide.a.
+
+#ifndef HALIDE_TOOLS_IMAGE_H
+#define HALIDE_TOOLS_IMAGE_H
+
+#include <cassert>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <stdint.h> // <cstdint> requires C++11
+
+//#include "HalideRuntime.h"
+#include "halide_buffer.h"
+
+namespace Halide {
+namespace Tools {
+
+template<typename T>
+class Image {
+ struct Contents {
+ Contents(const buffer_t &b, uint8_t *a) : buf(b), ref_count(1), alloc(a) {}
+ buffer_t buf;
+ int ref_count;
+ uint8_t *alloc;
+
+ void dev_free() {
+ //no-op, currently running on x86
+ //halide_device_free(NULL, &buf);
+ }
+
+ ~Contents() {
+ if (buf.dev) {
+ dev_free();
+ }
+ delete[] alloc;
+ }
+ };
+
+ Contents *contents;
+
+ void initialize(int x, int y, int z, int w, bool interleaved) {
+ buffer_t buf = {0};
+ buf.extent[0] = x;
+ buf.extent[1] = y;
+ buf.extent[2] = z;
+ buf.extent[3] = w;
+ if (interleaved) {
+ buf.stride[0] = z;
+ buf.stride[1] = x*z;
+ buf.stride[2] = 1;
+ buf.stride[3] = x*y*z;
+ } else {
+ buf.stride[0] = 1;
+ buf.stride[1] = x;
+ buf.stride[2] = x*y;
+ buf.stride[3] = x*y*z;
+ }
+ buf.elem_size = sizeof(T);
+
+ size_t size = 1;
+ if (x) size *= x;
+ if (y) size *= y;
+ if (z) size *= z;
+ if (w) size *= w;
+
+ uint8_t *ptr = new uint8_t[sizeof(T)*size + 40];
+ buf.host = ptr;
+ buf.host_dirty = false;
+ buf.dev_dirty = false;
+ buf.dev = 0;
+ while ((size_t)buf.host & 0x1f) buf.host++;
+ contents = new Contents(buf, ptr);
+ }
+
+public:
+ typedef T ElemType;
+
+ Image() : contents(NULL) {
+ }
+
+ Image(int x, int y = 0, int z = 0, int w = 0, bool interleaved = false) {
+ initialize(x, y, z, w, interleaved);
+ }
+
+ Image(const Image &other) : contents(other.contents) {
+ if (contents) {
+ contents->ref_count++;
+ }
+ }
+
+ ~Image() {
+ if (contents) {
+ contents->ref_count--;
+ if (contents->ref_count == 0) {
+ delete contents;
+ contents = NULL;
+ }
+ }
+ }
+
+ Image &operator=(const Image &other) {
+ Contents *p = other.contents;
+ if (p) {
+ p->ref_count++;
+ }
+ if (contents) {
+ contents->ref_count--;
+ if (contents->ref_count == 0) {
+ delete contents;
+ contents = NULL;
+ }
+ }
+ contents = p;
+ return *this;
+ }
+
+ T *data() { return (T*)contents->buf.host; }
+
+ const T *data() const { return (T*)contents->buf.host; }
+
+ void set_host_dirty(bool dirty = true) {
+ // If you use data directly, you must also call this so that
+ // gpu-side code knows that it needs to copy stuff over.
+ contents->buf.host_dirty = dirty;
+ }
+
+ void copy_to_host() {
+ if (contents->buf.dev_dirty) {
+ //halide_copy_to_host(NULL, &contents->buf);
+ contents->buf.dev_dirty = false;
+ }
+ }
+
+ void copy_to_device(const struct halide_device_interface *device_interface) {
+ if (contents->buf.host_dirty) {
+ // If host
+ //halide_copy_to_device(NULL, &contents->buf, device_interface);
+ contents->buf.host_dirty = false;
+ }
+ }
+
+ void dev_free() {
+ assert(!contents->buf.dev_dirty);
+ contents->dev_free();
+ }
+
+ Image(T vals[]) {
+ initialize(sizeof(vals)/sizeof(T));
+ for (int i = 0; i < sizeof(vals); i++) (*this)(i) = vals[i];
+ }
+
+ /** Make sure you've called copy_to_host before you start
+ * accessing pixels directly. */
+ T &operator()(int x, int y = 0, int z = 0, int w = 0) {
+ T *ptr = (T *)contents->buf.host;
+ x -= contents->buf.min[0];
+ y -= contents->buf.min[1];
+ z -= contents->buf.min[2];
+ w -= contents->buf.min[3];
+ size_t s0 = contents->buf.stride[0];
+ size_t s1 = contents->buf.stride[1];
+ size_t s2 = contents->buf.stride[2];
+ size_t s3 = contents->buf.stride[3];
+ return ptr[s0 * x + s1 * y + s2 * z + s3 * w];
+ }
+
+ /** Make sure you've called copy_to_host before you start
+ * accessing pixels directly */
+ const T &operator()(int x, int y = 0, int z = 0, int w = 0) const {
+ const T *ptr = (const T *)contents->buf.host;
+ x -= contents->buf.min[0];
+ y -= contents->buf.min[1];
+ z -= contents->buf.min[2];
+ w -= contents->buf.min[3];
+ size_t s0 = contents->buf.stride[0];
+ size_t s1 = contents->buf.stride[1];
+ size_t s2 = contents->buf.stride[2];
+ size_t s3 = contents->buf.stride[3];
+ return ptr[s0 * x + s1 * y + s2 * z + s3 * w];
+ }
+
+ operator buffer_t *() const {
+ return &(contents->buf);
+ }
+
+ int width() const {
+ return dimensions() > 0 ? contents->buf.extent[0] : 1;
+ }
+
+ int height() const {
+ return dimensions() > 1 ? contents->buf.extent[1] : 1;
+ }
+
+ int channels() const {
+ return dimensions() > 2 ? contents->buf.extent[2] : 1;
+ }
+
+ int dimensions() const {
+ for (int i = 0; i < 4; i++) {
+ if (contents->buf.extent[i] == 0) {
+ return i;
+ }
+ }
+ return 4;
+ }
+
+ int stride(int dim) const {
+ return contents->buf.stride[dim];
+ }
+
+ int min(int dim) const {
+ return contents->buf.min[dim];
+ }
+
+ int extent(int dim) const {
+ return contents->buf.extent[dim];
+ }
+
+ void set_min(int x, int y = 0, int z = 0, int w = 0) {
+ contents->buf.min[0] = x;
+ contents->buf.min[1] = y;
+ contents->buf.min[2] = z;
+ contents->buf.min[3] = w;
+ }
+};
+
+} // namespace Tools
+} // namespace Halide
+
+#include "halide_image_info.h"
+#endif // HALIDE_TOOLS_IMAGE_H
diff --git a/final/Bitcode/Benchmarks/Halide/common/halide_image_info.h b/final/Bitcode/Benchmarks/Halide/common/halide_image_info.h
new file mode 100644
index 00000000..a3539857
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/common/halide_image_info.h
@@ -0,0 +1,314 @@
+// This header defines several methods useful for debugging programs that
+// operate on the Image class supporting images with arbitrary dimensions.
+//
+// Image<uint16_t> input = load_image(argv[1]);
+//
+// info(input, "input"); // Output the Image header info
+// dump(input, "input"); // Dump the Image data
+// stats(input, "input"); // Report statistics for the Image
+//
+//
+#ifndef HALIDE_TOOLS_IMAGE_INFO_H
+#define HALIDE_TOOLS_IMAGE_INFO_H
+
+#include <cassert>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <iostream>
+#include <iomanip>
+#include <sstream>
+#include <stdint.h>
+
+#include "halide_buffer.h"
+
+namespace Halide {
+namespace Tools {
+
+static inline void print_dimid(int d, int val) {
+ static const char *dimid[] = {"x", "y", "z", "w"};
+ int numdimid = 4;
+ if (d < numdimid) {
+ std::cout << " " << dimid[d] << ":" << val;
+ } else {
+ std::cout << " extent[" << d << "]:" << val;
+ }
+}
+
+static inline void print_loc(int32_t *loc, int dim, int32_t *min) {
+ for (int d = 0; d < dim; d++) {
+ if (d) {
+ std::cout << ",";
+ }
+ std::cout << loc[d] + min[d];
+ }
+}
+
+static inline void print_memalign(intptr_t val) {
+ intptr_t align_chk = 1024*1024;
+ while (align_chk > 0) {
+ if ((val & (align_chk-1)) == 0) {
+ char aunit = ' ';
+ if (align_chk >= 1024) {
+ align_chk >>= 10;
+ aunit = 'K';
+ }
+ if (align_chk >= 1024) {
+ align_chk >>= 10;
+ aunit = 'M';
+ }
+ std::cout << "align:" << align_chk;
+ if (aunit != ' ') {
+ std::cout << aunit;
+ }
+ break;
+ }
+ align_chk >>= 1;
+ }
+}
+
+template<typename T>
+void info(Image<T> &img, const char *tag = "Image") {
+ buffer_t *buf = &(*img);
+ int32_t *min = buf->min;
+ int32_t *extent = buf->extent;
+ int32_t *stride = buf->stride;
+ int dim = img.dimensions();
+ int img_bpp = buf->elem_size;
+ int img_tsize = sizeof(T);
+ int img_csize = sizeof(Image<T>);
+ int img_bsize = sizeof(buffer_t);
+ int32_t size = 1;
+ uint64_t dev = buf->dev;
+ bool host_dirty = buf->host_dirty;
+ bool dev_dirty = buf->dev_dirty;
+
+ std::cout << std::endl
+ << "-----------------------------------------------------------------------------";
+ std::cout << std::endl << "Image info: " << tag
+ << " dim:" << dim << " bpp:" << img_bpp;
+ for (int d = 0; d < dim; d++) {
+ print_dimid(d, extent[d]);
+ size *= extent[d];
+ }
+ std::cout << std::endl;
+ std::cout << tag << " class = 0x" << std::left << std::setw(10) << (void*)img
+ << std::right << " # ";
+ print_memalign((intptr_t)&img); std::cout << std::endl;
+ std::cout << tag << " class size = "<< img_csize
+ << " (0x"<< std::hex << img_csize << std::dec <<")\n";
+ std::cout << tag << "-class => [ 0x" << (void*)&img
+ << ", 0x" << (void*)(((char*)&img)+img_csize-1)
+ << " ], # size:" << img_csize << ", ";
+ print_memalign((intptr_t)&img); std::cout << std::endl;
+ std::cout << tag << " buf_t size = "<< img_bsize
+ << " (0x"<< std::hex << img_bsize << std::dec <<")\n";
+ std::cout << tag << "-buf_t => [ 0x" << (void*)&buf
+ << ", 0x" << (void*)(((char*)&buf)+img_bsize-1)
+ << " ], # size:" << img_bsize << ", ";
+ print_memalign((intptr_t)&buf); std::cout << std::endl;
+ if (img_bpp != img_tsize) {
+ std::cout << tag << " sizeof(T) = " << img_tsize << std::endl;
+ }
+ std::cout << tag << " host_dirty = " << host_dirty << std::endl;
+ std::cout << tag << " dev_dirty = " << dev_dirty << std::endl;
+ std::cout << tag << " dev handle = " << dev << std::endl;
+ std::cout << tag << " elem_size = " << img_bpp << std::endl;
+ std::cout << tag << " img_dim = " << dim << std::endl;
+ std::cout << tag << " width = " << img.width() << std::endl;
+ std::cout << tag << " height = " << img.height() << std::endl;
+ std::cout << tag << " channels = " << img.channels() << std::endl;
+ std::cout << tag << " extent[] = ";
+ for (int d = 0; d < dim; d++) {
+ std::cout << extent[d] << " ";
+ }
+ std::cout << std::endl;
+ std::cout << tag << " min[] = ";
+ for (int d = 0; d < dim; d++) {
+ std::cout << min[d] << " ";
+ }
+ std::cout << std::endl;
+ std::cout << tag << " stride[] = ";
+ for (int d = 0; d < dim; d++) {
+ std::cout << stride[d] << " ";
+ }
+ std::cout << std::endl;
+ if (img_bpp > 1) {
+ for (int d = 0; d < dim; d++) {
+ std::cout << tag << " str[" << d << "]*bpp = "
+ << std::left << std::setw(12) << stride[d] * img_bpp
+ << std::right << " # ";
+ print_memalign(stride[d] * img_bpp); std::cout << std::endl;
+ }
+ }
+
+ const T *img_data = img.data();
+ const T *img_next = img_data + size;
+ int32_t img_size = size * img_bpp;
+ int32_t data_size = (char*)img_next - (char*)img_data;
+ std::cout << tag << " size = " << size << " (0x"
+ << std::hex << size << ")" << std::dec << std::endl;
+ std::cout << tag << " img_size = " << img_size << " (0x"
+ << std::hex << img_size << ")" << std::dec << std::endl;
+ std::cout << tag << " data = 0x" << std::left << std::setw(10) << (void *)img_data
+ << std::right << " # ";
+ print_memalign((intptr_t)img_data); std::cout << std::endl;
+ std::cout << tag << " next = 0x" << std::left << std::setw(10) << (void *)img_next
+ << std::right << " # ";
+ print_memalign((intptr_t)img_next); std::cout << std::endl;
+ std::cout << tag << " data_size = " << data_size << " (0x"
+ << std::hex << data_size << ")" << std::dec << std::endl;
+ std::cout << tag << " => [ 0x" << (void *)img_data
+ << ", 0x" << (void *)(((char*)img_next)-1)
+ << "], # size:" << data_size << ", ";
+ print_memalign((intptr_t)img_data); std::cout << std::endl;
+}
+
+template<typename T>
+void dump(Image<T> &img, const char *tag = "Image") {
+ buffer_t *buf = &(*img);
+ int32_t *min = buf->min;
+ int32_t *extent = buf->extent;
+ int32_t *stride = buf->stride;
+ int dim = img.dimensions();
+ int bpp = buf->elem_size;
+ int32_t size = 1;
+
+ std::cout << std::endl << "Image dump: " << tag
+ << " dim:" << dim << " bpp:" << bpp;
+ for (int d = 0; d < dim; d++) {
+ print_dimid(d, extent[d]);
+ size *= extent[d];
+ }
+
+ // Arbitrary dimension image traversal
+ const T *ptr = img.data();
+ int32_t curloc[dim];
+ for (int d = 1; d < dim; d++) {
+ curloc[d] = -1;
+ }
+ curloc[0] = 0;
+
+ for (int32_t i = 0; i < size; i++) {
+ // Track changes in position in higher dimensions
+ for (int d = 1; d < dim; d++) {
+ if ((i % stride[d]) == 0) {
+ curloc[d]++;
+ for (int din = 0; din < d; din++) {
+ curloc[din] = 0;
+ }
+ std::cout << std::endl;
+ // Print separators for dimensions beyond (x0,y1)
+ if (d > 1) {
+ print_dimid(d, curloc[d]+min[d]);
+ std::cout << "\n==========================================";
+ }
+ }
+ }
+
+ // Check for start of row (or wrap due to width)
+ if ((curloc[0] % 16) == 0) {
+ int widx = 0;
+ std::ostringstream idx;
+ if (dim > 1) { // Multi-dim, just report (x0,y1) on each row
+ idx << "(" << curloc[0]+min[0] << "," << curloc[1]+min[1] << ")";
+ widx = 12;
+ } else { // Single-dim
+ idx << curloc[0]+min[0];
+ widx = 4;
+ }
+ std::cout << std::endl << std::setw(widx) << idx.str() << ": ";
+ }
+
+ // Display data
+ std::cout << std::setw(4) << *ptr++ + 0 << " ";
+
+ curloc[0]++; // Track position in row
+ }
+ std::cout << std::endl;
+}
+
+template<typename T>
+void stats(Image<T> &img, const char *tag = "Image") {
+ buffer_t *buf = &(*img);
+ int32_t *min = buf->min;
+ int32_t *extent = buf->extent;
+ int32_t *stride = buf->stride;
+ int dim = img.dimensions();
+ int bpp = buf->elem_size;
+ int32_t size = 1;
+ std::cout << std::endl << "Image stats: " << tag
+ << " dim:" << dim << " bpp:" << bpp;
+ for (int d = 0; d < dim; d++) {
+ print_dimid(d, extent[d]);
+ size *= extent[d];
+ }
+
+ // Arbitrary dimension image traversal
+ const T *ptr = img.data();
+ int32_t curloc[dim];
+ for (int d = 1; d < dim; d++) {
+ curloc[d] = -1;
+ }
+ curloc[0] = 0;
+
+ // Statistics
+ int32_t cnt = 0;
+ double sum = 0;
+ T minval = *ptr;
+ T maxval = *ptr;
+ int32_t minloc[dim];
+ int32_t maxloc[dim];
+ for (int d = 0; d < dim; d++) {
+ minloc[d] = 0;
+ maxloc[d] = 0;
+ }
+
+ for (int32_t i = 0; i < size; i++) {
+ // Track changes in position in higher dimensions
+ for (int d = 1; d < dim; d++) {
+ if ((i % stride[d]) == 0) {
+ curloc[d]++;
+ for (int din = 0; din < d; din++) {
+ curloc[din] = 0;
+ }
+ }
+ }
+
+ // Collect data
+ T val = *ptr++;
+ sum += val;
+ cnt++;
+ if (val < minval) {
+ minval = val;
+ for (int d = 0; d < dim; d++) {
+ minloc[d] = curloc[d];
+ }
+ }
+ if (val > maxval) {
+ maxval = val;
+ for (int d = 0; d < dim; d++) {
+ maxloc[d] = curloc[d];
+ }
+ }
+
+ curloc[0]++; // Track position in row
+ }
+
+ double avg = sum / cnt;
+ std::cout << std::endl;
+ std::cout << "min = " << minval + 0 << " @ (";
+ print_loc(minloc, dim, min);
+ std::cout << ")" << std::endl;
+ std::cout << "max = " << maxval + 0 << " @ (";
+ print_loc(maxloc, dim, min);
+ std::cout << ")" << std::endl;
+ std::cout << "mean = " << avg << std::endl;
+ std::cout << "N = " << cnt << std::endl;
+ std::cout << std::endl;
+}
+
+} // namespace Tools
+} // namespace Halide
+
+#endif // HALIDE_TOOLS_IMAGE_INFO_H
diff --git a/final/Bitcode/Benchmarks/Halide/common/halide_image_io.h b/final/Bitcode/Benchmarks/Halide/common/halide_image_io.h
new file mode 100644
index 00000000..23b8f0d7
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/common/halide_image_io.h
@@ -0,0 +1,320 @@
+// This simple PNG IO library works with *both* the Halide::Buffer<T> type *and*
+// the simple halide_image.h version. Also now includes PPM support for faster load/save.
+
+#ifndef HALIDE_IMAGE_IO_H
+#define HALIDE_IMAGE_IO_H
+
+#include <algorithm>
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include <vector>
+
+namespace Halide {
+namespace Tools {
+
+namespace Internal {
+
+typedef bool (*CheckFunc)(bool condition, const char* fmt, ...);
+
+inline bool CheckFail(bool condition, const char* fmt, ...) {
+ if (!condition) {
+ char buffer[1024];
+ va_list args;
+ va_start(args, fmt);
+ vsnprintf(buffer, sizeof(buffer), fmt, args);
+ va_end(args);
+ fprintf(stderr, "%s", buffer);
+ exit(-1);
+ }
+ return condition;
+}
+
+inline bool CheckReturn(bool condition, const char* fmt, ...) {
+ return condition;
+}
+
+// Convert to u8
+inline void convert(uint8_t in, uint8_t &out) {out = in;}
+inline void convert(uint16_t in, uint8_t &out) {out = in >> 8;}
+inline void convert(uint32_t in, uint8_t &out) {out = in >> 24;}
+inline void convert(int8_t in, uint8_t &out) {out = in;}
+inline void convert(int16_t in, uint8_t &out) {out = in >> 8;}
+inline void convert(int32_t in, uint8_t &out) {out = in >> 24;}
+inline void convert(float in, uint8_t &out) {out = (uint8_t)(in*255.0f);}
+inline void convert(double in, uint8_t &out) {out = (uint8_t)(in*255.0f);}
+
+// Convert to u16
+inline void convert(uint8_t in, uint16_t &out) {out = in << 8;}
+inline void convert(uint16_t in, uint16_t &out) {out = in;}
+inline void convert(uint32_t in, uint16_t &out) {out = in >> 16;}
+inline void convert(int8_t in, uint16_t &out) {out = in << 8;}
+inline void convert(int16_t in, uint16_t &out) {out = in;}
+inline void convert(int32_t in, uint16_t &out) {out = in >> 16;}
+inline void convert(float in, uint16_t &out) {out = (uint16_t)(in*65535.0f);}
+inline void convert(double in, uint16_t &out) {out = (uint16_t)(in*65535.0f);}
+
+// Convert from u8
+inline void convert(uint8_t in, uint32_t &out) {out = in << 24;}
+inline void convert(uint8_t in, int8_t &out) {out = in;}
+inline void convert(uint8_t in, int16_t &out) {out = in << 8;}
+inline void convert(uint8_t in, int32_t &out) {out = in << 24;}
+inline void convert(uint8_t in, float &out) {out = in/255.0f;}
+inline void convert(uint8_t in, double &out) {out = in/255.0f;}
+
+// Convert from u16
+inline void convert(uint16_t in, uint32_t &out) {out = in << 16;}
+inline void convert(uint16_t in, int8_t &out) {out = in >> 8;}
+inline void convert(uint16_t in, int16_t &out) {out = in;}
+inline void convert(uint16_t in, int32_t &out) {out = in << 16;}
+inline void convert(uint16_t in, float &out) {out = in/65535.0f;}
+inline void convert(uint16_t in, double &out) {out = in/65535.0f;}
+
+
+inline bool ends_with_ignore_case(const std::string &ac, const std::string &bc) {
+ if (ac.length() < bc.length()) { return false; }
+ std::string a = ac, b = bc;
+ std::transform(a.begin(), a.end(), a.begin(), ::tolower);
+ std::transform(b.begin(), b.end(), b.begin(), ::tolower);
+ return a.compare(a.length()-b.length(), b.length(), b) == 0;
+}
+
+inline bool is_little_endian() {
+ int value = 1;
+ return ((char *) &value)[0] == 1;
+}
+
+inline void swap_endian_16(bool little_endian, uint16_t &value) {
+ if (little_endian) {
+ value = ((value & 0xff)<<8)|((value & 0xff00)>>8);
+ }
+}
+
+struct FileOpener {
+ FileOpener(const char* filename, const char* mode) : f(fopen(filename, mode)) {
+ // nothing
+ }
+ ~FileOpener() {
+ if (f != nullptr) {
+ fclose(f);
+ }
+ }
+ FILE * const f;
+};
+
+} // namespace Internal
+
+
+struct BytesImgStruct {
+ int dims[3]; //width, height, channels
+ float* ptr;
+};
+
+
+template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
+bool load_bytes(const std::string &filename, ImageType *im) {
+ Internal::FileOpener f(filename.c_str(), "rb");
+ if (!check(f.f != nullptr, "File %s could not be opened for reading\n", filename.c_str())) return false;
+
+ BytesImgStruct ptrStruct;
+ if (!check(fread(ptrStruct.dims, sizeof(int), 3, f.f) == 3,
+ "Could not read dimensions (width, height, channels) for .bytes image\n")) return false;
+ int img_size = ptrStruct.dims[0]*ptrStruct.dims[1];
+ if (!check(ptrStruct.dims[0] > 0 && ptrStruct.dims[1] > 0 && ptrStruct.dims[2] > 0,
+ "File %s does not have valid input\n", filename.c_str())) return false;
+ ptrStruct.ptr = (float*) malloc(img_size * sizeof(float));
+ if (!check(fread(ptrStruct.ptr, sizeof(float), img_size, f.f) == img_size,
+ "Could not read .bytes image\n")) return false;
+
+ if (ptrStruct.dims[2] != 1) {
+ *im = ImageType(ptrStruct.dims[0], ptrStruct.dims[1], ptrStruct.dims[2]);
+ } else {
+ *im = ImageType(ptrStruct.dims[0], ptrStruct.dims[1]);
+ }
+ typename ImageType::ElemType *ptr = (typename ImageType::ElemType*)im->data();
+ for (int i=0; i<img_size; i++)
+ ptr[i] = (typename ImageType::ElemType) ptrStruct.ptr[i];
+ free(ptrStruct.ptr);
+ return true;
+}
+
+template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
+bool save_bytes(ImageType &im, const std::string &filename) {
+ BytesImgStruct ptrStruct;
+ ptrStruct.dims[0] = im.width();
+ ptrStruct.dims[1] = im.height();
+ ptrStruct.dims[2] = im.channels();
+ int img_size = ptrStruct.dims[0]*ptrStruct.dims[1];
+ ptrStruct.ptr = (float*) malloc(img_size * sizeof(float));
+ typename ImageType::ElemType *ptr = (typename ImageType::ElemType*)im.data();
+ for (int i=0; i<img_size; i++)
+ ptrStruct.ptr[i] = (float) ptr[i];
+
+ Internal::FileOpener f(filename.c_str(), "wb");
+ if (!check(f.f != nullptr, "File %s could not be opened for writing\n", filename.c_str())) return false;
+ if (!check(fwrite (ptrStruct.dims, sizeof(int), 3, f.f),
+ "Could not write dimensions (width, height, channels) for .bytes image\n")) return false;
+ if (!check(fwrite (ptrStruct.ptr, sizeof(float), img_size, f.f),
+ "Could not write .bytes image\n")) return false;
+ return true;
+}
+
+template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
+bool load_ppm(const std::string &filename, ImageType *im) {
+
+ /* open file and test for it being a ppm */
+ Internal::FileOpener f(filename.c_str(), "rb");
+ if (!check(f.f != nullptr, "File %s could not be opened for reading\n", filename.c_str())) return false;
+
+ int width, height, maxval;
+ char header[256];
+ if (!check(fscanf(f.f, "%255s", header) == 1, "Could not read PPM header\n")) return false;
+ if (!check(fscanf(f.f, "%d %d\n", &width, &height) == 2, "Could not read PPM width and height\n")) return false;
+ if (!check(fscanf(f.f, "%d", &maxval) == 1, "Could not read PPM max value\n")) return false;
+ if (!check(fgetc(f.f) != EOF, "Could not read char from PPM\n")) return false;
+
+ int bit_depth = 0;
+ if (maxval == 255) { bit_depth = 8; }
+ else if (maxval == 65535) { bit_depth = 16; }
+ else { if (!check(false, "Invalid bit depth in PPM\n")) return false; }
+
+ if (!check(header == std::string("P6") || header == std::string("p6"), "Input is not binary PPM\n")) return false;
+
+ int channels = 3;
+ *im = ImageType(width, height, channels);
+
+ // convert the data to ImageType::ElemType
+ if (bit_depth == 8) {
+ std::vector<uint8_t> data(width*height*3);
+ if (!check(fread((void *) data.data(), sizeof(uint8_t), width*height*3, f.f) == (size_t) (width*height*3), "Could not read PPM 8-bit data\n")) return false;
+ typename ImageType::ElemType *im_data = (typename ImageType::ElemType*) im->data();
+ for (int y = 0; y < im->height(); y++) {
+ uint8_t *row = &data[(y*width)*3];
+ for (int x = 0; x < im->width(); x++) {
+ Internal::convert(*row++, im_data[(0*height+y)*width+x]);
+ Internal::convert(*row++, im_data[(1*height+y)*width+x]);
+ Internal::convert(*row++, im_data[(2*height+y)*width+x]);
+ }
+ }
+ } else if (bit_depth == 16) {
+ int little_endian = Internal::is_little_endian();
+ std::vector<uint16_t> data(width*height*3);
+ if (!check(fread((void *) data.data(), sizeof(uint16_t), width*height*3, f.f) == (size_t) (width*height*3), "Could not read PPM 16-bit data\n")) return false;
+ typename ImageType::ElemType *im_data = (typename ImageType::ElemType*) im->data();
+ for (int y = 0; y < im->height(); y++) {
+ uint16_t *row = &data[(y*width)*3];
+ for (int x = 0; x < im->width(); x++) {
+ uint16_t value;
+ value = *row++; Internal::swap_endian_16(little_endian, value); Internal::convert(value, im_data[(0*height+y)*width+x]);
+ value = *row++; Internal::swap_endian_16(little_endian, value); Internal::convert(value, im_data[(1*height+y)*width+x]);
+ value = *row++; Internal::swap_endian_16(little_endian, value); Internal::convert(value, im_data[(2*height+y)*width+x]);
+ }
+ }
+ }
+ (*im)(0,0,0) = (*im)(0,0,0); /* Mark dirty inside read/write functions. */
+
+ return true;
+}
+
+// "im" is not const-ref because copy_to_host() is not const.
+template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
+bool save_ppm(ImageType &im, const std::string &filename) {
+ im.copy_to_host();
+
+ unsigned int bit_depth = sizeof(typename ImageType::ElemType) == 1 ? 8: 16;
+
+ Internal::FileOpener f(filename.c_str(), "wb");
+ if (!check(f.f != nullptr, "File %s could not be opened for writing\n", filename.c_str())) return false;
+ fprintf(f.f, "P6\n%d %d\n%d\n", im.width(), im.height(), (1<<bit_depth)-1);
+ int width = im.width(), height = im.height();
+
+ if (bit_depth == 8) {
+ std::vector<uint8_t> data(width*height*3);
+ for (int y = 0; y < im.height(); y++) {
+ for (int x = 0; x < im.width(); x++) {
+ uint8_t *p = &data[(y*width+x)*3];
+ for (int c = 0; c < im.channels(); c++) {
+ Internal::convert(im(x, y, c), p[c]);
+ }
+ }
+ }
+ if (!check(fwrite((void *) data.data(), sizeof(uint8_t), width*height*3, f.f) == (size_t) (width*height*3), "Could not write PPM 8-bit data\n")) return false;
+ } else if (bit_depth == 16) {
+ int little_endian = Internal::is_little_endian();
+ std::vector<uint16_t> data(width*height*3);
+ for (int y = 0; y < im.height(); y++) {
+ for (int x = 0; x < im.width(); x++) {
+ uint16_t *p = &data[(y*width+x)*3];
+ for (int c = 0; c < im.channels(); c++) {
+ uint16_t value;
+ Internal::convert(im(x, y, c), value);
+ Internal::swap_endian_16(little_endian, value);
+ p[c] = value;
+ }
+ }
+ }
+ if (!check(fwrite((void *) data.data(), sizeof(uint16_t), width*height*3, f.f) == (size_t) (width*height*3), "Could not write PPM 16-bit data\n")) return false;
+ } else {
+ return check(false, "We only support saving 8- and 16-bit images.");
+ }
+ return true;
+}
+
+// Returns false upon failure.
+template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
+bool load(const std::string &filename, ImageType *im) {
+ if (Internal::ends_with_ignore_case(filename, ".ppm")) {
+ return load_ppm<ImageType, check>(filename, im);
+ } else if (Internal::ends_with_ignore_case(filename, ".bytes")) {
+ return (load_bytes<ImageType, check>(filename, im));
+ } else {
+ return check(false, "[load] unsupported file extension (bytes|ppm supported)");
+ }
+}
+
+// Returns false upon failure.
+template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
+bool save(ImageType &im, const std::string &filename) {
+ if (Internal::ends_with_ignore_case(filename, ".ppm")) {
+ return save_ppm<ImageType, check>(im, filename);
+ } else if (Internal::ends_with_ignore_case(filename, ".bytes")) {
+ return save_bytes<ImageType, check>(im, filename);
+ } else {
+ return check(false, "[save] unsupported file extension (bytes|ppm supported)");
+ }
+}
+
+// Fancy wrapper to call load() with CheckFail, inferring the return type;
+// this allows you to simply use
+//
+// Image im = load_image("filename");
+//
+// without bothering to check error results (all errors simply abort).
+class load_image {
+public:
+ load_image(const std::string &f) : filename(f) {}
+ template<typename ImageType>
+ inline operator ImageType() {
+ ImageType im;
+ (void) load<ImageType, Internal::CheckFail>(filename, &im);
+ return im;
+ }
+private:
+ const std::string filename;
+};
+
+// Fancy wrapper to call save() with CheckFail; this allows you to simply use
+//
+// save_image(im, "filename");
+//
+// without bothering to check error results (all errors simply abort).
+template<typename ImageType>
+void save_image(ImageType &im, const std::string &filename) {
+ (void) save<ImageType, Internal::CheckFail>(im, filename);
+}
+
+} // namespace Tools
+} // namespace Halide
+
+#endif // HALIDE_IMAGE_IO_H
diff --git a/final/Bitcode/Benchmarks/Halide/common/x86_halide_runtime.bc b/final/Bitcode/Benchmarks/Halide/common/x86_halide_runtime.bc
new file mode 100644
index 00000000..3c8ff350
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/common/x86_halide_runtime.bc
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/images/rgb.bytes b/final/Bitcode/Benchmarks/Halide/images/rgb.bytes
new file mode 100644
index 00000000..42ac0e97
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/images/rgb.bytes
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/images/rgba.bytes b/final/Bitcode/Benchmarks/Halide/images/rgba.bytes
new file mode 100644
index 00000000..cd0c9e50
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/images/rgba.bytes
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/local_laplacian/CMakeLists.txt b/final/Bitcode/Benchmarks/Halide/local_laplacian/CMakeLists.txt
new file mode 100644
index 00000000..006a3772
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/local_laplacian/CMakeLists.txt
@@ -0,0 +1,10 @@
+file(GLOB bcsources ${CMAKE_CURRENT_SOURCE_DIR}/../common/x86_halide_runtime.bc ${CMAKE_CURRENT_SOURCE_DIR}/local_laplacian.bc)
+SET_SOURCE_FILES_PROPERTIES(${bcsources} PROPERTIES LANGUAGE CXX)
+
+test_img_input(rgb 8 1 1 10)
+test_img_input(rgba 8 1 1 10)
+
+llvm_multisource(halide_local_laplacian
+ ${CMAKE_CURRENT_SOURCE_DIR}/driver.cpp
+ ${bcsources}
+)
diff --git a/final/Bitcode/Benchmarks/Halide/local_laplacian/driver.cpp b/final/Bitcode/Benchmarks/Halide/local_laplacian/driver.cpp
new file mode 100644
index 00000000..9e0f4806
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/local_laplacian/driver.cpp
@@ -0,0 +1,33 @@
+#include "../common/benchmark.h"
+#include "../common/halide_image.h"
+#include "../common/halide_image_io.h"
+#include "local_laplacian.h"
+
+using namespace Halide::Tools;
+
+int main(int argc, char **argv) {
+ if (argc < 7) {
+ printf("Usage: ./process input.png levels alpha beta timing_iterations output.png\n"
+ "e.g.: ./process input.png 8 1 1 10 output.png\n");
+ return 0;
+ }
+
+ Image<uint16_t> input = load_image(argv[1]);
+ int levels = atoi(argv[2]);
+ float alpha = atof(argv[3]), beta = atof(argv[4]);
+ Image<uint16_t> output(input.width(), input.height(), 3);
+ int timing = atoi(argv[5]);
+
+ local_laplacian(levels, alpha/(levels-1), beta, input, output);
+
+ // Timing code
+ double best = benchmark(timing, 1, [&]() {
+ local_laplacian(levels, alpha/(levels-1), beta, input, output);
+ });
+ printf("%gus\n", best * 1e6);
+
+ save_image(output, argv[6]);
+
+ return 0;
+}
+
diff --git a/final/Bitcode/Benchmarks/Halide/local_laplacian/local_laplacian.bc b/final/Bitcode/Benchmarks/Halide/local_laplacian/local_laplacian.bc
new file mode 100644
index 00000000..878ac6bf
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/local_laplacian/local_laplacian.bc
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/local_laplacian/local_laplacian.h b/final/Bitcode/Benchmarks/Halide/local_laplacian/local_laplacian.h
new file mode 100644
index 00000000..4d965fe7
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/local_laplacian/local_laplacian.h
@@ -0,0 +1,42 @@
+#ifndef HALIDE__local_laplacian_h
+#define HALIDE__local_laplacian_h
+#ifndef HALIDE_ATTRIBUTE_ALIGN
+ #ifdef _MSC_VER
+ #define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))
+ #else
+ #define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))
+ #endif
+#endif
+#ifndef BUFFER_T_DEFINED
+#define BUFFER_T_DEFINED
+#include <stdbool.h>
+#include <stdint.h>
+typedef struct buffer_t {
+ uint64_t dev;
+ uint8_t* host;
+ int32_t extent[4];
+ int32_t stride[4];
+ int32_t min[4];
+ int32_t elem_size;
+ HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty;
+ HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty;
+ HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)];
+} buffer_t;
+#endif
+struct halide_filter_metadata_t;
+#ifndef HALIDE_FUNCTION_ATTRS
+#define HALIDE_FUNCTION_ATTRS
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int local_laplacian(int32_t _levels, float _alpha, float _beta, buffer_t *_input_buffer, buffer_t *_local_laplacian_buffer) HALIDE_FUNCTION_ATTRS;
+int local_laplacian_argv(void **args) HALIDE_FUNCTION_ATTRS;
+// Result is never null and points to constant static data
+const struct halide_filter_metadata_t *local_laplacian_metadata() HALIDE_FUNCTION_ATTRS;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif
diff --git a/final/Bitcode/Benchmarks/Halide/local_laplacian/output/rgb_out.bytes b/final/Bitcode/Benchmarks/Halide/local_laplacian/output/rgb_out.bytes
new file mode 100644
index 00000000..6aa3e72f
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/local_laplacian/output/rgb_out.bytes
Binary files differ
diff --git a/final/Bitcode/Benchmarks/Halide/local_laplacian/output/rgba_out.bytes b/final/Bitcode/Benchmarks/Halide/local_laplacian/output/rgba_out.bytes
new file mode 100644
index 00000000..6aa3e72f
--- /dev/null
+++ b/final/Bitcode/Benchmarks/Halide/local_laplacian/output/rgba_out.bytes
Binary files differ