diff options
author | Gil Pitney <gil.pitney@linaro.org> | 2014-12-10 14:19:29 -0800 |
---|---|---|
committer | Gil Pitney <gil.pitney@linaro.org> | 2014-12-10 14:19:29 -0800 |
commit | dafc5b461979fd1bba2dabfd32d01d7a20cd1005 (patch) | |
tree | fc749e68125555d468cf6a146ba4b43757315e0e | |
parent | 27a45b16ed4aa2e3947b794c79e88fe0b4215510 (diff) |
prefetch: Implemented prefetch builtin (noop'ed)
Also, updated CL_DEVICE_LOCAL_MEM_SIZE and Buffer::Buffer() constructor
so that Khrnos basic "prefetch" test can pass.
Previously, it was trying to allocate huge global buffers (computed based
on the LOCAL_MEM_SIZE) and was failing.
Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r-- | include/cpu.h | 38 | ||||
-rw-r--r-- | src/core/cpu/device.cpp | 5 | ||||
-rw-r--r-- | src/core/memobject.cpp | 4 |
3 files changed, 25 insertions, 22 deletions
diff --git a/include/cpu.h b/include/cpu.h index 6fa0d90..c7c5976 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -31,23 +31,27 @@ #include "clc.h" #define PREFETCH_VECTORIZE(PRIM_TYPE) \ - _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE *p, size_t num_gentypes); \ - _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##2 *p, size_t num_gentypes); \ - _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##3 *p, size_t num_gentypes); \ - _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##4 *p, size_t num_gentypes); \ - _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##8 *p, size_t num_gentypes); \ - _CLC_OVERLOAD _CLC_DECL void prefetch(const __global PRIM_TYPE##16 *p, size_t num_gentypes); \ - -PREFETCH_VECTORIZE(char) -PREFETCH_VECTORIZE(uchar) -PREFETCH_VECTORIZE(short) -PREFETCH_VECTORIZE(ushort) -PREFETCH_VECTORIZE(int) -PREFETCH_VECTORIZE(uint) -PREFETCH_VECTORIZE(long) -PREFETCH_VECTORIZE(ulong) -PREFETCH_VECTORIZE(float) -PREFETCH_VECTORIZE(double) + _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE *p, size_t num_gentypes){} \ + _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##2 *p, size_t num_gentypes){} \ + _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##3 *p, size_t num_gentypes){} \ + _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##4 *p, size_t num_gentypes){} \ + _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##8 *p, size_t num_gentypes){} \ + _CLC_OVERLOAD _CLC_INLINE void prefetch(const __global PRIM_TYPE##16 *p, size_t num_gentypes){} \ + + +#define PREFETCH_TYPES() \ + PREFETCH_VECTORIZE(char) \ + PREFETCH_VECTORIZE(uchar) \ + PREFETCH_VECTORIZE(short) \ + PREFETCH_VECTORIZE(ushort) \ + PREFETCH_VECTORIZE(int) \ + PREFETCH_VECTORIZE(uint) \ + PREFETCH_VECTORIZE(long) \ + PREFETCH_VECTORIZE(ulong) \ + PREFETCH_VECTORIZE(float) \ + PREFETCH_VECTORIZE(double) \ + +PREFETCH_TYPES() /*----------------------------------------------------------------------------- * This can be empty since our copy routines are currently synchronous. When diff --git a/src/core/cpu/device.cpp b/src/core/cpu/device.cpp index b0732ac..2c85ea8 100644 --- a/src/core/cpu/device.cpp +++ b/src/core/cpu/device.cpp @@ -509,8 +509,11 @@ cl_int CPUDevice::info(cl_device_info param_name, "MemTotal:", 512*1024) * 1024); break; - case CL_DEVICE_MAX_MEM_ALLOC_SIZE: case CL_DEVICE_LOCAL_MEM_SIZE: + SIMPLE_ASSIGN(cl_ulong, 128 * 1024); + break; + + case CL_DEVICE_MAX_MEM_ALLOC_SIZE: case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: // TODO: 1 Gio seems to be enough for software acceleration diff --git a/src/core/memobject.cpp b/src/core/memobject.cpp index 5501ac1..3912740 100644 --- a/src/core/memobject.cpp +++ b/src/core/memobject.cpp @@ -408,11 +408,7 @@ Buffer::Buffer(Context *ctx, size_t size, void *host_ptr, cl_mem_flags flags, return; } -#if defined(__arm__) - if (size > 512*1024*1024) -#else if (size > 1*1024*1024*1024) -#endif { *errcode_ret = CL_INVALID_BUFFER_SIZE; return; |