diff options
author | Gil Pitney <gil.pitney@linaro.org> | 2014-11-19 10:40:13 -0800 |
---|---|---|
committer | Gil Pitney <gil.pitney@linaro.org> | 2014-11-19 10:40:13 -0800 |
commit | cc4318ce5886355ad1b06d5489820db6a3a2a044 (patch) | |
tree | c3c416f30f92a5ce5983238c458a7fbc9a94471c | |
parent | 54bfc22fee90d751d6e5d6ee89618b303234f6b3 (diff) |
clCreateBuffer(): Ensure allocation meets minimum alignment for double16 type
Previously, clCreateBuffer() was using malloc to allocate buffers, which
caused havoc with NEON instructions expecting 128 bit alignment for float4
vectors.
Now, use posix_memallign() to ensure alignment meets requirements of largest
OpenCL data type (double16).
Also, update clGetDeviceInfo()'s CL_DEVICE_MEM_BASE_ADDR_ALIGN and
CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE accordingly.
Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r-- | src/core/cpu/buffer.cpp | 6 | ||||
-rw-r--r-- | src/core/cpu/device.cpp | 8 |
2 files changed, 7 insertions, 7 deletions
diff --git a/src/core/cpu/buffer.cpp b/src/core/cpu/buffer.cpp index 9125872..00d9279 100644 --- a/src/core/cpu/buffer.cpp +++ b/src/core/cpu/buffer.cpp @@ -89,6 +89,7 @@ void *CPUBuffer::nativeGlobalPointer() const bool CPUBuffer::allocate() { size_t buf_size = p_buffer->size(); + int retval; if (buf_size == 0) // Something went wrong... @@ -97,9 +98,8 @@ bool CPUBuffer::allocate() if (!p_data) { // We don't use a host ptr, we need to allocate a buffer - p_data = std::malloc(buf_size); - - if (!p_data) + retval = posix_memalign(&p_data, 128, buf_size); // align for type double16 size. + if (retval) return false; p_data_malloced = true; diff --git a/src/core/cpu/device.cpp b/src/core/cpu/device.cpp index eb3fcb1..e444deb 100644 --- a/src/core/cpu/device.cpp +++ b/src/core/cpu/device.cpp @@ -460,12 +460,12 @@ cl_int CPUDevice::info(cl_device_info param_name, SIMPLE_ASSIGN(cl_uint, 0); //images not supported break; - case CL_DEVICE_MEM_BASE_ADDR_ALIGN: - SIMPLE_ASSIGN(cl_uint, 1024 /* sizeof(long16)*8) */); // 128 byte + case CL_DEVICE_MEM_BASE_ADDR_ALIGN: // in bits! + SIMPLE_ASSIGN(cl_uint, 1024 /* sizeof(double16)*8) */); // 128 byte break; - case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: - SIMPLE_ASSIGN(cl_uint, 16); + case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: // in bytes! + SIMPLE_ASSIGN(cl_uint, 128 /* sizeof(double16) */); break; case CL_DEVICE_SINGLE_FP_CONFIG: |