aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGil Pitney <gil.pitney@linaro.org>2014-11-19 10:40:13 -0800
committerGil Pitney <gil.pitney@linaro.org>2014-11-19 10:40:13 -0800
commitcc4318ce5886355ad1b06d5489820db6a3a2a044 (patch)
treec3c416f30f92a5ce5983238c458a7fbc9a94471c
parent54bfc22fee90d751d6e5d6ee89618b303234f6b3 (diff)
clCreateBuffer(): Ensure allocation meets minimum alignment for double16 type
Previously, clCreateBuffer() was using malloc to allocate buffers, which caused havoc with NEON instructions expecting 128 bit alignment for float4 vectors. Now, use posix_memallign() to ensure alignment meets requirements of largest OpenCL data type (double16). Also, update clGetDeviceInfo()'s CL_DEVICE_MEM_BASE_ADDR_ALIGN and CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE accordingly. Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r--src/core/cpu/buffer.cpp6
-rw-r--r--src/core/cpu/device.cpp8
2 files changed, 7 insertions, 7 deletions
diff --git a/src/core/cpu/buffer.cpp b/src/core/cpu/buffer.cpp
index 9125872..00d9279 100644
--- a/src/core/cpu/buffer.cpp
+++ b/src/core/cpu/buffer.cpp
@@ -89,6 +89,7 @@ void *CPUBuffer::nativeGlobalPointer() const
bool CPUBuffer::allocate()
{
size_t buf_size = p_buffer->size();
+ int retval;
if (buf_size == 0)
// Something went wrong...
@@ -97,9 +98,8 @@ bool CPUBuffer::allocate()
if (!p_data)
{
// We don't use a host ptr, we need to allocate a buffer
- p_data = std::malloc(buf_size);
-
- if (!p_data)
+ retval = posix_memalign(&p_data, 128, buf_size); // align for type double16 size.
+ if (retval)
return false;
p_data_malloced = true;
diff --git a/src/core/cpu/device.cpp b/src/core/cpu/device.cpp
index eb3fcb1..e444deb 100644
--- a/src/core/cpu/device.cpp
+++ b/src/core/cpu/device.cpp
@@ -460,12 +460,12 @@ cl_int CPUDevice::info(cl_device_info param_name,
SIMPLE_ASSIGN(cl_uint, 0); //images not supported
break;
- case CL_DEVICE_MEM_BASE_ADDR_ALIGN:
- SIMPLE_ASSIGN(cl_uint, 1024 /* sizeof(long16)*8) */); // 128 byte
+ case CL_DEVICE_MEM_BASE_ADDR_ALIGN: // in bits!
+ SIMPLE_ASSIGN(cl_uint, 1024 /* sizeof(double16)*8) */); // 128 byte
break;
- case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE:
- SIMPLE_ASSIGN(cl_uint, 16);
+ case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: // in bytes!
+ SIMPLE_ASSIGN(cl_uint, 128 /* sizeof(double16) */);
break;
case CL_DEVICE_SINGLE_FP_CONFIG: