aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGil Pitney <gil.pitney@linaro.org>2015-06-12 23:14:58 +0000
committerGil Pitney <gil.pitney@linaro.org>2015-06-12 23:14:58 +0000
commit339e5752735eafd65a1cd72106391dcb635c2678 (patch)
treee5f114df5435faa9f645163e020d3378d6180d08
parentda4508f56fdb4dc5b0a10382a165eefc96a2f7d1 (diff)
Allow CPUBuffers for CPUDevices to share copied host pointer data
Device data allocated for Buffer objects allocated via clCreateBuffer() using the CL_MEM_COPY_HOST_PTR flag for CPUDevices should be allocated only once in global device memory, and shared between the CPUDevices. Previously, shamrock was creating a brand new allocation for each device buffer, for the same MemObject. This was causing the test_device_partition Khronos test to fail (for device fission). This is now fixed, by enabling sharing of device data. Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r--src/core/cpu/buffer.cpp36
-rw-r--r--src/core/memobject.cpp17
-rw-r--r--src/core/memobject.h4
3 files changed, 39 insertions, 18 deletions
diff --git a/src/core/cpu/buffer.cpp b/src/core/cpu/buffer.cpp
index 00d9279..97c61d9 100644
--- a/src/core/cpu/buffer.cpp
+++ b/src/core/cpu/buffer.cpp
@@ -90,25 +90,35 @@ bool CPUBuffer::allocate()
{
size_t buf_size = p_buffer->size();
int retval;
+ void *shared_ptr = p_buffer->shared_ptr();
if (buf_size == 0)
// Something went wrong...
return false;
- if (!p_data)
- {
- // We don't use a host ptr, we need to allocate a buffer
- retval = posix_memalign(&p_data, 128, buf_size); // align for type double16 size.
- if (retval)
- return false;
-
- p_data_malloced = true;
+ if (!shared_ptr) {
+ if (!p_data)
+ {
+ // We don't use a host ptr, we need to allocate a buffer
+ retval = posix_memalign(&p_data, 128, buf_size); // align for type double16 size.
+ if (retval)
+ return false;
+
+ p_data_malloced = true;
+
+ // Now set the shared data pointer, so we need not allocate again for this MemObject:
+ p_buffer->setSharedPtr(p_data);
+ }
+
+ if (p_buffer->type() != MemObject::SubBuffer &&
+ p_buffer->flags() & CL_MEM_COPY_HOST_PTR)
+ {
+ std::memcpy(p_data, p_buffer->host_ptr(), buf_size);
+ }
}
-
- if (p_buffer->type() != MemObject::SubBuffer &&
- p_buffer->flags() & CL_MEM_COPY_HOST_PTR)
- {
- std::memcpy(p_data, p_buffer->host_ptr(), buf_size);
+ else {
+ // If the CPUBuffer data has already been allocated by the first device, use it:
+ if (!p_data) p_data = shared_ptr;
}
// Say to the memobject that we are allocated
diff --git a/src/core/memobject.cpp b/src/core/memobject.cpp
index 3912740..bd8736f 100644
--- a/src/core/memobject.cpp
+++ b/src/core/memobject.cpp
@@ -51,7 +51,7 @@ using namespace Coal;
MemObject::MemObject(Context *ctx, cl_mem_flags flags, void *host_ptr,
cl_int *errcode_ret)
: Object(Object::T_MemObject, ctx), p_num_devices(0), p_flags(flags),
- p_host_ptr(host_ptr), p_devicebuffers(0), p_dtor_callback_stack()
+ p_host_ptr(host_ptr), p_devicebuffers(0), p_dtor_callback_stack(), p_shared_ptr(NULL)
{
// Check the flags value
const cl_mem_flags all_flags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY |
@@ -168,8 +168,7 @@ cl_int MemObject::init()
// If we have more than one device, the allocation on the devices is
// defered to first use, so host_ptr can become invalid. So, copy it in
- // a RAM location and keep it. Also, set a flag telling CPU devices that
- // they don't need to reallocate and re-copy host_ptr
+ // a RAM location and keep it.
// SubBuffer should simply reuse Buffer data
if (p_num_devices > 1 && (p_flags & CL_MEM_COPY_HOST_PTR)
&& type() != SubBuffer)
@@ -215,7 +214,7 @@ cl_int MemObject::init()
std::free((void *)devices);
devices = 0;
- // If we have only one device, already allocate the buffer
+ // If we have only one device, pre-allocate the buffer
if (p_num_devices == 1)
{
if (!p_devicebuffers[0]->allocate())
@@ -259,6 +258,16 @@ void *MemObject::host_ptr() const
}
}
+void *MemObject::shared_ptr() const
+{
+ return p_shared_ptr;
+}
+
+void MemObject::setSharedPtr(void *ptr)
+{
+ p_shared_ptr = ptr;
+}
+
DeviceBuffer *MemObject::deviceBuffer(DeviceInterface *device) const
{
for (unsigned int i=0; i<p_num_devices; ++i)
diff --git a/src/core/memobject.h b/src/core/memobject.h
index 82cbfab..ede6050 100644
--- a/src/core/memobject.h
+++ b/src/core/memobject.h
@@ -98,6 +98,8 @@ class MemObject : public Object
cl_mem_flags flags() const; /*!< \brief Flags */
void *host_ptr() const; /*!< \brief Host pointer */
+ void *shared_ptr() const; /*!< \brief Shared data pointer */
+ void setSharedPtr(void *ptr); /*!< \brief Set the shared data pointer */
DeviceBuffer *deviceBuffer(DeviceInterface *device) const; /*!< \brief \c Coal::DeviceBuffer for the given \p device */
void deviceAllocated(DeviceBuffer *buffer); /*!< \brief Is the \c Coal::DeviceBuffer for \p buffer allocated ? */
@@ -138,7 +140,7 @@ class MemObject : public Object
unsigned int p_num_devices, p_devices_to_allocate;
void *p_host_ptr;
DeviceBuffer **p_devicebuffers;
-
+ void *p_shared_ptr; // Copied host data shared between CPUBuffers.
typedef std::pair<void (CL_CALLBACK *)(cl_mem memobj, void *user_data), void*> dtor_callback_t;
concurrent_stack<dtor_callback_t> p_dtor_callback_stack;