aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGil Pitney <gil.pitney@linaro.org>2016-06-10 23:18:27 +0000
committerGil Pitney <gil.pitney@linaro.org>2016-06-29 22:40:58 +0000
commit52496644a788f2118ed2de7e99f3d184d3d70ca8 (patch)
treea7cdfd2213da21a4f597a98bb3f31dcb7c73ddb7
parentd10b2c2df4655afc8ecd00d6b9afc4aa634b0e83 (diff)
downloadshamrock-TI_01_01_08_03_Merge.tar.gz
Merge of select changes from TI OpenCL 01.01.08.03 from git.ti.com/ti-opencl/hostTI_01_01_08_03_Merge
Including: - Use of the Loki library v 0.1.7 for C++ Singleton implementation. - Updated Platform object to be a Singleton class instance. - Better handling of MemObject management with concurrent threads. - Events: added CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST error code. - MapImageEvent() fixes. - KernelEvent() fixes around retaining/releasing objects. - CopyBufferRectEvent() fixes. - ReadWriteBufferRectEvent() fixes. Revalidating Shamrock with these changes resulted in 12 more Khronos conformance tests passing in test_basic, test_buffers, and test_events, and no regressions. Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r--README1
-rw-r--r--cmake/modules/FindLLVM.cmake3
-rw-r--r--src/api/api_context.cpp4
-rw-r--r--src/api/api_device.cpp4
-rw-r--r--src/api/api_enqueue.cpp15
-rw-r--r--src/api/api_event.cpp4
-rw-r--r--src/api/api_memory.cpp3
-rw-r--r--src/api/api_platform.cpp6
-rw-r--r--src/api/api_program.cpp3
-rw-r--r--src/core/context.cpp4
-rw-r--r--src/core/cpu/device.cpp2
-rw-r--r--src/core/cpu/kernel.cpp2
-rw-r--r--src/core/cpu/kernel.h2
-rw-r--r--src/core/deviceinterface.h7
-rw-r--r--src/core/dsp/u_concurrent_map.h21
-rw-r--r--src/core/events.cpp59
-rw-r--r--src/core/events.h3
-rw-r--r--src/core/icd.cpp2
-rw-r--r--src/core/memobject.h3
-rw-r--r--src/core/object.cpp53
-rw-r--r--src/core/object.h6
-rw-r--r--src/core/platform.cpp11
-rw-r--r--src/core/platform.h8
-rw-r--r--src/core/u_concurrent_set.h80
-rw-r--r--src/core/u_concurrent_stack.h (renamed from src/core/dsp/u_concurrent_stack.h)12
-rw-r--r--src/core/u_lockable.h (renamed from src/core/dsp/u_lockable.h)16
-rw-r--r--src/core/u_locks_pthread.h (renamed from src/core/dsp/u_locks_pthread.h)30
27 files changed, 223 insertions, 141 deletions
diff --git a/README b/README
index 4b6584f..ee3f015 100644
--- a/README
+++ b/README
@@ -20,6 +20,7 @@ libtinfo-dev
mesa-common-dev
python 2.6+, and not greater or equal to v 3.0.
opencl-headers
+loki 0.1.7
BUILD
=====
diff --git a/cmake/modules/FindLLVM.cmake b/cmake/modules/FindLLVM.cmake
index 0e3e11b..e2391f2 100644
--- a/cmake/modules/FindLLVM.cmake
+++ b/cmake/modules/FindLLVM.cmake
@@ -97,8 +97,9 @@ find_program(LLVM_CONFIG_EXECUTABLE
exec_program(${LLVM_CONFIG_EXECUTABLE} ARGS --version OUTPUT_VARIABLE REPORTED_LLVM_VERSION )
STRING(REPLACE "." "" REPORTED_LLVM_VERSION ${REPORTED_LLVM_VERSION})
+STRING(REPLACE "svn" "" REPORTED_LLVM_VERSION ${REPORTED_LLVM_VERSION})
if(NOT ${REPORTED_LLVM_VERSION} STREQUAL ${LLVM_VERSION})
- message(FATAL_ERROR "ERROR!: llvm-config reports different version that what is expected \(${REPORTED_LLVM_VERSION} != ${LLVM_VERSION}" \))
+ message(FATAL_ERROR "ERROR!: llvm-config reports different version than what is expected \(${REPORTED_LLVM_VERSION} != ${LLVM_VERSION}\)")
endif()
# Macro to build up list of llvm libraries
diff --git a/src/api/api_context.cpp b/src/api/api_context.cpp
index 20b0d84..f66722e 100644
--- a/src/api/api_context.cpp
+++ b/src/api/api_context.cpp
@@ -89,14 +89,14 @@ clCreateContextFromType(const cl_context_properties *properties,
cl_int local_error;
cl_context result = 0;
- local_error = clGetDeviceIDs(&the_platform, device_type, 0, NULL,
+ local_error = clGetDeviceIDs((cl_platform_id) &the_platform::Instance(), device_type, 0, NULL,
&num_devices);
if (!num_devices) { local_error = CL_INVALID_DEVICE; goto bail; }
devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
if (!devices) { local_error = CL_OUT_OF_HOST_MEMORY; goto bail; }
- local_error = clGetDeviceIDs(&the_platform, device_type, num_devices,
+ local_error = clGetDeviceIDs((cl_platform_id) &the_platform::Instance(), device_type, num_devices,
devices, 0);
if (local_error != CL_SUCCESS) { free (devices); goto bail; }
diff --git a/src/api/api_device.cpp b/src/api/api_device.cpp
index 7bf66a4..e0260ef 100644
--- a/src/api/api_device.cpp
+++ b/src/api/api_device.cpp
@@ -45,9 +45,9 @@ clGetDeviceIDs(cl_platform_id platform,
/*-------------------------------------------------------------------------
* We currently implement only one platform
*------------------------------------------------------------------------*/
- if (!platform) platform = &the_platform;
+ if (!platform) platform = (cl_platform_id)&(the_platform::Instance());
- if (platform != &the_platform) return CL_INVALID_PLATFORM;
+ if (platform != &(the_platform::Instance())) return CL_INVALID_PLATFORM;
if (num_entries == 0 && devices != 0) return CL_INVALID_VALUE;
if (num_devices == 0 && devices == 0) return CL_INVALID_VALUE;
diff --git a/src/api/api_enqueue.cpp b/src/api/api_enqueue.cpp
index 759ed92..7fce1d9 100644
--- a/src/api/api_enqueue.cpp
+++ b/src/api/api_enqueue.cpp
@@ -103,7 +103,7 @@ static inline cl_int queueEvent(Coal::CommandQueue *queue,
delete command;
return rs;
}
- command->dereference();
+ clReleaseEvent(d_event);
}
return CL_SUCCESS;
@@ -599,7 +599,7 @@ clEnqueueMapBuffer(cl_command_queue d_command_queue,
if (*errcode_ret != CL_SUCCESS)
{
- delete command;
+ // delete command; // command already deleted in queueEvent()
return 0;
}
else
@@ -667,7 +667,7 @@ clEnqueueMapImage(cl_command_queue d_command_queue,
if (*errcode_ret != CL_SUCCESS)
{
- delete command;
+ // delete command; // command already deleted in queueEvent()
return 0;
}
else
@@ -912,13 +912,12 @@ clEnqueueMarkerWithWaitList(cl_command_queue d_command_queue,
}
if (!event_wait_list) {
- // Free events, they were memcpyed by CommandQueue::events()
- for (unsigned int i=0; i<count; ++i)
- {
+ // Free events, they were memcpyed by CommandQueue::events()
+ for (unsigned int i=0; i<count; ++i) {
events[i]->dereference();
}
- if (events != NULL) std::free(events);
- if (e_wait_list != NULL) std::free(e_wait_list);
+ if (events != NULL) std::free(events);
+ if (e_wait_list != NULL) std::free(e_wait_list);
}
return queueEvent(command_queue, command, event, false);
diff --git a/src/api/api_event.cpp b/src/api/api_event.cpp
index 9f94011..51139ec 100644
--- a/src/api/api_event.cpp
+++ b/src/api/api_event.cpp
@@ -84,6 +84,10 @@ clWaitForEvents(cl_uint num_events,
{
auto event = pobj(event_list[i]);
event->waitForStatus(CL_COMPLETE);
+ // Per OpenCL spec, we need to return this error if any event
+ // in the event_wait_list fails
+ if (event->status() < 0)
+ return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
}
return CL_SUCCESS;
diff --git a/src/api/api_memory.cpp b/src/api/api_memory.cpp
index 6cab011..a4e9cf1 100644
--- a/src/api/api_memory.cpp
+++ b/src/api/api_memory.cpp
@@ -386,6 +386,9 @@ static cl_image_format supported_formats[] = {
{ CL_INTENSITY, CL_FLOAT }
};
+#ifdef MIN
+#undef MIN
+#endif
#define MIN(a, b) ((a) < (b) ? (a) : (b))
cl_int
diff --git a/src/api/api_platform.cpp b/src/api/api_platform.cpp
index eb562a8..ccbbadf 100644
--- a/src/api/api_platform.cpp
+++ b/src/api/api_platform.cpp
@@ -52,7 +52,7 @@ clGetPlatformIDs(cl_uint num_entries,
/*-------------------------------------------------------------------------
* Only one "default" platform
*------------------------------------------------------------------------*/
- if (platforms != 0) *platforms = &the_platform;
+ if (platforms != 0) *platforms = (cl_platform_id) &the_platform::Instance();
return CL_SUCCESS;
}
@@ -70,7 +70,7 @@ clGetPlatformInfo(cl_platform_id platform,
/*-------------------------------------------------------------------------
* NULL or what is returned by clGetPlatformIDs, that's to say also NULL
*------------------------------------------------------------------------*/
- if (platform != &the_platform) return CL_INVALID_PLATFORM;
+ if (platform != (cl_platform_id) &the_platform::Instance()) return CL_INVALID_PLATFORM;
return platform->info(param_name, param_value_size, param_value,
param_value_size_ret);
@@ -93,7 +93,7 @@ void * clGetExtensionFunctionAddress(const char *funcname)
void * clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, const char *funcname)
{
- if (platform != &the_platform) return NULL;
+ if (platform != (cl_platform_id) &the_platform::Instance()) return NULL;
return clGetExtensionFunctionAddress(funcname);
}
diff --git a/src/api/api_program.cpp b/src/api/api_program.cpp
index 7a17156..11c67a8 100644
--- a/src/api/api_program.cpp
+++ b/src/api/api_program.cpp
@@ -446,7 +446,8 @@ clUnloadCompiler(void)
cl_int
clUnloadPlatformCompiler(cl_platform_id platform)
{
- if (platform != &the_platform) return CL_INVALID_PLATFORM;
+ if (platform != (cl_platform_id)&(the_platform::Instance()))
+ return CL_INVALID_PLATFORM;
return CL_SUCCESS;
}
diff --git a/src/core/context.cpp b/src/core/context.cpp
index cd91ad0..4d51a60 100644
--- a/src/core/context.cpp
+++ b/src/core/context.cpp
@@ -57,7 +57,7 @@ Context::Context(const cl_context_properties *properties,
cl_int *errcode_ret)
: Object(Object::T_Context, 0), p_properties(0), p_pfn_notify(pfn_notify),
p_user_data(user_data), p_devices(0), p_d_devices(0), p_num_devices(0), p_props_len(0),
- p_platform(&the_platform)
+ p_platform((cl_platform_id) &the_platform::Instance())
{
if (!p_pfn_notify)
p_pfn_notify = &default_pfn_notify;
@@ -123,7 +123,7 @@ Context::Context(const cl_context_properties *properties,
}
// Verify that the platform is good
- if (p_platform != &the_platform)
+ if (p_platform != (cl_platform_id) &the_platform::Instance())
{
*errcode_ret = CL_INVALID_PLATFORM;
return;
diff --git a/src/core/cpu/device.cpp b/src/core/cpu/device.cpp
index 38a7e8d..e915d78 100644
--- a/src/core/cpu/device.cpp
+++ b/src/core/cpu/device.cpp
@@ -700,7 +700,7 @@ cl_int CPUDevice::info(cl_device_info param_name,
break;
case CL_DEVICE_PLATFORM:
- SIMPLE_ASSIGN(cl_platform_id, &the_platform);
+ SIMPLE_ASSIGN(cl_platform_id, (cl_platform_id) &the_platform::Instance());
break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
diff --git a/src/core/cpu/kernel.cpp b/src/core/cpu/kernel.cpp
index b89aafa..922cdfe 100644
--- a/src/core/cpu/kernel.cpp
+++ b/src/core/cpu/kernel.cpp
@@ -187,7 +187,7 @@ CPUKernel::~CPUKernel()
pthread_mutex_destroy(&p_call_function_mutex);
}
-size_t CPUKernel::workGroupSize()
+size_t CPUKernel::workGroupSize() const
{
// Just use CL_DEVICE_MAX_WORK_GROUP_SIZE
size_t param_value;
diff --git a/src/core/cpu/kernel.h b/src/core/cpu/kernel.h
index ab4d1ac..54dd516 100644
--- a/src/core/cpu/kernel.h
+++ b/src/core/cpu/kernel.h
@@ -81,7 +81,7 @@ class CPUKernel : public DeviceKernel
CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function);
~CPUKernel();
- size_t workGroupSize();
+ size_t workGroupSize() const;
cl_ulong localMemSize() const;
cl_ulong privateMemSize() const;
size_t preferredWorkGroupSizeMultiple() const;
diff --git a/src/core/deviceinterface.h b/src/core/deviceinterface.h
index e15994e..c809cf4 100644
--- a/src/core/deviceinterface.h
+++ b/src/core/deviceinterface.h
@@ -313,8 +313,9 @@ class DeviceProgram
* \param native \c std::string returns native binary if not NULL
* \return true if the binary is indeed mixed
*/
- virtual bool ExtractMixedBinary(std::string *binary_str,
- std::string *bitcode, std::string *native)
+ virtual bool ExtractMixedBinary(const std::string *binary_str,
+ std::string *bitcode,
+ std::string *native)
{ return false; }
};
@@ -332,7 +333,7 @@ class DeviceKernel
* \return Maximum work-group size of the kernel based on device-specific
* data such as memory usage, register pressure, etc)
*/
- virtual size_t workGroupSize() = 0;
+ virtual size_t workGroupSize() const = 0;
/**
* \brief Local memory used by the kernel
diff --git a/src/core/dsp/u_concurrent_map.h b/src/core/dsp/u_concurrent_map.h
index 014c0b6..c9bb0e6 100644
--- a/src/core/dsp/u_concurrent_map.h
+++ b/src/core/dsp/u_concurrent_map.h
@@ -62,10 +62,10 @@ public:
* @brief Place an object in the map.
* @param data is the item to psh on the map
***************************************************************************/
- void push(I index, T const data)
+ void push(I index, T const data, unsigned cnt = 1)
{
Lock lock(this);
- M[index] = data;
+ M[index] = std::pair<T,unsigned int>(data, cnt);
num_elements++;
}
@@ -98,32 +98,33 @@ public:
bool try_pop(I idx, T& popped_value)
{
Lock lock(this);
- if (num_elements == 0) return false;
+ auto it = M.find(idx);
- typename std::map<I,T>::iterator it = M.find(idx);
-
- if (it != M.end())
+ if (it != M.end() && --it->second.second == 0)
{
- popped_value = it->second;
+ popped_value = it->second.first;
M.erase (it);
num_elements--;
return true;
}
+
return false;
}
void dump()
{
- for (typename std::map<I,T>::const_iterator i = M.begin(); i != M.end(); ++i)
- std::cout << i->first << " ==> " << i->second << std::endl;
+ for (auto &i : M)
+ std::cout << i.first << " ==> " << i.second.first
+ << "(" << i.second.second << ")"
+ << std::endl;
}
/*-------------------------------------------------------------------------
* The class's data
*------------------------------------------------------------------------*/
private:
- std::map<I,T> M; //!< standard stl map
+ std::map<I, std::pair<T, unsigned int>> M; //!< standard stl map
int num_elements;
/*-------------------------------------------------------------------------
diff --git a/src/core/events.cpp b/src/core/events.cpp
index f4b7c09..83e8b4b 100644
--- a/src/core/events.cpp
+++ b/src/core/events.cpp
@@ -329,8 +329,10 @@ MapImageEvent::MapImageEvent(CommandQueue *parent,
}
// Check for out-of-bounds
- if ((p_origin[0] + p_region[0]) > image->row_pitch() ||
- (p_origin[1] + p_region[1]) * image->row_pitch() > image->slice_pitch() ||
+ // cross-row and cross-slice transfers on Image should be allowed,
+ // as seen by OpenCV use cases, and as SPEC does not prohibit such cases
+ if ((p_origin[0] + p_region[0]) > image->size() ||
+ (p_origin[1] + p_region[1]) * image->row_pitch() > image->size() ||
(p_origin[2] + p_region[2]) * image->slice_pitch() > image->size())
{
*errcode_ret = CL_INVALID_VALUE;
@@ -720,18 +722,6 @@ KernelEvent::KernelEvent(CommandQueue *parent,
p_work_dim(work_dim), p_kernel(kernel)
{
clRetainKernel(desc(p_kernel));
- // Also, retain any buffers in case the client releases during execute,
- // as is done in the Khronos test_api release_during_execute test!.
- // Check arguments (buffer alignment, image size, ...)
- for (unsigned int i=0; i < kernel->numArgs(); ++i) {
- const Kernel::Arg *a = kernel->arg(i);
-
- if (a->kind() == Kernel::Arg::Buffer && a->file() != Kernel::Arg::Local)
- {
- MemObject *buffer = *(MemObject **)(a->value(0));
- clRetainMemObject(desc(buffer));
- }
- }
if (*errcode_ret != CL_SUCCESS) return;
@@ -895,17 +885,20 @@ KernelEvent::KernelEvent(CommandQueue *parent,
if (a->kind() == Kernel::Arg::Buffer && a->file() != Kernel::Arg::Local)
{
- const MemObject *buffer = *(const MemObject **)(a->value(0));
+ MemObject *buffer = *(MemObject **)(a->value(0));
if (!BufferEvent::isSubBufferAligned(buffer, device))
{
*errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET;
return;
}
+
+ clRetainMemObject(desc(buffer));
+ p_mem_objects.push_back((MemObject *) buffer);
}
else if (a->kind() == Kernel::Arg::Image2D)
{
- const Image2D *image = *(const Image2D **)(a->value(0));
+ Image2D *image = *(Image2D **)(a->value(0));
size_t maxWidth, maxHeight;
*errcode_ret = device->info(CL_DEVICE_IMAGE2D_MAX_WIDTH,
@@ -921,10 +914,13 @@ KernelEvent::KernelEvent(CommandQueue *parent,
*errcode_ret = CL_INVALID_IMAGE_SIZE;
return;
}
+
+ clRetainMemObject(desc(image));
+ p_mem_objects.push_back((MemObject *) image);
}
else if (a->kind() == Kernel::Arg::Image3D)
{
- const Image3D *image = *(const Image3D **)a->value(0);
+ Image3D *image = *(Image3D **)a->value(0);
size_t maxWidth, maxHeight, maxDepth;
*errcode_ret = device->info(CL_DEVICE_IMAGE3D_MAX_WIDTH,
@@ -943,21 +939,18 @@ KernelEvent::KernelEvent(CommandQueue *parent,
*errcode_ret = CL_INVALID_IMAGE_SIZE;
return;
}
+
+ clRetainMemObject(desc(image));
+ p_mem_objects.push_back((MemObject *) image);
}
}
}
KernelEvent::~KernelEvent()
{
- for (unsigned int i=0; i < p_kernel->numArgs(); ++i) {
- const Kernel::Arg *a = p_kernel->arg(i);
+ for (MemObject *mem_object : p_mem_objects)
+ clReleaseMemObject(desc(mem_object));
- if (a->kind() == Kernel::Arg::Buffer && a->file() != Kernel::Arg::Local)
- {
- MemObject *buffer = *(MemObject **)(a->value(0));
- clReleaseMemObject(desc(buffer));
- }
- }
clReleaseKernel(desc(p_kernel));
}
@@ -1203,16 +1196,18 @@ CopyBufferRectEvent::CopyBufferRectEvent(CommandQueue *parent,
}
// Check for out-of-bounds
- if ((p_src_origin[0] + p_region[0]) > p_src_row_pitch ||
- (p_src_origin[1] + p_region[1]) * p_src_row_pitch > p_src_slice_pitch ||
+ // cross-row and cross-slice transfers on BufferRect should be allowed,
+ // as seen by OpenCV use cases, and as SPEC does not prohibit such cases
+ if ((p_src_origin[0] + p_region[0]) > source->size() ||
+ (p_src_origin[1] + p_region[1]) * p_src_row_pitch > source->size() ||
(p_src_origin[2] + p_region[2]) * p_src_slice_pitch > source->size())
{
*errcode_ret = CL_INVALID_VALUE;
return;
}
- if ((p_dst_origin[0] + p_region[0]) > p_dst_row_pitch ||
- (p_dst_origin[1] + p_region[1]) * p_dst_row_pitch > p_dst_slice_pitch ||
+ if ((p_dst_origin[0] + p_region[0]) > destination->size() ||
+ (p_dst_origin[1] + p_region[1]) * p_dst_row_pitch > destination->size() ||
(p_dst_origin[2] + p_region[2]) * p_dst_slice_pitch > destination->size())
{
*errcode_ret = CL_INVALID_VALUE;
@@ -1300,8 +1295,10 @@ ReadWriteBufferRectEvent::ReadWriteBufferRectEvent(CommandQueue *parent,
}
// Check for out-of-bounds
- if ((p_src_origin[0] + p_region[0]) > p_src_row_pitch ||
- (p_src_origin[1] + p_region[1]) * p_src_row_pitch > p_src_slice_pitch ||
+ // cross-row and cross-slice transfers on BufferRect should be allowed,
+ // as seen by OpenCV use cases, and as SPEC does not prohibit such cases
+ if ((p_src_origin[0] + p_region[0]) > buffer->size() ||
+ (p_src_origin[1] + p_region[1]) * p_src_row_pitch > buffer->size() ||
(p_src_origin[2] + p_region[2]) * p_src_slice_pitch > buffer->size())
{
*errcode_ret = CL_INVALID_VALUE;
diff --git a/src/core/events.h b/src/core/events.h
index aad5792..9811d87 100644
--- a/src/core/events.h
+++ b/src/core/events.h
@@ -36,8 +36,8 @@
#include "commandqueue.h"
#include <core/config.h>
-
#include <vector>
+#include <list>
namespace Coal
{
@@ -649,6 +649,7 @@ class KernelEvent : public Event
p_max_work_item_sizes[MAX_WORK_DIMS];
Kernel *p_kernel;
DeviceKernel *p_dev_kernel;
+ std::list<MemObject *> p_mem_objects;
};
/**
diff --git a/src/core/icd.cpp b/src/core/icd.cpp
index 75b3815..6495501 100644
--- a/src/core/icd.cpp
+++ b/src/core/icd.cpp
@@ -177,7 +177,7 @@ clIcdGetPlatformIDsKHR(cl_uint num_entries,
/*-------------------------------------------------------------------------
* Only one "default" platform
*------------------------------------------------------------------------*/
- if (platforms != 0) *platforms = &the_platform;
+ if (platforms != 0) *platforms = (cl_platform_id) &the_platform::Instance();
return CL_SUCCESS;
}
diff --git a/src/core/memobject.h b/src/core/memobject.h
index 55f2fe3..3451ed8 100644
--- a/src/core/memobject.h
+++ b/src/core/memobject.h
@@ -36,7 +36,8 @@
#include "object.h"
#include "icd.h"
-#include "dsp/u_concurrent_stack.h"
+#include "u_concurrent_stack.h"
+#include <list>
#include <CL/cl.h>
diff --git a/src/core/object.cpp b/src/core/object.cpp
index 072c61d..ee7d01d 100644
--- a/src/core/object.cpp
+++ b/src/core/object.cpp
@@ -32,14 +32,22 @@
*/
#include "object.h"
+#include "u_concurrent_set.h"
using namespace Coal;
-static std::list<Object *>& getKnownObjects()
-{
- static std::list<Object *> known_objects;
- return known_objects;
-}
+/*-----------------------------------------------------------------------------
+* This static was previously inside the getKnownObjects function in order to
+* delay its construction until first use. Since we now delay the construction
+* of the platform until first use, we need to make sure that known_objects
+* lifetime is a superset of the the_platform and all opencl objects lifetimes.
+* Therefore we moved the definition of known_objects to global scope which
+* will ensure that it exists before the_platform and should also ensure that
+* it is destroyed after the_platform, since objects are destructed in reverse
+* order of construction. Both singletons created with new and statics are
+* both placed in the same dtor queue.
+*----------------------------------------------------------------------------*/
+static concurrent_set<Object *> known_objects;
Object::Object(Type type, Object *parent)
@@ -49,8 +57,7 @@ Object::Object(Type type, Object *parent)
parent->reference();
// Add object in the list of known objects
- getKnownObjects().push_front(this);
- p_it = getKnownObjects().begin();
+ known_objects.insert(this);
}
Object::~Object()
@@ -59,18 +66,19 @@ Object::~Object()
delete p_parent;
// Remove object from the list of known objects
- getKnownObjects().erase(p_it);
+ known_objects.erase(this);
+ p_type = T_Invalid;
}
void Object::reference()
{
- p_references++;
+ __sync_fetch_and_add(&p_references, 1);
}
bool Object::dereference()
{
- p_references--;
- return (p_references == 0);
+ unsigned int oldval = __sync_fetch_and_sub(&p_references, 1);
+ return (oldval == 1);
}
void Object::setReleaseParent (bool release)
@@ -93,27 +101,10 @@ Object::Type Object::type() const
return p_type;
}
-#pragma clang diagnostic ignored "-Wtautological-undefined-compare"
-#pragma GCC diagnostic ignored "-Wtautological-undefined-compare"
-
bool Object::isA(Object::Type type) const
{
// Check for null values
- // NOTE: in clang 3.6+, this warns: we keep the code (as harmless), but suppress the warning.
- if (this == 0)
- return false;
-
- // Check that the value isn't garbage or freed pointer
- std::list<Object *>::const_iterator it = getKnownObjects().begin(),
- e = getKnownObjects().end();
- while (it != e)
- {
- if (*it == this)
- // OK, NOW it is safe to dereference this ptr:
- return this->type() == type;
-
- ++it;
- }
-
- return false;
+ if (this == 0) return false;
+
+ return known_objects.memberp((Object *) this) && type == p_type;
}
diff --git a/src/core/object.h b/src/core/object.h
index d83e326..1f481e4 100644
--- a/src/core/object.h
+++ b/src/core/object.h
@@ -33,8 +33,6 @@
#ifndef __REFCOUNTED_H__
#define __REFCOUNTED_H__
-#include <list>
-
namespace Coal
{
@@ -56,6 +54,7 @@ class Object
*/
enum Type
{
+ T_Invalid, /* Invalid type */
T_Device, /*!< \brief \c Coal::DeviceInterface */
T_CommandQueue, /*!< \brief \c Coal::CommandQueue */
T_Event, /*!< \brief \c Coal::Event */
@@ -121,10 +120,9 @@ class Object
bool isA(Type type) const;
private:
+ Type p_type;
unsigned int p_references;
Object *p_parent;
- Type p_type;
- std::list<Object *>::iterator p_it;
bool p_release_parent;
};
diff --git a/src/core/platform.cpp b/src/core/platform.cpp
index 30ace53..d498c68 100644
--- a/src/core/platform.cpp
+++ b/src/core/platform.cpp
@@ -51,15 +51,15 @@
using namespace Coal;
-// Ensure that Class Platform remains mutable to the ICD "POD" C structure, as expected
-// by the ICD loader
+// Ensure that Class Platform remains mutable to the ICD "POD" C structure, as
+// expected by the ICD loader
static_assert(std::is_standard_layout<Platform>::value,
"Class Platform must be of C++ standard layout type.");
/******************************************************************************
* begin_file_lock_crit_section
******************************************************************************/
-static int begin_file_lock_crit_section(char* fname)
+static int begin_file_lock_crit_section(const char* fname)
{
/*---------------------------------------------------------------------
* Create a lock, so only 1 OpenCL program can progress at a time.
@@ -106,8 +106,7 @@ namespace Coal
{
Platform::Platform(): dispatch(&dispatch_table)
{
- char filename[] = "/var/lock/opencl";
- p_lock_fd = begin_file_lock_crit_section(filename);
+ p_lock_fd = begin_file_lock_crit_section("/var/lock/opencl");
Coal::DeviceInterface * device = new Coal::CPUDevice(NULL,0);
p_devices.push_back(desc(device));
@@ -233,5 +232,3 @@ namespace Coal
return CL_SUCCESS;
}
};
-
-_cl_platform_id the_platform;
diff --git a/src/core/platform.h b/src/core/platform.h
index 809d12c..00b0691 100644
--- a/src/core/platform.h
+++ b/src/core/platform.h
@@ -33,6 +33,10 @@
#include <cstring>
#include "icd.h"
+#include <pthread.h>
+#define LOKI_PTHREAD_H
+#include <loki/Singleton.h>
+
namespace Coal
{
@@ -61,5 +65,7 @@ class Platform
struct _cl_platform_id : public Coal::Platform
{};
-extern _cl_platform_id the_platform;
+typedef Loki::SingletonHolder<Coal::Platform, Loki::CreateUsingNew,
+ Loki::NoDestroy, Loki::ClassLevelLockable> the_platform;
+
#endif
diff --git a/src/core/u_concurrent_set.h b/src/core/u_concurrent_set.h
new file mode 100644
index 0000000..c2767c9
--- /dev/null
+++ b/src/core/u_concurrent_set.h
@@ -0,0 +1,80 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+/**************************************************************************//**
+*
+* @file u_concurrent_set.h
+* @brief TI implementation class that implements a thread safe set.
+*
+******************************************************************************/
+#ifndef _U_CONCURRENT_SET_H_
+#define _U_CONCURRENT_SET_H_
+
+#include <iostream>
+#include <set>
+#include "u_lockable.h"
+
+/**************************************************************************//**
+* @class concurrent_set
+*
+* @brief A thread safe set implementation
+*
+* @details This implementation wraps a standard stl set with some locking
+* capability to make the member functions mutually exclusive
+* regions. In derives from the class Lockable which defines a type
+* Lock that can be used to define a type in a scope. The result will
+* be that the remainder of the scope (or until unlock is called) is a
+* mutex.
+*
+******************************************************************************/
+template<typename T>
+class concurrent_set : public Lockable
+{
+public:
+ concurrent_set() : S() {}
+ ~concurrent_set() {}
+
+ void insert (T const data) { Lock lock(this); S.insert(data); }
+ void erase (T data) { Lock lock(this); S.erase(data); }
+ bool memberp(T data) const
+ { Lock lock(this); return S.find(data) != S.end(); }
+
+ /*-------------------------------------------------------------------------
+ * The class's data
+ *------------------------------------------------------------------------*/
+private:
+ std::set<T> S; //!< standard stl set
+
+ /*-------------------------------------------------------------------------
+ * Prevent copy construction and assignment
+ *------------------------------------------------------------------------*/
+private:
+ concurrent_set(const concurrent_set&);
+ concurrent_set& operator=(const concurrent_set&);
+};
+
+#endif //_U_CONCURRENT_SET_H_
diff --git a/src/core/dsp/u_concurrent_stack.h b/src/core/u_concurrent_stack.h
index 6e9755b..abc91a2 100644
--- a/src/core/dsp/u_concurrent_stack.h
+++ b/src/core/u_concurrent_stack.h
@@ -15,7 +15,7 @@
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
@@ -43,11 +43,11 @@
*
* @brief A thread safe stack implementation
*
-* @details This implementation wraps a standard stl stack with some locking
-* capability to make the member functions mutually exclusive
-* regions. In derives from the class Lockable which defines a type
+* @details This implementation wraps a standard stl stack with some locking
+* capability to make the member functions mutually exclusive
+* regions. In derives from the class Lockable which defines a type
* Lock that can be used to define a type in a scope. The result will
-* be that the remainder of the scope (or until unlock is called) is a
+* be that the remainder of the scope (or until unlock is called) is a
* mutex.
*
******************************************************************************/
@@ -116,7 +116,7 @@ private:
/*-------------------------------------------------------------------------
* Prevent copy construction and assignment
*------------------------------------------------------------------------*/
-private:
+private:
concurrent_stack(const concurrent_stack&);
concurrent_stack& operator=(const concurrent_stack&);
};
diff --git a/src/core/dsp/u_lockable.h b/src/core/u_lockable.h
index 803197f..4e75527 100644
--- a/src/core/dsp/u_lockable.h
+++ b/src/core/u_lockable.h
@@ -24,18 +24,18 @@
* @version 1.00.00
*
* @note The Locakable class is a modified version of the ObjectLevelLockable
-* class from the LOKI library. The copyright from that library is
-* included at the top of this file.
+* class from the LOKI library. The copyright from that library is
+* included at the top of this file.
*
******************************************************************************/
#ifndef _U_LOCKABLE_H_
#define _U_LOCKABLE_H_
#include "u_locks_pthread.h"
-
+
/**************************************************************************//**
* @brief used as a base class to give your derived class a Lock type.
-* @details Have a class derive from this class and you can lock member
-* functions of your class by defining a lock like this
+* @details Have a class derive from this class and you can lock member
+* functions of your class by defining a lock like this
* Lock lock(this);
******************************************************************************/
class Lockable
@@ -49,13 +49,13 @@ class Lockable
* @brief The Lock type defined by inheriting from Lockable.
**************************************************************************/
class Lock
- {
+ {
public:
/*******************************************************************//**
* @brief Constructing a Lock object will lock the parent object's mutex
***********************************************************************/
- explicit Lock(const Lockable* host_) : host(*host_)
+ explicit Lock(const Lockable* host_) : host(*host_)
{ host.mutex.Lock(); }
/*******************************************************************//**
@@ -94,7 +94,7 @@ class Lockable_off
Lockable_off() {}
class Lock
- {
+ {
public:
explicit Lock(const Lockable_off* host_) { }
diff --git a/src/core/dsp/u_locks_pthread.h b/src/core/u_locks_pthread.h
index 4663a57..71f61b9 100644
--- a/src/core/dsp/u_locks_pthread.h
+++ b/src/core/u_locks_pthread.h
@@ -15,7 +15,7 @@
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
@@ -40,11 +40,11 @@
#define _U_LOCKS_PTHREAD_H_
#include <pthread.h>
-
+
/**************************************************************************//**
* @brief Simple mutex implemented using the pthreads library
*
-* @details This mutex is simply a wrapper around a pthread mutex. Two regions
+* @details This mutex is simply a wrapper around a pthread mutex. Two regions
* of code cannot have the mutex locked at the same time.
*
******************************************************************************/
@@ -62,19 +62,19 @@ class Mutex
pthread_mutex_t mutex; //!< The underlying pthread mutex
private: // prevent copy construction and assignment
- Mutex(const Mutex &);
+ Mutex(const Mutex &);
Mutex & operator = (const Mutex &);
};
/**************************************************************************//**
* @brief Simple condition variable implemented using the pthreads library.
*
-* @details Condition variables are synchronization primitives that enable
-* threads to wait until a particular condition occurs. Condition
-* variables enable threads to atomically release a lock and sleep.
-* Condition variables support operations that "wake one" or
-* "wake all" waiting threads. After a thread is woken, it
-* re-acquires the lock it released when the thread entered the
+* @details Condition variables are synchronization primitives that enable
+* threads to wait until a particular condition occurs. Condition
+* variables enable threads to atomically release a lock and sleep.
+* Condition variables support operations that "wake one" or
+* "wake all" waiting threads. After a thread is woken, it
+* re-acquires the lock it released when the thread entered the
* sleeping state.
*
******************************************************************************/
@@ -111,11 +111,11 @@ class CondVar
/**************************************************************************//**
* @brief Objects of this type lock the remainder of the enclosing scope.
*
-* @details Declare one of these in a scope and pass a mutex reference and the
-* mutex will be locked for the remainder of the scope. This is a
-* safer way to lock and unlock a mutex, because the mutex will
+* @details Declare one of these in a scope and pass a mutex reference and the
+* mutex will be locked for the remainder of the scope. This is a
+* safer way to lock and unlock a mutex, because the mutex will
* automatically be unlocked when the scope level is exited. This
-* helps prevent an unlocked mutex from occuring during exceptions or
+* helps prevent an unlocked mutex from occuring during exceptions or
* forgotten early function returns.
*
******************************************************************************/
@@ -126,7 +126,7 @@ class ScopedLock
~ScopedLock() { mutex.Unlock(); } //!< Destructor
private:
- //mutable
+ //mutable
Mutex& mutex; //!< The Underlying mutex reference
private: // prevent copy construction and assignment