aboutsummaryrefslogtreecommitdiff
path: root/final/libomptarget/deviceRTLs/nvptx/src/state-queuei.h
diff options
context:
space:
mode:
Diffstat (limited to 'final/libomptarget/deviceRTLs/nvptx/src/state-queuei.h')
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/src/state-queuei.h89
1 files changed, 89 insertions, 0 deletions
diff --git a/final/libomptarget/deviceRTLs/nvptx/src/state-queuei.h b/final/libomptarget/deviceRTLs/nvptx/src/state-queuei.h
new file mode 100644
index 0000000..c9ffd54
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/src/state-queuei.h
@@ -0,0 +1,89 @@
+//===------- state-queue.cu - NVPTX OpenMP GPU State Queue ------- CUDA -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of a queue to hand out OpenMP state
+// objects to teams of one or more kernels.
+//
+// Reference:
+// Thomas R.W. Scogland and Wu-chun Feng. 2015.
+// Design and Evaluation of Scalable Concurrent Queues for Many-Core
+// Architectures. International Conference on Performance Engineering.
+//
+//===----------------------------------------------------------------------===//
+
+#include "state-queue.h"
+
+template <typename ElementType, uint32_t SIZE>
+INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
+ return atomicAdd((unsigned int *)&tail, 1);
+}
+
+template <typename ElementType, uint32_t SIZE>
+INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::DEQUEUE_TICKET() {
+ return atomicAdd((unsigned int *)&head, 1);
+}
+
+template <typename ElementType, uint32_t SIZE>
+INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ID(uint32_t ticket) {
+ return (ticket / SIZE) * 2;
+}
+
+template <typename ElementType, uint32_t SIZE>
+INLINE bool omptarget_nvptx_Queue<ElementType, SIZE>::IsServing(uint32_t slot,
+ uint32_t id) {
+ return atomicAdd((unsigned int *)&ids[slot], 0) == id;
+}
+
+template <typename ElementType, uint32_t SIZE>
+INLINE void
+omptarget_nvptx_Queue<ElementType, SIZE>::PushElement(uint32_t slot,
+ ElementType *element) {
+ atomicExch((unsigned long long *)&elementQueue[slot],
+ (unsigned long long)element);
+}
+
+template <typename ElementType, uint32_t SIZE>
+INLINE ElementType *
+omptarget_nvptx_Queue<ElementType, SIZE>::PopElement(uint32_t slot) {
+ return (ElementType *)atomicAdd((unsigned long long *)&elementQueue[slot],
+ (unsigned long long)0);
+}
+
+template <typename ElementType, uint32_t SIZE>
+INLINE void omptarget_nvptx_Queue<ElementType, SIZE>::DoneServing(uint32_t slot,
+ uint32_t id) {
+ atomicExch((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
+}
+
+template <typename ElementType, uint32_t SIZE>
+INLINE void
+omptarget_nvptx_Queue<ElementType, SIZE>::Enqueue(ElementType *element) {
+ uint32_t ticket = ENQUEUE_TICKET();
+ uint32_t slot = ticket % SIZE;
+ uint32_t id = ID(ticket) + 1;
+ while (!IsServing(slot, id))
+ ;
+ PushElement(slot, element);
+ DoneServing(slot, id);
+}
+
+template <typename ElementType, uint32_t SIZE>
+INLINE ElementType *omptarget_nvptx_Queue<ElementType, SIZE>::Dequeue() {
+ uint32_t ticket = DEQUEUE_TICKET();
+ uint32_t slot = ticket % SIZE;
+ uint32_t id = ID(ticket);
+ while (!IsServing(slot, id))
+ ;
+ ElementType *element = PopElement(slot);
+ // This is to populate the queue because of the lack of GPU constructors.
+ if (element == 0)
+ element = &elements[slot];
+ DoneServing(slot, id);
+ return element;
+}