//===------------- task.h - NVPTX OpenMP tasks support ----------- CUDA -*-===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.txt for details. // //===----------------------------------------------------------------------===// // // Task implementation support. // // explicit task structure uses // omptarget_nvptx task // kmp_task // // where kmp_task is // - klegacy_TaskDescr <- task pointer // shared -> X // routine // part_id // descr // - private (of size given by task_alloc call). Accessed by // task+sizeof(klegacy_TaskDescr) // * private data * // - shared: X. Accessed by shared ptr in klegacy_TaskDescr // * pointer table to shared variables * // - end // //===----------------------------------------------------------------------===// #include "omptarget-nvptx.h" EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc( kmp_Indent *loc, // unused uint32_t global_tid, // unused int32_t flag, // unused (because in our impl, all are immediately exec size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable, kmp_TaskFctPtr taskSub) { PRINT(LD_IO, "call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, " "fct 0x%llx)\n", P64(sizeOfTaskInclPrivate), P64(sizeOfSharedTable), P64(taskSub)); // want task+priv to be a multiple of 8 bytes size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *)); sizeOfTaskInclPrivate += padForTaskInclPriv; size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable; ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0, "need task descr of size %d to be a multiple of %d\n", sizeof(omptarget_nvptx_TaskDescr), sizeof(void *)); size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize; omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = (omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc( totSize, "explicit task descriptor"); kmp_TaskDescr *newKmpTaskDescr = &newExplicitTaskDescr->kmpTaskDescr; ASSERT0(LT_FUSSY, (uint64_t)newKmpTaskDescr == (uint64_t)ADD_BYTES(newExplicitTaskDescr, sizeof(omptarget_nvptx_TaskDescr)), "bad size assumptions"); // init kmp_TaskDescr newKmpTaskDescr->sharedPointerTable = (void *)((char *)newKmpTaskDescr + sizeOfTaskInclPrivate); newKmpTaskDescr->sub = taskSub; newKmpTaskDescr->destructors = NULL; PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n", P64(newKmpTaskDescr), P64(newExplicitTaskDescr)); return newKmpTaskDescr; } EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr) { return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0, 0); } EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n", P64(newKmpTaskDescr)); // 1. get explict task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES( newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr)); ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr, "bad assumptions"); omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr; ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr, "bad assumptions"); // 2. push new context: update new task descriptor int tid = GetLogicalThreadIdInBlock(); omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid); newTaskDescr->CopyForExplicitTask(parentTaskDescr); // set new task descriptor as top omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr); // 3. call sub PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n", P64(newKmpTaskDescr->sub), P64(newKmpTaskDescr)); newKmpTaskDescr->sub(0, newKmpTaskDescr); PRINT(LD_TASK, "return from call task sub 0x%llx()\n", P64(newKmpTaskDescr->sub)); // 4. pop context omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, parentTaskDescr); // 5. free SafeFree(newExplicitTaskDescr, "explicit task descriptor"); return 0; } EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr) { PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n", P64(newKmpTaskDescr)); // 1. get explict task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES( newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr)); ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr, "bad assumptions"); omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr; ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr, "bad assumptions"); // 2. push new context: update new task descriptor int tid = GetLogicalThreadIdInBlock(); omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid); newTaskDescr->CopyForExplicitTask(parentTaskDescr); // set new task descriptor as top omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr); // 3... noting to call... is inline // 4 & 5 ... done in complete } EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr) { PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n", P64(newKmpTaskDescr)); // 1. get explict task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES( newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr)); ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr, "bad assumptions"); omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr; ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr, "bad assumptions"); // 2. get parent omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr(); // 3... noting to call... is inline // 4. pop context int tid = GetLogicalThreadIdInBlock(); omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, parentTaskDescr); // 5. free SafeFree(newExplicitTaskDescr, "explicit task descriptor"); } EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n"); // nothing to do as all our tasks are executed as final } EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n"); // nothing to do as all our tasks are executed as final } EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n"); // nothing to do as all our tasks are executed as final } EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid, int end_part) { PRINT0(LD_IO, "call to __kmpc_taskyield()\n"); // do nothing: tasks are executed immediately, no yielding allowed return 0; } EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_taskwait()\n"); // nothing to do as all our tasks are executed as final return 0; } EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr, int if_val, uint64_t *lb, uint64_t *ub, int64_t st, int nogroup, int32_t sched, uint64_t grainsize, void *task_dup) { // skip task entirely if empty iteration space if (*lb > *ub) return; // the compiler has already stored lb and ub in the kmp_TaskDescr structure // as we are using a single task to execute the entire loop, we can leave // the initial task_t untouched __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0, 0); }