aboutsummaryrefslogtreecommitdiff
path: root/final/runtime/test
diff options
context:
space:
mode:
Diffstat (limited to 'final/runtime/test')
-rw-r--r--final/runtime/test/CMakeLists.txt37
-rw-r--r--final/runtime/test/api/has_openmp.c23
-rw-r--r--final/runtime/test/api/kmp_aligned_malloc.c62
-rw-r--r--final/runtime/test/api/kmp_set_defaults_lock_bug.c53
-rw-r--r--final/runtime/test/api/omp_get_num_threads.c39
-rw-r--r--final/runtime/test/api/omp_get_wtick.c24
-rw-r--r--final/runtime/test/api/omp_get_wtime.c33
-rw-r--r--final/runtime/test/api/omp_in_parallel.c39
-rw-r--r--final/runtime/test/atomic/omp_atomic.c366
-rw-r--r--final/runtime/test/barrier/omp_barrier.c44
-rw-r--r--final/runtime/test/critical/omp_critical.c37
-rw-r--r--final/runtime/test/env/kmp_aff_disable_hwloc.c21
-rw-r--r--final/runtime/test/env/kmp_set_dispatch_buf.c76
-rw-r--r--final/runtime/test/env/omp_thread_limit.c82
-rw-r--r--final/runtime/test/env/omp_wait_policy.c40
-rw-r--r--final/runtime/test/flush/omp_flush.c45
-rw-r--r--final/runtime/test/lit.cfg130
-rw-r--r--final/runtime/test/lit.site.cfg.in20
-rw-r--r--final/runtime/test/lock/omp_init_lock.c42
-rw-r--r--final/runtime/test/lock/omp_lock.c47
-rw-r--r--final/runtime/test/lock/omp_nest_lock.c45
-rw-r--r--final/runtime/test/lock/omp_test_lock.c47
-rw-r--r--final/runtime/test/lock/omp_test_nest_lock.c47
-rw-r--r--final/runtime/test/master/omp_master.c38
-rw-r--r--final/runtime/test/master/omp_master_3.c44
-rw-r--r--final/runtime/test/misc_bugs/cancellation_for_sections.c64
-rw-r--r--final/runtime/test/misc_bugs/many-microtask-args.c39
-rw-r--r--final/runtime/test/misc_bugs/omp_foreign_thread_team_reuse.c81
-rw-r--r--final/runtime/test/misc_bugs/teams-no-par.c64
-rw-r--r--final/runtime/test/misc_bugs/teams-reduction.c68
-rw-r--r--final/runtime/test/omp_my_sleep.h33
-rw-r--r--final/runtime/test/omp_testsuite.h79
-rwxr-xr-xfinal/runtime/test/ompt/callback.h764
-rw-r--r--final/runtime/test/ompt/cancel/cancel_parallel.c40
-rw-r--r--final/runtime/test/ompt/cancel/cancel_taskgroup.c89
-rw-r--r--final/runtime/test/ompt/cancel/cancel_worksharing.c67
-rw-r--r--final/runtime/test/ompt/loadtool/tool_available/tool_available.c74
-rw-r--r--final/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c104
-rw-r--r--final/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c69
-rw-r--r--final/runtime/test/ompt/misc/api_calls_from_other_thread.cpp92
-rw-r--r--final/runtime/test/ompt/misc/api_calls_misc.c72
-rw-r--r--final/runtime/test/ompt/misc/api_calls_places.c88
-rw-r--r--final/runtime/test/ompt/misc/control_tool.c29
-rw-r--r--final/runtime/test/ompt/misc/control_tool_no_ompt_support.c12
-rw-r--r--final/runtime/test/ompt/misc/idle.c32
-rw-r--r--final/runtime/test/ompt/misc/interoperability.cpp115
-rw-r--r--final/runtime/test/ompt/misc/threads.c34
-rw-r--r--final/runtime/test/ompt/misc/threads_nested.c40
-rw-r--r--final/runtime/test/ompt/misc/unset_callback.c29
-rw-r--r--final/runtime/test/ompt/ompt-signal.h31
-rw-r--r--final/runtime/test/ompt/parallel/dynamic_enough_threads.c43
-rw-r--r--final/runtime/test/ompt/parallel/dynamic_not_enough_threads.c43
-rw-r--r--final/runtime/test/ompt/parallel/max_active_levels_serialized.c73
-rw-r--r--final/runtime/test/ompt/parallel/nested.c298
-rw-r--r--final/runtime/test/ompt/parallel/nested_lwt.c334
-rw-r--r--final/runtime/test/ompt/parallel/nested_serialized.c128
-rw-r--r--final/runtime/test/ompt/parallel/nested_thread_num.c357
-rw-r--r--final/runtime/test/ompt/parallel/no_thread_num_clause.c95
-rw-r--r--final/runtime/test/ompt/parallel/normal.c132
-rw-r--r--final/runtime/test/ompt/parallel/not_enough_threads.c90
-rw-r--r--final/runtime/test/ompt/parallel/parallel_if0.c76
-rw-r--r--final/runtime/test/ompt/parallel/serialized.c77
-rw-r--r--final/runtime/test/ompt/synchronization/barrier/explicit.c58
-rw-r--r--final/runtime/test/ompt/synchronization/barrier/for_loop.c56
-rw-r--r--final/runtime/test/ompt/synchronization/barrier/for_simd.c33
-rw-r--r--final/runtime/test/ompt/synchronization/barrier/implicit_task_data.c150
-rw-r--r--final/runtime/test/ompt/synchronization/barrier/parallel_region.c40
-rw-r--r--final/runtime/test/ompt/synchronization/barrier/sections.c63
-rw-r--r--final/runtime/test/ompt/synchronization/barrier/single.c61
-rw-r--r--final/runtime/test/ompt/synchronization/critical.c32
-rw-r--r--final/runtime/test/ompt/synchronization/flush.c30
-rw-r--r--final/runtime/test/ompt/synchronization/lock.c44
-rw-r--r--final/runtime/test/ompt/synchronization/master.c38
-rw-r--r--final/runtime/test/ompt/synchronization/nest_lock.c52
-rw-r--r--final/runtime/test/ompt/synchronization/ordered.c32
-rw-r--r--final/runtime/test/ompt/synchronization/taskgroup.c49
-rw-r--r--final/runtime/test/ompt/synchronization/taskwait.c36
-rw-r--r--final/runtime/test/ompt/synchronization/test_lock.c54
-rw-r--r--final/runtime/test/ompt/synchronization/test_nest_lock.c42
-rw-r--r--final/runtime/test/ompt/synchronization/test_nest_lock_parallel.c60
-rw-r--r--final/runtime/test/ompt/tasks/dependences.c61
-rw-r--r--final/runtime/test/ompt/tasks/explicit_task.c102
-rw-r--r--final/runtime/test/ompt/tasks/serialized.c154
-rw-r--r--final/runtime/test/ompt/tasks/task_in_joinbarrier.c91
-rw-r--r--final/runtime/test/ompt/tasks/task_types.c222
-rw-r--r--final/runtime/test/ompt/tasks/task_types_serialized.c113
-rw-r--r--final/runtime/test/ompt/tasks/taskloop.c81
-rw-r--r--final/runtime/test/ompt/tasks/taskyield.c62
-rw-r--r--final/runtime/test/ompt/tasks/untied_task.c108
-rw-r--r--final/runtime/test/ompt/worksharing/for/auto.c7
-rw-r--r--final/runtime/test/ompt/worksharing/for/auto_serialized.c7
-rw-r--r--final/runtime/test/ompt/worksharing/for/auto_split.c8
-rw-r--r--final/runtime/test/ompt/worksharing/for/base.h43
-rw-r--r--final/runtime/test/ompt/worksharing/for/base_serialized.h28
-rw-r--r--final/runtime/test/ompt/worksharing/for/base_split.h66
-rw-r--r--final/runtime/test/ompt/worksharing/for/dynamic.c5
-rw-r--r--final/runtime/test/ompt/worksharing/for/dynamic_serialized.c5
-rw-r--r--final/runtime/test/ompt/worksharing/for/dynamic_split.c7
-rw-r--r--final/runtime/test/ompt/worksharing/for/guided.c5
-rw-r--r--final/runtime/test/ompt/worksharing/for/guided_serialized.c5
-rw-r--r--final/runtime/test/ompt/worksharing/for/guided_split.c7
-rw-r--r--final/runtime/test/ompt/worksharing/for/runtime.c5
-rw-r--r--final/runtime/test/ompt/worksharing/for/runtime_serialized.c5
-rw-r--r--final/runtime/test/ompt/worksharing/for/runtime_split.c7
-rw-r--r--final/runtime/test/ompt/worksharing/for/static.c7
-rw-r--r--final/runtime/test/ompt/worksharing/for/static_serialized.c7
-rw-r--r--final/runtime/test/ompt/worksharing/for/static_split.c8
-rw-r--r--final/runtime/test/ompt/worksharing/sections.c36
-rw-r--r--final/runtime/test/ompt/worksharing/single.c36
-rw-r--r--final/runtime/test/parallel/omp_nested.c46
-rw-r--r--final/runtime/test/parallel/omp_parallel_copyin.c47
-rw-r--r--final/runtime/test/parallel/omp_parallel_default.c43
-rw-r--r--final/runtime/test/parallel/omp_parallel_firstprivate.c46
-rw-r--r--final/runtime/test/parallel/omp_parallel_if.c40
-rw-r--r--final/runtime/test/parallel/omp_parallel_num_threads.c46
-rw-r--r--final/runtime/test/parallel/omp_parallel_private.c46
-rw-r--r--final/runtime/test/parallel/omp_parallel_reduction.c254
-rw-r--r--final/runtime/test/parallel/omp_parallel_shared.c46
-rw-r--r--final/runtime/test/tasking/bug_36720.c36
-rw-r--r--final/runtime/test/tasking/bug_nested_proxy_task.c131
-rw-r--r--final/runtime/test/tasking/bug_proxy_task_dep_waiting.c134
-rw-r--r--final/runtime/test/tasking/bug_serial_taskgroup.c16
-rw-r--r--final/runtime/test/tasking/kmp_task_reduction_nest.cpp376
-rw-r--r--final/runtime/test/tasking/kmp_taskloop.c159
-rw-r--r--final/runtime/test/tasking/nested_parallel_tasking.c32
-rw-r--r--final/runtime/test/tasking/nested_task_creation.c35
-rw-r--r--final/runtime/test/tasking/omp_task.c52
-rw-r--r--final/runtime/test/tasking/omp_task_final.c65
-rw-r--r--final/runtime/test/tasking/omp_task_firstprivate.c51
-rw-r--r--final/runtime/test/tasking/omp_task_if.c43
-rw-r--r--final/runtime/test/tasking/omp_task_imp_firstprivate.c47
-rw-r--r--final/runtime/test/tasking/omp_task_priority.c22
-rw-r--r--final/runtime/test/tasking/omp_task_private.c53
-rw-r--r--final/runtime/test/tasking/omp_task_shared.c41
-rw-r--r--final/runtime/test/tasking/omp_taskloop_grainsize.c113
-rw-r--r--final/runtime/test/tasking/omp_taskloop_num_tasks.c71
-rw-r--r--final/runtime/test/tasking/omp_taskwait.c74
-rw-r--r--final/runtime/test/tasking/omp_taskyield.c58
-rw-r--r--final/runtime/test/threadprivate/omp_threadprivate.c102
-rw-r--r--final/runtime/test/threadprivate/omp_threadprivate_for.c48
-rw-r--r--final/runtime/test/worksharing/for/bug_set_schedule_0.c40
-rw-r--r--final/runtime/test/worksharing/for/kmp_doacross_check.c62
-rw-r--r--final/runtime/test/worksharing/for/kmp_sch_simd_guided.c410
-rw-r--r--final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c221
-rw-r--r--final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c196
-rw-r--r--final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c201
-rw-r--r--final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c91
-rw-r--r--final/runtime/test/worksharing/for/omp_doacross.c60
-rw-r--r--final/runtime/test/worksharing/for/omp_for_bigbounds.c70
-rw-r--r--final/runtime/test/worksharing/for/omp_for_collapse.c51
-rw-r--r--final/runtime/test/worksharing/for/omp_for_firstprivate.c55
-rw-r--r--final/runtime/test/worksharing/for/omp_for_lastprivate.c52
-rw-r--r--final/runtime/test/worksharing/for/omp_for_nowait.c77
-rw-r--r--final/runtime/test/worksharing/for/omp_for_ordered.c60
-rw-r--r--final/runtime/test/worksharing/for/omp_for_private.c63
-rw-r--r--final/runtime/test/worksharing/for/omp_for_reduction.c339
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_auto.c69
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c89
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_guided.c217
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_runtime.c82
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_static.c154
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_static_3.c202
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c35
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_if.c42
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c37
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_ordered.c64
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_private.c50
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_reduction.c266
-rw-r--r--final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c54
-rw-r--r--final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c71
-rw-r--r--final/runtime/test/worksharing/sections/omp_parallel_sections_private.c64
-rw-r--r--final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c508
-rw-r--r--final/runtime/test/worksharing/sections/omp_section_firstprivate.c55
-rw-r--r--final/runtime/test/worksharing/sections/omp_section_lastprivate.c76
-rw-r--r--final/runtime/test/worksharing/sections/omp_section_private.c66
-rw-r--r--final/runtime/test/worksharing/sections/omp_sections_nowait.c104
-rw-r--r--final/runtime/test/worksharing/sections/omp_sections_reduction.c543
-rw-r--r--final/runtime/test/worksharing/single/omp_single.c44
-rw-r--r--final/runtime/test/worksharing/single/omp_single_copyprivate.c60
-rw-r--r--final/runtime/test/worksharing/single/omp_single_nowait.c73
-rw-r--r--final/runtime/test/worksharing/single/omp_single_private.c57
181 files changed, 15167 insertions, 0 deletions
diff --git a/final/runtime/test/CMakeLists.txt b/final/runtime/test/CMakeLists.txt
new file mode 100644
index 0000000..8f367c5
--- /dev/null
+++ b/final/runtime/test/CMakeLists.txt
@@ -0,0 +1,37 @@
+# CMakeLists.txt file for unit testing OpenMP host runtime library.
+include(CheckFunctionExists)
+include(CheckLibraryExists)
+
+# Some tests use math functions
+check_library_exists(m sqrt "" LIBOMP_HAVE_LIBM)
+# When using libgcc, -latomic may be needed for atomics
+# (but when using compiler-rt, the atomics will be built-in)
+# Note: we can not check for __atomic_load because clang treats it
+# as special built-in and that breaks CMake checks
+check_function_exists(__atomic_load_1 LIBOMP_HAVE_BUILTIN_ATOMIC)
+if(NOT LIBOMP_HAVE_BUILTIN_ATOMIC)
+ check_library_exists(atomic __atomic_load_1 "" LIBOMP_HAVE_LIBATOMIC)
+else()
+ # not needed
+ set(LIBOMP_HAVE_LIBATOMIC 0)
+endif()
+
+macro(pythonize_bool var)
+ if (${var})
+ set(${var} True)
+ else()
+ set(${var} False)
+ endif()
+endmacro()
+
+pythonize_bool(LIBOMP_USE_HWLOC)
+pythonize_bool(LIBOMP_OMPT_SUPPORT)
+pythonize_bool(LIBOMP_OMPT_OPTIONAL)
+pythonize_bool(LIBOMP_HAVE_LIBM)
+pythonize_bool(LIBOMP_HAVE_LIBATOMIC)
+
+add_openmp_testsuite(check-libomp "Running libomp tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omp)
+
+# Configure the lit.site.cfg.in file
+set(AUTO_GEN_COMMENT "## Autogenerated by libomp configuration.\n# Do not edit!")
+configure_file(lit.site.cfg.in lit.site.cfg @ONLY)
diff --git a/final/runtime/test/api/has_openmp.c b/final/runtime/test/api/has_openmp.c
new file mode 100644
index 0000000..da95f59
--- /dev/null
+++ b/final/runtime/test/api/has_openmp.c
@@ -0,0 +1,23 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+int test_has_openmp()
+{
+ int rvalue = 0;
+#ifdef _OPENMP
+ rvalue = 1;
+#endif
+ return (rvalue);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+ if(!test_has_openmp()) {
+ num_failed++;
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/api/kmp_aligned_malloc.c b/final/runtime/test/api/kmp_aligned_malloc.c
new file mode 100644
index 0000000..5302fec
--- /dev/null
+++ b/final/runtime/test/api/kmp_aligned_malloc.c
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdint.h>
+#include <omp.h>
+#include "omp_testsuite.h"
+
+int alignments[] = {64, 128, 256, 512, 1024, 2048, 4096};
+
+unsigned aligned_by(uint64_t addr) {
+ uint64_t alignment = 1;
+ while((addr & (alignment-1)) == 0) {
+ alignment <<= 1;
+ }
+ return (alignment >> 1);
+}
+
+int test_kmp_aligned_malloc()
+{
+ int err = 0;
+ #pragma omp parallel shared(err)
+ {
+ int i;
+ int* ptr;
+ uint64_t addr;
+ int tid = omp_get_thread_num();
+
+ for(i = 0; i < sizeof(alignments)/sizeof(int); i++) {
+ int alignment = alignments[i];
+ // allocate 64 bytes with 64-byte alignment
+ // allocate 128 bytes with 128-byte alignment, etc.
+ ptr = (int*)kmp_aligned_malloc(alignment, alignment);
+ addr = (uint64_t)ptr;
+ if(addr & (alignment-1)) {
+ printf("thread %d: addr = %p (aligned to %u bytes) but expected "
+ " alignment = %d\n", tid, ptr, aligned_by(addr), alignment);
+ err = 1;
+ }
+ kmp_free(ptr);
+ }
+
+ ptr = kmp_aligned_malloc(128, 127);
+ if (ptr != NULL) {
+ printf("thread %d: kmp_aligned_malloc() didn't return NULL when "
+ "alignment was not power of 2\n", tid);
+ err = 1;
+ }
+ } /* end of parallel */
+ return !err;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_kmp_aligned_malloc()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/api/kmp_set_defaults_lock_bug.c b/final/runtime/test/api/kmp_set_defaults_lock_bug.c
new file mode 100644
index 0000000..73a7afb
--- /dev/null
+++ b/final/runtime/test/api/kmp_set_defaults_lock_bug.c
@@ -0,0 +1,53 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+/* The bug occurs if the lock table is reallocated after
+ kmp_set_defaults() is called. If the table is reallocated,
+ then the lock will not point to a valid lock object after the
+ kmp_set_defaults() call.*/
+omp_lock_t lock;
+
+int test_kmp_set_defaults_lock_bug()
+{
+ /* checks that omp_get_num_threads is equal to the number of
+ threads */
+ int nthreads_lib;
+ int nthreads = 0;
+
+ nthreads_lib = -1;
+
+ #pragma omp parallel
+ {
+ omp_set_lock(&lock);
+ nthreads++;
+ omp_unset_lock(&lock);
+ #pragma omp single
+ {
+ nthreads_lib = omp_get_num_threads ();
+ } /* end of single */
+ } /* end of parallel */
+ kmp_set_defaults("OMP_NUM_THREADS");
+ #pragma omp parallel
+ {
+ omp_set_lock(&lock);
+ nthreads++;
+ omp_unset_lock(&lock);
+ } /* end of parallel */
+
+ return (nthreads == 2*nthreads_lib);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+ omp_init_lock(&lock);
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_kmp_set_defaults_lock_bug()) {
+ num_failed++;
+ }
+ }
+ omp_destroy_lock(&lock);
+ return num_failed;
+}
diff --git a/final/runtime/test/api/omp_get_num_threads.c b/final/runtime/test/api/omp_get_num_threads.c
new file mode 100644
index 0000000..daf286d
--- /dev/null
+++ b/final/runtime/test/api/omp_get_num_threads.c
@@ -0,0 +1,39 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_get_num_threads()
+{
+ /* checks that omp_get_num_threads is equal to the number of
+ threads */
+ int nthreads_lib;
+ int nthreads = 0;
+
+ nthreads_lib = -1;
+
+ #pragma omp parallel
+ {
+ #pragma omp critical
+ {
+ nthreads++;
+ } /* end of critical */
+ #pragma omp single
+ {
+ nthreads_lib = omp_get_num_threads ();
+ } /* end of single */
+ } /* end of parallel */
+ return (nthreads == nthreads_lib);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_get_num_threads()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/api/omp_get_wtick.c b/final/runtime/test/api/omp_get_wtick.c
new file mode 100644
index 0000000..8b35226
--- /dev/null
+++ b/final/runtime/test/api/omp_get_wtick.c
@@ -0,0 +1,24 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_get_wtick()
+{
+ double tick;
+ tick = -1.;
+ tick = omp_get_wtick ();
+ return ((tick > 0.0) && (tick < 0.01));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_get_wtick()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/api/omp_get_wtime.c b/final/runtime/test/api/omp_get_wtime.c
new file mode 100644
index 0000000..b309440
--- /dev/null
+++ b/final/runtime/test/api/omp_get_wtime.c
@@ -0,0 +1,33 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_get_wtime()
+{
+ double start;
+ double end;
+ double measured_time;
+ double wait_time = 5.0;
+ start = 0;
+ end = 0;
+ start = omp_get_wtime();
+ my_sleep (wait_time);
+ end = omp_get_wtime();
+ measured_time = end-start;
+ return ((measured_time > 0.97 * wait_time) && (measured_time < 1.03 * wait_time)) ;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_get_wtime()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/api/omp_in_parallel.c b/final/runtime/test/api/omp_in_parallel.c
new file mode 100644
index 0000000..d09313e
--- /dev/null
+++ b/final/runtime/test/api/omp_in_parallel.c
@@ -0,0 +1,39 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+/*
+ * Checks that false is returned when called from serial region
+ * and true is returned when called within parallel region.
+ */
+int test_omp_in_parallel()
+{
+ int serial;
+ int isparallel;
+
+ serial = 1;
+ isparallel = 0;
+ serial = omp_in_parallel();
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ isparallel = omp_in_parallel();
+ }
+ }
+ return (!(serial) && isparallel);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_in_parallel()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/atomic/omp_atomic.c b/final/runtime/test/atomic/omp_atomic.c
new file mode 100644
index 0000000..7cdd30d
--- /dev/null
+++ b/final/runtime/test/atomic/omp_atomic.c
@@ -0,0 +1,366 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */
+#define MAX_FACTOR 10
+#define KNOWN_PRODUCT 3628800 /* 10! */
+
+int test_omp_atomic()
+{
+ int sum;
+ int diff;
+ double dsum = 0;
+ double dt = 0.5; /* base of geometric row for + and - test*/
+ double ddiff;
+ int product;
+ int x;
+ int *logics;
+ int bit_and = 1;
+ int bit_or = 0;
+ int exclusiv_bit_or = 0;
+ int j;
+ int known_sum;
+ int known_diff;
+ int known_product;
+ int result = 0;
+ int logic_and = 1;
+ int logic_or = 0;
+ double dknown_sum;
+ double rounding_error = 1.E-9;
+ double dpt, div;
+ int logicsArray[LOOPCOUNT];
+ logics = logicsArray;
+
+ sum = 0;
+ diff = 0;
+ product = 1;
+
+ // sum of integers test
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ #pragma omp atomic
+ sum += i;
+ }
+
+ }
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ if (known_sum != sum)
+ {
+ fprintf(stderr,
+ "Error in sum with integers: Result was %d instead of %d.\n",
+ sum, known_sum);
+ result++;
+ }
+
+ // difference of integers test
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; i++) {
+ #pragma omp atomic
+ diff -= i;
+ }
+ }
+ known_diff = ((LOOPCOUNT - 1) * LOOPCOUNT) / 2 * -1;
+ if (diff != known_diff)
+ {
+ fprintf (stderr,
+ "Error in difference with integers: Result was %d instead of 0.\n",
+ diff);
+ result++;
+ }
+
+ // sum of doubles test
+ dsum = 0;
+ dpt = 1;
+ for (j = 0; j < DOUBLE_DIGITS; ++j) {
+ dpt *= dt;
+ }
+ dknown_sum = (1 - dpt) / (1 -dt);
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < DOUBLE_DIGITS; ++i) {
+ #pragma omp atomic
+ dsum += pow (dt, i);
+ }
+ }
+ if (dsum != dknown_sum && (fabs (dsum - dknown_sum) > rounding_error)) {
+ fprintf (stderr, "Error in sum with doubles: Result was %f"
+ " instead of: %f (Difference: %E)\n",
+ dsum, dknown_sum, dsum - dknown_sum);
+ result++;
+ }
+
+ // difference of doubles test
+ dpt = 1;
+ for (j = 0; j < DOUBLE_DIGITS; ++j) {
+ dpt *= dt;
+ }
+ ddiff = (1 - dpt) / (1 - dt);
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < DOUBLE_DIGITS; ++i) {
+ #pragma omp atomic
+ ddiff -= pow (dt, i);
+ }
+ }
+ if (fabs (ddiff) > rounding_error) {
+ fprintf (stderr,
+ "Error in difference with doubles: Result was %E instead of 0.0\n",
+ ddiff);
+ result++;
+ }
+
+ // product of integers test
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 1; i <= MAX_FACTOR; i++) {
+ #pragma omp atomic
+ product *= i;
+ }
+ }
+ known_product = KNOWN_PRODUCT;
+ if (known_product != product) {
+ fprintf (stderr,
+ "Error in product with integers: Result was %d instead of %d\n",
+ product, known_product);
+ result++;
+ }
+
+ // division of integers test
+ product = KNOWN_PRODUCT;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 1; i <= MAX_FACTOR; ++i) {
+ #pragma omp atomic
+ product /= i;
+ }
+ }
+ if (product != 1) {
+ fprintf (stderr,
+ "Error in product division with integers: Result was %d"
+ " instead of 1\n",
+ product);
+ result++;
+ }
+
+ // division of doubles test
+ div = 5.0E+5;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 1; i <= MAX_FACTOR; i++) {
+ #pragma omp atomic
+ div /= i;
+ }
+ }
+ if (fabs(div-0.137787) >= 1.0E-4 ) {
+ result++;
+ fprintf (stderr, "Error in division with double: Result was %f"
+ " instead of 0.137787\n", div);
+ }
+
+ // ++ test
+ x = 0;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ #pragma omp atomic
+ x++;
+ }
+ }
+ if (x != LOOPCOUNT) {
+ result++;
+ fprintf (stderr, "Error in ++\n");
+ }
+
+ // -- test
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ #pragma omp atomic
+ x--;
+ }
+ }
+ if (x != 0) {
+ result++;
+ fprintf (stderr, "Error in --\n");
+ }
+
+ // bit-and test part 1
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logics[j] = 1;
+ }
+ bit_and = 1;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ #pragma omp atomic
+ bit_and &= logics[i];
+ }
+ }
+ if (!bit_and) {
+ result++;
+ fprintf (stderr, "Error in BIT AND part 1\n");
+ }
+
+ // bit-and test part 2
+ bit_and = 1;
+ logics[LOOPCOUNT / 2] = 0;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ #pragma omp atomic
+ bit_and &= logics[i];
+ }
+ }
+ if (bit_and) {
+ result++;
+ fprintf (stderr, "Error in BIT AND part 2\n");
+ }
+
+ // bit-or test part 1
+ for (j = 0; j < LOOPCOUNT; j++) {
+ logics[j] = 0;
+ }
+ bit_or = 0;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ #pragma omp atomic
+ bit_or |= logics[i];
+ }
+ }
+ if (bit_or) {
+ result++;
+ fprintf (stderr, "Error in BIT OR part 1\n");
+ }
+
+ // bit-or test part 2
+ bit_or = 0;
+ logics[LOOPCOUNT / 2] = 1;
+ #pragma omp parallel
+ {
+
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ #pragma omp atomic
+ bit_or |= logics[i];
+ }
+ }
+ if (!bit_or) {
+ result++;
+ fprintf (stderr, "Error in BIT OR part 2\n");
+ }
+
+ // bit-xor test part 1
+ for (j = 0; j < LOOPCOUNT; j++) {
+ logics[j] = 0;
+ }
+ exclusiv_bit_or = 0;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ #pragma omp atomic
+ exclusiv_bit_or ^= logics[i];
+ }
+ }
+ if (exclusiv_bit_or) {
+ result++;
+ fprintf (stderr, "Error in EXCLUSIV BIT OR part 1\n");
+ }
+
+ // bit-xor test part 2
+ exclusiv_bit_or = 0;
+ logics[LOOPCOUNT / 2] = 1;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ #pragma omp atomic
+ exclusiv_bit_or ^= logics[i];
+ }
+
+ }
+ if (!exclusiv_bit_or) {
+ result++;
+ fprintf (stderr, "Error in EXCLUSIV BIT OR part 2\n");
+ }
+
+ // left shift test
+ x = 1;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < 10; ++i) {
+ #pragma omp atomic
+ x <<= 1;
+ }
+
+ }
+ if ( x != 1024) {
+ result++;
+ fprintf (stderr, "Error in <<\n");
+ x = 1024;
+ }
+
+ // right shift test
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for
+ for (i = 0; i < 10; ++i) {
+ #pragma omp atomic
+ x >>= 1;
+ }
+ }
+ if (x != 1) {
+ result++;
+ fprintf (stderr, "Error in >>\n");
+ }
+
+ return (result == 0);
+} // test_omp_atomic()
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_atomic()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/barrier/omp_barrier.c b/final/runtime/test/barrier/omp_barrier.c
new file mode 100644
index 0000000..a3fb060
--- /dev/null
+++ b/final/runtime/test/barrier/omp_barrier.c
@@ -0,0 +1,44 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_barrier()
+{
+ int result1;
+ int result2;
+ result1 = 0;
+ result2 = 0;
+
+ #pragma omp parallel
+ {
+ int rank;
+ rank = omp_get_thread_num ();
+ if (rank ==1) {
+ my_sleep(((double)SLEEPTIME)/REPETITIONS); // give 1 sec to whole test
+ result2 = 3;
+ }
+ #pragma omp barrier
+ if (rank == 2) {
+ result1 = result2;
+ }
+ }
+ return (result1 == 3);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+#ifdef _OPENMP
+ omp_set_dynamic(0); // prevent runtime to change number of threads
+ omp_set_num_threads(4); // the test expects at least 3 threads
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_barrier()) {
+ num_failed++;
+ }
+ }
+#endif
+ return num_failed;
+}
diff --git a/final/runtime/test/critical/omp_critical.c b/final/runtime/test/critical/omp_critical.c
new file mode 100644
index 0000000..e07dbcb
--- /dev/null
+++ b/final/runtime/test/critical/omp_critical.c
@@ -0,0 +1,37 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_critical()
+{
+ int sum;
+ int known_sum;
+
+ sum=0;
+ #pragma omp parallel
+ {
+ int mysum=0;
+ int i;
+ #pragma omp for
+ for (i = 0; i < 1000; i++)
+ mysum = mysum + i;
+
+ #pragma omp critical
+ sum = mysum +sum;
+ }
+ known_sum = 999 * 1000 / 2;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_critical()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/env/kmp_aff_disable_hwloc.c b/final/runtime/test/env/kmp_aff_disable_hwloc.c
new file mode 100644
index 0000000..5f848ac
--- /dev/null
+++ b/final/runtime/test/env/kmp_aff_disable_hwloc.c
@@ -0,0 +1,21 @@
+// RUN: %libomp-compile && env KMP_AFFINITY=disabled KMP_TOPOLOGY_METHOD=hwloc %libomp-run
+// REQUIRES: hwloc
+#include <stdio.h>
+#include <stdlib.h>
+
+// Test will assert() without fix
+int test_affinity_disabled_plus_hwloc() {
+ #pragma omp parallel
+ {}
+ return 1;
+}
+
+int main(int argc, char **argv) {
+ int i, j;
+ int failed = 0;
+
+ if (!test_affinity_disabled_plus_hwloc()) {
+ failed = 1;
+ }
+ return failed;
+}
diff --git a/final/runtime/test/env/kmp_set_dispatch_buf.c b/final/runtime/test/env/kmp_set_dispatch_buf.c
new file mode 100644
index 0000000..49eb7b5
--- /dev/null
+++ b/final/runtime/test/env/kmp_set_dispatch_buf.c
@@ -0,0 +1,76 @@
+// RUN: %libomp-compile && env KMP_DISP_NUM_BUFFERS=0 %libomp-run
+// RUN: env KMP_DISP_NUM_BUFFERS=1 %libomp-run && env KMP_DISP_NUM_BUFFERS=3 %libomp-run
+// RUN: env KMP_DISP_NUM_BUFFERS=4 %libomp-run && env KMP_DISP_NUM_BUFFERS=7 %libomp-run
+// RUN: %libomp-compile -DMY_SCHEDULE=guided && env KMP_DISP_NUM_BUFFERS=1 %libomp-run
+// RUN: env KMP_DISP_NUM_BUFFERS=3 %libomp-run && env KMP_DISP_NUM_BUFFERS=4 %libomp-run
+// RUN: env KMP_DISP_NUM_BUFFERS=7 %libomp-run
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+#define INCR 7
+#define MY_MAX 200
+#define MY_MIN -200
+#define NUM_LOOPS 100
+#ifndef MY_SCHEDULE
+# define MY_SCHEDULE dynamic
+#endif
+
+int a, b, a_known_value, b_known_value;
+
+int test_kmp_set_disp_num_buffers()
+{
+ int success = 1;
+ a = 0;
+ b = 0;
+ // run many small dynamic loops to stress the dispatch buffer system
+ #pragma omp parallel
+ {
+ int i,j;
+ for (j = 0; j < NUM_LOOPS; j++) {
+ #pragma omp for schedule(MY_SCHEDULE) nowait
+ for (i = MY_MIN; i < MY_MAX; i+=INCR) {
+ #pragma omp atomic
+ a++;
+ }
+ #pragma omp for schedule(MY_SCHEDULE) nowait
+ for (i = MY_MAX; i >= MY_MIN; i-=INCR) {
+ #pragma omp atomic
+ b++;
+ }
+ }
+ }
+ // detect failure
+ if (a != a_known_value || b != b_known_value) {
+ success = 0;
+ printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value,
+ b, b_known_value);
+ }
+ return success;
+}
+
+int main(int argc, char** argv)
+{
+ int i,j;
+ int num_failed=0;
+
+ // figure out the known values to compare with calculated result
+ a_known_value = 0;
+ b_known_value = 0;
+
+ for (j = 0; j < NUM_LOOPS; j++) {
+ for (i = MY_MIN; i < MY_MAX; i+=INCR)
+ a_known_value++;
+ for (i = MY_MAX; i >= MY_MIN; i-=INCR)
+ b_known_value++;
+ }
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_kmp_set_disp_num_buffers()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/env/omp_thread_limit.c b/final/runtime/test/env/omp_thread_limit.c
new file mode 100644
index 0000000..800edc4
--- /dev/null
+++ b/final/runtime/test/env/omp_thread_limit.c
@@ -0,0 +1,82 @@
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=4 %libomp-run 4
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=7 %libomp-run 7
+//
+// OMP_THREAD_LIMIT=N should imply that no more than N threads are active in
+// a contention group
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+int failed = 0;
+
+void usage() {
+ fprintf(stderr, "usage: omp_thread_limit <n>\n");
+}
+
+void verify(const char* file_name, int line_number, int team_size) {
+ int num_threads = omp_get_num_threads();
+ if (team_size != num_threads) {
+#pragma omp critical(A)
+ {
+ char label[256];
+ snprintf(label, sizeof(label), "%s:%d", file_name, line_number);
+ failed = 1;
+ printf("failed: %s: team_size(%d) != omp_get_num_threads(%d)\n",
+ label, team_size, num_threads);
+ }
+ }
+}
+
+int main(int argc, char** argv)
+{
+ int cl_thread_limit;
+
+ if (argc != 2) {
+ usage();
+ return 1;
+ }
+ cl_thread_limit = atoi(argv[1]);
+
+ omp_set_dynamic(0);
+ if (omp_get_thread_limit() != cl_thread_limit) {
+ fprintf(stderr, "omp_get_thread_limit failed with %d, should be%d\n",
+ omp_get_thread_limit(), cl_thread_limit);
+ return 1;
+ }
+ else if (omp_get_max_threads() > cl_thread_limit) {
+#if _OPENMP
+ int team_size = cl_thread_limit;
+#else
+ int team_size = 1;
+#endif
+ omp_set_num_threads(19);
+ verify(__FILE__, __LINE__, 1);
+#pragma omp parallel
+ {
+ verify(__FILE__, __LINE__, team_size);
+ verify(__FILE__, __LINE__, team_size);
+ }
+ verify(__FILE__, __LINE__, 1);
+
+ omp_set_nested(1);
+#pragma omp parallel num_threads(3)
+ {
+ verify(__FILE__, __LINE__, 3);
+#pragma omp master
+#pragma omp parallel num_threads(21)
+ {
+ verify(__FILE__, __LINE__, team_size-2);
+ verify(__FILE__, __LINE__, team_size-2);
+ }
+ }
+ verify(__FILE__, __LINE__, 1);
+
+ return failed;
+ } else {
+ fprintf(stderr, "This test is not applicable for max num_threads='%d'\n",
+ omp_get_max_threads());
+ return 0;
+ }
+
+}
diff --git a/final/runtime/test/env/omp_wait_policy.c b/final/runtime/test/env/omp_wait_policy.c
new file mode 100644
index 0000000..b260ce4
--- /dev/null
+++ b/final/runtime/test/env/omp_wait_policy.c
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile && env OMP_WAIT_POLICY=active %libomp-run active
+// RUN: %libomp-compile && env OMP_WAIT_POLICY=passive %libomp-run passive
+//
+// OMP_WAIT_POLICY=active should imply blocktime == INT_MAX
+// i.e., threads spin-wait forever
+// OMP_WAIT_POLICY=passive should imply blocktime == 0
+// i.e., threads immediately sleep
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+void usage() {
+ fprintf(stderr, "usage: omp_wait_policy active|passive\n");
+}
+
+int main(int argc, char** argv)
+{
+ int blocktime, retval=1;
+ const char* env_var_value;
+
+ if (argc != 2) {
+ usage();
+ return 1;
+ }
+
+ blocktime = kmp_get_blocktime();
+
+ env_var_value = argv[1];
+ if (!strcmp(env_var_value, "active")) {
+ retval = (blocktime != INT_MAX);
+ } else if (!strcmp(env_var_value, "passive")) {
+ retval = (blocktime != 0);
+ } else {
+ usage();
+ retval = 1;
+ }
+
+ return retval;
+}
diff --git a/final/runtime/test/flush/omp_flush.c b/final/runtime/test/flush/omp_flush.c
new file mode 100644
index 0000000..3fd3cdf
--- /dev/null
+++ b/final/runtime/test/flush/omp_flush.c
@@ -0,0 +1,45 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_flush()
+{
+ int result1;
+ int result2;
+ int dummy;
+
+ result1 = 0;
+ result2 = 0;
+
+ #pragma omp parallel
+ {
+ int rank;
+ rank = omp_get_thread_num ();
+ #pragma omp barrier
+ if (rank == 1) {
+ result2 = 3;
+ #pragma omp flush (result2)
+ dummy = result2;
+ }
+ if (rank == 0) {
+ my_sleep(SLEEPTIME);
+ #pragma omp flush (result2)
+ result1 = result2;
+ }
+ } /* end of parallel */
+ return ((result1 == result2) && (result2 == dummy) && (result2 == 3));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_flush()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/lit.cfg b/final/runtime/test/lit.cfg
new file mode 100644
index 0000000..e4561eb
--- /dev/null
+++ b/final/runtime/test/lit.cfg
@@ -0,0 +1,130 @@
+# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79:
+# Configuration file for the 'lit' test runner.
+
+import os
+import re
+import subprocess
+import lit.formats
+
+# Tell pylint that we know config and lit_config exist somewhere.
+if 'PYLINT_IMPORT' in os.environ:
+ config = object()
+ lit_config = object()
+
+def append_dynamic_library_path(path):
+ if config.operating_system == 'Windows':
+ name = 'PATH'
+ sep = ';'
+ elif config.operating_system == 'Darwin':
+ name = 'DYLD_LIBRARY_PATH'
+ sep = ':'
+ else:
+ name = 'LD_LIBRARY_PATH'
+ sep = ':'
+ if name in config.environment:
+ config.environment[name] = path + sep + config.environment[name]
+ else:
+ config.environment[name] = path
+
+# name: The name of this test suite.
+config.name = 'libomp'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c', '.cpp']
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root object directory where output is placed
+config.test_exec_root = config.libomp_obj_root
+
+# test format
+config.test_format = lit.formats.ShTest()
+
+# compiler flags
+config.test_flags = " -I " + config.test_source_root + \
+ " -I " + config.omp_header_directory + \
+ " -L " + config.library_dir + \
+ " " + config.test_extra_flags
+
+# extra libraries
+libs = ""
+if config.has_libm:
+ libs += " -lm"
+if config.has_libatomic:
+ libs += " -latomic"
+
+# Allow XFAIL to work
+config.target_triple = [ ]
+for feature in config.test_compiler_features:
+ config.available_features.add(feature)
+
+# Setup environment to find dynamic library at runtime
+append_dynamic_library_path(config.library_dir)
+if config.using_hwloc:
+ append_dynamic_library_path(config.hwloc_library_dir)
+ config.available_features.add('hwloc')
+
+# Rpath modifications for Darwin
+if config.operating_system == 'Darwin':
+ config.test_flags += " -Wl,-rpath," + config.library_dir
+ if config.using_hwloc:
+ config.test_flags += " -Wl,-rpath," + config.hwloc_library_dir
+
+# Find the SDK on Darwin
+if config.operating_system == 'Darwin':
+ cmd = subprocess.Popen(['xcrun', '--show-sdk-path'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = cmd.communicate()
+ out = out.strip()
+ res = cmd.wait()
+ if res == 0 and out:
+ config.test_flags += " -isysroot " + out
+
+# Disable OMPT tests if FileCheck was not found
+if config.has_ompt and config.test_filecheck == "":
+ lit_config.note("Not testing OMPT because FileCheck was not found")
+ config.has_ompt = False
+
+if config.has_ompt:
+ config.available_features.add("ompt")
+ # for callback.h
+ config.test_flags += " -I " + config.test_source_root + "/ompt"
+
+if 'Linux' in config.operating_system:
+ config.available_features.add("linux")
+
+# to run with icc INTEL_LICENSE_FILE must be set
+if 'INTEL_LICENSE_FILE' in os.environ:
+ config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE']
+
+
+# substitutions
+config.substitutions.append(("%libomp-compile-and-run", \
+ "%libomp-compile && %libomp-run"))
+config.substitutions.append(("%libomp-cxx-compile-and-run", \
+ "%libomp-cxx-compile && %libomp-run"))
+config.substitutions.append(("%libomp-cxx-compile", \
+ "%clangXX %openmp_flags %flags -std=c++11 %s -o %t" + libs))
+config.substitutions.append(("%libomp-compile", \
+ "%clang %openmp_flags %flags %s -o %t" + libs))
+config.substitutions.append(("%libomp-run", "%t"))
+config.substitutions.append(("%clangXX", config.test_cxx_compiler))
+config.substitutions.append(("%clang", config.test_c_compiler))
+config.substitutions.append(("%openmp_flags", config.test_openmp_flags))
+config.substitutions.append(("%flags", config.test_flags))
+
+if config.has_ompt:
+ config.substitutions.append(("FileCheck", config.test_filecheck))
+ config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable"))
+ if config.operating_system == 'Windows':
+ # No such environment variable on Windows.
+ config.substitutions.append(("%preload-tool", "true ||"))
+ config.substitutions.append(("%no-as-needed-flag", "-Wl,--no-as-needed"))
+ elif config.operating_system == 'Darwin':
+ config.substitutions.append(("%preload-tool", "env DYLD_INSERT_LIBRARIES=%T/tool.so"))
+ # No such linker flag on Darwin.
+ config.substitutions.append(("%no-as-needed-flag", ""))
+ else:
+ config.substitutions.append(("%preload-tool", "env LD_PRELOAD=%T/tool.so"))
+ config.substitutions.append(("%no-as-needed-flag", "-Wl,--no-as-needed"))
diff --git a/final/runtime/test/lit.site.cfg.in b/final/runtime/test/lit.site.cfg.in
new file mode 100644
index 0000000..c2825ee
--- /dev/null
+++ b/final/runtime/test/lit.site.cfg.in
@@ -0,0 +1,20 @@
+@AUTO_GEN_COMMENT@
+
+config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@"
+config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@"
+config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@
+config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@"
+config.test_openmp_flags = "@OPENMP_TEST_OPENMP_FLAGS@"
+config.test_extra_flags = "@OPENMP_TEST_FLAGS@"
+config.libomp_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.library_dir = "@LIBOMP_LIBRARY_DIR@"
+config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src"
+config.operating_system = "@CMAKE_SYSTEM_NAME@"
+config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@"
+config.using_hwloc = @LIBOMP_USE_HWLOC@
+config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@
+config.has_libm = @LIBOMP_HAVE_LIBM@
+config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@LIBOMP_BASE_DIR@/test/lit.cfg")
diff --git a/final/runtime/test/lock/omp_init_lock.c b/final/runtime/test/lock/omp_init_lock.c
new file mode 100644
index 0000000..24b60d1
--- /dev/null
+++ b/final/runtime/test/lock/omp_init_lock.c
@@ -0,0 +1,42 @@
+// RUN: %libomp-compile-and-run
+#include "omp_testsuite.h"
+#include <stdio.h>
+
+// This should be slightly less than KMP_I_LOCK_CHUNK, which is 1024
+#define LOCKS_PER_ITER 1000
+#define ITERATIONS (REPETITIONS + 1)
+
+// This tests concurrently using locks on one thread while initializing new
+// ones on another thread. This exercises the global lock pool.
+int test_omp_init_lock() {
+ int i;
+ omp_lock_t lcks[ITERATIONS * LOCKS_PER_ITER];
+#pragma omp parallel for schedule(static) num_threads(NUM_TASKS)
+ for (i = 0; i < ITERATIONS; i++) {
+ int j;
+ omp_lock_t *my_lcks = &lcks[i * LOCKS_PER_ITER];
+ for (j = 0; j < LOCKS_PER_ITER; j++) {
+ omp_init_lock(&my_lcks[j]);
+ }
+ for (j = 0; j < LOCKS_PER_ITER * 100; j++) {
+ omp_set_lock(&my_lcks[j % LOCKS_PER_ITER]);
+ omp_unset_lock(&my_lcks[j % LOCKS_PER_ITER]);
+ }
+ }
+ // Wait until all repititions are done. The test is exercising growth of
+ // the global lock pool, which does not shrink when no locks are allocated.
+ {
+ int j;
+ for (j = 0; j < ITERATIONS * LOCKS_PER_ITER; j++) {
+ omp_destroy_lock(&lcks[j]);
+ }
+ }
+
+ return 0;
+}
+
+int main() {
+ // No use repeating this test, since it's exercising a private global pool
+ // which is not reset between test iterations.
+ return test_omp_init_lock();
+}
diff --git a/final/runtime/test/lock/omp_lock.c b/final/runtime/test/lock/omp_lock.c
new file mode 100644
index 0000000..1301f27
--- /dev/null
+++ b/final/runtime/test/lock/omp_lock.c
@@ -0,0 +1,47 @@
+// RUN: %libomp-compile-and-run
+// RUN: env KMP_LOCK_KIND=tas KMP_SPIN_BACKOFF_PARAMS=2048,200 %libomp-run
+// RUN: env KMP_LOCK_KIND=futex %libomp-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+omp_lock_t lck;
+
+int test_omp_lock()
+{
+ int nr_threads_in_single = 0;
+ int result = 0;
+ int nr_iterations = 0;
+ int i;
+
+ omp_init_lock(&lck);
+ #pragma omp parallel shared(lck)
+ {
+ #pragma omp for
+ for(i = 0; i < LOOPCOUNT; i++) {
+ omp_set_lock(&lck);
+ #pragma omp flush
+ nr_threads_in_single++;
+ #pragma omp flush
+ nr_iterations++;
+ nr_threads_in_single--;
+ result = result + nr_threads_in_single;
+ omp_unset_lock(&lck);
+ }
+ }
+ omp_destroy_lock(&lck);
+
+ return ((result == 0) && (nr_iterations == LOOPCOUNT));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_lock()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/lock/omp_nest_lock.c b/final/runtime/test/lock/omp_nest_lock.c
new file mode 100644
index 0000000..33d7c6a
--- /dev/null
+++ b/final/runtime/test/lock/omp_nest_lock.c
@@ -0,0 +1,45 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+omp_nest_lock_t lck;
+
+int test_omp_nest_lock()
+{
+ int nr_threads_in_single = 0;
+ int result = 0;
+ int nr_iterations = 0;
+ int i;
+
+ omp_init_nest_lock(&lck);
+ #pragma omp parallel shared(lck)
+ {
+ #pragma omp for
+ for(i = 0; i < LOOPCOUNT; i++) {
+ omp_set_nest_lock(&lck);
+ #pragma omp flush
+ nr_threads_in_single++;
+ #pragma omp flush
+ nr_iterations++;
+ nr_threads_in_single--;
+ result = result + nr_threads_in_single;
+ omp_unset_nest_lock(&lck);
+ }
+ }
+ omp_destroy_nest_lock(&lck);
+
+ return ((result == 0) && (nr_iterations == LOOPCOUNT));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_nest_lock()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/lock/omp_test_lock.c b/final/runtime/test/lock/omp_test_lock.c
new file mode 100644
index 0000000..c512055
--- /dev/null
+++ b/final/runtime/test/lock/omp_test_lock.c
@@ -0,0 +1,47 @@
+// RUN: %libomp-compile-and-run
+// RUN: env KMP_LOCK_KIND=tas %libomp-run
+// RUN: env KMP_LOCK_KIND=futex %libomp-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+omp_lock_t lck;
+
+int test_omp_test_lock()
+{
+ int nr_threads_in_single = 0;
+ int result = 0;
+ int nr_iterations = 0;
+ int i;
+
+ omp_init_lock (&lck);
+ #pragma omp parallel shared(lck)
+ {
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; i++) {
+ while (!omp_test_lock (&lck))
+ {};
+ #pragma omp flush
+ nr_threads_in_single++;
+ #pragma omp flush
+ nr_iterations++;
+ nr_threads_in_single--;
+ result = result + nr_threads_in_single;
+ omp_unset_lock (&lck);
+ }
+ }
+ omp_destroy_lock(&lck);
+ return ((result == 0) && (nr_iterations == LOOPCOUNT));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_test_lock()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/lock/omp_test_nest_lock.c b/final/runtime/test/lock/omp_test_nest_lock.c
new file mode 100644
index 0000000..2fa6fd2
--- /dev/null
+++ b/final/runtime/test/lock/omp_test_nest_lock.c
@@ -0,0 +1,47 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+static omp_nest_lock_t lck;
+
+int test_omp_test_nest_lock()
+{
+ int nr_threads_in_single = 0;
+ int result = 0;
+ int nr_iterations = 0;
+ int i;
+
+ omp_init_nest_lock (&lck);
+ #pragma omp parallel shared(lck)
+ {
+ #pragma omp for
+ for (i = 0; i < LOOPCOUNT; i++)
+ {
+ /*omp_set_lock(&lck);*/
+ while(!omp_test_nest_lock (&lck))
+ {};
+ #pragma omp flush
+ nr_threads_in_single++;
+ #pragma omp flush
+ nr_iterations++;
+ nr_threads_in_single--;
+ result = result + nr_threads_in_single;
+ omp_unset_nest_lock (&lck);
+ }
+ }
+ omp_destroy_nest_lock (&lck);
+ return ((result == 0) && (nr_iterations == LOOPCOUNT));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_test_nest_lock()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/master/omp_master.c b/final/runtime/test/master/omp_master.c
new file mode 100644
index 0000000..1cc7f9e
--- /dev/null
+++ b/final/runtime/test/master/omp_master.c
@@ -0,0 +1,38 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_master()
+{
+ int nthreads;
+ int executing_thread;
+
+ nthreads = 0;
+ executing_thread = -1;
+
+ #pragma omp parallel
+ {
+ #pragma omp master
+ {
+ #pragma omp critical
+ {
+ nthreads++;
+ }
+ executing_thread = omp_get_thread_num();
+ } /* end of master*/
+ } /* end of parallel*/
+ return ((nthreads == 1) && (executing_thread == 0));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_master()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/master/omp_master_3.c b/final/runtime/test/master/omp_master_3.c
new file mode 100644
index 0000000..2e9fdf8
--- /dev/null
+++ b/final/runtime/test/master/omp_master_3.c
@@ -0,0 +1,44 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_master_3()
+{
+ int nthreads;
+ int executing_thread;
+ int tid_result = 0; /* counts up the number of wrong thread no. for
+ the master thread. (Must be 0) */
+ nthreads = 0;
+ executing_thread = -1;
+
+ #pragma omp parallel
+ {
+ #pragma omp master
+ {
+ int tid = omp_get_thread_num();
+ if (tid != 0) {
+ #pragma omp critical
+ { tid_result++; }
+ }
+ #pragma omp critical
+ {
+ nthreads++;
+ }
+ executing_thread = omp_get_thread_num ();
+ } /* end of master*/
+ } /* end of parallel*/
+ return ((nthreads == 1) && (executing_thread == 0) && (tid_result == 0));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_master_3()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/misc_bugs/cancellation_for_sections.c b/final/runtime/test/misc_bugs/cancellation_for_sections.c
new file mode 100644
index 0000000..07a61cb
--- /dev/null
+++ b/final/runtime/test/misc_bugs/cancellation_for_sections.c
@@ -0,0 +1,64 @@
+// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run
+// XFAIL: gcc
+// Clang had a bug until version 4.0.1 which resulted in a hang.
+// UNSUPPORTED: clang-3, clang-4.0.0
+
+// Regression test for a bug in cancellation to cover effect of `#pragma omp cancel`
+// in a loop construct, on sections construct.
+// Pass condition: Cancellation status from `for` does not persist
+// to `sections`.
+
+#include <stdio.h>
+#include <omp.h>
+
+int result[2] = {0, 0};
+
+void cq416850_for_sections() {
+
+ unsigned i;
+ // 1) loop
+ #pragma omp for
+ for (i = 0; i < 1; i++) {
+ result[0] = 1;
+ #pragma omp cancel for
+ result[0] = 2;
+ }
+
+// printf("thread %d: result[0] = %d, result[1] = %d \n", omp_get_thread_num(), result[0], result[1]);
+
+
+ // 2) sections
+ #pragma omp sections
+ {
+ #pragma omp section
+ {
+ result[1] = 1;
+ #pragma omp cancellation point sections
+ result[1] = 2;
+ }
+ }
+}
+
+int main(void) {
+ if(!omp_get_cancellation()) {
+ printf("Cancellation not enabled!\n");
+ return 2;
+ }
+
+ #pragma omp parallel num_threads(4)
+ {
+ cq416850_for_sections();
+ }
+
+ if (result[0] != 1 || result[1] != 2) {
+ printf("Incorrect values. "
+ "result[0] = %d (expected 1), "
+ "result[1] = %d (expected 2).\n",
+ result[0], result[1]);
+ printf("FAILED\n");
+ return 1;
+ }
+
+ printf("PASSED\n");
+ return 0;
+}
diff --git a/final/runtime/test/misc_bugs/many-microtask-args.c b/final/runtime/test/misc_bugs/many-microtask-args.c
new file mode 100644
index 0000000..d644515
--- /dev/null
+++ b/final/runtime/test/misc_bugs/many-microtask-args.c
@@ -0,0 +1,39 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+
+int main()
+{
+
+ int i;
+ int i1 = 0;
+ int i2 = 1;
+ int i3 = 2;
+ int i4 = 3;
+ int i5 = 4;
+ int i6 = 6;
+ int i7 = 7;
+ int i8 = 8;
+ int i9 = 9;
+ int i10 = 10;
+ int i11 = 11;
+ int i12 = 12;
+ int i13 = 13;
+ int i14 = 14;
+ int i15 = 15;
+ int i16 = 16;
+
+ int r = 0;
+ #pragma omp parallel for firstprivate(i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16) reduction(+:r)
+ for (i = 0; i < i16; i++) {
+ r += i + i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10 + i11 + i12 + i13 + i14 + i15 + i16;
+ }
+
+ int rf = 2216;
+ if (r != rf) {
+ fprintf(stderr, "r should be %d but instead equals %d\n", rf, r);
+ return 1;
+ }
+
+ return 0;
+}
+
diff --git a/final/runtime/test/misc_bugs/omp_foreign_thread_team_reuse.c b/final/runtime/test/misc_bugs/omp_foreign_thread_team_reuse.c
new file mode 100644
index 0000000..4d70d47
--- /dev/null
+++ b/final/runtime/test/misc_bugs/omp_foreign_thread_team_reuse.c
@@ -0,0 +1,81 @@
+// RUN: %libomp-compile -lpthread && %libomp-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+#define NUM_THREADS 10
+
+/*
+ After hot teams were enabled by default, the library started using levels
+ kept in the team structure. The levels are broken in case foreign thread
+ exits and puts its team into the pool which is then re-used by another foreign
+ thread. The broken behavior observed is when printing the levels for each
+ new team, one gets 1, 2, 1, 2, 1, 2, etc. This makes the library believe that
+ every other team is nested which is incorrect. What is wanted is for the
+ levels to be 1, 1, 1, etc.
+*/
+
+int a = 0;
+int level;
+
+typedef struct thread_arg_t {
+ int iterations;
+} thread_arg_t;
+
+void* thread_function(void* arg) {
+ int i;
+ thread_arg_t* targ = (thread_arg_t*)arg;
+ int iterations = targ->iterations;
+ #pragma omp parallel private(i)
+ {
+ // level should always be 1
+ #pragma omp single
+ level = omp_get_level();
+
+ #pragma omp for
+ for(i = 0; i < iterations; i++) {
+ #pragma omp atomic
+ a++;
+ }
+ }
+}
+
+int test_omp_team_reuse()
+{
+ int i;
+ int success = 1;
+ pthread_t thread[NUM_THREADS];
+ thread_arg_t thread_arg[NUM_THREADS];
+ // launch NUM_THREADS threads, one at a time to perform thread_function()
+ for(i = 0; i < NUM_THREADS; i++) {
+ thread_arg[i].iterations = i + 1;
+ pthread_create(thread+i, NULL, thread_function, thread_arg+i);
+ pthread_join(*(thread+i), NULL);
+ // level read in thread_function()'s parallel region should be 1
+ if(level != 1) {
+ fprintf(stderr, "error: for pthread %d level should be 1 but "
+ "instead equals %d\n", i, level);
+ success = 0;
+ }
+ }
+ // make sure the for loop works
+ int known_sum = (NUM_THREADS * (NUM_THREADS+1)) / 2;
+ if(a != known_sum) {
+ fprintf(stderr, "a should be %d but instead equals %d\n", known_sum, a);
+ success = 0;
+ }
+ return success;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ a = 0;
+ if(!test_omp_team_reuse()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/misc_bugs/teams-no-par.c b/final/runtime/test/misc_bugs/teams-no-par.c
new file mode 100644
index 0000000..0ef8d9a
--- /dev/null
+++ b/final/runtime/test/misc_bugs/teams-no-par.c
@@ -0,0 +1,64 @@
+// RUN: %libomp-compile-and-run
+//
+// The test checks the teams construct pseudocode executed on host
+//
+
+#include <stdio.h>
+#include <omp.h>
+
+#ifndef N_TEAMS
+#define N_TEAMS 4
+#endif
+#ifndef N_THR
+#define N_THR 3
+#endif
+
+static int err = 0;
+
+// Internal library staff to emulate compiler's code generation:
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} ident_t;
+
+static ident_t dummy_loc = {0, 2, 0, 0, ";dummyFile;dummyFunc;0;0;;"};
+
+int __kmpc_global_thread_num(void*);
+void __kmpc_push_num_teams(ident_t const*, int, int, int);
+void __kmpc_fork_teams(ident_t const*, int argc, void *microtask, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+// Outlined entry point:
+void foo(int *gtid, int *tid, int *nt)
+{ // start "serial" execution by master threads of each team
+ if ( nt ) {
+ printf(" team %d, param %d\n", omp_get_team_num(), *nt);
+ } else {
+ printf("ERROR: teams before parallel: gtid, tid: %d %d, bad pointer: %p\n", *gtid, *tid, nt);
+ err++;
+ return;
+ }
+}
+
+int main()
+{
+ int nt = 4;
+ int th = __kmpc_global_thread_num(NULL); // registers initial thread
+ __kmpc_push_num_teams(&dummy_loc, th, N_TEAMS, N_THR);
+ __kmpc_fork_teams(&dummy_loc, 1, &foo, &nt); // pass 1 shared parameter "nt"
+ if (err)
+ printf("failed with %d errors\n",err);
+ else
+ printf("passed\n");
+ return err;
+}
diff --git a/final/runtime/test/misc_bugs/teams-reduction.c b/final/runtime/test/misc_bugs/teams-reduction.c
new file mode 100644
index 0000000..6d7cd11
--- /dev/null
+++ b/final/runtime/test/misc_bugs/teams-reduction.c
@@ -0,0 +1,68 @@
+// RUN: %libomp-compile-and-run
+//
+// The test checks the teams construct with reduction executed on the host.
+//
+
+#include <stdio.h>
+#include <omp.h>
+
+#include <stdint.h>
+
+#ifndef N_TEAMS
+#define N_TEAMS 4
+#endif
+#ifndef N_THR
+#define N_THR 3
+#endif
+
+// Internal library stuff to emulate compiler's code generation:
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ int32_t reserved_1;
+ int32_t flags;
+ int32_t reserved_2;
+ int32_t reserved_3;
+ char const *psource;
+} ident_t;
+
+static ident_t dummy_loc = {0, 2, 0, 0, ";dummyFile;dummyFunc;0;0;;"};
+
+typedef union {
+ // The global will be used as pointer, so we need to make sure that the
+ // compiler correctly aligns the global...
+ void *ptr;
+ int32_t data[8];
+} kmp_critical_name;
+kmp_critical_name crit;
+
+int32_t __kmpc_global_thread_num(ident_t *);
+void __kmpc_push_num_teams(ident_t *, int32_t global_tid, int32_t num_teams,
+ int32_t num_threads);
+void __kmpc_fork_teams(ident_t *, int32_t argc, void *microtask, ...);
+int32_t __kmpc_reduce(ident_t *, int32_t global_tid, int32_t num_vars,
+ size_t reduce_size, void *reduce_data, void *reduce_func,
+ kmp_critical_name *lck);
+void __kmpc_end_reduce(ident_t *, int32_t global_tid, kmp_critical_name *lck);
+
+#ifdef __cplusplus
+}
+#endif
+
+// Outlined entry point:
+void outlined(int32_t *gtid, int32_t *tid) {
+ int32_t ret = __kmpc_reduce(&dummy_loc, *gtid, 0, 0, NULL, NULL, &crit);
+ __kmpc_end_reduce(&dummy_loc, *gtid, &crit);
+}
+
+int main() {
+ int32_t th = __kmpc_global_thread_num(NULL); // registers initial thread
+ __kmpc_push_num_teams(&dummy_loc, th, N_TEAMS, N_THR);
+ __kmpc_fork_teams(&dummy_loc, 0, &outlined);
+
+ // Test did not hang -> passed!
+ printf("passed\n");
+ return 0;
+}
diff --git a/final/runtime/test/omp_my_sleep.h b/final/runtime/test/omp_my_sleep.h
new file mode 100644
index 0000000..138d930
--- /dev/null
+++ b/final/runtime/test/omp_my_sleep.h
@@ -0,0 +1,33 @@
+#ifndef MY_SLEEP_H
+#define MY_SLEEP_H
+
+/*! Utility function to have a sleep function with better resolution and
+ * which only stops one thread. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <time.h>
+
+#if defined(_WIN32)
+# include <windows.h>
+// Windows version of my_sleep() function
+static void my_sleep(double sleeptime) {
+ DWORD ms = (DWORD) (sleeptime * 1000.0);
+ Sleep(ms);
+}
+
+
+#else // _WIN32
+
+// Unices version of my_sleep() function
+static void my_sleep(double sleeptime) {
+ struct timespec ts;
+ ts.tv_sec = (time_t)sleeptime;
+ ts.tv_nsec = (long)((sleeptime - (double)ts.tv_sec) * 1E9);
+ nanosleep(&ts, NULL);
+}
+
+#endif // _WIN32
+
+#endif // MY_SLEEP_H
diff --git a/final/runtime/test/omp_testsuite.h b/final/runtime/test/omp_testsuite.h
new file mode 100644
index 0000000..eef5470
--- /dev/null
+++ b/final/runtime/test/omp_testsuite.h
@@ -0,0 +1,79 @@
+/* Global headerfile of the OpenMP Testsuite */
+
+#ifndef OMP_TESTSUITE_H
+#define OMP_TESTSUITE_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+/* General */
+/**********************************************************/
+#define LOOPCOUNT 1000 /* Number of iterations to slit amongst threads */
+#define REPETITIONS 10 /* Number of times to run each test */
+
+/* following times are in seconds */
+#define SLEEPTIME 1
+
+/* Definitions for tasks */
+/**********************************************************/
+#define NUM_TASKS 25
+#define MAX_TASKS_PER_THREAD 5
+
+#ifdef _WIN32
+// Windows versions of pthread_create() and pthread_join()
+# include <windows.h>
+typedef HANDLE pthread_t;
+
+// encapsulates the information about a pthread-callable function
+struct thread_func_info_t {
+ void* (*start_routine)(void*);
+ void* arg;
+};
+
+// call the void* start_routine(void*);
+static DWORD __thread_func_wrapper(LPVOID lpParameter) {
+ struct thread_func_info_t* function_information;
+ function_information = (struct thread_func_info_t*)lpParameter;
+ function_information->start_routine(function_information->arg);
+ free(function_information);
+ return 0;
+}
+
+// attr is ignored
+static int pthread_create(pthread_t *thread, void *attr,
+ void *(*start_routine) (void *), void *arg) {
+ pthread_t pthread;
+ struct thread_func_info_t* info;
+ info = (struct thread_func_info_t*)malloc(sizeof(struct thread_func_info_t));
+ info->start_routine = start_routine;
+ info->arg = arg;
+ pthread = CreateThread(NULL, 0, __thread_func_wrapper, info, 0, NULL);
+ if (pthread == NULL) {
+ fprintf(stderr, "CreateThread() failed: Error #%u.\n", GetLastError());
+ exit(1);
+ }
+ *thread = pthread;
+ return 0;
+}
+// retval is ignored for now
+static int pthread_join(pthread_t thread, void **retval) {
+ int rc;
+ rc = WaitForSingleObject(thread, INFINITE);
+ if (rc == WAIT_FAILED) {
+ fprintf(stderr, "WaitForSingleObject() failed: Error #%u.\n",
+ GetLastError());
+ exit(1);
+ }
+ rc = CloseHandle(thread);
+ if (rc == 0) {
+ fprintf(stderr, "CloseHandle() failed: Error #%u.\n", GetLastError());
+ exit(1);
+ }
+ return 0;
+}
+#else
+# include <pthread.h>
+#endif
+
+#endif
diff --git a/final/runtime/test/ompt/callback.h b/final/runtime/test/ompt/callback.h
new file mode 100755
index 0000000..f1191ad
--- /dev/null
+++ b/final/runtime/test/ompt/callback.h
@@ -0,0 +1,764 @@
+#ifndef _BSD_SOURCE
+#define _BSD_SOURCE
+#endif
+#define _DEFAULT_SOURCE
+#include <stdio.h>
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+#include <inttypes.h>
+#include <omp.h>
+#include <ompt.h>
+#include "ompt-signal.h"
+
+// Used to detect architecture
+#include "../../src/kmp_platform.h"
+
+static const char* ompt_thread_type_t_values[] = {
+ NULL,
+ "ompt_thread_initial",
+ "ompt_thread_worker",
+ "ompt_thread_other"
+};
+
+static const char* ompt_task_status_t_values[] = {
+ NULL,
+ "ompt_task_complete",
+ "ompt_task_yield",
+ "ompt_task_cancel",
+ "ompt_task_others"
+};
+static const char* ompt_cancel_flag_t_values[] = {
+ "ompt_cancel_parallel",
+ "ompt_cancel_sections",
+ "ompt_cancel_do",
+ "ompt_cancel_taskgroup",
+ "ompt_cancel_activated",
+ "ompt_cancel_detected",
+ "ompt_cancel_discarded_task"
+};
+
+static void format_task_type(int type, char *buffer) {
+ char *progress = buffer;
+ if (type & ompt_task_initial)
+ progress += sprintf(progress, "ompt_task_initial");
+ if (type & ompt_task_implicit)
+ progress += sprintf(progress, "ompt_task_implicit");
+ if (type & ompt_task_explicit)
+ progress += sprintf(progress, "ompt_task_explicit");
+ if (type & ompt_task_target)
+ progress += sprintf(progress, "ompt_task_target");
+ if (type & ompt_task_undeferred)
+ progress += sprintf(progress, "|ompt_task_undeferred");
+ if (type & ompt_task_untied)
+ progress += sprintf(progress, "|ompt_task_untied");
+ if (type & ompt_task_final)
+ progress += sprintf(progress, "|ompt_task_final");
+ if (type & ompt_task_mergeable)
+ progress += sprintf(progress, "|ompt_task_mergeable");
+ if (type & ompt_task_merged)
+ progress += sprintf(progress, "|ompt_task_merged");
+}
+
+static ompt_set_callback_t ompt_set_callback;
+static ompt_get_callback_t ompt_get_callback;
+static ompt_get_state_t ompt_get_state;
+static ompt_get_task_info_t ompt_get_task_info;
+static ompt_get_thread_data_t ompt_get_thread_data;
+static ompt_get_parallel_info_t ompt_get_parallel_info;
+static ompt_get_unique_id_t ompt_get_unique_id;
+static ompt_get_num_procs_t ompt_get_num_procs;
+static ompt_get_num_places_t ompt_get_num_places;
+static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
+static ompt_get_place_num_t ompt_get_place_num;
+static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
+static ompt_get_proc_id_t ompt_get_proc_id;
+static ompt_enumerate_states_t ompt_enumerate_states;
+static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
+
+static void print_ids(int level)
+{
+ int task_type, thread_num;
+ omp_frame_t *frame;
+ ompt_data_t *task_parallel_data;
+ ompt_data_t *task_data;
+ int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
+ &task_parallel_data, &thread_num);
+ char buffer[2048];
+ format_task_type(task_type, buffer);
+ if (frame)
+ printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
+ ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
+ "task_type=%s=%d, thread_num=%d\n",
+ ompt_get_thread_data()->value, level,
+ exists_task ? task_parallel_data->value : 0,
+ exists_task ? task_data->value : 0, frame->exit_frame,
+ frame->enter_frame, buffer, task_type, thread_num);
+}
+
+#define get_frame_address(level) __builtin_frame_address(level)
+
+#define print_frame(level) \
+ printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
+ ompt_get_thread_data()->value, level, get_frame_address(level))
+
+// clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
+#if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
+ #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
+ #define print_frame_from_outlined_fn(level) print_frame(level+1)
+ #else
+ #define print_frame_from_outlined_fn(level) print_frame(level)
+ #endif
+
+ #if defined(__clang__) && __clang_major__ >= 5
+ #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
+ #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
+ #endif
+#endif
+
+// This macro helps to define a label at the current position that can be used
+// to get the current address in the code.
+//
+// For print_current_address():
+// To reliably determine the offset between the address of the label and the
+// actual return address, we insert a NOP instruction as a jump target as the
+// compiler would otherwise insert an instruction that we can't control. The
+// instruction length is target dependent and is explained below.
+//
+// (The empty block between "#pragma omp ..." and the __asm__ statement is a
+// workaround for a bug in the Intel Compiler.)
+#define define_ompt_label(id) \
+ {} \
+ __asm__("nop"); \
+ompt_label_##id:
+
+// This macro helps to get the address of a label that is inserted by the above
+// macro define_ompt_label(). The address is obtained with a GNU extension
+// (&&label) that has been tested with gcc, clang and icc.
+#define get_ompt_label_address(id) (&& ompt_label_##id)
+
+// This macro prints the exact address that a previously called runtime function
+// returns to.
+#define print_current_address(id) \
+ define_ompt_label(id) \
+ print_possible_return_addresses(get_ompt_label_address(id))
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+// On X86 the NOP instruction is 1 byte long. In addition, the comiler inserts
+// a MOV instruction for non-void runtime functions which is 3 bytes long.
+#define print_possible_return_addresses(addr) \
+ printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
+ ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
+#elif KMP_ARCH_PPC64
+// On Power the NOP instruction is 4 bytes long. In addition, the compiler
+// inserts an LD instruction which accounts for another 4 bytes. In contrast to
+// X86 this instruction is always there, even for void runtime functions.
+#define print_possible_return_addresses(addr) \
+ printf("%" PRIu64 ": current_address=%p\n", ompt_get_thread_data()->value, \
+ ((char *)addr) - 8)
+#elif KMP_ARCH_AARCH64
+// On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
+// store instruction (another 4 bytes long).
+#define print_possible_return_addresses(addr) \
+ printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
+ ((char *)addr) - 4, ((char *)addr) - 8)
+#else
+#error Unsupported target architecture, cannot determine address offset!
+#endif
+
+
+// This macro performs a somewhat similar job to print_current_address(), except
+// that it discards a certain number of nibbles from the address and only prints
+// the most significant bits / nibbles. This can be used for cases where the
+// return address can only be approximated.
+//
+// To account for overflows (ie the most significant bits / nibbles have just
+// changed as we are a few bytes above the relevant power of two) the addresses
+// of the "current" and of the "previous block" are printed.
+#define print_fuzzy_address(id) \
+ define_ompt_label(id) \
+ print_fuzzy_address_blocks(get_ompt_label_address(id))
+
+// If you change this define you need to adapt all capture patterns in the tests
+// to include or discard the new number of nibbles!
+#define FUZZY_ADDRESS_DISCARD_NIBBLES 2
+#define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
+#define print_fuzzy_address_blocks(addr) \
+ printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
+ " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
+ ompt_get_thread_data()->value, \
+ ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
+ ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
+ ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
+ ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
+
+static void
+on_ompt_callback_mutex_acquire(
+ ompt_mutex_kind_t kind,
+ unsigned int hint,
+ unsigned int impl,
+ omp_wait_id_t wait_id,
+ const void *codeptr_ra)
+{
+ switch(kind)
+ {
+ case ompt_mutex_lock:
+ printf("%" PRIu64 ": ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+ break;
+ case ompt_mutex_nest_lock:
+ printf("%" PRIu64 ": ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+ break;
+ case ompt_mutex_critical:
+ printf("%" PRIu64 ": ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+ break;
+ case ompt_mutex_atomic:
+ printf("%" PRIu64 ": ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+ break;
+ case ompt_mutex_ordered:
+ printf("%" PRIu64 ": ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+on_ompt_callback_mutex_acquired(
+ ompt_mutex_kind_t kind,
+ omp_wait_id_t wait_id,
+ const void *codeptr_ra)
+{
+ switch(kind)
+ {
+ case ompt_mutex_lock:
+ printf("%" PRIu64 ": ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_nest_lock:
+ printf("%" PRIu64 ": ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_critical:
+ printf("%" PRIu64 ": ompt_event_acquired_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_atomic:
+ printf("%" PRIu64 ": ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_ordered:
+ printf("%" PRIu64 ": ompt_event_acquired_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+on_ompt_callback_mutex_released(
+ ompt_mutex_kind_t kind,
+ omp_wait_id_t wait_id,
+ const void *codeptr_ra)
+{
+ switch(kind)
+ {
+ case ompt_mutex_lock:
+ printf("%" PRIu64 ": ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_nest_lock:
+ printf("%" PRIu64 ": ompt_event_release_nest_lock_last: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_critical:
+ printf("%" PRIu64 ": ompt_event_release_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_atomic:
+ printf("%" PRIu64 ": ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_ordered:
+ printf("%" PRIu64 ": ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+on_ompt_callback_nest_lock(
+ ompt_scope_endpoint_t endpoint,
+ omp_wait_id_t wait_id,
+ const void *codeptr_ra)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ printf("%" PRIu64 ": ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_scope_end:
+ printf("%" PRIu64 ": ompt_event_release_nest_lock_prev: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ }
+}
+
+static void
+on_ompt_callback_sync_region(
+ ompt_sync_region_kind_t kind,
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ const void *codeptr_ra)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ switch(kind)
+ {
+ case ompt_sync_region_barrier:
+ printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ print_ids(0);
+ break;
+ case ompt_sync_region_taskwait:
+ printf("%" PRIu64 ": ompt_event_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ case ompt_sync_region_taskgroup:
+ printf("%" PRIu64 ": ompt_event_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ }
+ break;
+ case ompt_scope_end:
+ switch(kind)
+ {
+ case ompt_sync_region_barrier:
+ printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+ break;
+ case ompt_sync_region_taskwait:
+ printf("%" PRIu64 ": ompt_event_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+ break;
+ case ompt_sync_region_taskgroup:
+ printf("%" PRIu64 ": ompt_event_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+ break;
+ }
+ break;
+ }
+}
+
+static void
+on_ompt_callback_sync_region_wait(
+ ompt_sync_region_kind_t kind,
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ const void *codeptr_ra)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ switch(kind)
+ {
+ case ompt_sync_region_barrier:
+ printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ case ompt_sync_region_taskwait:
+ printf("%" PRIu64 ": ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ case ompt_sync_region_taskgroup:
+ printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ }
+ break;
+ case ompt_scope_end:
+ switch(kind)
+ {
+ case ompt_sync_region_barrier:
+ printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+ break;
+ case ompt_sync_region_taskwait:
+ printf("%" PRIu64 ": ompt_event_wait_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+ break;
+ case ompt_sync_region_taskgroup:
+ printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+ break;
+ }
+ break;
+ }
+}
+
+static void
+on_ompt_callback_flush(
+ ompt_data_t *thread_data,
+ const void *codeptr_ra)
+{
+ printf("%" PRIu64 ": ompt_event_flush: codeptr_ra=%p\n", thread_data->value, codeptr_ra);
+}
+
+static void
+on_ompt_callback_cancel(
+ ompt_data_t *task_data,
+ int flags,
+ const void *codeptr_ra)
+{
+ const char* first_flag_value;
+ const char* second_flag_value;
+ if(flags & ompt_cancel_parallel)
+ first_flag_value = ompt_cancel_flag_t_values[0];
+ else if(flags & ompt_cancel_sections)
+ first_flag_value = ompt_cancel_flag_t_values[1];
+ else if(flags & ompt_cancel_do)
+ first_flag_value = ompt_cancel_flag_t_values[2];
+ else if(flags & ompt_cancel_taskgroup)
+ first_flag_value = ompt_cancel_flag_t_values[3];
+
+ if(flags & ompt_cancel_activated)
+ second_flag_value = ompt_cancel_flag_t_values[4];
+ else if(flags & ompt_cancel_detected)
+ second_flag_value = ompt_cancel_flag_t_values[5];
+ else if(flags & ompt_cancel_discarded_task)
+ second_flag_value = ompt_cancel_flag_t_values[6];
+
+ printf("%" PRIu64 ": ompt_event_cancel: task_data=%" PRIu64 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, task_data->value, first_flag_value, second_flag_value, flags, codeptr_ra);
+}
+
+static void
+on_ompt_callback_idle(
+ ompt_scope_endpoint_t endpoint)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ printf("%" PRIu64 ": ompt_event_idle_begin:\n", ompt_get_thread_data()->value);
+ break;
+ case ompt_scope_end:
+ printf("%" PRIu64 ": ompt_event_idle_end:\n", ompt_get_thread_data()->value);
+ break;
+ }
+}
+
+static void
+on_ompt_callback_implicit_task(
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ unsigned int team_size,
+ unsigned int thread_num)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ if(task_data->ptr)
+ printf("%s\n", "0: task_data initially not null");
+ task_data->value = ompt_get_unique_id();
+ printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num);
+ break;
+ case ompt_scope_end:
+ printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num);
+ break;
+ }
+}
+
+static void
+on_ompt_callback_lock_init(
+ ompt_mutex_kind_t kind,
+ unsigned int hint,
+ unsigned int impl,
+ omp_wait_id_t wait_id,
+ const void *codeptr_ra)
+{
+ switch(kind)
+ {
+ case ompt_mutex_lock:
+ printf("%" PRIu64 ": ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+ break;
+ case ompt_mutex_nest_lock:
+ printf("%" PRIu64 ": ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+on_ompt_callback_lock_destroy(
+ ompt_mutex_kind_t kind,
+ omp_wait_id_t wait_id,
+ const void *codeptr_ra)
+{
+ switch(kind)
+ {
+ case ompt_mutex_lock:
+ printf("%" PRIu64 ": ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ case ompt_mutex_nest_lock:
+ printf("%" PRIu64 ": ompt_event_destroy_nest_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+on_ompt_callback_work(
+ ompt_work_type_t wstype,
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ uint64_t count,
+ const void *codeptr_ra)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ switch(wstype)
+ {
+ case ompt_work_loop:
+ printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_sections:
+ printf("%" PRIu64 ": ompt_event_sections_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_single_executor:
+ printf("%" PRIu64 ": ompt_event_single_in_block_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_single_other:
+ printf("%" PRIu64 ": ompt_event_single_others_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_workshare:
+ //impl
+ break;
+ case ompt_work_distribute:
+ printf("%" PRIu64 ": ompt_event_distribute_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_taskloop:
+ //impl
+ printf("%" PRIu64 ": ompt_event_taskloop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ }
+ break;
+ case ompt_scope_end:
+ switch(wstype)
+ {
+ case ompt_work_loop:
+ printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_sections:
+ printf("%" PRIu64 ": ompt_event_sections_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_single_executor:
+ printf("%" PRIu64 ": ompt_event_single_in_block_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_single_other:
+ printf("%" PRIu64 ": ompt_event_single_others_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_workshare:
+ //impl
+ break;
+ case ompt_work_distribute:
+ printf("%" PRIu64 ": ompt_event_distribute_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ case ompt_work_taskloop:
+ //impl
+ printf("%" PRIu64 ": ompt_event_taskloop_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+ break;
+ }
+ break;
+ }
+}
+
+static void
+on_ompt_callback_master(
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ const void *codeptr_ra)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ printf("%" PRIu64 ": ompt_event_master_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ case ompt_scope_end:
+ printf("%" PRIu64 ": ompt_event_master_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ }
+}
+
+static void
+on_ompt_callback_parallel_begin(
+ ompt_data_t *encountering_task_data,
+ const omp_frame_t *encountering_task_frame,
+ ompt_data_t* parallel_data,
+ uint32_t requested_team_size,
+ ompt_invoker_t invoker,
+ const void *codeptr_ra)
+{
+ if(parallel_data->ptr)
+ printf("0: parallel_data initially not null\n");
+ parallel_data->value = ompt_get_unique_id();
+ printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, encountering_task_data->value, encountering_task_frame->exit_frame, encountering_task_frame->enter_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker);
+}
+
+static void
+on_ompt_callback_parallel_end(
+ ompt_data_t *parallel_data,
+ ompt_data_t *encountering_task_data,
+ ompt_invoker_t invoker,
+ const void *codeptr_ra)
+{
+ printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, encountering_task_data->value, invoker, codeptr_ra);
+}
+
+static void
+on_ompt_callback_task_create(
+ ompt_data_t *encountering_task_data,
+ const omp_frame_t *encountering_task_frame,
+ ompt_data_t* new_task_data,
+ int type,
+ int has_dependences,
+ const void *codeptr_ra)
+{
+ if(new_task_data->ptr)
+ printf("0: new_task_data initially not null\n");
+ new_task_data->value = ompt_get_unique_id();
+ char buffer[2048];
+
+ format_task_type(type, buffer);
+
+ //there is no parallel_begin callback for implicit parallel region
+ //thus it is initialized in initial task
+ if(type & ompt_task_initial)
+ {
+ ompt_data_t *parallel_data;
+ ompt_get_parallel_info(0, &parallel_data, NULL);
+ if(parallel_data->ptr)
+ printf("%s\n", "0: parallel_data initially not null");
+ parallel_data->value = ompt_get_unique_id();
+ }
+
+ printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame : NULL, encountering_task_frame ? encountering_task_frame->enter_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
+}
+
+static void
+on_ompt_callback_task_schedule(
+ ompt_data_t *first_task_data,
+ ompt_task_status_t prior_task_status,
+ ompt_data_t *second_task_data)
+{
+ printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status);
+ if(prior_task_status == ompt_task_complete)
+ {
+ printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value);
+ }
+}
+
+static void
+on_ompt_callback_task_dependences(
+ ompt_data_t *task_data,
+ const ompt_task_dependence_t *deps,
+ int ndeps)
+{
+ printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps);
+}
+
+static void
+on_ompt_callback_task_dependence(
+ ompt_data_t *first_task_data,
+ ompt_data_t *second_task_data)
+{
+ printf("%" PRIu64 ": ompt_event_task_dependence_pair: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value);
+}
+
+static void
+on_ompt_callback_thread_begin(
+ ompt_thread_type_t thread_type,
+ ompt_data_t *thread_data)
+{
+ if(thread_data->ptr)
+ printf("%s\n", "0: thread_data initially not null");
+ thread_data->value = ompt_get_unique_id();
+ printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value);
+}
+
+static void
+on_ompt_callback_thread_end(
+ ompt_data_t *thread_data)
+{
+ printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data->value);
+}
+
+static int
+on_ompt_callback_control_tool(
+ uint64_t command,
+ uint64_t modifier,
+ void *arg,
+ const void *codeptr_ra)
+{
+ omp_frame_t* omptTaskFrame;
+ ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
+ printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame, omptTaskFrame->enter_frame);
+ return 0; //success
+}
+
+#define register_callback_t(name, type) \
+do{ \
+ type f_##name = &on_##name; \
+ if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \
+ ompt_set_never) \
+ printf("0: Could not register callback '" #name "'\n"); \
+}while(0)
+
+#define register_callback(name) register_callback_t(name, name##_t)
+
+int ompt_initialize(
+ ompt_function_lookup_t lookup,
+ ompt_data_t *tool_data)
+{
+ ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
+ ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
+ ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
+ ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
+ ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
+ ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
+ ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
+
+ ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
+ ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
+ ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
+ ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
+ ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
+ ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
+ ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
+ ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
+
+ register_callback(ompt_callback_mutex_acquire);
+ register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
+ register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
+ register_callback(ompt_callback_nest_lock);
+ register_callback(ompt_callback_sync_region);
+ register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
+ register_callback(ompt_callback_control_tool);
+ register_callback(ompt_callback_flush);
+ register_callback(ompt_callback_cancel);
+ register_callback(ompt_callback_idle);
+ register_callback(ompt_callback_implicit_task);
+ register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
+ register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
+ register_callback(ompt_callback_work);
+ register_callback(ompt_callback_master);
+ register_callback(ompt_callback_parallel_begin);
+ register_callback(ompt_callback_parallel_end);
+ register_callback(ompt_callback_task_create);
+ register_callback(ompt_callback_task_schedule);
+ register_callback(ompt_callback_task_dependences);
+ register_callback(ompt_callback_task_dependence);
+ register_callback(ompt_callback_thread_begin);
+ register_callback(ompt_callback_thread_end);
+ printf("0: NULL_POINTER=%p\n", (void*)NULL);
+ return 1; //success
+}
+
+void ompt_finalize(ompt_data_t *tool_data)
+{
+ printf("0: ompt_event_runtime_shutdown\n");
+}
+
+ompt_start_tool_result_t* ompt_start_tool(
+ unsigned int omp_version,
+ const char *runtime_version)
+{
+ static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
+ return &ompt_start_tool_result;
+}
diff --git a/final/runtime/test/ompt/cancel/cancel_parallel.c b/final/runtime/test/ompt/cancel/cancel_parallel.c
new file mode 100644
index 0000000..b03239d
--- /dev/null
+++ b/final/runtime/test/ompt/cancel/cancel_parallel.c
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// Current GOMP interface implementation does not support cancellation
+// XFAIL: gcc
+
+#include "callback.h"
+#include "omp.h"
+
+int main() {
+ #pragma omp parallel num_threads(2)
+ {
+ if (omp_get_thread_num() == 0) {
+ print_fuzzy_address_blocks(get_ompt_label_address(1));
+ #pragma omp cancel parallel
+ define_ompt_label(1);
+ // We cannot print at this location because the parallel region is cancelled!
+ } else {
+ delay(100);
+ print_fuzzy_address_blocks(get_ompt_label_address(2));
+ #pragma omp cancellation point parallel
+ define_ompt_label(2);
+ // We cannot print at this location because the parallel region is cancelled!
+ }
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_activated=17, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_detected=33, codeptr_ra=[[OTHER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[OTHER_RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/cancel/cancel_taskgroup.c b/final/runtime/test/ompt/cancel/cancel_taskgroup.c
new file mode 100644
index 0000000..803fa97
--- /dev/null
+++ b/final/runtime/test/ompt/cancel/cancel_taskgroup.c
@@ -0,0 +1,89 @@
+// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: clang-3, clang-4.0.0
+// Current GOMP interface implementation does not support cancellation; icc 16 has a bug
+// XFAIL: gcc, icc-16
+
+#include "callback.h"
+#include <unistd.h>
+#include <stdio.h>
+
+int main()
+{
+ int condition=0;
+ #pragma omp parallel num_threads(2)
+ {}
+
+ print_frame(0);
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp master
+ {
+ #pragma omp taskgroup
+ {
+ #pragma omp task shared(condition)
+ {
+ printf("start execute task 1\n");
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition,2);
+ #pragma omp cancellation point taskgroup
+ printf("end execute task 1\n");
+ }
+ #pragma omp task shared(condition)
+ {
+ printf("start execute task 2\n");
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition,2);
+ #pragma omp cancellation point taskgroup
+ printf("end execute task 2\n");
+ }
+ #pragma omp task shared(condition)
+ {
+ printf("start execute task 3\n");
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition,2);
+ #pragma omp cancellation point taskgroup
+ printf("end execute task 3\n");
+ }
+ #pragma omp task if(0) shared(condition)
+ {
+ printf("start execute task 4\n");
+ OMPT_WAIT(condition,1);
+ #pragma omp cancel taskgroup
+ printf("end execute task 4\n");
+ }
+ OMPT_SIGNAL(condition);
+ }
+ }
+ #pragma omp barrier
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[CANCEL_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_others=4
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[CANCEL_TASK_ID]], flags=ompt_cancel_taskgroup|ompt_cancel_activated=24, codeptr_ra={{0x[0-f]*}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[CANCEL_TASK_ID]], second_task_id=[[PARENT_TASK_ID]], prior_task_status=ompt_task_cancel=3
+
+ // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]]
+ // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_detected=40, codeptr_ra={{0x[0-f]*}}
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/cancel/cancel_worksharing.c b/final/runtime/test/ompt/cancel/cancel_worksharing.c
new file mode 100644
index 0000000..db3b168
--- /dev/null
+++ b/final/runtime/test/ompt/cancel/cancel_worksharing.c
@@ -0,0 +1,67 @@
+// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// Current GOMP interface implementation does not support cancellation; icc 16 does not distinguish between sections and loops
+// XFAIL: gcc, icc-16
+
+#include "callback.h"
+#include <unistd.h>
+
+int main()
+{
+ int condition=0;
+ #pragma omp parallel num_threads(2)
+ {
+ int x = 0;
+ int i;
+ #pragma omp for
+ for(i = 0; i < 2; i++)
+ {
+ if(i == 0)
+ {
+ x++;
+ OMPT_SIGNAL(condition);
+ #pragma omp cancel for
+ }
+ else
+ {
+ x++;
+ OMPT_WAIT(condition,1);
+ delay(10000);
+ #pragma omp cancellation point for
+ }
+ }
+ }
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp sections
+ {
+ #pragma omp section
+ {
+ OMPT_SIGNAL(condition);
+ #pragma omp cancel sections
+ }
+ #pragma omp section
+ {
+ OMPT_WAIT(condition,2);
+ delay(10000);
+ #pragma omp cancellation point sections
+ }
+ }
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+
+ // cancel for and sections
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_activated=20, codeptr_ra={{0x[0-f]*}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_activated=18, codeptr_ra={{0x[0-f]*}}
+ // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_detected=36, codeptr_ra={{0x[0-f]*}}
+ // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_detected=34, codeptr_ra={{0x[0-f]*}}
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/loadtool/tool_available/tool_available.c b/final/runtime/test/ompt/loadtool/tool_available/tool_available.c
new file mode 100644
index 0000000..fbbdadd
--- /dev/null
+++ b/final/runtime/test/ompt/loadtool/tool_available/tool_available.c
@@ -0,0 +1,74 @@
+// The OpenMP standard defines 3 ways of providing ompt_start_tool:
+// 1. "statically-linking the tool’s definition of ompt_start_tool into an OpenMP application"
+// RUN: %libomp-compile -DCODE -DTOOL && %libomp-run | FileCheck %s
+
+// Note: We should compile the tool without -fopenmp as other tools developer
+// would do. Otherwise this test may pass for the wrong reasons on Darwin.
+// RUN: %clang %flags -DTOOL -shared -fPIC %s -o %T/tool.so
+// 2. "introducing a dynamically-linked library that includes the tool’s definition of ompt_start_tool into the application’s address space"
+// 2.1 Link with tool during compilation
+// RUN: %libomp-compile -DCODE %no-as-needed-flag %T/tool.so && %libomp-run | FileCheck %s
+// 2.2 Link with tool during compilation, but AFTER the runtime
+// RUN: %libomp-compile -DCODE -lomp %no-as-needed-flag %T/tool.so && %libomp-run | FileCheck %s
+// 2.3 Inject tool via the dynamic loader
+// RUN: %libomp-compile -DCODE && %preload-tool %libomp-run | FileCheck %s
+
+// 3. "providing the name of a dynamically-linked library appropriate for the architecture and operating system used by the application in the tool-libraries-var ICV"
+// RUN: %libomp-compile -DCODE && env OMP_TOOL_LIBRARIES=%T/tool.so %libomp-run | FileCheck %s
+
+// REQUIRES: ompt
+
+/*
+ * This file contains code for an OMPT shared library tool to be
+ * loaded and the code for the OpenMP executable.
+ * -DTOOL enables the code for the tool during compilation
+ * -DCODE enables the code for the executable during compilation
+ */
+
+#ifdef CODE
+#include "omp.h"
+
+int main()
+{
+ #pragma omp parallel num_threads(2)
+ {
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+ // CHECK: {{^}}0: ompt_event_runtime_shutdown
+
+ return 0;
+}
+
+#endif /* CODE */
+
+#ifdef TOOL
+
+#include <stdio.h>
+#include <ompt.h>
+
+int ompt_initialize(
+ ompt_function_lookup_t lookup,
+ ompt_data_t* tool_data)
+{
+ printf("0: NULL_POINTER=%p\n", (void*)NULL);
+ return 1; //success
+}
+
+void ompt_finalize(ompt_data_t* tool_data)
+{
+ printf("0: ompt_event_runtime_shutdown\n");
+}
+
+ompt_start_tool_result_t* ompt_start_tool(
+ unsigned int omp_version,
+ const char *runtime_version)
+{
+ static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
+ return &ompt_start_tool_result;
+}
+#endif /* TOOL */
diff --git a/final/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c b/final/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c
new file mode 100644
index 0000000..a6fe8e9
--- /dev/null
+++ b/final/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c
@@ -0,0 +1,104 @@
+// RUN: %clang %flags -shared -fPIC %s -o %T/first_tool.so
+// RUN: %clang %flags -DTOOL -DSECOND_TOOL -shared -fPIC %s -o %T/second_tool.so
+// RUN: %clang %flags -DTOOL -DTHIRD_TOOL -shared -fPIC %s -o %T/third_tool.so
+// RUN: %libomp-compile -DCODE && env OMP_TOOL_LIBRARIES=%T/non_existing_file.so:%T/first_tool.so:%T/second_tool.so:%T/third_tool.so %libomp-run | FileCheck %s
+
+// REQUIRES: ompt
+
+/*
+ * This file contains code for three OMPT shared library tool to be
+ * loaded and the code for the OpenMP executable.
+ * No option enables code for the first shared library
+ * (without an implementation of ompt_start_tool) during compilation
+ * -DTOOL -DSECOND_TOOL enables the code for the second tool during compilation
+ * -DTOOL -DTHIRD_TOOL enables the code for the third tool during compilation
+ * -DCODE enables the code for the executable during compilation
+ */
+
+#ifdef CODE
+#include "stdio.h"
+#include "omp.h"
+#include "ompt.h"
+
+int main()
+{
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp master
+ {
+ int result = omp_control_tool(omp_control_tool_start, 0, NULL);
+ printf("0: control_tool()=%d\n", result);
+ }
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback
+
+ // CHECK: {{^}}0: Do not initialize tool
+
+ // CHECK: {{^}}0: Do initialize tool
+ // CHECK: {{^}}0: Tool initialized
+ // CHECK: {{^}}0: ompt_event_thread_begin
+ // CHECK-DAG: {{^}}0: ompt_event_thread_begin
+ // CHECK-DAG: {{^}}0: control_tool()=-1
+ // CHECK: {{^}}0: Tool finalized
+
+
+ return 0;
+}
+
+#endif /* CODE */
+
+#ifdef TOOL
+
+#include <ompt.h>
+#include "stdio.h"
+
+#ifdef SECOND_TOOL
+// The second tool has an implementation of ompt_start_tool that returns NULL
+ompt_start_tool_result_t* ompt_start_tool(
+ unsigned int omp_version,
+ const char *runtime_version)
+{
+ printf("0: Do not initialize tool\n");
+ return NULL;
+}
+#elif defined(THIRD_TOOL)
+// The third tool has an implementation of ompt_start_tool that returns a
+// pointer to a valid instance of ompt_start_tool_result_t
+
+static void
+on_ompt_callback_thread_begin(
+ ompt_thread_type_t thread_type,
+ ompt_data_t *thread_data)
+{
+ printf("0: ompt_event_thread_begin\n");
+}
+
+int ompt_initialize(
+ ompt_function_lookup_t lookup,
+ ompt_data_t *tool_data)
+{
+ ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
+ ompt_set_callback(ompt_callback_thread_begin, (ompt_callback_t)on_ompt_callback_thread_begin);
+ printf("0: Tool initialized\n");
+ return 1;
+}
+
+void ompt_finalize(ompt_data_t *tool_data)
+{
+ printf("0: Tool finalized\n");
+}
+
+ompt_start_tool_result_t* ompt_start_tool(
+ unsigned int omp_version,
+ const char *runtime_version)
+{
+ printf("0: Do initialize tool\n");
+ static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
+ return &ompt_start_tool_result;
+}
+#endif
+
+#endif /* TOOL */
diff --git a/final/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c b/final/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c
new file mode 100644
index 0000000..b0d3f2b
--- /dev/null
+++ b/final/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c
@@ -0,0 +1,69 @@
+// The OpenMP standard defines 3 ways of providing ompt_start_tool:
+// 1. "statically-linking the tool’s definition of ompt_start_tool into an OpenMP application"
+// RUN: %libomp-compile -DCODE -DTOOL && %libomp-run | FileCheck %s
+
+// Note: We should compile the tool without -fopenmp as other tools developer
+// would do. Otherwise this test may pass for the wrong reasons on Darwin.
+// RUN: %clang %flags -DTOOL -shared -fPIC %s -o %T/tool.so
+// 2. "introducing a dynamically-linked library that includes the tool’s definition of ompt_start_tool into the application’s address space"
+// 2.1 Link with tool during compilation
+// RUN: %libomp-compile -DCODE %no-as-needed-flag %T/tool.so && %libomp-run | FileCheck %s
+// 2.2 Link with tool during compilation, but AFTER the runtime
+// RUN: %libomp-compile -DCODE -lomp %no-as-needed-flag %T/tool.so && %libomp-run | FileCheck %s
+// 2.3 Inject tool via the dynamic loader
+// RUN: %libomp-compile -DCODE && %preload-tool %libomp-run | FileCheck %s
+
+// 3. "providing the name of a dynamically-linked library appropriate for the architecture and operating system used by the application in the tool-libraries-var ICV"
+// RUN: %libomp-compile -DCODE && env OMP_TOOL_LIBRARIES=%T/tool.so %libomp-run | FileCheck %s
+
+// REQUIRES: ompt
+
+/*
+ * This file contains code for an OMPT shared library tool to be
+ * loaded and the code for the OpenMP executable.
+ * -DTOOL enables the code for the tool during compilation
+ * -DCODE enables the code for the executable during compilation
+ */
+
+#ifdef CODE
+#include "stdio.h"
+#include "omp.h"
+#include "ompt.h"
+
+int main()
+{
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp master
+ {
+ int result = omp_control_tool(omp_control_tool_start, 0, NULL);
+ printf("0: control_tool()=%d\n", result);
+ }
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback
+
+ // CHECK: {{^}}0: Do not initialize tool
+ // CHECK: {{^}}0: control_tool()=-2
+
+
+ return 0;
+}
+
+#endif /* CODE */
+
+#ifdef TOOL
+
+#include <ompt.h>
+#include "stdio.h"
+
+ompt_start_tool_result_t* ompt_start_tool(
+ unsigned int omp_version,
+ const char *runtime_version)
+{
+ printf("0: Do not initialize tool\n");
+ return NULL;
+}
+#endif /* TOOL */
diff --git a/final/runtime/test/ompt/misc/api_calls_from_other_thread.cpp b/final/runtime/test/ompt/misc/api_calls_from_other_thread.cpp
new file mode 100644
index 0000000..470d7cd
--- /dev/null
+++ b/final/runtime/test/ompt/misc/api_calls_from_other_thread.cpp
@@ -0,0 +1,92 @@
+// RUN: %libomp-cxx-compile-and-run | FileCheck %s
+// REQUIRES: ompt, linux
+
+#include <thread>
+#include "callback.h"
+
+void f() {
+ ompt_data_t *tdata = ompt_get_thread_data();
+ uint64_t tvalue = tdata ? tdata->value : 0;
+
+ printf("%" PRIu64 ": ompt_get_num_places()=%d\n", tvalue,
+ ompt_get_num_places());
+
+ printf("%" PRIu64 ": ompt_get_place_proc_ids()=%d\n", tvalue,
+ ompt_get_place_proc_ids(0, 0, NULL));
+
+ printf("%" PRIu64 ": ompt_get_place_num()=%d\n", tvalue,
+ ompt_get_place_num());
+
+ printf("%" PRIu64 ": ompt_get_partition_place_nums()=%d\n", tvalue,
+ ompt_get_partition_place_nums(0, NULL));
+
+ printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", tvalue, ompt_get_proc_id());
+
+ printf("%" PRIu64 ": ompt_get_num_procs()=%d\n", tvalue,
+ ompt_get_num_procs());
+
+ ompt_callback_t callback;
+ printf("%" PRIu64 ": ompt_get_callback()=%d\n", tvalue,
+ ompt_get_callback(ompt_callback_thread_begin, &callback));
+
+ printf("%" PRIu64 ": ompt_get_state()=%d\n", tvalue, ompt_get_state(NULL));
+
+ int state = omp_state_undefined;
+ const char *state_name;
+ printf("%" PRIu64 ": ompt_enumerate_states()=%d\n", tvalue,
+ ompt_enumerate_states(state, &state, &state_name));
+
+ int impl = ompt_mutex_impl_unknown;
+ const char *impl_name;
+ printf("%" PRIu64 ": ompt_enumerate_mutex_impls()=%d\n", tvalue,
+ ompt_enumerate_mutex_impls(impl, &impl, &impl_name));
+
+ printf("%" PRIu64 ": ompt_get_thread_data()=%p\n", tvalue,
+ ompt_get_thread_data());
+
+ printf("%" PRIu64 ": ompt_get_parallel_info()=%d\n", tvalue,
+ ompt_get_parallel_info(0, NULL, NULL));
+
+ printf("%" PRIu64 ": ompt_get_task_info()=%d\n", tvalue,
+ ompt_get_task_info(0, NULL, NULL, NULL, NULL, NULL));
+}
+
+int main() {
+#pragma omp parallel num_threads(1)
+ {}
+
+ std::thread t1(f);
+ t1.join();
+
+ // Check if libomp supports the callbacks for this test.
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_get_num_places()={{[0-9]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids()={{[0-9]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=-1
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_partition_place_nums()=0
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=-1
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_procs()={{[0-9]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_callback()=1
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_state()=0
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_enumerate_states()=1
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_enumerate_mutex_impls()=1
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_thread_data()=[[NULL]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_parallel_info()=0
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_task_info()=0
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/misc/api_calls_misc.c b/final/runtime/test/ompt/misc/api_calls_misc.c
new file mode 100644
index 0000000..d567b1b
--- /dev/null
+++ b/final/runtime/test/ompt/misc/api_calls_misc.c
@@ -0,0 +1,72 @@
+// RUN: %libomp-compile && %libomp-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+#pragma omp parallel num_threads(1)
+ {
+ // ompt_get_callback()
+ ompt_callback_t callback;
+ ompt_get_callback(ompt_callback_thread_begin, &callback);
+ printf("%" PRIu64 ": &on_ompt_callback_thread_begin=%p\n",
+ ompt_get_thread_data()->value, &on_ompt_callback_thread_begin);
+ printf("%" PRIu64 ": ompt_get_callback() result=%p\n",
+ ompt_get_thread_data()->value, callback);
+
+ // ompt_get_state()
+ printf("%" PRIu64 ": ompt_get_state()=%d\n", ompt_get_thread_data()->value,
+ ompt_get_state(NULL));
+
+ // ompt_enumerate_states()
+ int state = omp_state_undefined;
+ const char *state_name;
+ int steps = 0;
+ while (ompt_enumerate_states(state, &state, &state_name) && steps < 1000) {
+ steps++;
+ if (!state_name)
+ printf("%" PRIu64 ": state_name is NULL\n",
+ ompt_get_thread_data()->value);
+ }
+ if (steps >= 1000) {
+ // enumeration did not end after 1000 steps
+ printf("%" PRIu64 ": states enumeration did not end\n",
+ ompt_get_thread_data()->value);
+ }
+
+ // ompt_enumerate_mutex_impls()
+ int impl = ompt_mutex_impl_unknown;
+ const char *impl_name;
+ steps = 0;
+ while (ompt_enumerate_mutex_impls(impl, &impl, &impl_name) &&
+ steps < 1000) {
+ steps++;
+ if (!impl_name)
+ printf("%" PRIu64 ": impl_name is NULL\n",
+ ompt_get_thread_data()->value);
+ }
+ if (steps >= 1000) {
+ // enumeration did not end after 1000 steps
+ printf("%" PRIu64 ": mutex_impls enumeration did not end\n",
+ ompt_get_thread_data()->value);
+ }
+ }
+
+ // Check if libomp supports the callbacks for this test.
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: &on_ompt_callback_thread_begin
+ // CHECK-SAME: =[[FUNCTION_POINTER:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_get_callback() result=[[FUNCTION_POINTER]]
+
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_get_state()=1
+
+ // CHECK-NOT: {{^}}[[THREAD_ID]]: state_name is NULL
+ // CHECK-NOT: {{^}}[[THREAD_ID]]: states enumeration did not end
+
+ // CHECK-NOT: {{^}}[[THREAD_ID]]: impl_name is NULL
+ // CHECK-NOT: {{^}}[[THREAD_ID]]: mutex_impls enumeration did not end
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/misc/api_calls_places.c b/final/runtime/test/ompt/misc/api_calls_places.c
new file mode 100644
index 0000000..ad338a7
--- /dev/null
+++ b/final/runtime/test/ompt/misc/api_calls_places.c
@@ -0,0 +1,88 @@
+// RUN: %libomp-compile && env OMP_PLACES=cores %libomp-run | FileCheck %s
+// REQUIRES: ompt, linux
+#include "callback.h"
+#include <omp.h>
+#define __USE_GNU
+#include <sched.h>
+#undef __USE_GNU
+
+void print_list(char *function_name, int size, int list[]) {
+ printf("%" PRIu64 ": %s(0)=(%d", ompt_get_thread_data()->value, function_name,
+ list[0]);
+ int i;
+ for (i = 1; i < size; i++) {
+ printf(",%d", list[i]);
+ }
+ printf(")\n");
+}
+
+int main() {
+#pragma omp parallel num_threads(1)
+ {
+ printf("%" PRIu64 ": omp_get_num_places()=%d\n",
+ ompt_get_thread_data()->value, omp_get_num_places());
+ printf("%" PRIu64 ": ompt_get_num_places()=%d\n",
+ ompt_get_thread_data()->value, ompt_get_num_places());
+
+ int omp_ids_size = omp_get_place_num_procs(0);
+ int omp_ids[omp_ids_size];
+ omp_get_place_proc_ids(0, omp_ids);
+ print_list("omp_get_place_proc_ids", omp_ids_size, omp_ids);
+ int ompt_ids_size = ompt_get_place_proc_ids(0, 0, NULL);
+ int ompt_ids[ompt_ids_size];
+ ompt_get_place_proc_ids(0, ompt_ids_size, ompt_ids);
+ print_list("ompt_get_place_proc_ids", ompt_ids_size, ompt_ids);
+
+ printf("%" PRIu64 ": omp_get_place_num()=%d\n",
+ ompt_get_thread_data()->value, omp_get_place_num());
+ printf("%" PRIu64 ": ompt_get_place_num()=%d\n",
+ ompt_get_thread_data()->value, ompt_get_place_num());
+
+ int omp_nums_size = omp_get_partition_num_places();
+ int omp_nums[omp_nums_size];
+ omp_get_partition_place_nums(omp_nums);
+ print_list("omp_get_partition_place_nums", omp_nums_size, omp_nums);
+ int ompt_nums_size = ompt_get_partition_place_nums(0, NULL);
+ int ompt_nums[ompt_nums_size];
+ ompt_get_partition_place_nums(ompt_nums_size, ompt_nums);
+ print_list("ompt_get_partition_place_nums", ompt_nums_size, ompt_nums);
+
+ printf("%" PRIu64 ": sched_getcpu()=%d\n", ompt_get_thread_data()->value,
+ sched_getcpu());
+ printf("%" PRIu64 ": ompt_get_proc_id()=%d\n",
+ ompt_get_thread_data()->value, ompt_get_proc_id());
+
+ printf("%" PRIu64 ": omp_get_num_procs()=%d\n",
+ ompt_get_thread_data()->value, omp_get_num_procs());
+ printf("%" PRIu64 ": ompt_get_num_procs()=%d\n",
+ ompt_get_thread_data()->value, ompt_get_num_procs());
+ }
+
+ // Check if libomp supports the callbacks for this test.
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: omp_get_num_places
+ // CHECK-SAME: ()=[[NUM_PLACES:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_places()=[[NUM_PLACES]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_proc_ids
+ // CHECK-SAME: (0)=([[PROC_IDS:[0-9\,]+]])
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids(0)=([[PROC_IDS]])
+
+ // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_num()=[[PLACE_NUM:[-]?[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=[[PLACE_NUM]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: omp_get_partition_place_nums
+ // CHECK-SAME: (0)=([[PARTITION_PLACE_NUMS:[0-9\,]+]])
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_partition_place_nums
+ // CHECK-SAME: (0)=([[PARTITION_PLACE_NUMS]])
+
+ // CHECK: {{^}}[[MASTER_ID]]: sched_getcpu()=[[CPU_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=[[CPU_ID]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: omp_get_num_procs()=[[NUM_PROCS:[-]?[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_procs()=[[NUM_PROCS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/misc/control_tool.c b/final/runtime/test/ompt/misc/control_tool.c
new file mode 100644
index 0000000..2c59666
--- /dev/null
+++ b/final/runtime/test/ompt/misc/control_tool.c
@@ -0,0 +1,29 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ #pragma omp parallel num_threads(1)
+ {
+ print_frame_from_outlined_fn(1);
+ print_frame(0);
+ omp_control_tool(omp_control_tool_flush, 1, NULL);
+ print_current_address(0);
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_control_tool'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address({{.}})=[[EXIT_FRAME:0x[0-f]*]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/misc/control_tool_no_ompt_support.c b/final/runtime/test/ompt/misc/control_tool_no_ompt_support.c
new file mode 100644
index 0000000..ee64da0
--- /dev/null
+++ b/final/runtime/test/ompt/misc/control_tool_no_ompt_support.c
@@ -0,0 +1,12 @@
+// RUN: %libomp-compile-and-run
+#include <omp.h>
+
+int main()
+{
+ #pragma omp parallel num_threads(1)
+ {
+ omp_control_tool(omp_control_tool_flush, 1, NULL);
+ }
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/misc/idle.c b/final/runtime/test/ompt/misc/idle.c
new file mode 100644
index 0000000..7413c32
--- /dev/null
+++ b/final/runtime/test/ompt/misc/idle.c
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int x = 0;
+ #pragma omp parallel num_threads(3)
+ {
+ #pragma omp atomic
+ x++;
+ }
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp atomic
+ x++;
+ }
+
+
+ printf("x=%d\n", x);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_idle_begin:
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_idle_end:
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/misc/interoperability.cpp b/final/runtime/test/ompt/misc/interoperability.cpp
new file mode 100644
index 0000000..102e6de
--- /dev/null
+++ b/final/runtime/test/ompt/misc/interoperability.cpp
@@ -0,0 +1,115 @@
+// RUN: %libomp-cxx-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+
+#include <iostream>
+#include <thread>
+#include <alloca.h>
+
+#include "callback.h"
+#include "omp.h"
+
+int condition = 0;
+
+void f() {
+ // Call OpenMP API function to force initialization of OMPT.
+ // (omp_get_thread_num() does not work because it just returns 0 if the
+ // runtime isn't initialized yet...)
+ omp_get_num_threads();
+
+ // Call alloca() to force availability of frame pointer
+ void *p = alloca(0);
+
+ OMPT_SIGNAL(condition);
+ // Wait for both initial threads to arrive that will eventually become the
+ // master threads in the following parallel region.
+ OMPT_WAIT(condition, 2);
+
+#pragma omp parallel num_threads(2)
+ {
+ // Wait for all threads to arrive so that no worker thread can be reused...
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition, 6);
+ }
+}
+
+int main() {
+ std::thread t1(f);
+ std::thread t2(f);
+ t1.join();
+ t2.join();
+}
+
+// Check if libomp supports the callbacks for this test.
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+
+// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+// first master thread
+// CHECK: {{^}}[[MASTER_ID_1:[0-9]+]]: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID_1]]
+
+// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_task_create: parent_task_id=0
+// CHECK-SAME: parent_task_frame.exit=[[NULL]]
+// CHECK-SAME: parent_task_frame.reenter=[[NULL]]
+// CHECK-SAME: new_task_id=[[PARENT_TASK_ID_1:[0-9]+]]
+// CHECK-SAME: codeptr_ra=[[NULL]], task_type=ompt_task_initial=1
+// CHECK-SAME: has_dependences=no
+
+// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID_1]]
+// CHECK-SAME: parent_task_frame.exit=[[NULL]]
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+// CHECK-SAME: parallel_id=[[PARALLEL_ID_1:[0-9]+]], requested_team_size=2
+// CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, invoker={{.*}}
+
+// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_parallel_end:
+// CHECK-SAME: parallel_id=[[PARALLEL_ID_1]], task_id=[[PARENT_TASK_ID_1]]
+// CHECK-SAME: invoker={{[0-9]+}}
+
+// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[MASTER_ID_1]]
+
+// second master thread
+// CHECK: {{^}}[[MASTER_ID_2:[0-9]+]]: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID_2]]
+
+// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_task_create: parent_task_id=0
+// CHECK-SAME: parent_task_frame.exit=[[NULL]]
+// CHECK-SAME: parent_task_frame.reenter=[[NULL]]
+// CHECK-SAME: new_task_id=[[PARENT_TASK_ID_2:[0-9]+]]
+// CHECK-SAME: codeptr_ra=[[NULL]], task_type=ompt_task_initial=1
+// CHECK-SAME: has_dependences=no
+
+// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID_2]]
+// CHECK-SAME: parent_task_frame.exit=[[NULL]]
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+// CHECK-SAME: parallel_id=[[PARALLEL_ID_2:[0-9]+]]
+// CHECK-SAME: requested_team_size=2, codeptr_ra=0x{{[0-f]+}}
+// CHECK-SAME: invoker={{.*}}
+
+// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_parallel_end:
+// CHECK-SAME: parallel_id=[[PARALLEL_ID_2]], task_id=[[PARENT_TASK_ID_2]]
+// CHECK-SAME: invoker={{[0-9]+}}
+
+// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[MASTER_ID_2]]
+
+// first worker thread
+// CHECK: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID_1]]
+
+// CHECK: {{^}}[[THREAD_ID_1]]: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[THREAD_ID_1]]
+
+// second worker thread
+// CHECK: {{^}}[[THREAD_ID_2:[0-9]+]]: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID_2]]
+
+// CHECK: {{^}}[[THREAD_ID_2]]: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[THREAD_ID_2]]
diff --git a/final/runtime/test/ompt/misc/threads.c b/final/runtime/test/ompt/misc/threads.c
new file mode 100644
index 0000000..4a0fc6f
--- /dev/null
+++ b/final/runtime/test/ompt/misc/threads.c
@@ -0,0 +1,34 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+ int x = 0;
+#pragma omp parallel num_threads(4)
+ {
+#pragma omp atomic
+ x++;
+ }
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin:
+ // CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_thread_end:
+ // CHECK-SAME: thread_id=[[MASTER_ID]]
+ // CHECK: {{^}}[[WORKER_ID1:[0-9]+]]: ompt_event_thread_begin:
+ // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID1]]
+ // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_thread_end:
+ // CHECK-SAME: thread_id=[[WORKER_ID1]]
+ // CHECK: {{^}}[[WORKER_ID2:[0-9]+]]: ompt_event_thread_begin:
+ // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID2]]
+ // CHECK: {{^}}[[WORKER_ID2]]: ompt_event_thread_end:
+ // CHECK-SAME: thread_id=[[WORKER_ID2]]
+ // CHECK: {{^}}[[WORKER_ID3:[0-9]+]]: ompt_event_thread_begin:
+ // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID3]]
+ // CHECK: {{^}}[[WORKER_ID3]]: ompt_event_thread_end:
+ // CHECK-SAME: thread_id=[[WORKER_ID3]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/misc/threads_nested.c b/final/runtime/test/ompt/misc/threads_nested.c
new file mode 100644
index 0000000..0d38dcf
--- /dev/null
+++ b/final/runtime/test/ompt/misc/threads_nested.c
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+
+ int condition = 0;
+ int x = 0;
+ omp_set_nested(1);
+#pragma omp parallel num_threads(2)
+ {
+#pragma omp parallel num_threads(2)
+ {
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition, 4);
+ }
+ }
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin:
+ // CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_thread_end:
+ // CHECK-SAME: thread_id=[[MASTER_ID]]
+ // CHECK: {{^}}[[WORKER_ID1:[0-9]+]]: ompt_event_thread_begin:
+ // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID1]]
+ // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_thread_end:
+ // CHECK-SAME: thread_id=[[WORKER_ID1]]
+ // CHECK: {{^}}[[WORKER_ID2:[0-9]+]]: ompt_event_thread_begin:
+ // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID2]]
+ // CHECK: {{^}}[[WORKER_ID2]]: ompt_event_thread_end:
+ // CHECK-SAME: thread_id=[[WORKER_ID2]]
+ // CHECK: {{^}}[[WORKER_ID3:[0-9]+]]: ompt_event_thread_begin:
+ // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID3]]
+ // CHECK: {{^}}[[WORKER_ID3]]: ompt_event_thread_end:
+ // CHECK-SAME: thread_id=[[WORKER_ID3]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/misc/unset_callback.c b/final/runtime/test/ompt/misc/unset_callback.c
new file mode 100644
index 0000000..9074ad3
--- /dev/null
+++ b/final/runtime/test/ompt/misc/unset_callback.c
@@ -0,0 +1,29 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ #pragma omp parallel num_threads(1)
+ {
+
+ }
+ ompt_set_callback(ompt_callback_parallel_begin, NULL);
+ #pragma omp parallel num_threads(1)
+ {
+
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_parallel_begin:
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_parallel_end:
+ // CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin:
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_parallel_end:
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/ompt-signal.h b/final/runtime/test/ompt/ompt-signal.h
new file mode 100644
index 0000000..b5c28cf
--- /dev/null
+++ b/final/runtime/test/ompt/ompt-signal.h
@@ -0,0 +1,31 @@
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#else
+#include <unistd.h>
+#define delay(t) usleep(t);
+#endif
+
+// These functions are used to provide a signal-wait mechanism to enforce expected scheduling for the test cases.
+// Conditional variable (s) needs to be shared! Initialize to 0
+
+#define OMPT_SIGNAL(s) ompt_signal(&s)
+//inline
+void ompt_signal(int* s)
+{
+ #pragma omp atomic
+ (*s)++;
+}
+
+#define OMPT_WAIT(s,v) ompt_wait(&s,v)
+// wait for s >= v
+//inline
+void ompt_wait(int *s, int v)
+{
+ int wait=0;
+ do{
+ delay(10);
+ #pragma omp atomic read
+ wait = (*s);
+ }while(wait<v);
+}
diff --git a/final/runtime/test/ompt/parallel/dynamic_enough_threads.c b/final/runtime/test/ompt/parallel/dynamic_enough_threads.c
new file mode 100644
index 0000000..4c340ba
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/dynamic_enough_threads.c
@@ -0,0 +1,43 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main()
+{
+ omp_set_dynamic(1);
+
+ #pragma omp parallel num_threads(4)
+ {
+ print_ids(0);
+ print_ids(1);
+ }
+ print_fuzzy_address(1);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
+ //team-size of 1-4 is expected
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[1-4]}}
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/dynamic_not_enough_threads.c b/final/runtime/test/ompt/parallel/dynamic_not_enough_threads.c
new file mode 100644
index 0000000..f3a6e17
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/dynamic_not_enough_threads.c
@@ -0,0 +1,43 @@
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main()
+{
+ omp_set_dynamic(1);
+
+ #pragma omp parallel num_threads(4)
+ {
+ print_ids(0);
+ print_ids(1);
+ }
+ print_fuzzy_address(1);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
+ //team-size of 1-4 is expected
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[1-4]}}
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/max_active_levels_serialized.c b/final/runtime/test/ompt/parallel/max_active_levels_serialized.c
new file mode 100644
index 0000000..bbe73ef
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/max_active_levels_serialized.c
@@ -0,0 +1,73 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ omp_set_nested(1);
+ omp_set_max_active_levels(1);
+
+ #pragma omp parallel num_threads(2)
+ {
+ print_ids(0);
+ print_ids(1);
+ #pragma omp parallel num_threads(2)
+ {
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ }
+ print_fuzzy_address(1);
+ }
+ print_fuzzy_address(2);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+ // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/nested.c b/final/runtime/test/ompt/parallel/nested.c
new file mode 100644
index 0000000..035529c
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/nested.c
@@ -0,0 +1,298 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
+#include "callback.h"
+#include <omp.h>
+#include <unistd.h>
+
+int main()
+{
+ int condition=0;
+ omp_set_nested(1);
+ print_frame(0);
+
+ #pragma omp parallel num_threads(4)
+ {
+ print_frame_from_outlined_fn(1);
+ print_ids(0);
+ print_ids(1);
+ print_frame(0);
+
+ //get all implicit task events before starting nested:
+ #pragma omp barrier
+
+ #pragma omp parallel num_threads(4)
+ {
+ print_frame_from_outlined_fn(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ print_frame(0);
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition,16);
+ #pragma omp barrier
+ print_fuzzy_address(1);
+ print_ids(0);
+ }
+ print_fuzzy_address(2);
+ print_ids(0);
+ }
+ print_fuzzy_address(3);
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+
+ // THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+ // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // nested parallel masters
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[NESTED_EXIT:0x[0-f]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]]
+ // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // explicit barrier
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
+ // implicit barrier
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // implicit barrier
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // nested parallel worker threads
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/nested_lwt.c b/final/runtime/test/ompt/parallel/nested_lwt.c
new file mode 100644
index 0000000..8348376
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/nested_lwt.c
@@ -0,0 +1,334 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+#include <unistd.h>
+
+int main()
+{
+ omp_set_nested(1);
+ int condition = 0;
+
+ #pragma omp parallel num_threads(4)
+ {
+ print_ids(0);
+ print_ids(1);
+ //get all implicit task events before starting nested:
+ #pragma omp barrier
+ #pragma omp parallel num_threads(1)
+ {
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ //get all implicit task events before starting nested:
+ #pragma omp barrier
+ #pragma omp parallel num_threads(4)
+ {
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ print_ids(3);
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition,16);
+ }
+ print_fuzzy_address(1);
+ }
+ print_fuzzy_address(2);
+ }
+ print_fuzzy_address(3);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+
+
+ // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+ // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // nested parallel masters
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // nested parallel worker threads
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // can't reliably tell which parallel region is the parent...
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+ // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/nested_serialized.c b/final/runtime/test/ompt/parallel/nested_serialized.c
new file mode 100644
index 0000000..f87b8f4
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/nested_serialized.c
@@ -0,0 +1,128 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ omp_set_nested(0);
+
+ #pragma omp parallel num_threads(4)
+ {
+ print_ids(0);
+ print_ids(1);
+ #pragma omp parallel num_threads(4)
+ {
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ }
+ print_fuzzy_address(1);
+ }
+ print_fuzzy_address(2);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+
+
+ // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+ // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/nested_thread_num.c b/final/runtime/test/ompt/parallel/nested_thread_num.c
new file mode 100644
index 0000000..e952f80
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/nested_thread_num.c
@@ -0,0 +1,357 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
+#include "callback.h"
+#include <omp.h>
+#include <unistd.h>
+
+int main() {
+ int condition = 0;
+ omp_set_nested(1);
+ print_frame(0);
+
+#pragma omp parallel num_threads(2)
+ {
+ print_frame_from_outlined_fn(1);
+ print_ids(0);
+ print_ids(1);
+ print_frame(0);
+
+// get all implicit task events before starting nested:
+#pragma omp barrier
+
+#pragma omp parallel num_threads(2)
+ {
+ print_frame_from_outlined_fn(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ print_frame(0);
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition, 4);
+#pragma omp barrier
+ print_fuzzy_address(1);
+ print_ids(0);
+ }
+ print_fuzzy_address(2);
+ print_ids(0);
+ }
+ print_fuzzy_address(3);
+
+ return 0;
+}
+// Check if libomp supports the callbacks for this test.
+// CHECK-NOT: {{^}}0: Could not register callback
+
+// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+// make sure initial data pointers are null
+// CHECK-NOT: 0: parallel_data initially not null
+// CHECK-NOT: 0: task_data initially not null
+// CHECK-NOT: 0: thread_data initially not null
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
+// CHECK-SAME: parent_task_frame.exit=[[NULL]],
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: requested_team_size=2,
+// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
+// CHECK-SAME: invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end:
+
+// Note that we cannot ensure that the worker threads have already called
+// barrier_end and implicit_task_end before parallel_end!
+
+// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin:
+// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin:
+
+
+// CHECK: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]],
+// CHECK-SAME: task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+// THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+// THREADS: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+// THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
+// THREADS-SAME: parent_task_frame.exit=[[NULL]],
+// THREADS-SAME: parent_task_frame.reenter=[[MAIN_REENTER]],
+// THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2,
+// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
+// THREADS-SAME: invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+// nested parallel masters
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+// THREADS-SAME: parallel_id=[[PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]],
+// THREADS-SAME: team_size=2, thread_num=0
+
+// THREADS: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
+// THREADS-SAME: reenter_frame=[[NULL]],
+// THREADS-SAME: thread_num=0
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 1:
+// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]],
+// THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]],
+// THREADS-SAME: reenter_frame=[[MAIN_REENTER]]
+
+// THREADS: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin:
+// THREADS-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
+// THREADS-SAME: parent_task_frame.exit=[[EXIT]],
+// THREADS-SAME: parent_task_frame.reenter=[[REENTER]],
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]],
+// THREADS-SAME: requested_team_size=2,
+// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
+// THREADS-SAME: invoker=[[PARALLEL_INVOKER]]
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// THREADS-SAME: thread_num=0
+
+// THREADS: __builtin_frame_address({{.}})=[[NESTED_EXIT:0x[0-f]+]]
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 0:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
+// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]],
+// THREADS-SAME: thread_num=0
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
+// THREADS-SAME: reenter_frame=[[REENTER]]
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 2:
+// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]],
+// THREADS-SAME: reenter_frame=[[MAIN_REENTER]]
+
+// THREADS: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]]
+
+// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+// explicit barrier
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
+// THREADS-SAME: codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 0:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
+// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]]
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 0:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
+// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
+
+// implicit barrier
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
+// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 0:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
+// THREADS-SAME: exit_frame=[[NULL]], reenter_frame=[[NULL]]
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]],
+// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]],
+// THREADS-SAME: invoker=[[PARALLEL_INVOKER]],
+// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+
+// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+
+// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
+// THREADS-SAME: reenter_frame=[[NULL]]
+
+// implicit barrier
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin:
+// THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]],
+// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+
+// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]],
+// THREADS-SAME: reenter_frame=[[NULL]]
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]],
+// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+// THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]],
+// THREADS-SAME: invoker=[[PARALLEL_INVOKER]],
+// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+
+// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+// Worker of first nesting level
+
+// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin:
+// THREADS-SAME: parallel_id=[[PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// THREADS-SAME: thread_num=[[OUTER_THREADNUM:[0-9]+]]
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]],
+// THREADS-SAME: thread_num=[[OUTER_THREADNUM]]
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 1:
+// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin:
+// THREADS-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
+// THREADS-SAME: parent_task_frame.exit={{0x[0-f]+}},
+// THREADS-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2,
+// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}},
+// THREADS-SAME: invoker=[[PARALLEL_INVOKER]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// THREADS-SAME: thread_num=[[INNER_THREADNUM:[0-9]+]]
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 0:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
+// THREADS-SAME: thread_num=[[INNER_THREADNUM]]
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]],
+// THREADS-SAME: thread_num=[[OUTER_THREADNUM]]
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 2:
+// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+
+// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+
+// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin:
+// THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+// nested parallel worker threads
+
+// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+// THREADS-SAME: thread_num=[[THREADNUM:[0-9]+]]
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 0:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]]
+// THREADS-SAME: thread_num=[[THREADNUM]]
+
+// can't reliably tell which parallel region is the parent...
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}},
+// THREADS-SAME: task_id={{[0-9]+}}
+// THREADS-SAME: thread_num={{[01]}}
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 2:
+// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+// THREADS-SAME: thread_num=0
+
+// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+// other nested parallel worker threads
+
+// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+// THREADS-SAME: thread_num=[[THREADNUM:[0-9]+]]
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 0:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]]
+// THREADS-SAME: thread_num=[[THREADNUM]]
+
+// can't reliably tell which parallel region is the parent...
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}},
+// THREADS-SAME: task_id={{[0-9]+}}
+// THREADS-SAME: thread_num={{[01]}}
+
+// THREADS: {{^}}[[THREAD_ID]]: task level 2:
+// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+// THREADS-SAME: thread_num=0
+
+// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin:
+// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
+// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end:
+// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
diff --git a/final/runtime/test/ompt/parallel/no_thread_num_clause.c b/final/runtime/test/ompt/parallel/no_thread_num_clause.c
new file mode 100644
index 0000000..e23d89a
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/no_thread_num_clause.c
@@ -0,0 +1,95 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main()
+{
+ omp_set_num_threads(4);
+ #pragma omp parallel
+ {
+ print_ids(0);
+ print_ids(1);
+ }
+ print_fuzzy_address(1);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+
+
+ // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+ // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=281474976710658, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
+
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/normal.c b/final/runtime/test/ompt/parallel/normal.c
new file mode 100644
index 0000000..2cc9ce1
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/normal.c
@@ -0,0 +1,132 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads \
+// RUN: | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main() {
+#pragma omp parallel num_threads(4)
+ {
+ print_ids(0);
+ print_ids(1);
+ }
+ print_fuzzy_address(1);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // Only check callback names, arguments are verified in THREADS below.
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin
+
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+
+ // Note that we cannot ensure that the worker threads have already called
+ // barrier_end and implicit_task_end before parallel_end!
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end
+
+ // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+ // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin
+ // THREADS-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+ // THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]]
+ // THREADS-SAME: parent_task_frame.exit=[[NULL]]
+ // THREADS-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4
+ // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1
+ // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]]
+ // THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end
+ // parallel_id is 0 because the region ended in the barrier!
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin
+ // THREADS-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1
+ // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end
+ // parallel_id is 0 because the region ended in the barrier!
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin
+ // THREADS-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1
+ // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end
+ // parallel_id is 0 because the region ended in the barrier!
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin
+ // THREADS-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1
+ // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end
+ // parallel_id is 0 because the region ended in the barrier!
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/not_enough_threads.c b/final/runtime/test/ompt/parallel/not_enough_threads.c
new file mode 100644
index 0000000..8a0469a
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/not_enough_threads.c
@@ -0,0 +1,90 @@
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | %sort-threads \
+// RUN: | FileCheck --check-prefix=THREADS %s
+
+// REQUIRES: ompt
+
+#include "callback.h"
+
+int main() {
+#pragma omp parallel num_threads(4)
+ {
+ print_ids(0);
+ print_ids(1);
+ }
+ print_fuzzy_address(1);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback
+
+ // Make sure initial data pointers are null.
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // Only check callback names, arguments are verified in THREADS below.
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin
+
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+
+ // Note that we cannot ensure that the worker threads have already called
+ // barrier_end and implicit_task_end before parallel_end!
+
+ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin
+ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end
+
+ // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+ // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin
+ // THREADS-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+ // THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]]
+ // THREADS-SAME: parent_task_frame.exit=[[NULL]]
+ // THREADS-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4
+ // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1
+ // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]]
+ // THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+
+ // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // parallel_id is 0 because the region ended in the barrier!
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin
+ // THREADS-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 0
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: task level 1
+ // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
+ // THREADS-SAME: task_id=[[PARENT_TASK_ID]]
+ // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+ // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // parallel_id is 0 because the region ended in the barrier!
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/parallel_if0.c b/final/runtime/test/ompt/parallel/parallel_if0.c
new file mode 100644
index 0000000..f5c4454
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/parallel_if0.c
@@ -0,0 +1,76 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+
+int main()
+{
+// print_frame(0);
+ #pragma omp parallel if(0)
+ {
+// print_frame(1);
+ print_ids(0);
+ print_ids(1);
+// print_frame(0);
+ #pragma omp parallel if(0)
+ {
+// print_frame(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+// print_frame(0);
+ #pragma omp task
+ {
+// print_frame(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ print_ids(3);
+ }
+ }
+ print_fuzzy_address(1);
+ }
+ print_fuzzy_address(2);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]]
+
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/parallel/serialized.c b/final/runtime/test/ompt/parallel/serialized.c
new file mode 100644
index 0000000..e7a9207
--- /dev/null
+++ b/final/runtime/test/ompt/parallel/serialized.c
@@ -0,0 +1,77 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+
+int main()
+{
+// print_frame(0);
+ #pragma omp parallel num_threads(1)
+ {
+// print_frame(1);
+ print_ids(0);
+ print_ids(1);
+// print_frame(0);
+ #pragma omp parallel num_threads(1)
+ {
+// print_frame(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+// print_frame(0);
+ #pragma omp task
+ {
+// print_frame(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ print_ids(3);
+ }
+ }
+ print_fuzzy_address(1);
+ }
+ print_fuzzy_address(2);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: parallel_data initially not null
+ // CHECK-NOT: 0: task_data initially not null
+ // CHECK-NOT: 0: thread_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[OUTER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[INNER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]]
+
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[INNER_RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[INNER_RETURN_ADDRESS]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[OUTER_RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[OUTER_RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/barrier/explicit.c b/final/runtime/test/ompt/synchronization/barrier/explicit.c
new file mode 100644
index 0000000..d60acd6
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/barrier/explicit.c
@@ -0,0 +1,58 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int x = 0;
+
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp atomic
+ x++;
+
+ #pragma omp barrier
+ print_current_address();
+
+ #pragma omp atomic
+ x++;
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // master thread explicit barrier
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+ // master thread implicit barrier at parallel end
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+
+
+ // worker thread explicit barrier
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+ // worker thread implicit barrier at parallel end
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/barrier/for_loop.c b/final/runtime/test/ompt/synchronization/barrier/for_loop.c
new file mode 100644
index 0000000..5259447
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/barrier/for_loop.c
@@ -0,0 +1,56 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int y[] = {0,1,2,3};
+
+ #pragma omp parallel num_threads(2)
+ {
+ //implicit barrier at end of for loop
+ int i;
+ #pragma omp for
+ for (i = 0; i < 4; i++)
+ {
+ y[i]++;
+ }
+ print_current_address();
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // master thread implicit barrier at loop end
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+ // master thread implicit barrier at parallel end
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+
+ // worker thread explicit barrier
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+ // worker thread implicit barrier after parallel
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/barrier/for_simd.c b/final/runtime/test/ompt/synchronization/barrier/for_simd.c
new file mode 100644
index 0000000..351b2c2
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/barrier/for_simd.c
@@ -0,0 +1,33 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// XFAIL: gcc-4
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int y[] = {0,1,2,3};
+
+ int i;
+ #pragma omp for simd
+ for (i = 0; i < 4; i++)
+ {
+ y[i]++;
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // master thread implicit barrier at simd loop end
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/barrier/implicit_task_data.c b/final/runtime/test/ompt/synchronization/barrier/implicit_task_data.c
new file mode 100644
index 0000000..0824b47
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/barrier/implicit_task_data.c
@@ -0,0 +1,150 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+
+// This test checks that values stored in task_data in a barrier_begin event
+// are still present in the corresponding barrier_end event.
+// Therefore, callback implementations different from the ones in callback.h are neccessary.
+// This is a test for an issue reported in
+// https://github.com/OpenMPToolsInterface/LLVM-openmp/issues/39
+
+#define _BSD_SOURCE
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <omp.h>
+#include <ompt.h>
+
+static const char* ompt_thread_type_t_values[] = {
+ NULL,
+ "ompt_thread_initial",
+ "ompt_thread_worker",
+ "ompt_thread_other"
+};
+
+static ompt_get_unique_id_t ompt_get_unique_id;
+static ompt_get_thread_data_t ompt_get_thread_data;
+
+int main()
+{
+ #pragma omp parallel num_threads(4)
+ {
+ #pragma omp master
+ {
+ sleep(1);
+ }
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // master thread implicit barrier at parallel end
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id=0, task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}}
+
+
+ // worker thread implicit barrier at parallel end
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id=0, task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]]
+
+ return 0;
+}
+
+static void
+on_ompt_callback_thread_begin(
+ ompt_thread_type_t thread_type,
+ ompt_data_t *thread_data)
+{
+ if(thread_data->ptr)
+ printf("%s\n", "0: thread_data initially not null");
+ thread_data->value = ompt_get_unique_id();
+ printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value);
+}
+
+static void
+on_ompt_callback_sync_region(
+ ompt_sync_region_kind_t kind,
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ const void *codeptr_ra)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ task_data->value = ompt_get_unique_id();
+ if(kind == ompt_sync_region_barrier)
+ printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ case ompt_scope_end:
+ if(kind == ompt_sync_region_barrier)
+ printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+ break;
+ }
+}
+
+static void
+on_ompt_callback_sync_region_wait(
+ ompt_sync_region_kind_t kind,
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ const void *codeptr_ra)
+{
+ switch(endpoint)
+ {
+ case ompt_scope_begin:
+ if(kind == ompt_sync_region_barrier)
+ printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+ break;
+ case ompt_scope_end:
+ if(kind == ompt_sync_region_barrier)
+ printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+ break;
+ }
+}
+
+#define register_callback_t(name, type) \
+do{ \
+ type f_##name = &on_##name; \
+ if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \
+ ompt_set_never) \
+ printf("0: Could not register callback '" #name "'\n"); \
+}while(0)
+
+#define register_callback(name) register_callback_t(name, name##_t)
+
+int ompt_initialize(
+ ompt_function_lookup_t lookup,
+ ompt_data_t *tool_data)
+{
+ ompt_set_callback_t ompt_set_callback;
+ ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
+ ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
+ ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
+ register_callback(ompt_callback_sync_region);
+ register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
+ register_callback(ompt_callback_thread_begin);
+ printf("0: NULL_POINTER=%p\n", (void*)NULL);
+ return 1; //success
+}
+
+void ompt_finalize(ompt_data_t *tool_data)
+{
+ printf("0: ompt_event_runtime_shutdown\n");
+}
+
+ompt_start_tool_result_t* ompt_start_tool(
+ unsigned int omp_version,
+ const char *runtime_version)
+{
+ static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
+ return &ompt_start_tool_result;
+}
diff --git a/final/runtime/test/ompt/synchronization/barrier/parallel_region.c b/final/runtime/test/ompt/synchronization/barrier/parallel_region.c
new file mode 100644
index 0000000..ea0a23f
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/barrier/parallel_region.c
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int x = 0;
+
+ //implicit barrier at end of a parallel region
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp atomic
+ x++;
+ }
+ print_fuzzy_address();
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // master thread implicit barrier at parallel end
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+
+ // worker thread implicit barrier at parallel end
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/barrier/sections.c b/final/runtime/test/ompt/synchronization/barrier/sections.c
new file mode 100644
index 0000000..4e1dfdd
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/barrier/sections.c
@@ -0,0 +1,63 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int x = 0;
+
+ #pragma omp parallel num_threads(2)
+ {
+ //implicit barrier after sections with nowait but with lastprivates
+ //implicit barrier at end of sections
+ #pragma omp sections
+ {
+ #pragma omp section
+ {
+ #pragma omp atomic
+ x++;
+ }
+
+ #pragma omp section
+ {
+ #pragma omp atomic
+ x++;
+ }
+ }
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // master thread implicit barrier at sections end
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+ // master thread implicit barrier at parallel end
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+
+ // worker thread implicit barrier at sections end
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+ // worker thread implicit barrier at parallel end
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/barrier/single.c b/final/runtime/test/ompt/synchronization/barrier/single.c
new file mode 100644
index 0000000..8ba8b52
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/barrier/single.c
@@ -0,0 +1,61 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int x = 0;
+
+ #pragma omp parallel num_threads(2)
+ {
+ //implicit barrier at end of single
+ #pragma omp single
+ {
+ x++;
+ }
+ print_fuzzy_address();
+ //critical section to avoid merge of two barriers into one
+ #pragma omp critical
+ {
+ x++;
+ }
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // master thread implicit barrier at single end
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // master thread implicit barrier at parallel end
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+
+ // worker thread implicit barrier at single end
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // worker thread implicit barrier at parallel end
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/critical.c b/final/runtime/test/ompt/synchronization/critical.c
new file mode 100644
index 0000000..ed982b7
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/critical.c
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ #pragma omp critical
+ {
+ print_current_address(1);
+ print_ids(0);
+ }
+ print_current_address(2);
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_critical: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/flush.c b/final/runtime/test/ompt/synchronization/flush.c
new file mode 100644
index 0000000..287d035
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/flush.c
@@ -0,0 +1,30 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// GCC generates code that does not call the runtime for the flush construct
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+#pragma omp parallel num_threads(2)
+ {
+ int tid = omp_get_thread_num();
+
+#pragma omp flush
+ print_current_address(1);
+ }
+
+ return 0;
+}
+// Check if libomp supports the callbacks for this test.
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_flush'
+
+// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_flush:
+// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+// CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+//
+// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_flush:
+// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+// CHECK: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
diff --git a/final/runtime/test/ompt/synchronization/lock.c b/final/runtime/test/ompt/synchronization/lock.c
new file mode 100644
index 0000000..eae1575
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/lock.c
@@ -0,0 +1,44 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ //need to use an OpenMP construct so that OMPT will be initalized
+ #pragma omp parallel num_threads(1)
+ print_ids(0);
+
+ omp_lock_t lock;
+ printf("%" PRIu64 ": &lock: %" PRIu64 "\n", ompt_get_thread_data()->value, (uint64_t) &lock);
+ omp_init_lock(&lock);
+ print_fuzzy_address(1);
+ omp_set_lock(&lock);
+ print_fuzzy_address(2);
+ omp_unset_lock(&lock);
+ print_fuzzy_address(3);
+ omp_destroy_lock(&lock);
+ print_fuzzy_address(4);
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: &lock: [[WAIT_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_init_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/master.c b/final/runtime/test/ompt/synchronization/master.c
new file mode 100644
index 0000000..8cc2d46
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/master.c
@@ -0,0 +1,38 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// GCC generates code that does not call the runtime for the master construct
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+ int x = 0;
+#pragma omp parallel num_threads(2)
+ {
+#pragma omp master
+ {
+ print_fuzzy_address(1);
+ x++;
+ }
+ print_current_address(2);
+ }
+
+ printf("%" PRIu64 ": x=%d\n", ompt_get_thread_data()->value, x);
+
+ return 0;
+}
+
+// Check if libomp supports the callbacks for this test.
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
+
+// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin:
+// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]],
+// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+// CHECK: {{^}}[[MASTER_ID]]: ompt_event_master_end:
+// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]],
+// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS_END:0x[0-f]+]]
+// CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS_END]]
diff --git a/final/runtime/test/ompt/synchronization/nest_lock.c b/final/runtime/test/ompt/synchronization/nest_lock.c
new file mode 100644
index 0000000..c83ceaf
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/nest_lock.c
@@ -0,0 +1,52 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ //need to use an OpenMP construct so that OMPT will be initalized
+ #pragma omp parallel num_threads(1)
+ print_ids(0);
+
+ omp_nest_lock_t nest_lock;
+ printf("%" PRIu64 ": &nest_lock: %lli\n", ompt_get_thread_data()->value, (long long) &nest_lock);
+ omp_init_nest_lock(&nest_lock);
+ print_fuzzy_address(1);
+ omp_set_nest_lock(&nest_lock);
+ print_fuzzy_address(2);
+ omp_set_nest_lock(&nest_lock);
+ print_fuzzy_address(3);
+ omp_unset_nest_lock(&nest_lock);
+ print_fuzzy_address(4);
+ omp_unset_nest_lock(&nest_lock);
+ print_fuzzy_address(5);
+ omp_destroy_nest_lock(&nest_lock);
+ print_fuzzy_address(6);
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/ordered.c b/final/runtime/test/ompt/synchronization/ordered.c
new file mode 100644
index 0000000..14284a4
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/ordered.c
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ #pragma omp ordered
+ {
+ print_current_address(1);
+ print_ids(0);
+ }
+ print_current_address(2);
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_ordered: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/taskgroup.c b/final/runtime/test/ompt/synchronization/taskgroup.c
new file mode 100644
index 0000000..7309c0a
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/taskgroup.c
@@ -0,0 +1,49 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+
+#include "callback.h"
+#include <unistd.h>
+#include <stdio.h>
+
+int main()
+{
+ int condition=0;
+ int x=0;
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp master
+ {
+ #pragma omp taskgroup
+ {
+ print_current_address(1);
+ #pragma omp task
+ {
+ #pragma omp atomic
+ x++;
+ }
+ }
+ print_current_address(2);
+ }
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskgroup_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/taskwait.c b/final/runtime/test/ompt/synchronization/taskwait.c
new file mode 100644
index 0000000..c431024
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/taskwait.c
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int x = 0;
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp master
+ {
+ #pragma omp task
+ {
+ x++;
+ }
+ #pragma omp taskwait
+ print_current_address(1);
+ }
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: ompt_event_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/test_lock.c b/final/runtime/test/ompt/synchronization/test_lock.c
new file mode 100644
index 0000000..d24e4d6
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/test_lock.c
@@ -0,0 +1,54 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ omp_lock_t lock;
+ omp_init_lock(&lock);
+ print_fuzzy_address(1);
+
+ omp_test_lock(&lock);
+ print_fuzzy_address(2);
+ omp_unset_lock(&lock);
+ print_fuzzy_address(3);
+
+ omp_set_lock(&lock);
+ print_fuzzy_address(4);
+ omp_test_lock(&lock);
+ print_fuzzy_address(5);
+ omp_unset_lock(&lock);
+ print_fuzzy_address(6);
+
+ omp_destroy_lock(&lock);
+ print_fuzzy_address(7);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/test_nest_lock.c b/final/runtime/test/ompt/synchronization/test_nest_lock.c
new file mode 100644
index 0000000..ad02d32
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/test_nest_lock.c
@@ -0,0 +1,42 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ omp_nest_lock_t nest_lock;
+ omp_init_nest_lock(&nest_lock);
+
+ omp_test_nest_lock(&nest_lock);
+ omp_unset_nest_lock(&nest_lock);
+
+ omp_set_nest_lock(&nest_lock);
+ omp_test_nest_lock(&nest_lock);
+ omp_unset_nest_lock(&nest_lock);
+ omp_unset_nest_lock(&nest_lock);
+
+ omp_destroy_nest_lock(&nest_lock);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/synchronization/test_nest_lock_parallel.c b/final/runtime/test/ompt/synchronization/test_nest_lock_parallel.c
new file mode 100644
index 0000000..e9240f7
--- /dev/null
+++ b/final/runtime/test/ompt/synchronization/test_nest_lock_parallel.c
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ omp_nest_lock_t nest_lock;
+ omp_init_nest_lock(&nest_lock);
+
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp master
+ {
+ omp_set_nest_lock(&nest_lock);
+ print_fuzzy_address(1);
+ }
+ #pragma omp barrier
+ omp_test_nest_lock(&nest_lock); //should fail for non-master
+ print_fuzzy_address(2);
+ #pragma omp barrier
+ #pragma omp master
+ {
+ omp_unset_nest_lock(&nest_lock);
+ print_fuzzy_address(3);
+ omp_unset_nest_lock(&nest_lock);
+ print_fuzzy_address(4);
+ }
+ }
+
+ omp_destroy_nest_lock(&nest_lock);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]]
+ // CHECK-NEXT: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/dependences.c b/final/runtime/test/ompt/tasks/dependences.c
new file mode 100644
index 0000000..57b61f9
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/dependences.c
@@ -0,0 +1,61 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+
+#include "callback.h"
+#include <omp.h>
+#include <math.h>
+#include <unistd.h>
+
+int main()
+{
+ int x = 0;
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp master
+ {
+ print_ids(0);
+ #pragma omp task depend(out:x)
+ {
+ x++;
+ delay(100);
+ }
+ print_fuzzy_address(1);
+ print_ids(0);
+
+ #pragma omp task depend(in:x)
+ {
+ x = -1;
+ }
+ print_ids(0);
+ }
+ }
+
+ x++;
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence'
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: new_task_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/explicit_task.c b/final/runtime/test/ompt/tasks/explicit_task.c
new file mode 100644
index 0000000..01fb3f8
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/explicit_task.c
@@ -0,0 +1,102 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int condition=0;
+ omp_set_nested(0);
+ print_frame(0);
+ #pragma omp parallel num_threads(2)
+ {
+ print_frame_from_outlined_fn(1);
+ print_ids(0);
+ print_ids(1);
+ print_frame(0);
+ #pragma omp master
+ {
+ print_ids(0);
+ #pragma omp task shared(condition)
+ {
+ OMPT_SIGNAL(condition);
+ print_frame(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ }
+ print_fuzzy_address(1);
+ OMPT_WAIT(condition,1);
+ print_ids(0);
+ }
+ #pragma omp barrier
+ print_ids(0);
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: new_task_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+ // nested parallel masters
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // <- ompt_event_task_create would be expected here
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // explicit barrier after master
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // implicit barrier parallel
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // this is expected to come earlier and at MASTER:
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/serialized.c b/final/runtime/test/ompt/tasks/serialized.c
new file mode 100644
index 0000000..12a0281
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/serialized.c
@@ -0,0 +1,154 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
+#include "callback.h"
+#include <omp.h>
+#include <math.h>
+
+int main() {
+ omp_set_nested(0);
+ print_frame(0);
+#pragma omp parallel num_threads(2)
+ {
+ print_frame_from_outlined_fn(1);
+ print_ids(0);
+ print_ids(1);
+ print_frame(0);
+#pragma omp master
+ {
+ print_ids(0);
+ void *creator_frame = get_frame_address(0);
+ int t = (int)sin(0.1);
+#pragma omp task if (t)
+ {
+ void *task_frame = get_frame_address(0);
+ if (creator_frame == task_frame) {
+ // Assume this code was inlined which the compiler is allowed to do.
+ print_frame(0);
+ } else {
+ // The exit frame must be our parent!
+ print_frame_from_outlined_fn(1);
+ }
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ }
+ print_fuzzy_address(1);
+ print_ids(0);
+ }
+ print_ids(0);
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: new_task_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create
+ // CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]]
+ // CHECK-SAME: parent_task_frame.reenter=[[NULL]]
+ // CHECK-SAME: new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_initial=1, has_dependences=no
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)
+ // CHECK-SAME: =[[MAIN_REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+ // CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]]
+ // CHECK-SAME: parent_task_frame.exit=[[NULL]]
+ // CHECK-SAME: parent_task_frame.reenter=[[MAIN_REENTER]]
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2
+ // CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
+
+ // nested parallel masters
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address
+ // CHECK-SAME: =[[EXIT:0x[0-f]+]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1
+ // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]]
+ // CHECK-SAME: task_id=[[PARENT_TASK_ID]],
+ // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK-SAME: parent_task_frame.exit=[[EXIT]]
+ // CHECK-SAME: parent_task_frame.reenter=[[REENTER]]
+ // CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]]
+ // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule:
+ // CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address
+ // CHECK-SAME: =[[TASK_EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]]
+ // CHECK-SAME: exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: task level 2
+ // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[PARENT_TASK_ID]]
+ // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule
+ // CHECK-SAME: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+
+ // implicit barrier parallel
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end
+ // parallel_id is 0 because the region ended in the barrier!
+ // CHECK-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // CHECK-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address
+ // CHECK-SAME: =[[EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1
+ // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[PARENT_TASK_ID]]
+ // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)={{0x[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // parallel_id is 0 because the region ended in the barrier!
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end
+ // CHECK-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+ // CHECK-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/task_in_joinbarrier.c b/final/runtime/test/ompt/tasks/task_in_joinbarrier.c
new file mode 100644
index 0000000..25b57a9
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/task_in_joinbarrier.c
@@ -0,0 +1,91 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int condition=0;
+ omp_set_nested(0);
+ print_frame(0);
+ #pragma omp parallel num_threads(2)
+ {
+ print_frame_from_outlined_fn(1);
+ print_ids(0);
+ print_ids(1);
+ print_frame(0);
+ #pragma omp master
+ {
+ print_ids(0);
+ #pragma omp task shared(condition)
+ {
+ OMPT_SIGNAL(condition);
+ print_frame(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ }
+ OMPT_WAIT(condition,1);
+ print_ids(0);
+ }
+ print_ids(0);
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: new_task_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+ // nested parallel masters
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // <- ompt_event_task_create would be expected here
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // implicit barrier parallel
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+ // implicit barrier parallel
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/task_types.c b/final/runtime/test/ompt/tasks/task_types.c
new file mode 100644
index 0000000..40ceb2d
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/task_types.c
@@ -0,0 +1,222 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+#include <math.h>
+
+int main() {
+ //initialize the OpenMP runtime
+ omp_get_num_threads();
+
+ // initial task
+ print_ids(0);
+
+ int x;
+// implicit task
+#pragma omp parallel num_threads(1)
+ {
+ print_ids(0);
+ x++;
+ }
+
+#pragma omp parallel num_threads(2)
+ {
+// explicit task
+#pragma omp single
+#pragma omp task
+ {
+ print_ids(0);
+ x++;
+ }
+// explicit task with undeferred
+#pragma omp single
+#pragma omp task if (0)
+ {
+ print_ids(0);
+ x++;
+ }
+
+// explicit task with untied
+#pragma omp single
+#pragma omp task untied
+ {
+ // Output of thread_id is needed to know on which thread task is executed
+ printf("%" PRIu64 ": explicit_untied\n", ompt_get_thread_data()->value);
+ print_ids(0);
+ print_frame(1);
+ x++;
+#pragma omp taskyield
+ printf("%" PRIu64 ": explicit_untied(2)\n",
+ ompt_get_thread_data()->value);
+ print_ids(0);
+ print_frame(1);
+ x++;
+#pragma omp taskwait
+ printf("%" PRIu64 ": explicit_untied(3)\n",
+ ompt_get_thread_data()->value);
+ print_ids(0);
+ print_frame(1);
+ x++;
+ }
+// explicit task with final
+#pragma omp single
+#pragma omp task final(1)
+ {
+ print_ids(0);
+ x++;
+// nested explicit task with final and undeferred
+#pragma omp task
+ {
+ print_ids(0);
+ x++;
+ }
+ }
+
+ // Mergeable task test deactivated for now
+ // explicit task with mergeable
+ /*
+ #pragma omp task mergeable if((int)sin(0))
+ {
+ print_ids(0);
+ x++;
+ }
+ */
+
+ // TODO: merged task
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0
+ // CHECK-SAME: parent_task_frame.exit=[[NULL]]
+ // CHECK-SAME: parent_task_frame.reenter=[[NULL]]
+ // CHECK-SAME: new_task_id=[[INITIAL_TASK_ID:[0-9]+]], codeptr_ra=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_initial=1, has_dependences=no
+
+ // CHECK-NOT: 0: parallel_data initially not null
+
+ // initial task
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id={{[0-9]+}}
+ // CHECK-SAME: task_id=[[INITIAL_TASK_ID]], exit_frame=[[NULL]]
+ // CHECK-SAME: reenter_frame=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_initial=1, thread_num=0
+
+ // implicit task
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id={{[0-9]+}}
+ // CHECK-SAME: task_id={{[0-9]+}}, exit_frame={{0x[0-f]+}}
+ // CHECK-SAME: reenter_frame=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_implicit|ompt_task_undeferred=134217730
+ // CHECK-SAME: thread_num=0
+
+ // explicit task
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}
+ // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}
+ // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+ // CHECK-SAME: new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]]
+ // CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+ // CHECK-SAME: task_type=ompt_task_explicit=4
+ // CHECK-SAME: has_dependences=no
+
+ // CHECK: [[THREAD_ID_1:[0-9]+]]: ompt_event_task_schedule:
+ // CHECK-SAME: second_task_id=[[EXPLICIT_TASK_ID]]
+
+ // CHECK: [[THREAD_ID_1]]: task level 0: parallel_id=[[PARALLEL_ID:[0-9]+]]
+ // CHECK-SAME: task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}
+ // CHECK-SAME: reenter_frame=[[NULL]], task_type=ompt_task_explicit=4
+ // CHECK-SAME: thread_num={{[01]}}
+
+ // explicit task with undeferred
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}
+ // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}
+ // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+ // CHECK-SAME: new_task_id=[[EXPLICIT_UNDEFERRED_TASK_ID:[0-9]+]]
+ // CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+ // CHECK-SAME: has_dependences=no
+
+ // CHECK: [[THREAD_ID_2:[0-9]+]]: ompt_event_task_schedule:
+ // CHECK-SAME: second_task_id=[[EXPLICIT_UNDEFERRED_TASK_ID]]
+
+ // CHECK: [[THREAD_ID_2]]: task level 0: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[EXPLICIT_UNDEFERRED_TASK_ID]]
+ // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+ // CHECK-SAME: thread_num={{[01]}}
+
+ // explicit task with untied
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}
+ // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}
+ // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+ // CHECK-SAME: new_task_id=[[EXPLICIT_UNTIED_TASK_ID:[0-9]+]]
+ // CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460
+ // CHECK-SAME: has_dependences=no
+
+ // Here the thread_id cannot be taken from a schedule event as there
+ // may be multiple of those
+ // CHECK: [[THREAD_ID_3:[0-9]+]]: explicit_untied
+ // CHECK: [[THREAD_ID_3]]: task level 0: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]]
+ // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460
+ // CHECK-SAME: thread_num={{[01]}}
+
+ // after taskyield
+ // CHECK: [[THREAD_ID_3_2:[0-9]+]]: explicit_untied(2)
+ // CHECK: [[THREAD_ID_3_2]]: task level 0: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]]
+ // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460
+ // CHECK-SAME: thread_num={{[01]}}
+
+ // after taskwait
+ // CHECK: [[THREAD_ID_3_3:[0-9]+]]: explicit_untied(3)
+ // CHECK: [[THREAD_ID_3_3]]: task level 0: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]]
+ // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460
+ // CHECK-SAME: thread_num={{[01]}}
+
+ // explicit task with final
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}
+ // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}
+ // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+ // CHECK-SAME: new_task_id=[[EXPLICIT_FINAL_TASK_ID:[0-9]+]]
+ // CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_final=536870916
+ // CHECK-SAME: has_dependences=no
+
+ // CHECK: [[THREAD_ID_4:[0-9]+]]: ompt_event_task_schedule:
+ // CHECK-SAME: second_task_id=[[EXPLICIT_FINAL_TASK_ID]]
+
+ // CHECK: [[THREAD_ID_4]]: task level 0: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[EXPLICIT_FINAL_TASK_ID]]
+ // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_final=536870916
+ // CHECK-SAME: thread_num={{[01]}}
+
+ // nested explicit task with final and undeferred
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}
+ // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}
+ // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}
+ // CHECK-SAME: new_task_id=[[NESTED_FINAL_UNDEFERRED_TASK_ID:[0-9]+]]
+ // CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred
+ // CHECK-SAME:|ompt_task_final=671088644
+ // CHECK-SAME: has_dependences=no
+
+ // CHECK: [[THREAD_ID_5:[0-9]+]]: ompt_event_task_schedule:
+ // CHECK-SAME: second_task_id=[[NESTED_FINAL_UNDEFERRED_TASK_ID]]
+
+ // CHECK: [[THREAD_ID_5]]: task level 0: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[NESTED_FINAL_UNDEFERRED_TASK_ID]]
+ // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+ // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred
+ // CHECK-SAME:|ompt_task_final=671088644
+ // CHECK-SAME: thread_num={{[01]}}
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/task_types_serialized.c b/final/runtime/test/ompt/tasks/task_types_serialized.c
new file mode 100644
index 0000000..7726f5b
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/task_types_serialized.c
@@ -0,0 +1,113 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+__attribute__ ((noinline)) // workaround for bug in icc
+void print_task_type(int id)
+{
+ #pragma omp critical
+ {
+ int task_type;
+ char buffer[2048];
+ ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL);
+ format_task_type(task_type, buffer);
+ printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type);
+ }
+};
+
+int main()
+{
+ //initial task
+ print_task_type(0);
+
+ int x;
+ //implicit task
+ #pragma omp parallel num_threads(1)
+ {
+ print_task_type(1);
+ x++;
+ }
+
+ #pragma omp parallel num_threads(1)
+ #pragma omp master
+ {
+ //explicit task
+ #pragma omp task
+ {
+ print_task_type(2);
+ x++;
+ }
+
+ //explicit task with undeferred
+ #pragma omp task if(0)
+ {
+ print_task_type(3);
+ x++;
+ }
+
+ //explicit task with untied
+ #pragma omp task untied
+ {
+ print_task_type(4);
+ x++;
+ }
+
+ //explicit task with final
+ #pragma omp task final(1)
+ {
+ print_task_type(5);
+ x++;
+ //nested explicit task with final and undeferred
+ #pragma omp task
+ {
+ print_task_type(6);
+ x++;
+ }
+ }
+
+/*
+ //TODO:not working
+ //explicit task with mergeable
+ #pragma omp task mergeable
+ {
+ print_task_type(7);
+ x++;
+ }
+*/
+
+ //TODO: merged task
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+ // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1
+ // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730
+
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+ // CHECK: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+ // CHECK: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188, has_dependences=no
+ // CHECK: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188
+
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no
+ // CHECK: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644
+
+ // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no
+ // CHECK: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644
+
+ // ___CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+ // ___CHECK: {{^[0-9]+}}: id=7 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/taskloop.c b/final/runtime/test/ompt/tasks/taskloop.c
new file mode 100644
index 0000000..59a47bf
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/taskloop.c
@@ -0,0 +1,81 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | FileCheck --check-prefix=TASKS %s
+// REQUIRES: ompt
+
+// These compilers don't support the taskloop construct
+// UNSUPPORTED: gcc-4, gcc-5, icc-16
+// GCC 6 has support for taskloops, but at least 6.3.0 is crashing on this test
+// UNSUPPORTED: gcc-6
+
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+ unsigned int i, x;
+
+#pragma omp parallel num_threads(2)
+ {
+#pragma omp barrier
+
+#pragma omp master
+#pragma omp taskloop
+ for (i = 0; i < 5; i += 3) {
+ x++;
+ }
+ }
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+ // CHECK-SAME: parent_task_id={{[0-9]+}}
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]]
+ // CHECK-SAME: requested_team_size=2
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1:[0-9]+]]
+ // CHECK-SAME: team_size=2, thread_num=0
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=2
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK-SAME: new_task_id=[[TASK_ID1:[0-9]+]]
+ // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-SAME: task_type=ompt_task_explicit=4
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK-SAME: new_task_id=[[TASK_ID2:[0-9]+]]
+ // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-SAME: task_type=ompt_task_explicit=4
+ // CHECK-NOT: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK-SAME: count=2
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin:
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0
+ // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]], team_size=2, thread_num=0
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+
+ // TASKS: ompt_event_task_create:{{.*}} new_task_id={{[0-9]+}}
+ // TASKS-SAME: task_type=ompt_task_initial
+ // TASKS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskloop_begin:
+ // TASKS: ompt_event_task_create:{{.*}} new_task_id=[[TASK_ID1:[0-9]+]]
+ // TASKS-SAME: task_type=ompt_task_explicit
+ // TASKS-DAG: ompt_event_task_create:{{.*}} new_task_id=[[TASK_ID2:[0-9]+]]
+ // Schedule events:
+ // TASKS-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID1]]
+ // TASKS-DAG: {{^.*}}first_task_id=[[TASK_ID1]], second_task_id={{[0-9]+}}
+ // TASKS-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID2]]
+ // TASKS-DAG: {{^.*}}first_task_id=[[TASK_ID2]], second_task_id={{[0-9]+}}
+ // TASKS-NOT: ompt_event_task_schedule
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/taskyield.c b/final/runtime/test/ompt/tasks/taskyield.c
new file mode 100644
index 0000000..56a4697
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/taskyield.c
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// Current GOMP interface implements taskyield as stub
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>
+#include <unistd.h>
+
+int main()
+{
+ int condition=0, x=0;
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp master
+ {
+ #pragma omp task shared(condition)
+ {
+ OMPT_SIGNAL(condition);
+ OMPT_WAIT(condition,2);
+ }
+ OMPT_WAIT(condition,1);
+ #pragma omp task shared(x)
+ {
+ x++;
+ }
+ printf("%" PRIu64 ": before yield\n", ompt_get_thread_data()->value);
+ #pragma omp taskyield
+ printf("%" PRIu64 ": after yield\n", ompt_get_thread_data()->value);
+ OMPT_SIGNAL(condition);
+ }
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: new_task_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[0-9]+}}, thread_num={{[0-9]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[WORKER_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[MAIN_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[MAIN_TASK]], prior_task_status=ompt_task_yield=2
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[MAIN_TASK]], second_task_id=[[IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_others=4
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[WORKER_TASK]], second_task_id={{[0-9]+}}, prior_task_status=ompt_task_complete=1
+
+
+
+
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/tasks/untied_task.c b/final/runtime/test/ompt/tasks/untied_task.c
new file mode 100644
index 0000000..e68fa26
--- /dev/null
+++ b/final/runtime/test/ompt/tasks/untied_task.c
@@ -0,0 +1,108 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int condition=0;
+ omp_set_nested(0);
+ print_frame(0);
+ #pragma omp parallel num_threads(2)
+ {
+ print_frame_from_outlined_fn(1);
+ print_ids(0);
+ print_ids(1);
+ print_frame(0);
+ #pragma omp master
+ {
+ print_ids(0);
+ #pragma omp task untied shared(condition)
+ {
+ OMPT_SIGNAL(condition);
+ print_frame(1);
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ #pragma omp task if(0)
+ {
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ }
+ print_ids(0);
+ print_ids(1);
+ print_ids(2);
+ }
+ OMPT_WAIT(condition,1);
+ print_ids(0);
+ }
+ #pragma omp barrier
+ print_ids(0);
+ }
+
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+
+ // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+ // make sure initial data pointers are null
+ // CHECK-NOT: 0: new_task_data initially not null
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+ // nested parallel masters
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // <- ompt_event_task_create would be expected here
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // explicit barrier after master
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // implicit barrier parallel
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // this is expected to come earlier and at MASTER:
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/worksharing/for/auto.c b/final/runtime/test/ompt/worksharing/for/auto.c
new file mode 100644
index 0000000..17d26f5
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/auto.c
@@ -0,0 +1,7 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h
+// REQUIRES: ompt
+// GCC doesn't call runtime for auto = static schedule
+// XFAIL: gcc
+
+#define SCHEDULE auto
+#include "base.h"
diff --git a/final/runtime/test/ompt/worksharing/for/auto_serialized.c b/final/runtime/test/ompt/worksharing/for/auto_serialized.c
new file mode 100644
index 0000000..f756166
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/auto_serialized.c
@@ -0,0 +1,7 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h
+// REQUIRES: ompt
+// GCC doesn't call runtime for auto = static schedule
+// XFAIL: gcc
+
+#define SCHEDULE auto
+#include "base_serialized.h"
diff --git a/final/runtime/test/ompt/worksharing/for/auto_split.c b/final/runtime/test/ompt/worksharing/for/auto_split.c
new file mode 100644
index 0000000..d82e3fd
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/auto_split.c
@@ -0,0 +1,8 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+// GCC doesn't call runtime for auto = static schedule
+// XFAIL: gcc
+
+#define SCHEDULE auto
+#include "base_split.h"
diff --git a/final/runtime/test/ompt/worksharing/for/base.h b/final/runtime/test/ompt/worksharing/for/base.h
new file mode 100644
index 0000000..8a496d9
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/base.h
@@ -0,0 +1,43 @@
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ unsigned int i;
+
+ #pragma omp parallel for num_threads(4) schedule(SCHEDULE)
+ for (i = 0; i < 4; i++) {
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/worksharing/for/base_serialized.h b/final/runtime/test/ompt/worksharing/for/base_serialized.h
new file mode 100644
index 0000000..3376b37
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/base_serialized.h
@@ -0,0 +1,28 @@
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ unsigned int i;
+
+ #pragma omp parallel for num_threads(1) schedule(SCHEDULE)
+ for (i = 0; i < 1; i++) {
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[PARALLEL_ID,0]}}, task_id=[[IMPLICIT_TASK_ID]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/worksharing/for/base_split.h b/final/runtime/test/ompt/worksharing/for/base_split.h
new file mode 100644
index 0000000..0f1fed3
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/base_split.h
@@ -0,0 +1,66 @@
+#include "callback.h"
+#include <omp.h>
+
+/* With the combined parallel-for construct (base.h), the return-addresses are hard to compare.
+ With the separate parallel and for-nowait construct, the addresses become more predictable,
+ but the begin of the for-loop still generates additional code, so the offset of loop-begin
+ to the label is >4 Byte.
+*/
+
+int main()
+{
+ unsigned int i;
+
+ #pragma omp parallel num_threads(4)
+ {
+ print_current_address(0);
+ #pragma omp for schedule(SCHEDULE) nowait
+ for (i = 0; i < 4; i++) {
+ print_fuzzy_address(1);
+ }
+ print_fuzzy_address(2);
+ }
+ print_fuzzy_address(3);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[PARALLEL_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
+
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, invoker={{[0-9]+}}, codeptr_ra=[[PARALLEL_RETURN_ADDRESS]]
+ // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[PARALLEL_RETURN_ADDRESS]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
+
+
+ // CHECK-LOOP: 0: NULL_POINTER=[[NULL:.*$]]
+ // CHECK-LOOP: 0: ompt_event_runtime_shutdown
+ // CHECK-LOOP: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra={{0x[0-f]+}}, invoker={{[0-9]+}}
+ // CHECK-LOOP: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
+ // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
+ // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
+ // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
+
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/worksharing/for/dynamic.c b/final/runtime/test/ompt/worksharing/for/dynamic.c
new file mode 100644
index 0000000..ca5ae10
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/dynamic.c
@@ -0,0 +1,5 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h
+// REQUIRES: ompt
+
+#define SCHEDULE dynamic
+#include "base.h"
diff --git a/final/runtime/test/ompt/worksharing/for/dynamic_serialized.c b/final/runtime/test/ompt/worksharing/for/dynamic_serialized.c
new file mode 100644
index 0000000..0f80929
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/dynamic_serialized.c
@@ -0,0 +1,5 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h
+// REQUIRES: ompt
+
+#define SCHEDULE dynamic
+#include "base_serialized.h"
diff --git a/final/runtime/test/ompt/worksharing/for/dynamic_split.c b/final/runtime/test/ompt/worksharing/for/dynamic_split.c
new file mode 100644
index 0000000..cf14971
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/dynamic_split.c
@@ -0,0 +1,7 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+
+#define SCHEDULE dynamic
+#include "base_split.h"
diff --git a/final/runtime/test/ompt/worksharing/for/guided.c b/final/runtime/test/ompt/worksharing/for/guided.c
new file mode 100644
index 0000000..01bff4e
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/guided.c
@@ -0,0 +1,5 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h
+// REQUIRES: ompt
+
+#define SCHEDULE guided
+#include "base.h"
diff --git a/final/runtime/test/ompt/worksharing/for/guided_serialized.c b/final/runtime/test/ompt/worksharing/for/guided_serialized.c
new file mode 100644
index 0000000..4b5096d
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/guided_serialized.c
@@ -0,0 +1,5 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h
+// REQUIRES: ompt
+
+#define SCHEDULE guided
+#include "base_serialized.h"
diff --git a/final/runtime/test/ompt/worksharing/for/guided_split.c b/final/runtime/test/ompt/worksharing/for/guided_split.c
new file mode 100644
index 0000000..7d560c2
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/guided_split.c
@@ -0,0 +1,7 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+
+#define SCHEDULE guided
+#include "base_split.h"
diff --git a/final/runtime/test/ompt/worksharing/for/runtime.c b/final/runtime/test/ompt/worksharing/for/runtime.c
new file mode 100644
index 0000000..bcf160f
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/runtime.c
@@ -0,0 +1,5 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h
+// REQUIRES: ompt
+
+#define SCHEDULE runtime
+#include "base.h"
diff --git a/final/runtime/test/ompt/worksharing/for/runtime_serialized.c b/final/runtime/test/ompt/worksharing/for/runtime_serialized.c
new file mode 100644
index 0000000..231d67d
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/runtime_serialized.c
@@ -0,0 +1,5 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h
+// REQUIRES: ompt
+
+#define SCHEDULE runtime
+#include "base_serialized.h"
diff --git a/final/runtime/test/ompt/worksharing/for/runtime_split.c b/final/runtime/test/ompt/worksharing/for/runtime_split.c
new file mode 100644
index 0000000..7a677ed
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/runtime_split.c
@@ -0,0 +1,7 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+
+#define SCHEDULE runtime
+#include "base_split.h"
diff --git a/final/runtime/test/ompt/worksharing/for/static.c b/final/runtime/test/ompt/worksharing/for/static.c
new file mode 100644
index 0000000..4d99059
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/static.c
@@ -0,0 +1,7 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h
+// REQUIRES: ompt
+// GCC doesn't call runtime for static schedule
+// XFAIL: gcc
+
+#define SCHEDULE static
+#include "base.h"
diff --git a/final/runtime/test/ompt/worksharing/for/static_serialized.c b/final/runtime/test/ompt/worksharing/for/static_serialized.c
new file mode 100644
index 0000000..4860d49
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/static_serialized.c
@@ -0,0 +1,7 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h
+// REQUIRES: ompt
+// GCC doesn't call runtime for static schedule
+// XFAIL: gcc
+
+#define SCHEDULE static
+#include "base_serialized.h"
diff --git a/final/runtime/test/ompt/worksharing/for/static_split.c b/final/runtime/test/ompt/worksharing/for/static_split.c
new file mode 100644
index 0000000..d8c88dd
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/for/static_split.c
@@ -0,0 +1,8 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+// GCC doesn't call runtime for static schedule
+// XFAIL: gcc
+
+#define SCHEDULE static
+#include "base_split.h"
diff --git a/final/runtime/test/ompt/worksharing/sections.c b/final/runtime/test/ompt/worksharing/sections.c
new file mode 100644
index 0000000..bafb743
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/sections.c
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// Some compilers generate code that does not distinguish between sections and loops
+// XFAIL: gcc, clang-3, clang-4, clang-5, icc-16, icc-17
+// UNSUPPORTED: icc-18
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ #pragma omp parallel sections num_threads(2)
+ {
+ #pragma omp section
+ {
+ printf("%lu: section 1\n", ompt_get_thread_data()->value);
+ }
+ #pragma omp section
+ {
+ printf("%lu: section 2\n", ompt_get_thread_data()->value);
+ }
+ }
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN:0x[0-f]+]], count=2
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END:0x[0-f]+]]
+
+ // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN]], count=2
+ // CHECK: {{^}}[[THREAD_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END]]
+
+ return 0;
+}
diff --git a/final/runtime/test/ompt/worksharing/single.c b/final/runtime/test/ompt/worksharing/single.c
new file mode 100644
index 0000000..6b24f2d
--- /dev/null
+++ b/final/runtime/test/ompt/worksharing/single.c
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// GCC generates code that does not call the runtime for the single construct
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+ int x = 0;
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp single
+ {
+ printf("%" PRIu64 ": in single\n", ompt_get_thread_data()->value);
+ x++;
+ }
+ }
+
+ printf("x=%d\n", x);
+
+ // Check if libomp supports the callbacks for this test.
+ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK-DAG: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_single_in_block_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1
+ // CHECK-DAG: {{^}}[[THREAD_ID_1]]: in single
+ // CHECK-DAG: {{^}}[[THREAD_ID_1]]: ompt_event_single_in_block_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1
+
+ // CHECK-DAG: {{^}}[[THREAD_ID_2:[0-9]+]]: ompt_event_single_others_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1
+ // CHECK-DAG: {{^}}[[THREAD_ID_2]]: ompt_event_single_others_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1
+
+ return 0;
+}
diff --git a/final/runtime/test/parallel/omp_nested.c b/final/runtime/test/parallel/omp_nested.c
new file mode 100644
index 0000000..8b78088
--- /dev/null
+++ b/final/runtime/test/parallel/omp_nested.c
@@ -0,0 +1,46 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+/*
+ * Test if the compiler supports nested parallelism
+ * By Chunhua Liao, University of Houston
+ * Oct. 2005
+ */
+int test_omp_nested()
+{
+#ifdef _OPENMP
+ if (omp_get_max_threads() > 4)
+ omp_set_num_threads(4);
+#endif
+
+ int counter = 0;
+#ifdef _OPENMP
+ omp_set_nested(1);
+#endif
+
+ #pragma omp parallel shared(counter)
+ {
+ #pragma omp critical
+ counter++;
+ #pragma omp parallel
+ {
+ #pragma omp critical
+ counter--;
+ }
+ }
+ return (counter != 0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_nested()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/parallel/omp_parallel_copyin.c b/final/runtime/test/parallel/omp_parallel_copyin.c
new file mode 100644
index 0000000..600f9b7
--- /dev/null
+++ b/final/runtime/test/parallel/omp_parallel_copyin.c
@@ -0,0 +1,47 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+static int sum1 = 789;
+#pragma omp threadprivate(sum1)
+
+int test_omp_parallel_copyin()
+{
+ int sum, num_threads;
+ int known_sum;
+
+ sum = 0;
+ sum1 = 7;
+ num_threads = 0;
+
+ #pragma omp parallel copyin(sum1)
+ {
+ /*printf("sum1=%d\n",sum1);*/
+ int i;
+ #pragma omp for
+ for (i = 1; i < 1000; i++) {
+ sum1 = sum1 + i;
+ } /*end of for*/
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ num_threads++;
+ } /*end of critical*/
+ } /* end of parallel*/
+ known_sum = (999 * 1000) / 2 + 7 * num_threads;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_copyin()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/parallel/omp_parallel_default.c b/final/runtime/test/parallel/omp_parallel_default.c
new file mode 100644
index 0000000..0a8e09e
--- /dev/null
+++ b/final/runtime/test/parallel/omp_parallel_default.c
@@ -0,0 +1,43 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_default()
+{
+ int i;
+ int sum;
+ int mysum;
+ int known_sum;
+ sum =0;
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 ;
+
+ #pragma omp parallel default(shared) private(i) private(mysum)
+ {
+ mysum = 0;
+ #pragma omp for
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ mysum = mysum + i;
+ }
+ #pragma omp critical
+ {
+ sum = sum + mysum;
+ } /* end of critical */
+ } /* end of parallel */
+ if (known_sum != sum) {
+ fprintf(stderr, "KNOWN_SUM = %d; SUM = %d\n", known_sum, sum);
+ }
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_default()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/parallel/omp_parallel_firstprivate.c b/final/runtime/test/parallel/omp_parallel_firstprivate.c
new file mode 100644
index 0000000..dbee76c
--- /dev/null
+++ b/final/runtime/test/parallel/omp_parallel_firstprivate.c
@@ -0,0 +1,46 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+//static int sum1 = 789;
+
+int test_omp_parallel_firstprivate()
+{
+ int sum, num_threads,sum1;
+ int known_sum;
+
+ sum = 0;
+ sum1=7;
+ num_threads = 0;
+
+ #pragma omp parallel firstprivate(sum1)
+ {
+ /*printf("sum1=%d\n",sum1);*/
+ int i;
+ #pragma omp for
+ for (i = 1; i < 1000; i++) {
+ sum1 = sum1 + i;
+ } /*end of for*/
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ num_threads++;
+ } /*end of critical*/
+ } /* end of parallel*/
+ known_sum = (999 * 1000) / 2 + 7 * num_threads;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/parallel/omp_parallel_if.c b/final/runtime/test/parallel/omp_parallel_if.c
new file mode 100644
index 0000000..abbf3cd
--- /dev/null
+++ b/final/runtime/test/parallel/omp_parallel_if.c
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_if()
+{
+ int i;
+ int sum;
+ int known_sum;
+ int mysum;
+ int control=1;
+
+ sum =0;
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 ;
+ #pragma omp parallel private(i) if(control==0)
+ {
+ mysum = 0;
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ mysum = mysum + i;
+ }
+ #pragma omp critical
+ {
+ sum = sum + mysum;
+ }
+ }
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_if()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/parallel/omp_parallel_num_threads.c b/final/runtime/test/parallel/omp_parallel_num_threads.c
new file mode 100644
index 0000000..8af1f9d
--- /dev/null
+++ b/final/runtime/test/parallel/omp_parallel_num_threads.c
@@ -0,0 +1,46 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_num_threads()
+{
+ int num_failed;
+ int threads;
+ int nthreads;
+ int max_threads = 0;
+
+ num_failed = 0;
+
+ /* first we check how many threads are available */
+ #pragma omp parallel
+ {
+ #pragma omp master
+ max_threads = omp_get_num_threads ();
+ }
+
+ /* we increase the number of threads from one to maximum:*/
+ for(threads = 1; threads <= max_threads; threads++) {
+ nthreads = 0;
+ #pragma omp parallel reduction(+:num_failed) num_threads(threads)
+ {
+ num_failed = num_failed + !(threads == omp_get_num_threads());
+ #pragma omp atomic
+ nthreads += 1;
+ }
+ num_failed = num_failed + !(nthreads == threads);
+ }
+ return (!num_failed);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_num_threads()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/parallel/omp_parallel_private.c b/final/runtime/test/parallel/omp_parallel_private.c
new file mode 100644
index 0000000..238e806
--- /dev/null
+++ b/final/runtime/test/parallel/omp_parallel_private.c
@@ -0,0 +1,46 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+//static int sum1 = 789;
+
+int test_omp_parallel_private()
+{
+ int sum, num_threads,sum1;
+ int known_sum;
+
+ sum = 0;
+ num_threads = 0;
+
+ #pragma omp parallel private(sum1)
+ {
+ int i;
+ sum1 = 7;
+ /*printf("sum1=%d\n",sum1);*/
+ #pragma omp for
+ for (i = 1; i < 1000; i++) {
+ sum1 = sum1 + i;
+ }
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ num_threads++;
+ }
+ }
+ known_sum = (999 * 1000) / 2 + 7 * num_threads;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/parallel/omp_parallel_reduction.c b/final/runtime/test/parallel/omp_parallel_reduction.c
new file mode 100644
index 0000000..bb00939
--- /dev/null
+++ b/final/runtime/test/parallel/omp_parallel_reduction.c
@@ -0,0 +1,254 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */
+#define MAX_FACTOR 10
+#define KNOWN_PRODUCT 3628800 /* 10! */
+
+int test_omp_parallel_reduction()
+{
+ int sum;
+ int known_sum;
+ double dsum;
+ double dknown_sum;
+ double dt=0.5; /* base of geometric row for + and - test*/
+ double rounding_error= 1.E-9;
+ int diff;
+ double ddiff;
+ int product;
+ int known_product;
+ int logic_and;
+ int logic_or;
+ int bit_and;
+ int bit_or;
+ int exclusiv_bit_or;
+ int logics[LOOPCOUNT];
+ int i;
+ double dpt;
+ int result;
+
+ sum =0;
+ dsum=0;
+ product=1;
+ logic_and=1;
+ logic_or=0;
+ bit_and=1;
+ bit_or=0;
+ exclusiv_bit_or=0;
+ result=0;
+ dt = 1./3.;
+ known_sum = (LOOPCOUNT*(LOOPCOUNT+1))/2;
+
+ /* Tests for integers */
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:sum)
+ for (i=1;i<=LOOPCOUNT;i++) {
+ sum=sum+i;
+ }
+
+ if(known_sum!=sum) {
+ result++;
+ fprintf(stderr,"Error in sum with integers: Result was %d instead of %d\n",sum,known_sum);
+ }
+
+ diff = (LOOPCOUNT*(LOOPCOUNT+1))/2;
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:diff)
+ for (i=1;i<=LOOPCOUNT;++i) {
+ diff=diff-i;
+ }
+
+ if(diff != 0) {
+ result++;
+ fprintf(stderr,"Error in difference with integers: Result was %d instead of 0.\n",diff);
+ }
+
+ /* Tests for doubles */
+ dsum=0;
+ dpt=1;
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dpt*=dt;
+ }
+ dknown_sum = (1-dpt)/(1-dt);
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:dsum)
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dsum += pow(dt,i);
+ }
+
+ if( fabs(dsum-dknown_sum) > rounding_error ) {
+ result++;
+ fprintf(stderr,"Error in sum with doubles: Result was %f instead of %f (Difference: %E)\n",dsum,dknown_sum, dsum-dknown_sum);
+ }
+
+ dpt=1;
+
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dpt*=dt;
+ }
+ fprintf(stderr,"\n");
+ ddiff = (1-dpt)/(1-dt);
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:ddiff)
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ ddiff -= pow(dt,i);
+ }
+ if( fabs(ddiff) > rounding_error) {
+ result++;
+ fprintf(stderr,"Error in Difference with doubles: Result was %E instead of 0.0\n",ddiff);
+ }
+
+ /* Tests for product of integers */
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(*:product)
+ for(i=1;i<=MAX_FACTOR;i++) {
+ product *= i;
+ }
+
+ known_product = KNOWN_PRODUCT;
+ if(known_product != product) {
+ result++;
+ fprintf(stderr,"Error in Product with integers: Result was %d instead of %d\n\n",product,known_product);
+ }
+
+ /* Tests for logical and */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(&&:logic_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_and = (logic_and && logics[i]);
+ }
+ if(!logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 1.\n");
+ }
+
+ logic_and = 1;
+ logics[LOOPCOUNT/2]=0;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(&&:logic_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_and = logic_and && logics[i];
+ }
+ if(logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 2.\n");
+ }
+
+ /* Tests for logical or */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(||:logic_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_or = logic_or || logics[i];
+ }
+ if(logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 1.\n");
+ }
+ logic_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(||:logic_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_or = logic_or || logics[i];
+ }
+ if(!logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 2.\n");
+ }
+
+ /* Tests for bitwise and */
+ for(i=0;i<LOOPCOUNT;++i) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(&:bit_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ if(!bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 1.\n");
+ }
+
+ bit_and = 1;
+ logics[LOOPCOUNT/2]=0;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(&:bit_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ if(bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 2.\n");
+ }
+
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ /* Tests for bitwise or */
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(|:bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ if(bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 1\n");
+ }
+ bit_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(|:bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ if(!bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 2\n");
+ }
+
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ /* Tests for bitwise xor */
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(^:exclusiv_bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ if(exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n");
+ }
+
+ exclusiv_bit_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(^:exclusiv_bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ if(!exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n");
+ }
+
+ /*printf("\nResult:%d\n",result);*/
+ return (result==0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_reduction()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/parallel/omp_parallel_shared.c b/final/runtime/test/parallel/omp_parallel_shared.c
new file mode 100644
index 0000000..3146ca6
--- /dev/null
+++ b/final/runtime/test/parallel/omp_parallel_shared.c
@@ -0,0 +1,46 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_shared()
+{
+ int i;
+ int sum;
+ int known_sum;
+
+ sum = 0;
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 ;
+
+ #pragma omp parallel private(i) shared(sum)
+ {
+
+ int mysum = 0;
+ #pragma omp for
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ mysum = mysum + i;
+ }
+ #pragma omp critical
+ {
+ sum = sum + mysum;
+ }
+
+
+ }
+ if (known_sum != sum) {
+ fprintf(stderr, "KNOWN_SUM = %d; SUM = %d\n", known_sum, sum);
+ }
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_shared()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/bug_36720.c b/final/runtime/test/tasking/bug_36720.c
new file mode 100644
index 0000000..684d675
--- /dev/null
+++ b/final/runtime/test/tasking/bug_36720.c
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile-and-run
+
+/*
+Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=36720
+
+Assertion failure at kmp_runtime.cpp(1715): nthreads > 0.
+OMP: Error #13: Assertion failure at kmp_runtime.cpp(1715).
+
+The assertion fails even with OMP_NUM_THREADS=1. If the second task is removed,
+everything runs to completion. If the "omp parallel for" directives are removed
+from inside the tasks, once again everything runs fine.
+*/
+
+#define N 1024
+
+int main() {
+ #pragma omp task
+ {
+ int i;
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ (void)0;
+ }
+
+ #pragma omp task
+ {
+ int i;
+ #pragma omp parallel for
+ for (i = 0; i < N; ++i)
+ (void)0;
+ }
+
+ #pragma omp taskwait
+
+ return 0;
+}
diff --git a/final/runtime/test/tasking/bug_nested_proxy_task.c b/final/runtime/test/tasking/bug_nested_proxy_task.c
new file mode 100644
index 0000000..6c00822
--- /dev/null
+++ b/final/runtime/test/tasking/bug_nested_proxy_task.c
@@ -0,0 +1,131 @@
+// RUN: %libomp-compile -lpthread && %libomp-run
+// The runtime currently does not get dependency information from GCC.
+// UNSUPPORTED: gcc
+
+#include <stdio.h>
+#include <omp.h>
+#include <pthread.h>
+#include "omp_my_sleep.h"
+
+/*
+ With task dependencies one can generate proxy tasks from an explicit task
+ being executed by a serial task team. The OpenMP runtime library didn't
+ expect that and tries to free the explicit task that is the parent of the
+ proxy task still working in background. It therefore has incomplete children
+ which triggers a debugging assertion.
+*/
+
+// Compiler-generated code (emulation)
+typedef long kmp_intptr_t;
+typedef int kmp_int32;
+
+typedef char bool;
+
+typedef struct ident {
+ kmp_int32 reserved_1; /**< might be used in Fortran; see above */
+ kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */
+ kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
+#if USE_ITT_BUILD
+ /* but currently used for storing region-specific ITT */
+ /* contextual information. */
+#endif /* USE_ITT_BUILD */
+ kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
+ char const *psource; /**< String describing the source location.
+ The string is composed of semi-colon separated fields which describe the source file,
+ the function and a pair of line numbers that delimit the construct.
+ */
+} ident_t;
+
+typedef struct kmp_depend_info {
+ kmp_intptr_t base_addr;
+ size_t len;
+ struct {
+ bool in:1;
+ bool out:1;
+ } flags;
+} kmp_depend_info_t;
+
+struct kmp_task;
+typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, struct kmp_task * );
+
+typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
+ void * shareds; /**< pointer to block of pointers to shared vars */
+ kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */
+ kmp_int32 part_id; /**< part id for the task */
+} kmp_task_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+kmp_int32 __kmpc_global_thread_num ( ident_t * );
+kmp_task_t*
+__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ kmp_routine_entry_t task_entry );
+void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask );
+kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
+ kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+ kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
+kmp_int32
+__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
+#ifdef __cplusplus
+}
+#endif
+
+void *target(void *task)
+{
+ my_sleep( 0.1 );
+ __kmpc_proxy_task_completed_ooo((kmp_task_t*) task);
+ return NULL;
+}
+
+pthread_t target_thread;
+
+// User's code
+int task_entry(kmp_int32 gtid, kmp_task_t *task)
+{
+ pthread_create(&target_thread, NULL, &target, task);
+ return 0;
+}
+
+int main()
+{
+ int dep;
+
+#pragma omp taskgroup
+{
+/*
+ * Corresponds to:
+ #pragma omp target nowait depend(out: dep)
+ {
+ my_sleep( 0.1 );
+ }
+*/
+ kmp_depend_info_t dep_info;
+ dep_info.base_addr = (long) &dep;
+ dep_info.len = sizeof(int);
+ // out = inout per spec and runtime expects this
+ dep_info.flags.in = 1;
+ dep_info.flags.out = 1;
+
+ kmp_int32 gtid = __kmpc_global_thread_num(NULL);
+ kmp_task_t *proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry);
+ __kmpc_omp_task_with_deps(NULL,gtid,proxy_task,1,&dep_info,0,NULL);
+
+ #pragma omp task depend(in: dep)
+ {
+/*
+ * Corresponds to:
+ #pragma omp target nowait
+ {
+ my_sleep( 0.1 );
+ }
+*/
+ kmp_task_t *nested_proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry);
+ __kmpc_omp_task(NULL,gtid,nested_proxy_task);
+ }
+}
+
+ // only check that it didn't crash
+ return 0;
+}
diff --git a/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c
new file mode 100644
index 0000000..e6dd895
--- /dev/null
+++ b/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c
@@ -0,0 +1,134 @@
+// RUN: %libomp-compile -lpthread && %libomp-run
+// The runtime currently does not get dependency information from GCC.
+// UNSUPPORTED: gcc
+
+#include <stdio.h>
+#include <omp.h>
+#include <pthread.h>
+#include "omp_my_sleep.h"
+
+/*
+ An explicit task can have a dependency on a target task. If it is not
+ directly satisfied, the runtime should not wait but resume execution.
+*/
+
+// Compiler-generated code (emulation)
+typedef long kmp_intptr_t;
+typedef int kmp_int32;
+
+typedef char bool;
+
+typedef struct ident {
+ kmp_int32 reserved_1; /**< might be used in Fortran; see above */
+ kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */
+ kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
+#if USE_ITT_BUILD
+ /* but currently used for storing region-specific ITT */
+ /* contextual information. */
+#endif /* USE_ITT_BUILD */
+ kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
+ char const *psource; /**< String describing the source location.
+ The string is composed of semi-colon separated fields which describe the source file,
+ the function and a pair of line numbers that delimit the construct.
+ */
+} ident_t;
+
+typedef struct kmp_depend_info {
+ kmp_intptr_t base_addr;
+ size_t len;
+ struct {
+ bool in:1;
+ bool out:1;
+ } flags;
+} kmp_depend_info_t;
+
+struct kmp_task;
+typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, struct kmp_task * );
+
+typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
+ void * shareds; /**< pointer to block of pointers to shared vars */
+ kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */
+ kmp_int32 part_id; /**< part id for the task */
+} kmp_task_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+kmp_int32 __kmpc_global_thread_num ( ident_t * );
+kmp_task_t*
+__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ kmp_routine_entry_t task_entry );
+void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask );
+kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
+ kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+ kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
+kmp_int32
+__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
+#ifdef __cplusplus
+}
+#endif
+
+void *target(void *task)
+{
+ my_sleep( 0.1 );
+ __kmpc_proxy_task_completed_ooo((kmp_task_t*) task);
+ return NULL;
+}
+
+pthread_t target_thread;
+
+// User's code
+int task_entry(kmp_int32 gtid, kmp_task_t *task)
+{
+ pthread_create(&target_thread, NULL, &target, task);
+ return 0;
+}
+
+int main()
+{
+ int dep;
+
+/*
+ * Corresponds to:
+ #pragma omp target nowait depend(out: dep)
+ {
+ my_sleep( 0.1 );
+ }
+*/
+ kmp_depend_info_t dep_info;
+ dep_info.base_addr = (long) &dep;
+ dep_info.len = sizeof(int);
+ // out = inout per spec and runtime expects this
+ dep_info.flags.in = 1;
+ dep_info.flags.out = 1;
+
+ kmp_int32 gtid = __kmpc_global_thread_num(NULL);
+ kmp_task_t *proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry);
+ __kmpc_omp_task_with_deps(NULL,gtid,proxy_task,1,&dep_info,0,NULL);
+
+ int first_task_finished = 0;
+ #pragma omp task shared(first_task_finished) depend(inout: dep)
+ {
+ first_task_finished = 1;
+ }
+
+ int second_task_finished = 0;
+ #pragma omp task shared(second_task_finished) depend(in: dep)
+ {
+ second_task_finished = 1;
+ }
+
+ // check that execution has been resumed and the runtime has not waited
+ // for the dependencies to be satisfied.
+ int error = (first_task_finished == 1);
+ error += (second_task_finished == 1);
+
+ #pragma omp taskwait
+
+ // by now all tasks should have finished
+ error += (first_task_finished != 1);
+ error += (second_task_finished != 1);
+
+ return error;
+}
diff --git a/final/runtime/test/tasking/bug_serial_taskgroup.c b/final/runtime/test/tasking/bug_serial_taskgroup.c
new file mode 100644
index 0000000..850bc90
--- /dev/null
+++ b/final/runtime/test/tasking/bug_serial_taskgroup.c
@@ -0,0 +1,16 @@
+// RUN: %libomp-compile-and-run
+
+/*
+ GCC failed this test because __kmp_get_gtid() instead of __kmp_entry_gtid()
+ was called in xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void).
+ __kmp_entry_gtid() will initialize the runtime if not yet done which does not
+ happen with __kmp_get_gtid().
+ */
+
+int main()
+{
+ #pragma omp taskgroup
+ { }
+
+ return 0;
+}
diff --git a/final/runtime/test/tasking/kmp_task_reduction_nest.cpp b/final/runtime/test/tasking/kmp_task_reduction_nest.cpp
new file mode 100644
index 0000000..63dffe4
--- /dev/null
+++ b/final/runtime/test/tasking/kmp_task_reduction_nest.cpp
@@ -0,0 +1,376 @@
+// RUN: %libomp-cxx-compile-and-run
+// RUN: %libomp-cxx-compile -DFLG=1 && %libomp-run
+// GCC-5 is needed for OpenMP 4.0 support (taskgroup)
+// XFAIL: gcc-4
+#include <cstdio>
+#include <cmath>
+#include <cassert>
+#include <omp.h>
+
+// Total number of loop iterations, should be multiple of T for this test
+#define N 10000
+
+// Flag to request lazy (1) or eager (0) allocation of reduction objects
+#ifndef FLG
+#define FLG 0
+#endif
+
+/*
+ // initial user's code that corresponds to pseudo code of the test
+ #pragma omp taskgroup task_reduction(+:i,j) task_reduction(*:x)
+ {
+ for( int l = 0; l < N; ++l ) {
+ #pragma omp task firstprivate(l) in_reduction(+:i) in_reduction(*:x)
+ {
+ i += l;
+ if( l%2 )
+ x *= 1.0 / (l + 1);
+ else
+ x *= (l + 1);
+ }
+ }
+
+ #pragma omp taskgroup task_reduction(-:i,k) task_reduction(+:y)
+ {
+ for( int l = 0; l < N; ++l ) {
+ #pragma omp task firstprivate(l) in_reduction(+:j,y) \
+ in_reduction(*:x) in_reduction(-:k)
+ {
+ j += l;
+ k -= l;
+ y += (double)l;
+ if( l%2 )
+ x *= 1.0 / (l + 1);
+ else
+ x *= (l + 1);
+ }
+ #pragma omp task firstprivate(l) in_reduction(+:y) in_reduction(-:i,k)
+ {
+ i -= l;
+ k -= l;
+ y += (double)l;
+ }
+ #pragma omp task firstprivate(l) in_reduction(+:j) in_reduction(*:x)
+ {
+ j += l;
+ if( l%2 )
+ x *= 1.0 / (l + 1);
+ else
+ x *= (l + 1);
+ }
+ }
+ } // inner reduction
+
+ for( int l = 0; l < N; ++l ) {
+ #pragma omp task firstprivate(l) in_reduction(+:j)
+ j += l;
+ }
+ } // outer reduction
+*/
+
+//------------------------------------------------
+// OpenMP runtime library routines
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void* __kmpc_task_reduction_get_th_data(int gtid, void* tg, void* item);
+extern void* __kmpc_task_reduction_init(int gtid, int num, void* data);
+extern int __kmpc_global_thread_num(void*);
+#ifdef __cplusplus
+}
+#endif
+
+//------------------------------------------------
+// Compiler-generated code
+
+typedef struct _task_red_item {
+ void *shar; // shared reduction item
+ size_t size; // size of data item
+ void *f_init; // data initialization routine
+ void *f_fini; // data finalization routine
+ void *f_comb; // data combiner routine
+ unsigned flags;
+} _task_red_item_t;
+
+// int:+ no need in init/fini callbacks, valid for subtraction
+void __red_int_add_comb(void *lhs, void *rhs) // combiner
+{ *(int*)lhs += *(int*)rhs; }
+
+// long long:+ no need in init/fini callbacks, valid for subtraction
+void __red_llong_add_comb(void *lhs, void *rhs) // combiner
+{ *(long long*)lhs += *(long long*)rhs; }
+
+// double:* no need in fini callback
+void __red_dbl_mul_init(void *data) // initializer
+{ *(double*)data = 1.0; }
+void __red_dbl_mul_comb(void *lhs, void *rhs) // combiner
+{ *(double*)lhs *= *(double*)rhs; }
+
+// double:+ no need in init/fini callbacks
+void __red_dbl_add_comb(void *lhs, void *rhs) // combiner
+{ *(double*)lhs += *(double*)rhs; }
+
+// ==============================
+
+void calc_serial(int *pi, long long *pj, double *px, long long *pk, double *py)
+{
+ for( int l = 0; l < N; ++l ) {
+ *pi += l;
+ if( l%2 )
+ *px *= 1.0 / (l + 1);
+ else
+ *px *= (l + 1);
+ }
+ for( int l = 0; l < N; ++l ) {
+ *pj += l;
+ *pk -= l;
+ *py += (double)l;
+ if( l%2 )
+ *px *= 1.0 / (l + 1);
+ else
+ *px *= (l + 1);
+
+ *pi -= l;
+ *pk -= l;
+ *py += (double)l;
+
+ *pj += l;
+ if( l%2 )
+ *px *= 1.0 / (l + 1);
+ else
+ *px *= (l + 1);
+ }
+ for( int l = 0; l < N; ++l ) {
+ *pj += l;
+ }
+}
+
+//------------------------------------------------
+// Test case
+int main()
+{
+ int nthreads = omp_get_max_threads();
+ int err = 0;
+ void** ptrs = (void**)malloc(nthreads*sizeof(void*));
+
+ // user's code ======================================
+ // variables for serial calculations:
+ int is = 3;
+ long long js = -9999999;
+ double xs = 99999.0;
+ long long ks = 99999999;
+ double ys = -99999999.0;
+ // variables for parallel calculations:
+ int ip = 3;
+ long long jp = -9999999;
+ double xp = 99999.0;
+ long long kp = 99999999;
+ double yp = -99999999.0;
+
+ calc_serial(&is, &js, &xs, &ks, &ys);
+ // ==================================================
+ for (int i = 0; i < nthreads; ++i)
+ ptrs[i] = NULL;
+ #pragma omp parallel
+ {
+ #pragma omp single nowait
+ {
+ // outer taskgroup reduces (i,j,x)
+ #pragma omp taskgroup // task_reduction(+:i,j) task_reduction(*:x)
+ {
+ _task_red_item_t red_data[3];
+ red_data[0].shar = &ip;
+ red_data[0].size = sizeof(ip);
+ red_data[0].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[0].f_fini = NULL; // no destructors needed
+ red_data[0].f_comb = (void*)&__red_int_add_comb;
+ red_data[0].flags = FLG;
+ red_data[1].shar = &jp;
+ red_data[1].size = sizeof(jp);
+ red_data[1].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[1].f_fini = NULL; // no destructors needed
+ red_data[1].f_comb = (void*)&__red_llong_add_comb;
+ red_data[1].flags = FLG;
+ red_data[2].shar = &xp;
+ red_data[2].size = sizeof(xp);
+ red_data[2].f_init = (void*)&__red_dbl_mul_init;
+ red_data[2].f_fini = NULL; // no destructors needed
+ red_data[2].f_comb = (void*)&__red_dbl_mul_comb;
+ red_data[2].flags = FLG;
+ int gtid = __kmpc_global_thread_num(NULL);
+ void* tg1 = __kmpc_task_reduction_init(gtid, 3, red_data);
+
+ for( int l = 0; l < N; l += 2 ) {
+ // 2 iterations per task to get correct x value; actually any even
+ // number of iters per task will work, otherwise x looses precision
+ #pragma omp task firstprivate(l) //in_reduction(+:i) in_reduction(*:x)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_ip = (int*)__kmpc_task_reduction_get_th_data(gtid, tg1, &ip);
+ double *p_xp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &xp);
+ if (!ptrs[gtid]) ptrs[gtid] = p_xp;
+
+ // user's pseudo-code ==============================
+ *p_ip += l;
+ *p_xp *= (l + 1);
+
+ *p_ip += l + 1;
+ *p_xp *= 1.0 / (l + 2);
+ // ==================================================
+ }
+ }
+ // inner taskgroup reduces (i,k,y), i is same object as in outer one
+ #pragma omp taskgroup // task_reduction(-:i,k) task_reduction(+:y)
+ {
+ _task_red_item_t red_data[3];
+ red_data[0].shar = &ip;
+ red_data[0].size = sizeof(ip);
+ red_data[0].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[0].f_fini = NULL; // no destructors needed
+ red_data[0].f_comb = (void*)&__red_int_add_comb;
+ red_data[0].flags = FLG;
+ red_data[1].shar = &kp;
+ red_data[1].size = sizeof(kp);
+ red_data[1].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[1].f_fini = NULL; // no destructors needed
+ red_data[1].f_comb = (void*)&__red_llong_add_comb; // same for + and -
+ red_data[1].flags = FLG;
+ red_data[2].shar = &yp;
+ red_data[2].size = sizeof(yp);
+ red_data[2].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[2].f_fini = NULL; // no destructors needed
+ red_data[2].f_comb = (void*)&__red_dbl_add_comb;
+ red_data[2].flags = FLG;
+ int gtid = __kmpc_global_thread_num(NULL);
+ void* tg2 = __kmpc_task_reduction_init(gtid, 3, red_data);
+
+ for( int l = 0; l < N; l += 2 ) {
+ #pragma omp task firstprivate(l)
+ // in_reduction(+:j,y) in_reduction(*:x) in_reduction(-:k)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &jp);
+ long long *p_kp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &kp);
+ double *p_xp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &xp);
+ double *p_yp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &yp);
+ // user's pseudo-code ==============================
+ *p_jp += l;
+ *p_kp -= l;
+ *p_yp += (double)l;
+ *p_xp *= (l + 1);
+
+ *p_jp += l + 1;
+ *p_kp -= l + 1;
+ *p_yp += (double)(l + 1);
+ *p_xp *= 1.0 / (l + 2);
+ // =================================================
+{
+ // the following code is here just to check __kmpc_task_reduction_get_th_data:
+ int tid = omp_get_thread_num();
+ void *addr1;
+ void *addr2;
+ addr1 = __kmpc_task_reduction_get_th_data(gtid, tg1, &xp); // from shared
+ addr2 = __kmpc_task_reduction_get_th_data(gtid, tg1, addr1); // from private
+ if (addr1 != addr2) {
+ #pragma omp atomic
+ ++err;
+ printf("Wrong thread-specific addresses %d s:%p p:%p\n", tid, addr1, addr2);
+ }
+ // from neighbour w/o taskgroup (should start lookup from current tg2)
+ if (tid > 0) {
+ if (ptrs[tid-1]) {
+ addr2 = __kmpc_task_reduction_get_th_data(gtid, NULL, ptrs[tid-1]);
+ if (addr1 != addr2) {
+ #pragma omp atomic
+ ++err;
+ printf("Wrong thread-specific addresses %d s:%p n:%p\n",
+ tid, addr1, addr2);
+ }
+ }
+ } else {
+ if (ptrs[nthreads-1]) {
+ addr2 = __kmpc_task_reduction_get_th_data(gtid, NULL, ptrs[nthreads-1]);
+ if (addr1 != addr2) {
+ #pragma omp atomic
+ ++err;
+ printf("Wrong thread-specific addresses %d s:%p n:%p\n",
+ tid, addr1, addr2);
+ }
+ }
+ }
+ // ----------------------------------------------
+}
+ }
+ #pragma omp task firstprivate(l)
+ // in_reduction(+:y) in_reduction(-:i,k)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_ip = (int*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &ip);
+ long long *p_kp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &kp);
+ double *p_yp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &yp);
+
+ // user's pseudo-code ==============================
+ *p_ip -= l;
+ *p_kp -= l;
+ *p_yp += (double)l;
+
+ *p_ip -= l + 1;
+ *p_kp -= l + 1;
+ *p_yp += (double)(l + 1);
+ // =================================================
+ }
+ #pragma omp task firstprivate(l)
+ // in_reduction(+:j) in_reduction(*:x)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &jp);
+ double *p_xp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &xp);
+ // user's pseudo-code ==============================
+ *p_jp += l;
+ *p_xp *= (l + 1);
+
+ *p_jp += l + 1;
+ *p_xp *= 1.0 / (l + 2);
+ // =================================================
+ }
+ }
+ } // inner reduction
+
+ for( int l = 0; l < N; l += 2 ) {
+ #pragma omp task firstprivate(l) // in_reduction(+:j)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &jp);
+ // user's pseudo-code ==============================
+ *p_jp += l;
+ *p_jp += l + 1;
+ // =================================================
+ }
+ }
+ } // outer reduction
+ } // end single
+ } // end parallel
+ // check results
+#if _DEBUG
+ printf("reduction flags = %u\n", FLG);
+#endif
+ if (ip == is && jp == js && ks == kp &&
+ fabs(xp - xs) < 0.01 && fabs(yp - ys) < 0.01)
+ printf("passed\n");
+ else
+ printf("failed,\n ser:(%d %lld %f %lld %f)\n par:(%d %lld %f %lld %f)\n",
+ is, js, xs, ks, ys,
+ ip, jp, xp, kp, yp);
+ return 0;
+}
diff --git a/final/runtime/test/tasking/kmp_taskloop.c b/final/runtime/test/tasking/kmp_taskloop.c
new file mode 100644
index 0000000..4b13793
--- /dev/null
+++ b/final/runtime/test/tasking/kmp_taskloop.c
@@ -0,0 +1,159 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+#include <stdio.h>
+#include <omp.h>
+#include "omp_my_sleep.h"
+
+#define N 4
+#define GRAIN 10
+#define STRIDE 3
+
+// globals
+int th_counter[N];
+int counter;
+
+
+// Compiler-generated code (emulation)
+typedef struct ident {
+ void* dummy;
+} ident_t;
+
+typedef struct shar {
+ int(*pth_counter)[N];
+ int *pcounter;
+ int *pj;
+} *pshareds;
+
+typedef struct task {
+ pshareds shareds;
+ int(* routine)(int,struct task*);
+ int part_id;
+// privates:
+ unsigned long long lb; // library always uses ULONG
+ unsigned long long ub;
+ int st;
+ int last;
+ int i;
+ int j;
+ int th;
+} *ptask, kmp_task_t;
+
+typedef int(* task_entry_t)( int, ptask );
+
+void
+__task_dup_entry(ptask task_dst, ptask task_src, int lastpriv)
+{
+// setup lastprivate flag
+ task_dst->last = lastpriv;
+// could be constructor calls here...
+}
+
+
+// OpenMP RTL interfaces
+typedef unsigned long long kmp_uint64;
+typedef long long kmp_int64;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void
+__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
+ kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+ int nogroup, int sched, kmp_int64 grainsize, void *task_dup );
+ptask
+__kmpc_omp_task_alloc( ident_t *loc, int gtid, int flags,
+ size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ task_entry_t task_entry );
+void __kmpc_atomic_fixed4_add(void *id_ref, int gtid, int * lhs, int rhs);
+int __kmpc_global_thread_num(void *id_ref);
+#ifdef __cplusplus
+}
+#endif
+
+
+// User's code
+int task_entry(int gtid, ptask task)
+{
+ pshareds pshar = task->shareds;
+ for( task->i = task->lb; task->i <= (int)task->ub; task->i += task->st ) {
+ task->th = omp_get_thread_num();
+ __kmpc_atomic_fixed4_add(NULL,gtid,pshar->pcounter,1);
+ __kmpc_atomic_fixed4_add(NULL,gtid,&((*pshar->pth_counter)[task->th]),1);
+ task->j = task->i;
+ }
+ my_sleep( 0.1 ); // sleep 100 ms in order to allow other threads to steal tasks
+ if( task->last ) {
+ *(pshar->pj) = task->j; // lastprivate
+ }
+ return 0;
+}
+
+int main()
+{
+ int i, j, gtid = __kmpc_global_thread_num(NULL);
+ ptask task;
+ pshareds psh;
+ omp_set_dynamic(0);
+ counter = 0;
+ for( i=0; i<N; ++i )
+ th_counter[i] = 0;
+ #pragma omp parallel num_threads(N)
+ {
+ #pragma omp master
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+/*
+ * This is what the OpenMP runtime calls correspond to:
+ #pragma omp taskloop num_tasks(N) lastprivate(j)
+ for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE )
+ {
+ int th = omp_get_thread_num();
+ #pragma omp atomic
+ counter++;
+ #pragma omp atomic
+ th_counter[th]++;
+ j = i;
+ }
+*/
+ task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct task),sizeof(struct shar),&task_entry);
+ psh = task->shareds;
+ psh->pth_counter = &th_counter;
+ psh->pcounter = &counter;
+ psh->pj = &j;
+ task->lb = 0;
+ task->ub = N*GRAIN*STRIDE-2;
+ task->st = STRIDE;
+
+ __kmpc_taskloop(
+ NULL, // location
+ gtid, // gtid
+ task, // task structure
+ 1, // if clause value
+ &task->lb, // lower bound
+ &task->ub, // upper bound
+ STRIDE, // loop increment
+ 0, // 1 if nogroup specified
+ 2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
+ N, // schedule value (ignored for type 0)
+ (void*)&__task_dup_entry // tasks duplication routine
+ );
+ } // end master
+ } // end parallel
+// check results
+ if( j != N*GRAIN*STRIDE-STRIDE ) {
+ printf("Error in lastprivate, %d != %d\n",j,N*GRAIN*STRIDE-STRIDE);
+ return 1;
+ }
+ if( counter != N*GRAIN ) {
+ printf("Error, counter %d != %d\n",counter,N*GRAIN);
+ return 1;
+ }
+ for( i=0; i<N; ++i ) {
+ if( th_counter[i] % GRAIN ) {
+ printf("Error, th_counter[%d] = %d\n",i,th_counter[i]);
+ return 1;
+ }
+ }
+ printf("passed\n");
+ return 0;
+}
diff --git a/final/runtime/test/tasking/nested_parallel_tasking.c b/final/runtime/test/tasking/nested_parallel_tasking.c
new file mode 100644
index 0000000..4374d6e
--- /dev/null
+++ b/final/runtime/test/tasking/nested_parallel_tasking.c
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <omp.h>
+
+/*
+ * This test would hang when level instead of active level
+ * used to push task state.
+ */
+
+int main()
+{
+ // If num_threads is changed to a value greater than 1, then the test passes
+ #pragma omp parallel num_threads(1)
+ {
+ #pragma omp parallel
+ printf("Hello World from thread %d\n", omp_get_thread_num());
+ }
+
+ printf("omp_num_threads: %d\n", omp_get_max_threads());
+
+ #pragma omp parallel
+ {
+ #pragma omp master
+ #pragma omp task default(none)
+ {
+ printf("%d is executing this task\n", omp_get_thread_num());
+ }
+ }
+
+ printf("pass\n");
+ return 0;
+}
diff --git a/final/runtime/test/tasking/nested_task_creation.c b/final/runtime/test/tasking/nested_task_creation.c
new file mode 100644
index 0000000..c7c25fc
--- /dev/null
+++ b/final/runtime/test/tasking/nested_task_creation.c
@@ -0,0 +1,35 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <omp.h>
+#include "omp_my_sleep.h"
+
+/*
+ * This test creates tasks that themselves create a new task.
+ * The runtime has to take care that they are correctly freed.
+ */
+
+int main()
+{
+ #pragma omp task
+ {
+ #pragma omp task
+ {
+ my_sleep( 0.1 );
+ }
+ }
+
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp single
+ #pragma omp task
+ {
+ #pragma omp task
+ {
+ my_sleep( 0.1 );
+ }
+ }
+ }
+
+ printf("pass\n");
+ return 0;
+}
diff --git a/final/runtime/test/tasking/omp_task.c b/final/runtime/test/tasking/omp_task.c
new file mode 100644
index 0000000..c534abe
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task.c
@@ -0,0 +1,52 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_task()
+{
+ int tids[NUM_TASKS];
+ int i;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ /* First we have to store the value of the loop index in a new variable
+ * which will be private for each task because otherwise it will be overwritten
+ * if the execution of the task takes longer than the time which is needed to
+ * enter the next step of the loop!
+ */
+ int myi;
+ myi = i;
+ #pragma omp task
+ {
+ my_sleep (SLEEPTIME);
+ tids[myi] = omp_get_thread_num();
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /*end of parallel */
+
+ /* Now we ckeck if more than one thread executed the tasks. */
+ for (i = 1; i < NUM_TASKS; i++) {
+ if (tids[0] != tids[i])
+ return 1;
+ }
+ return 0;
+} /* end of check_parallel_for_private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_final.c b/final/runtime/test/tasking/omp_task_final.c
new file mode 100644
index 0000000..b531af6
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_final.c
@@ -0,0 +1,65 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_task_final()
+{
+ int tids[NUM_TASKS];
+ int includedtids[NUM_TASKS];
+ int i;
+ int error = 0;
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ /* First we have to store the value of the loop index in a new variable
+ * which will be private for each task because otherwise it will be overwritten
+ * if the execution of the task takes longer than the time which is needed to
+ * enter the next step of the loop!
+ */
+ int myi;
+ myi = i;
+
+ #pragma omp task final(i>=10)
+ {
+ tids[myi] = omp_get_thread_num();
+ /* we generate included tasks for final tasks */
+ if(myi >= 10) {
+ int included = myi;
+ #pragma omp task
+ {
+ my_sleep (SLEEPTIME);
+ includedtids[included] = omp_get_thread_num();
+ } /* end of omp included task of the final task */
+ my_sleep (SLEEPTIME);
+ } /* end of if it is a final task*/
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /*end of parallel */
+
+ /* Now we ckeck if more than one thread executed the final task and its included task. */
+ for (i = 10; i < NUM_TASKS; i++) {
+ if (tids[i] != includedtids[i]) {
+ error++;
+ }
+ }
+ return (error==0);
+} /* end of check_paralel_for_private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_final()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
+
diff --git a/final/runtime/test/tasking/omp_task_firstprivate.c b/final/runtime/test/tasking/omp_task_firstprivate.c
new file mode 100644
index 0000000..d1f7c35
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_firstprivate.c
@@ -0,0 +1,51 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int test_omp_task_firstprivate()
+{
+ int i;
+ int sum = 1234;
+ int known_sum;
+ int result = 0; /* counts the wrong sums from tasks */
+
+ known_sum = 1234 + (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ #pragma omp task firstprivate(sum)
+ {
+ int j;
+ for (j = 0; j <= LOOPCOUNT; j++) {
+ #pragma omp flush
+ sum += j;
+ }
+
+ /* check if calculated sum was right */
+ if (sum != known_sum) {
+ #pragma omp critical
+ { result++; }
+ }
+ } /* omp task */
+ } /* for loop */
+ } /* omp single */
+ } /* omp parallel */
+ return (result == 0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_if.c b/final/runtime/test/tasking/omp_task_if.c
new file mode 100644
index 0000000..8b4728e
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_if.c
@@ -0,0 +1,43 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_task_if()
+{
+ int condition_false;
+ int count;
+ int result;
+
+ count=0;
+ condition_false = (count == 1);
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ #pragma omp task if (condition_false) shared(count, result)
+ {
+ my_sleep (SLEEPTIME);
+ #pragma omp critical
+ result = (0 == count);
+ } /* end of omp task */
+ #pragma omp critical
+ count = 1;
+ } /* end of single */
+ } /*end of parallel */
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_if()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_imp_firstprivate.c b/final/runtime/test/tasking/omp_task_imp_firstprivate.c
new file mode 100644
index 0000000..905ab9a
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_imp_firstprivate.c
@@ -0,0 +1,47 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function do spend some time in a loop */
+int test_omp_task_imp_firstprivate()
+{
+ int i=5;
+ int k = 0;
+ int result = 0;
+ int task_result = 1;
+ #pragma omp parallel firstprivate(i)
+ {
+ #pragma omp single
+ {
+ for (k = 0; k < NUM_TASKS; k++) {
+ #pragma omp task shared(result , task_result)
+ {
+ int j;
+ //check if i is private
+ if(i != 5)
+ task_result = 0;
+ for(j = 0; j < NUM_TASKS; j++)
+ i++;
+ //this should be firstprivate implicitly
+ }
+ }
+ #pragma omp taskwait
+ result = (task_result && i==5);
+ }
+ }
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_imp_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_priority.c b/final/runtime/test/tasking/omp_task_priority.c
new file mode 100644
index 0000000..7b62360
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_priority.c
@@ -0,0 +1,22 @@
+// RUN: %libomp-compile && env OMP_MAX_TASK_PRIORITY=42 %libomp-run
+// Test OMP 4.5 task priorities
+// Currently only API function and envirable parsing implemented.
+// Test environment sets envirable: OMP_MAX_TASK_PRIORITY=42 as tested below.
+#include <stdio.h>
+#include <omp.h>
+
+int main (void) {
+ int passed;
+
+ passed = (omp_get_max_task_priority() == 42);
+ printf("Got %d\n", omp_get_max_task_priority());
+
+ if (passed) {
+ printf("passed\n");
+ return 0;
+ }
+
+ printf("failed\n");
+ return 1;
+}
+
diff --git a/final/runtime/test/tasking/omp_task_private.c b/final/runtime/test/tasking/omp_task_private.c
new file mode 100644
index 0000000..7a93716
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_private.c
@@ -0,0 +1,53 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function do spend some time in a loop */
+int test_omp_task_private()
+{
+ int i;
+ int known_sum;
+ int sum = 0;
+ int result = 0; /* counts the wrong sums from tasks */
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ #pragma omp task private(sum) shared(result, known_sum)
+ {
+ int j;
+ //if sum is private, initialize to 0
+ sum = 0;
+ for (j = 0; j <= LOOPCOUNT; j++) {
+ #pragma omp flush
+ sum += j;
+ }
+ /* check if calculated sum was right */
+ if (sum != known_sum) {
+ #pragma omp critical
+ result++;
+ }
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /* end of parallel*/
+ return (result == 0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_shared.c b/final/runtime/test/tasking/omp_task_shared.c
new file mode 100644
index 0000000..0304026
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_shared.c
@@ -0,0 +1,41 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function do spend some time in a loop */
+int test_omp_task_imp_shared()
+{
+ int i;
+ int k = 0;
+ int result = 0;
+ i=0;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ for (k = 0; k < NUM_TASKS; k++) {
+ #pragma omp task shared(i)
+ {
+ #pragma omp atomic
+ i++;
+ //this should be shared implicitly
+ }
+ }
+ }
+ result = i;
+ return ((result == NUM_TASKS));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_imp_shared()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_taskloop_grainsize.c b/final/runtime/test/tasking/omp_taskloop_grainsize.c
new file mode 100644
index 0000000..0833073
--- /dev/null
+++ b/final/runtime/test/tasking/omp_taskloop_grainsize.c
@@ -0,0 +1,113 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+
+// These compilers don't support the taskloop construct
+// UNSUPPORTED: gcc-4, gcc-5, icc-16
+// GCC 6 has support for taskloops, but at least 6.3.0 is crashing on this test
+// UNSUPPORTED: gcc-6
+
+/*
+ * Test for taskloop
+ * Method: caculate how many times the iteration space is dispatched
+ * and judge if each dispatch has the requested grainsize
+ * It is possible for two adjacent chunks are executed by the same thread
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#define CFDMAX_SIZE 1120
+
+int test_omp_taskloop_grainsize()
+{
+ int result = 0;
+ int i, grainsize, count, tmp_count, num_off;
+ int *tmp, *tids, *tidsArray;
+
+ tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE);
+ tids = tidsArray;
+
+ for (grainsize = 1; grainsize < 48; ++grainsize) {
+ fprintf(stderr, "Grainsize %d\n", grainsize);
+ count = tmp_count = num_off = 0;
+
+ for (i = 0; i < CFDMAX_SIZE; ++i) {
+ tids[i] = -1;
+ }
+
+ #pragma omp parallel shared(tids)
+ {
+ #pragma omp master
+ #pragma omp taskloop grainsize(grainsize)
+ for (i = 0; i < CFDMAX_SIZE; i++) {
+ tids[i] = omp_get_thread_num();
+ }
+ }
+
+ for (i = 0; i < CFDMAX_SIZE; ++i) {
+ if (tids[i] == -1) {
+ fprintf(stderr, " Iteration %d not touched!\n", i);
+ result++;
+ }
+ }
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tids[i] != tids[i + 1]) {
+ count++;
+ }
+ }
+
+ tmp = (int *)malloc(sizeof(int) * (count + 1));
+ tmp[0] = 1;
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tmp_count > count) {
+ printf("--------------------\nTestinternal Error: List too "
+ "small!!!\n--------------------\n");
+ break;
+ }
+ if (tids[i] != tids[i + 1]) {
+ tmp_count++;
+ tmp[tmp_count] = 1;
+ } else {
+ tmp[tmp_count]++;
+ }
+ }
+
+ // is grainsize statement working?
+ int num_tasks = CFDMAX_SIZE / grainsize;
+ int multiple1 = CFDMAX_SIZE / num_tasks;
+ int multiple2 = CFDMAX_SIZE / num_tasks + 1;
+ for (i = 0; i < count; i++) {
+ // it is possible for 2 adjacent chunks assigned to a same thread
+ if (tmp[i] % multiple1 != 0 && tmp[i] % multiple2 != 0) {
+ num_off++;
+ }
+ }
+
+ if (num_off > 1) {
+ fprintf(stderr, " The number of bad chunks is %d\n", num_off);
+ result++;
+ } else {
+ fprintf(stderr, " Everything ok\n");
+ }
+
+ free(tmp);
+ }
+ free(tidsArray);
+ return (result==0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_omp_taskloop_grainsize()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_taskloop_num_tasks.c b/final/runtime/test/tasking/omp_taskloop_num_tasks.c
new file mode 100644
index 0000000..7c3c704
--- /dev/null
+++ b/final/runtime/test/tasking/omp_taskloop_num_tasks.c
@@ -0,0 +1,71 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+
+// These compilers don't support the taskloop construct
+// UNSUPPORTED: gcc-4, gcc-5, icc-16
+
+/*
+ * Test for taskloop
+ * Method: caculate how many times the iteration space is dispatched
+ * and judge if each dispatch has the requested grainsize
+ * It is possible for two adjacent chunks are executed by the same thread
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#define CFDMAX_SIZE 1120
+
+int test_omp_taskloop_num_tasks()
+{
+ int i;
+ int *tids;
+ int *tidsArray;
+ int count;
+ int result = 0;
+ int num_tasks;
+
+ for (num_tasks = 1; num_tasks < 120; ++num_tasks) {
+ count = 0;
+ tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE);
+ tids = tidsArray;
+
+ #pragma omp parallel shared(tids)
+ {
+ int i;
+ #pragma omp master
+ #pragma omp taskloop num_tasks(num_tasks)
+ for (i = 0; i < CFDMAX_SIZE; i++) {
+ tids[i] = omp_get_thread_num();
+ }
+ }
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tids[i] != tids[i + 1]) {
+ count++;
+ }
+ }
+
+ if (count > num_tasks) {
+ fprintf(stderr, "counted too many tasks: (wanted %d, got %d)\n",
+ num_tasks, count);
+ result++;
+ }
+ }
+
+ return (result==0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_omp_taskloop_num_tasks()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_taskwait.c b/final/runtime/test/tasking/omp_taskwait.c
new file mode 100644
index 0000000..c3a0ea7
--- /dev/null
+++ b/final/runtime/test/tasking/omp_taskwait.c
@@ -0,0 +1,74 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_taskwait()
+{
+ int result1 = 0; /* Stores number of not finished tasks after the taskwait */
+ int result2 = 0; /* Stores number of wrong array elements at the end */
+ int array[NUM_TASKS];
+ int i;
+
+ /* fill array */
+ for (i = 0; i < NUM_TASKS; i++)
+ array[i] = 0;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ /* First we have to store the value of the loop index in a new variable
+ * which will be private for each task because otherwise it will be overwritten
+ * if the execution of the task takes longer than the time which is needed to
+ * enter the next step of the loop!
+ */
+ int myi;
+ myi = i;
+ #pragma omp task
+ {
+ my_sleep (SLEEPTIME);
+ array[myi] = 1;
+ } /* end of omp task */
+ } /* end of for */
+ #pragma omp taskwait
+ /* check if all tasks were finished */
+ for (i = 0; i < NUM_TASKS; i++)
+ if (array[i] != 1)
+ result1++;
+
+ /* generate some more tasks which now shall overwrite
+ * the values in the tids array */
+ for (i = 0; i < NUM_TASKS; i++) {
+ int myi;
+ myi = i;
+ #pragma omp task
+ {
+ array[myi] = 2;
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /*end of parallel */
+
+ /* final check, if all array elements contain the right values: */
+ for (i = 0; i < NUM_TASKS; i++) {
+ if (array[i] != 2)
+ result2++;
+ }
+ return ((result1 == 0) && (result2 == 0));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_taskwait()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_taskyield.c b/final/runtime/test/tasking/omp_taskyield.c
new file mode 100644
index 0000000..5bb6984
--- /dev/null
+++ b/final/runtime/test/tasking/omp_taskyield.c
@@ -0,0 +1,58 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_taskyield()
+{
+ int i;
+ int count = 0;
+ int start_tid[NUM_TASKS];
+ int current_tid[NUM_TASKS];
+
+ for (i=0; i< NUM_TASKS; i++) {
+ start_tid[i]=0;
+ current_tid[i]=0;
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ int myi = i;
+ #pragma omp task untied
+ {
+ my_sleep(SLEEPTIME);
+ start_tid[myi] = omp_get_thread_num();
+ #pragma omp taskyield
+ if((start_tid[myi] %2) ==0){
+ my_sleep(SLEEPTIME);
+ current_tid[myi] = omp_get_thread_num();
+ } /*end of if*/
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /* end of parallel */
+ for (i=0;i<NUM_TASKS; i++) {
+ //printf("start_tid[%d]=%d, current_tid[%d]=%d\n",
+ //i, start_tid[i], i , current_tid[i]);
+ if (current_tid[i] == start_tid[i])
+ count++;
+ }
+ return (count<NUM_TASKS);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_taskyield()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/threadprivate/omp_threadprivate.c b/final/runtime/test/threadprivate/omp_threadprivate.c
new file mode 100644
index 0000000..a3dd80d
--- /dev/null
+++ b/final/runtime/test/threadprivate/omp_threadprivate.c
@@ -0,0 +1,102 @@
+// RUN: %libomp-compile-and-run
+/*
+ * Threadprivate is tested in 2 ways:
+ * 1. The global variable declared as threadprivate should have
+ * local copy for each thread. Otherwise race condition and
+ * wrong result.
+ * 2. If the value of local copy is retained for the two adjacent
+ * parallel regions
+ */
+#include "omp_testsuite.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+static int sum0=0;
+static int myvalue = 0;
+
+#pragma omp threadprivate(sum0)
+#pragma omp threadprivate(myvalue)
+
+int test_omp_threadprivate()
+{
+ int sum = 0;
+ int known_sum;
+ int i;
+ int iter;
+ int *data;
+ int size;
+ int num_failed = 0;
+ int my_random;
+ omp_set_dynamic(0);
+
+ #pragma omp parallel private(i)
+ {
+ sum0 = 0;
+ #pragma omp for
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum0 = sum0 + i;
+ } /*end of for*/
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ } /*end of critical */
+ } /* end of parallel */
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ if (known_sum != sum ) {
+ fprintf (stderr, " known_sum = %d, sum = %d\n", known_sum, sum);
+ }
+
+ /* the next parallel region is just used to get the number of threads*/
+ omp_set_dynamic(0);
+ #pragma omp parallel
+ {
+ #pragma omp master
+ {
+ size=omp_get_num_threads();
+ data=(int*) malloc(size*sizeof(int));
+ }
+ }/* end parallel*/
+
+ srand(45);
+ for (iter = 0; iter < 100; iter++) {
+ my_random = rand(); /* random number generator is
+ called inside serial region*/
+
+ /* the first parallel region is used to initialiye myvalue
+ and the array with my_random+rank */
+ #pragma omp parallel
+ {
+ int rank;
+ rank = omp_get_thread_num ();
+ myvalue = data[rank] = my_random + rank;
+ }
+
+ /* the second parallel region verifies that the
+ value of "myvalue" is retained */
+ #pragma omp parallel reduction(+:num_failed)
+ {
+ int rank;
+ rank = omp_get_thread_num ();
+ num_failed = num_failed + (myvalue != data[rank]);
+ if(myvalue != data[rank]) {
+ fprintf (stderr, " myvalue = %d, data[rank]= %d\n",
+ myvalue, data[rank]);
+ }
+ }
+ }
+ free (data);
+ return (known_sum == sum) && !num_failed;
+} /* end of check_threadprivate*/
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_threadprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/threadprivate/omp_threadprivate_for.c b/final/runtime/test/threadprivate/omp_threadprivate_for.c
new file mode 100644
index 0000000..3342e63
--- /dev/null
+++ b/final/runtime/test/threadprivate/omp_threadprivate_for.c
@@ -0,0 +1,48 @@
+// RUN: %libomp-compile-and-run
+#include "omp_testsuite.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+static int i;
+#pragma omp threadprivate(i)
+
+int test_omp_threadprivate_for()
+{
+ int known_sum;
+ int sum;
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ sum = 0;
+
+ #pragma omp parallel
+ {
+ int sum0 = 0, i0;
+ #pragma omp for
+ for (i0 = 1; i0 <= LOOPCOUNT; i0++) {
+ i = i0;
+ sum0 = sum0 + i;
+ }
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ }
+ } /* end of parallel */
+
+ if (known_sum != sum ) {
+ fprintf(stderr, " known_sum = %d, sum = %d\n", known_sum, sum);
+ }
+ return (known_sum == sum);
+} /* end of check_threadprivate*/
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_threadprivate_for()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/bug_set_schedule_0.c b/final/runtime/test/worksharing/for/bug_set_schedule_0.c
new file mode 100644
index 0000000..889e239
--- /dev/null
+++ b/final/runtime/test/worksharing/for/bug_set_schedule_0.c
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <omp.h>
+#include "omp_testsuite.h"
+
+/* Test that the chunk size is set to default (1) when
+ chunk size <= 0 is specified */
+int a = 0;
+
+int test_set_schedule_0()
+{
+ int i;
+ a = 0;
+ omp_set_schedule(omp_sched_dynamic,0);
+
+ #pragma omp parallel
+ {
+ #pragma omp for schedule(runtime)
+ for(i = 0; i < 10; i++) {
+ #pragma omp atomic
+ a++;
+ if(a > 10)
+ exit(1);
+ }
+ }
+ return a==10;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_set_schedule_0()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/kmp_doacross_check.c b/final/runtime/test/worksharing/for/kmp_doacross_check.c
new file mode 100644
index 0000000..59b61e3
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_doacross_check.c
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile-and-run
+// UNSUPPORTED: gcc
+// This test is incompatible with gcc because of the explicit call to
+// __kmpc_doacross_fini(). gcc relies on an implicit call to this function
+// when the last iteration is executed inside the GOMP_loop_*_next() functions.
+// Hence, in gcc, having the explicit call leads to __kmpc_doacross_fini()
+// being called twice.
+#include <stdio.h>
+
+#define N 1000
+
+struct dim {
+ long long lo; // lower
+ long long up; // upper
+ long long st; // stride
+};
+extern void __kmpc_doacross_init(void*, int, int, struct dim *);
+extern void __kmpc_doacross_wait(void*, int, long long*);
+extern void __kmpc_doacross_post(void*, int, long long*);
+extern void __kmpc_doacross_fini(void*, int);
+extern int __kmpc_global_thread_num(void*);
+
+int main()
+{
+ int i;
+ int iter[N];
+ struct dim dims;
+ for( i = 0; i < N; ++i )
+ iter[i] = 1;
+ dims.lo = 1;
+ dims.up = N-1;
+ dims.st = 1;
+ #pragma omp parallel num_threads(4)
+ {
+ int i, gtid;
+ long long vec;
+ gtid = __kmpc_global_thread_num(NULL);
+ __kmpc_doacross_init(NULL,gtid,1,&dims); // thread starts the loop
+ #pragma omp for nowait schedule(dynamic)
+ for( i = 1; i < N; ++i )
+ {
+ // runtime call corresponding to #pragma omp ordered depend(sink:i-1)
+ vec=i-1;
+ __kmpc_doacross_wait(NULL,gtid,&vec);
+ // user's code
+ iter[i] = iter[i-1] + 1;
+ // runtime call corresponding to #pragma omp ordered depend(source)
+ vec=i;
+ __kmpc_doacross_post(NULL,gtid,&vec);
+ }
+ // thread finishes the loop (should be before the loop barrier)
+ __kmpc_doacross_fini(NULL,gtid);
+ }
+ if( iter[N-1] == N ) {
+ printf("passed\n");
+ } else {
+ printf("failed %d != %d\n", iter[N-1], N);
+ return 1;
+ }
+ return 0;
+}
+
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c
new file mode 100644
index 0000000..5c6f94b
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c
@@ -0,0 +1,410 @@
+// RUN: %libomp-compile-and-run
+/*
+ Test for the 'schedule(simd:guided)' clause.
+ Compiler needs to generate a dynamic dispatching and pass the schedule
+ value 46 to the OpenMP RTL. Test uses numerous loop parameter combinations.
+*/
+#include <stdio.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#endif
+
+// uncomment for debug diagnostics:
+//#define DEBUG
+
+#define SIMD_LEN 4
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+extern int __kmpc_global_thread_num(id*);
+extern void __kmpc_barrier(id*, int gtid);
+extern void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+extern void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+extern int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+extern int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+int run_loop_64(i64 loop_lb, i64 loop_ub, i64 loop_st, int loop_chunk) {
+ int err = 0;
+ static int volatile loop_sync = 0;
+ i64 lb; // Chunk lower bound
+ i64 ub; // Chunk upper bound
+ i64 st; // Chunk stride
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = tid;
+ int last;
+#if DEBUG
+ printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n",
+ (int)sizeof(i64), gtid, tid,
+ (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen
+ if (loop_st == 0)
+ return 0;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return 0;
+
+ __kmpc_dispatch_init_8(&loc, gtid, kmp_sch_guided_simd,
+ loop_lb, loop_ub, loop_st, loop_chunk);
+ if (tid == 0) {
+ // Let the master thread handle the chunks alone
+ int chunk; // No of current chunk
+ i64 next_lb; // Lower bound of the next chunk
+ i64 last_ub; // Upper bound of the last processed chunk
+ u64 cur; // Number of interations in current chunk
+ u64 max; // Max allowed iterations for current chunk
+ int undersized = 0;
+
+ chunk = 0;
+ next_lb = loop_lb;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations
+ while (__kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if DEBUG
+ printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub);
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized
+ if (undersized) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Check lower and upper bounds
+ if (lb != next_lb) {
+ printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk);
+ err++;
+ }
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub)) {
+ printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb <= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(ub >= loop_ub)) {
+ printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb >= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ }; // if
+ // Stride should not change
+ if (!(st == loop_st)) {
+ printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk);
+ err++;
+ }
+ cur = (ub - lb) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum
+ if (!(cur <= max + 1)) {
+ printf("Error with iter %d, %d\n", cur, max);
+ err++;
+ }
+ // Update maximum for the next chunk
+ if (cur < max)
+ max = cur;
+ next_lb = ub + loop_st;
+ last_ub = ub;
+ undersized = (cur < loop_chunk);
+ }; // while
+ // Must have at least one chunk
+ if (!(chunk > 0)) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Must have the right last iteration index
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st > loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(last_ub >= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st < loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ }; // if
+ // Let non-master threads go
+ loop_sync = 1;
+ } else {
+ int i;
+ // Workers wait for master thread to finish, then call __kmpc_dispatch_next
+ for (i = 0; i < 1000000; ++ i) {
+ if (loop_sync != 0) {
+ break;
+ }; // if
+ }; // for i
+ while (loop_sync == 0) {
+ delay();
+ }; // while
+ // At this moment we do not have any more chunks -- all the chunks already
+ // processed by master thread
+ rc = __kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st);
+ if (rc) {
+ printf("Error return value\n");
+ err++;
+ }
+ }; // if
+
+ __kmpc_barrier(&loc, gtid);
+ if (tid == 0) {
+ loop_sync = 0; // Restore original state
+#if DEBUG
+ printf("run_loop_64(): at the end\n");
+#endif
+ }; // if
+ __kmpc_barrier(&loc, gtid);
+ return err;
+} // run_loop
+
+// ---------------------------------------------------------------------------
+int run_loop_32(int loop_lb, int loop_ub, int loop_st, int loop_chunk) {
+ int err = 0;
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound
+ int ub; // Chunk upper bound
+ int st; // Chunk stride
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = tid;
+ int last;
+#if DEBUG
+ printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n",
+ (int)sizeof(int), gtid, tid,
+ (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen
+ if (loop_st == 0)
+ return 0;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return 0;
+
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_guided_simd,
+ loop_lb, loop_ub, loop_st, loop_chunk);
+ if (tid == 0) {
+ // Let the master thread handle the chunks alone
+ int chunk; // No of current chunk
+ int next_lb; // Lower bound of the next chunk
+ int last_ub; // Upper bound of the last processed chunk
+ u64 cur; // Number of interations in current chunk
+ u64 max; // Max allowed iterations for current chunk
+ int undersized = 0;
+
+ chunk = 0;
+ next_lb = loop_lb;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if DEBUG
+ printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub);
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized
+ if (undersized) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Check lower and upper bounds
+ if (lb != next_lb) {
+ printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk);
+ err++;
+ }
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub)) {
+ printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb <= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(ub >= loop_ub)) {
+ printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb >= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ }; // if
+ // Stride should not change
+ if (!(st == loop_st)) {
+ printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk);
+ err++;
+ }
+ cur = (ub - lb) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum
+ if (!(cur <= max + 1)) {
+ printf("Error with iter %d, %d\n", cur, max);
+ err++;
+ }
+ // Update maximum for the next chunk
+ if (cur < max)
+ max = cur;
+ next_lb = ub + loop_st;
+ last_ub = ub;
+ undersized = (cur < loop_chunk);
+ }; // while
+ // Must have at least one chunk
+ if (!(chunk > 0)) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Must have the right last iteration index
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st > loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(last_ub >= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st < loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ }; // if
+ // Let non-master threads go
+ loop_sync = 1;
+ } else {
+ int i;
+ // Workers wait for master thread to finish, then call __kmpc_dispatch_next
+ for (i = 0; i < 1000000; ++ i) {
+ if (loop_sync != 0) {
+ break;
+ }; // if
+ }; // for i
+ while (loop_sync == 0) {
+ delay();
+ }; // while
+ // At this moment we do not have any more chunks -- all the chunks already
+ // processed by the master thread
+ rc = __kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st);
+ if (rc) {
+ printf("Error return value\n");
+ err++;
+ }
+ }; // if
+
+ __kmpc_barrier(&loc, gtid);
+ if (tid == 0) {
+ loop_sync = 0; // Restore original state
+#if DEBUG
+ printf("run_loop<>(): at the end\n");
+#endif
+ }; // if
+ __kmpc_barrier(&loc, gtid);
+ return err;
+} // run_loop
+
+// ---------------------------------------------------------------------------
+int run_64(int num_th)
+{
+ int err = 0;
+#pragma omp parallel num_threads(num_th)
+ {
+ int chunk;
+ i64 st, lb, ub;
+ for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) {
+ for (st = 1; st <= 3; ++ st) {
+ for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) {
+ for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) {
+ err += run_loop_64(lb, ub, st, chunk);
+ err += run_loop_64(ub, lb, -st, chunk);
+ }; // for ub
+ }; // for lb
+ }; // for st
+ }; // for chunk
+ }
+ return err;
+} // run_all
+
+int run_32(int num_th)
+{
+ int err = 0;
+#pragma omp parallel num_threads(num_th)
+ {
+ int chunk, st, lb, ub;
+ for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) {
+ for (st = 1; st <= 3; ++ st) {
+ for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) {
+ for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) {
+ err += run_loop_32(lb, ub, st, chunk);
+ err += run_loop_32(ub, lb, -st, chunk);
+ }; // for ub
+ }; // for lb
+ }; // for st
+ }; // for chunk
+ }
+ return err;
+} // run_all
+
+// ---------------------------------------------------------------------------
+int main()
+{
+ int n, err = 0;
+ for (n = 1; n <= 4; ++ n) {
+ err += run_32(n);
+ err += run_64(n);
+ }; // for n
+ if (err)
+ printf("failed with %d errors\n", err);
+ else
+ printf("passed\n");
+ return err;
+}
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
new file mode 100644
index 0000000..bb538d1
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
@@ -0,0 +1,221 @@
+// RUN: %libomp-compile-and-run
+
+// The test checks schedule(simd:runtime)
+// in combination with omp_set_schedule()
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (last) {
+ if (!no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ } else {
+ if (cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ }
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+// static (no chunk)
+ omp_set_schedule(omp_sched_static,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// auto (chunk should be ignorted)
+ omp_set_schedule(omp_sched_auto,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// static,1
+ chunk = 1;
+ omp_set_schedule(omp_sched_static,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// dynamic,1
+ omp_set_schedule(omp_sched_dynamic,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// guided,1
+ omp_set_schedule(omp_sched_guided,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// dynamic,0 - use default chunk size 1
+ omp_set_schedule(omp_sched_dynamic,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// guided,0 - use default chunk size 1
+ omp_set_schedule(omp_sched_guided,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
new file mode 100644
index 0000000..d137831
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
@@ -0,0 +1,196 @@
+// RUN: %libomp-compile
+// RUN: env OMP_SCHEDULE=guided %libomp-run
+// RUN: env OMP_SCHEDULE=guided,1 %libomp-run 1
+// RUN: env OMP_SCHEDULE=guided,2 %libomp-run 2
+// RUN: env OMP_SCHEDULE=dynamic %libomp-run
+// RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1
+// RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2
+// RUN: env OMP_SCHEDULE=auto %libomp-run
+
+// The test checks schedule(simd:runtime)
+// in combination with OMP_SCHEDULE=guided[,chunk]
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define UBOUND 100
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (!last && cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ if (last && !no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+ if (argc > 1) {
+ // expect chunk size as a parameter
+ chunk = atoi(argv[1]);
+ }
+#pragma omp parallel //num_threads(num_th)
+ run_loop(0, UBOUND, 1, chunk);
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
new file mode 100644
index 0000000..4cb15d6
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
@@ -0,0 +1,201 @@
+// RUN: %libomp-compile && %libomp-run
+// RUN: %libomp-run 1 && %libomp-run 2
+
+// The test checks schedule(simd:runtime)
+// in combination with OMP_SCHEDULE=static[,chunk]
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (last) {
+ if (!no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ } else {
+ if (cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ }
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+ if (argc > 1) {
+ char *buf = malloc(8 + strlen(argv[1]));
+ // expect chunk size as a parameter
+ chunk = atoi(argv[1]);
+ strcpy(buf,"static,");
+ strcat(buf,argv[1]);
+ seten("OMP_SCHEDULE",buf,1);
+ printf("Testing schedule(simd:%s)\n", buf);
+ free(buf);
+ } else {
+ seten("OMP_SCHEDULE","static",1);
+ printf("Testing schedule(simd:static)\n");
+ }
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
diff --git a/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c b/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c
new file mode 100644
index 0000000..a6378fe
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c
@@ -0,0 +1,91 @@
+// RUN: %libomp-compile && %libomp-run 7
+// RUN: %libomp-run 0 && %libomp-run -1
+// RUN: %libomp-run 1 && %libomp-run 2 && %libomp-run 5
+// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run 7
+// RUN: %libomp-run 1 && %libomp-run 2 && %libomp-run 5
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+#define INCR 7
+#define MY_MAX 200
+#define MY_MIN -200
+#ifndef MY_SCHEDULE
+# define MY_SCHEDULE dynamic
+#endif
+
+int num_disp_buffers, num_loops;
+int a, b, a_known_value, b_known_value;
+
+int test_kmp_set_disp_num_buffers()
+{
+ int success = 1;
+ a = 0;
+ b = 0;
+ // run many small dynamic loops to stress the dispatch buffer system
+ #pragma omp parallel
+ {
+ int i,j;
+ for (j = 0; j < num_loops; j++) {
+ #pragma omp for schedule(MY_SCHEDULE) nowait
+ for (i = MY_MIN; i < MY_MAX; i+=INCR) {
+ #pragma omp atomic
+ a++;
+ }
+ #pragma omp for schedule(MY_SCHEDULE) nowait
+ for (i = MY_MAX; i >= MY_MIN; i-=INCR) {
+ #pragma omp atomic
+ b++;
+ }
+ }
+ }
+ // detect failure
+ if (a != a_known_value || b != b_known_value) {
+ success = 0;
+ printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value,
+ b, b_known_value);
+ }
+ return success;
+}
+
+int main(int argc, char** argv)
+{
+ int i,j;
+ int num_failed=0;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s num_disp_buffers\n", argv[0]);
+ exit(1);
+ }
+
+ // set the number of dispatch buffers
+ num_disp_buffers = atoi(argv[1]);
+ kmp_set_disp_num_buffers(num_disp_buffers);
+
+ // figure out the known values to compare with calculated result
+ a_known_value = 0;
+ b_known_value = 0;
+
+ // if specified to use bad num_disp_buffers set num_loops
+ // to something reasonable
+ if (num_disp_buffers <= 0)
+ num_loops = 10;
+ else
+ num_loops = num_disp_buffers*10;
+
+ for (j = 0; j < num_loops; j++) {
+ for (i = MY_MIN; i < MY_MAX; i+=INCR)
+ a_known_value++;
+ for (i = MY_MAX; i >= MY_MIN; i-=INCR)
+ b_known_value++;
+ }
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_kmp_set_disp_num_buffers()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_doacross.c b/final/runtime/test/worksharing/for/omp_doacross.c
new file mode 100644
index 0000000..4187112
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_doacross.c
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile-and-run
+// XFAIL: gcc-4, gcc-5, clang-3.7, clang-3.8, icc-15, icc-16
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#ifndef N
+#define N 750
+#endif
+
+int test_doacross() {
+ int i, j;
+ // Allocate and zero out the matrix
+ int *m = (int *)malloc(sizeof(int) * N * N);
+ for (i = 0; i < N; ++i) {
+ for (j = 0; j < N; ++j) {
+ m[i * N + j] = 0;
+ }
+ }
+ // Have first row and column be 0, 1, 2, 3, etc.
+ for (i = 0; i < N; ++i)
+ m[i * N] = i;
+ for (j = 0; j < N; ++j)
+ m[j] = j;
+ // Perform wavefront which results in matrix:
+ // 0 1 2 3 4
+ // 1 2 3 4 5
+ // 2 3 4 5 6
+ // 3 4 5 6 7
+ // 4 5 6 7 8
+ #pragma omp parallel shared(m)
+ {
+ int row, col;
+ #pragma omp for ordered(2)
+ for (row = 1; row < N; ++row) {
+ for (col = 1; col < N; ++col) {
+ #pragma omp ordered depend(sink : row - 1, col) depend(sink : row, col - 1)
+ m[row * N + col] = m[(row - 1) * N + col] + m[row * N + (col - 1)] -
+ m[(row - 1) * N + (col - 1)];
+ #pragma omp ordered depend(source)
+ }
+ }
+ }
+
+ // Check the bottom right element to see if iteration dependencies were held
+ int retval = (m[(N - 1) * N + N - 1] == 2 * (N - 1));
+ free(m);
+ return retval;
+}
+
+int main(int argc, char **argv) {
+ int i;
+ int num_failed = 0;
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_doacross()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_bigbounds.c b/final/runtime/test/worksharing/for/omp_for_bigbounds.c
new file mode 100644
index 0000000..901d760
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_bigbounds.c
@@ -0,0 +1,70 @@
+// RUN: %libomp-compile -DMY_SCHEDULE=static && %libomp-run
+// RUN: %libomp-compile -DMY_SCHEDULE=dynamic && %libomp-run
+// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run
+
+// Only works with Intel Compiler since at least version 15.0
+// XFAIL: gcc, clang
+
+/*
+ * Test that large bounds are handled properly and calculations of
+ * loop iterations don't accidently overflow
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+#define INCR 50000000
+#define MY_MAX 2000000000
+#define MY_MIN -2000000000
+#ifndef MY_SCHEDULE
+# define MY_SCHEDULE static
+#endif
+
+int a, b, a_known_value, b_known_value;
+
+int test_omp_for_bigbounds()
+{
+ a = 0;
+ b = 0;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for schedule(MY_SCHEDULE)
+ for (i = INT_MIN; i < MY_MAX; i+=INCR) {
+ #pragma omp atomic
+ a++;
+ }
+ #pragma omp for schedule(MY_SCHEDULE)
+ for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
+ #pragma omp atomic
+ b++;
+ }
+ }
+ printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, b, b_known_value);
+ return (a == a_known_value && b == b_known_value);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ a_known_value = 0;
+ for (i = INT_MIN; i < MY_MAX; i+=INCR) {
+ a_known_value++;
+ }
+
+ b_known_value = 0;
+ for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
+ b_known_value++;
+ }
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_bigbounds()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_collapse.c b/final/runtime/test/worksharing/for/omp_for_collapse.c
new file mode 100644
index 0000000..a08086d
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_collapse.c
@@ -0,0 +1,51 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function to check that i is increasing monotonically
+ with each call */
+static int check_i_islarger (int i)
+{
+ static int last_i;
+ int islarger;
+ if (i==1)
+ last_i=0;
+ islarger = ((i >= last_i)&&(i - last_i<=1));
+ last_i = i;
+ return (islarger);
+}
+
+int test_omp_for_collapse()
+{
+ int is_larger = 1;
+
+ #pragma omp parallel
+ {
+ int i,j;
+ int my_islarger = 1;
+ #pragma omp for private(i,j) schedule(static,1) collapse(2) ordered
+ for (i = 1; i < 100; i++) {
+ for (j =1; j <100; j++) {
+ #pragma omp ordered
+ my_islarger = check_i_islarger(i)&&my_islarger;
+ }
+ }
+ #pragma omp critical
+ is_larger = is_larger && my_islarger;
+ }
+ return (is_larger);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_collapse()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_firstprivate.c b/final/runtime/test/worksharing/for/omp_for_firstprivate.c
new file mode 100644
index 0000000..6c4121c
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_firstprivate.c
@@ -0,0 +1,55 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int sum1;
+#pragma omp threadprivate(sum1)
+
+int test_omp_for_firstprivate()
+{
+ int sum;
+ int sum0;
+ int known_sum;
+ int threadsnum;
+
+ sum = 0;
+ sum0 = 12345;
+ sum1 = 0;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ threadsnum=omp_get_num_threads();
+ }
+ /* sum0 = 0; */
+
+ int i;
+ #pragma omp for firstprivate(sum0)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum0 = sum0 + i;
+ sum1 = sum0;
+ } /* end of for */
+
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ } /* end of critical */
+ } /* end of parallel */
+ known_sum = 12345* threadsnum+ (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_lastprivate.c b/final/runtime/test/worksharing/for/omp_for_lastprivate.c
new file mode 100644
index 0000000..88694b8
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_lastprivate.c
@@ -0,0 +1,52 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int sum0;
+#pragma omp threadprivate(sum0)
+
+int test_omp_for_lastprivate()
+{
+ int sum = 0;
+ int known_sum;
+ int i0;
+
+ i0 = -1;
+
+ #pragma omp parallel
+ {
+ sum0 = 0;
+ { /* Begin of orphaned block */
+ int i;
+ #pragma omp for schedule(static,7) lastprivate(i0)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum0 = sum0 + i;
+ i0 = i;
+ } /* end of for */
+ } /* end of orphaned block */
+
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ } /* end of critical */
+ } /* end of parallel */
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ fprintf(stderr, "known_sum = %d , sum = %d\n",known_sum,sum);
+ fprintf(stderr, "LOOPCOUNT = %d , i0 = %d\n",LOOPCOUNT,i0);
+ return ((known_sum == sum) && (i0 == LOOPCOUNT));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_lastprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_nowait.c b/final/runtime/test/worksharing/for/omp_for_nowait.c
new file mode 100644
index 0000000..95a4775
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_nowait.c
@@ -0,0 +1,77 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+/*
+ * This test will hang if the nowait is not working properly.
+ *
+ * It relies on a thread skipping to the second for construct to
+ * release the threads in the first for construct.
+ *
+ * Also, we use static scheduling to guarantee that one
+ * thread will make it to the second for construct.
+ */
+volatile int release;
+volatile int count;
+
+void wait_for_release_then_increment(int rank)
+{
+ fprintf(stderr, "Thread nr %d enters first for construct"
+ " and waits.\n", rank);
+ while (release == 0);
+ #pragma omp atomic
+ count++;
+}
+
+void release_and_increment(int rank)
+{
+ fprintf(stderr, "Thread nr %d sets release to 1\n", rank);
+ release = 1;
+ #pragma omp atomic
+ count++;
+}
+
+int test_omp_for_nowait()
+{
+ release = 0;
+ count = 0;
+
+ #pragma omp parallel num_threads(4)
+ {
+ int rank;
+ int i;
+
+ rank = omp_get_thread_num();
+
+ #pragma omp for schedule(static) nowait
+ for (i = 0; i < 4; i++) {
+ if (i < 3)
+ wait_for_release_then_increment(rank);
+ else {
+ fprintf(stderr, "Thread nr %d enters first for and goes "
+ "immediately to the next for construct to release.\n", rank);
+ #pragma omp atomic
+ count++;
+ }
+ }
+
+ #pragma omp for schedule(static)
+ for (i = 0; i < 4; i++) {
+ release_and_increment(rank);
+ }
+ }
+ return (count==8);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_nowait()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_ordered.c b/final/runtime/test/worksharing/for/omp_for_ordered.c
new file mode 100644
index 0000000..18ac7eb
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_ordered.c
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+static int last_i = 0;
+
+/* Utility function to check that i is increasing monotonically
+ with each call */
+static int check_i_islarger (int i)
+{
+ int islarger;
+ islarger = (i > last_i);
+ last_i = i;
+ return (islarger);
+}
+
+int test_omp_for_ordered()
+{
+ int sum;
+ int is_larger = 1;
+ int known_sum;
+
+ last_i = 0;
+ sum = 0;
+
+ #pragma omp parallel
+ {
+ int i;
+ int my_islarger = 1;
+ #pragma omp for schedule(static,1) ordered
+ for (i = 1; i < 100; i++) {
+ #pragma omp ordered
+ {
+ my_islarger = check_i_islarger(i) && my_islarger;
+ sum = sum + i;
+ }
+ }
+ #pragma omp critical
+ {
+ is_larger = is_larger && my_islarger;
+ }
+ }
+
+ known_sum=(99 * 100) / 2;
+ return ((known_sum == sum) && is_larger);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_ordered()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_private.c b/final/runtime/test/worksharing/for/omp_for_private.c
new file mode 100644
index 0000000..1f537b9
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_private.c
@@ -0,0 +1,63 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function do spend some time in a loop */
+static void do_some_work()
+{
+ int i;
+ double sum = 0;
+ for(i = 0; i < 1000; i++){
+ sum += sqrt ((double) i);
+ }
+}
+
+int sum1;
+#pragma omp threadprivate(sum1)
+
+int test_omp_for_private()
+{
+ int sum = 0;
+ int sum0;
+ int known_sum;
+
+ sum0 = 0; /* setting (global) sum0 = 0 */
+
+ #pragma omp parallel
+ {
+ sum1 = 0; /* setting sum1 in each thread to 0 */
+ { /* begin of orphaned block */
+ int i;
+ #pragma omp for private(sum0) schedule(static,1)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum0 = sum1;
+ #pragma omp flush
+ sum0 = sum0 + i;
+ do_some_work ();
+ #pragma omp flush
+ sum1 = sum0;
+ }
+ } /* end of orphaned block */
+
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ } /*end of critical*/
+ } /* end of parallel*/
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_reduction.c b/final/runtime/test/worksharing/for/omp_for_reduction.c
new file mode 100644
index 0000000..28f0907
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_reduction.c
@@ -0,0 +1,339 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */
+#define MAX_FACTOR 10
+#define KNOWN_PRODUCT 3628800 /* 10! */
+
+int test_omp_for_reduction ()
+{
+ double dt;
+ int sum;
+ int diff;
+ int product = 1;
+ double dsum;
+ double dknown_sum;
+ double ddiff;
+ int logic_and;
+ int logic_or;
+ int bit_and;
+ int bit_or;
+ int exclusiv_bit_or;
+ int *logics;
+ int i;
+ int known_sum;
+ int known_product;
+ double rounding_error = 1.E-9; /* over all rounding error to be
+ ignored in the double tests */
+ double dpt;
+ int result = 0;
+ int logicsArray[LOOPCOUNT];
+
+ /* Variables for integer tests */
+ sum = 0;
+ product = 1;
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ /* variabels for double tests */
+ dt = 1. / 3.; /* base of geometric row for + and - test*/
+ dsum = 0.;
+ /* Variabeles for logic tests */
+ logics = logicsArray;
+ logic_and = 1;
+ logic_or = 0;
+ /* Variabeles for bit operators tests */
+ bit_and = 1;
+ bit_or = 0;
+ /* Variables for exclusiv bit or */
+ exclusiv_bit_or = 0;
+
+ /************************************************************************/
+ /** Tests for integers **/
+ /************************************************************************/
+
+ /**** Testing integer addition ****/
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(+:sum)
+ for (j = 1; j <= LOOPCOUNT; j++) {
+ sum = sum + j;
+ }
+ }
+ if (known_sum != sum) {
+ result++;
+ fprintf (stderr, "Error in sum with integers: Result was %d"
+ " instead of %d.\n", sum, known_sum);
+ }
+
+ /**** Testing integer subtracton ****/
+ diff = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(-:diff)
+ for (j = 1; j <= LOOPCOUNT; j++) {
+ diff = diff - j;
+ }
+ }
+ if (diff != 0) {
+ result++;
+ fprintf (stderr, "Error in difference with integers: Result was %d"
+ " instead of 0.\n", diff);
+ }
+
+ /**** Testing integer multiplication ****/
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(*:product)
+ for (j = 1; j <= MAX_FACTOR; j++) {
+ product *= j;
+ }
+ }
+ known_product = KNOWN_PRODUCT;
+ if(known_product != product) {
+ result++;
+ fprintf (stderr,"Error in Product with integers: Result was %d"
+ " instead of %d\n",product,known_product);
+ }
+
+ /************************************************************************/
+ /** Tests for doubles **/
+ /************************************************************************/
+
+ /**** Testing double addition ****/
+ dsum = 0.;
+ dpt = 1.;
+ for (i = 0; i < DOUBLE_DIGITS; ++i) {
+ dpt *= dt;
+ }
+ dknown_sum = (1 - dpt) / (1 - dt);
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(+:dsum)
+ for (j = 0; j < DOUBLE_DIGITS; j++) {
+ dsum += pow (dt, j);
+ }
+ }
+ if (fabs (dsum - dknown_sum) > rounding_error) {
+ result++;
+ fprintf (stderr, "\nError in sum with doubles: Result was %f"
+ " instead of: %f (Difference: %E)\n",
+ dsum, dknown_sum, dsum-dknown_sum);
+ }
+
+ /**** Testing double subtraction ****/
+ ddiff = (1 - dpt) / (1 - dt);
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(-:ddiff)
+ for (j = 0; j < DOUBLE_DIGITS; ++j) {
+ ddiff -= pow (dt, j);
+ }
+ }
+ if (fabs (ddiff) > rounding_error) {
+ result++;
+ fprintf (stderr, "Error in Difference with doubles: Result was %E"
+ " instead of 0.0\n", ddiff);
+ }
+
+
+ /************************************************************************/
+ /** Tests for logical values **/
+ /************************************************************************/
+
+ /**** Testing logic and ****/
+ for (i = 0; i < LOOPCOUNT; i++) {
+ logics[i] = 1;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(&&:logic_and)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logic_and = (logic_and && logics[j]);
+ }
+ }
+ if(!logic_and) {
+ result++;
+ fprintf (stderr, "Error in logic AND part 1\n");
+ }
+
+ logic_and = 1;
+ logics[LOOPCOUNT / 2] = 0;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(&&:logic_and)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logic_and = logic_and && logics[j];
+ }
+ }
+ if(logic_and) {
+ result++;
+ fprintf (stderr, "Error in logic AND part 2\n");
+ }
+
+ /**** Testing logic or ****/
+ for (i = 0; i < LOOPCOUNT; i++) {
+ logics[i] = 0;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(||:logic_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logic_or = logic_or || logics[j];
+ }
+ }
+ if (logic_or) {
+ result++;
+ fprintf (stderr, "Error in logic OR part 1\n");
+ }
+
+ logic_or = 0;
+ logics[LOOPCOUNT / 2] = 1;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(||:logic_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logic_or = logic_or || logics[j];
+ }
+ }
+ if(!logic_or) {
+ result++;
+ fprintf (stderr, "Error in logic OR part 2\n");
+ }
+
+ /************************************************************************/
+ /** Tests for bit values **/
+ /************************************************************************/
+
+ /**** Testing bit and ****/
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ logics[i] = 1;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(&:bit_and)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ bit_and = (bit_and & logics[j]);
+ }
+ }
+ if (!bit_and) {
+ result++;
+ fprintf (stderr, "Error in BIT AND part 1\n");
+ }
+
+ bit_and = 1;
+ logics[LOOPCOUNT / 2] = 0;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(&:bit_and)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ bit_and = bit_and & logics[j];
+ }
+ }
+ if (bit_and) {
+ result++;
+ fprintf (stderr, "Error in BIT AND part 2\n");
+ }
+
+ /**** Testing bit or ****/
+ for (i = 0; i < LOOPCOUNT; i++) {
+ logics[i] = 0;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(|:bit_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ bit_or = bit_or | logics[j];
+ }
+ }
+ if (bit_or) {
+ result++;
+ fprintf (stderr, "Error in BIT OR part 1\n");
+ }
+
+ bit_or = 0;
+ logics[LOOPCOUNT / 2] = 1;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(|:bit_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ bit_or = bit_or | logics[j];
+ }
+ }
+ if (!bit_or) {
+ result++;
+ fprintf (stderr, "Error in BIT OR part 2\n");
+ }
+
+ /**** Testing exclusive bit or ****/
+ for (i = 0; i < LOOPCOUNT; i++) {
+ logics[i] = 0;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(^:exclusiv_bit_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[j];
+ }
+ }
+ if (exclusiv_bit_or) {
+ result++;
+ fprintf (stderr, "Error in EXCLUSIV BIT OR part 1\n");
+ }
+
+ exclusiv_bit_or = 0;
+ logics[LOOPCOUNT / 2] = 1;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(^:exclusiv_bit_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[j];
+ }
+ }
+ if (!exclusiv_bit_or) {
+ result++;
+ fprintf (stderr, "Error in EXCLUSIV BIT OR part 2\n");
+ }
+
+ return (result == 0);
+ free (logics);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_reduction()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_auto.c b/final/runtime/test/worksharing/for/omp_for_schedule_auto.c
new file mode 100644
index 0000000..075617c
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_auto.c
@@ -0,0 +1,69 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int sum1;
+#pragma omp threadprivate(sum1)
+
+int test_omp_for_auto()
+{
+ int j;
+ int sum;
+ int sum0;
+ int known_sum;
+ int threadsnum;
+
+ sum = 0;
+ sum0 = 12345;
+
+ // array which keeps track of which threads participated in the for loop
+ // e.g., given 4 threads, [ 0 | 1 | 1 | 0 ] implies
+ // threads 0 and 3 did not, threads 1 and 2 did
+ int max_threads = omp_get_max_threads();
+ int* active_threads = (int*)malloc(sizeof(int)*max_threads);
+ for(j = 0; j < max_threads; j++)
+ active_threads[j] = 0;
+
+ #pragma omp parallel
+ {
+ int i;
+ sum1 = 0;
+ #pragma omp for firstprivate(sum0) schedule(auto)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ active_threads[omp_get_thread_num()] = 1;
+ sum0 = sum0 + i;
+ sum1 = sum0;
+ }
+
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ }
+ }
+
+ // count the threads that participated (sum is stored in threadsnum)
+ threadsnum=0;
+ for(j = 0; j < max_threads; j++) {
+ if(active_threads[j])
+ threadsnum++;
+ }
+ free(active_threads);
+
+ known_sum = 12345 * threadsnum + (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_auto()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c b/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c
new file mode 100644
index 0000000..6d4f59b
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c
@@ -0,0 +1,89 @@
+// RUN: %libomp-compile-and-run
+/*
+ * Test for dynamic scheduling with chunk size
+ * Method: caculate how many times the iteration space is dispatched
+ * and judge if each dispatch has the requested chunk size
+ * unless it is the last one.
+ * It is possible for two adjacent chunks are assigned to the same thread
+ * Modified by Chunhua Liao
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#define CFDMAX_SIZE 100
+const int chunk_size = 7;
+
+int test_omp_for_schedule_dynamic()
+{
+ int tid;
+ int *tids;
+ int i;
+ int tidsArray[CFDMAX_SIZE];
+ int count = 0;
+ int tmp_count = 0; /*dispatch times*/
+ int *tmp; /*store chunk size for each dispatch*/
+ int result = 0;
+
+ tids = tidsArray;
+
+ #pragma omp parallel private(tid) shared(tids)
+ { /* begin of parallel */
+ int tid;
+ tid = omp_get_thread_num ();
+ #pragma omp for schedule(dynamic,chunk_size)
+ for (i = 0; i < CFDMAX_SIZE; i++) {
+ tids[i] = tid;
+ }
+ }
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tids[i] != tids[i + 1]) {
+ count++;
+ }
+ }
+
+ tmp = (int *) malloc (sizeof (int) * (count + 1));
+ tmp[0] = 1;
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tmp_count > count) {
+ printf ("--------------------\nTestinternal Error: List too small!!!\n--------------------\n"); /* Error handling */
+ break;
+ }
+ if (tids[i] != tids[i + 1]) {
+ tmp_count++;
+ tmp[tmp_count] = 1;
+ } else {
+ tmp[tmp_count]++;
+ }
+ }
+ /* is dynamic statement working? */
+ for (i = 0; i < count; i++) {
+ if ((tmp[i]%chunk_size)!=0) {
+ /* it is possible for 2 adjacent chunks assigned to a same thread */
+ result++;
+ fprintf(stderr,"The intermediate dispatch has wrong chunksize.\n");
+ /* result += ((tmp[i] / chunk_size) - 1); */
+ }
+ }
+ if ((tmp[count]%chunk_size)!=(CFDMAX_SIZE%chunk_size)) {
+ result++;
+ fprintf(stderr,"the last dispatch has wrong chunksize.\n");
+ }
+ /* for (int i=0;i<count+1;++i) printf("%d\t:=\t%d\n",i+1,tmp[i]); */
+ return (result==0);
+}
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_schedule_dynamic()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_guided.c b/final/runtime/test/worksharing/for/omp_for_schedule_guided.c
new file mode 100644
index 0000000..1ee7449
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_guided.c
@@ -0,0 +1,217 @@
+// RUN: %libomp-compile-and-run
+
+/* Test for guided scheduling
+ * Ensure threads get chunks interleavely first
+ * Then judge the chunk sizes are decreasing to a stable value
+ * Modified by Chunhua Liao
+ * For example, 100 iteration on 2 threads, chunksize 7
+ * one line for each dispatch, 0/1 means thread id
+ * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24
+ * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18
+ * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14
+ * 1 1 1 1 1 1 1 1 1 1 10
+ * 0 0 0 0 0 0 0 0 8
+ * 1 1 1 1 1 1 1 7
+ * 0 0 0 0 0 0 0 7
+ * 1 1 1 1 1 1 1 7
+ * 0 0 0 0 0 5
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+#define CFSMAX_SIZE 1000
+#define MAX_TIME 0.005
+
+#ifdef SLEEPTIME
+#undef SLEEPTIME
+#define SLEEPTIME 0.0001
+#endif
+
+int test_omp_for_schedule_guided()
+{
+ int * tids;
+ int * chunksizes;
+ int notout;
+ int maxiter;
+ int threads;
+ int i;
+ int result;
+
+ tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1));
+ maxiter = 0;
+ result = 1;
+ notout = 1;
+
+ /* Testing if enough threads are available for this check. */
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ threads = omp_get_num_threads();
+ }
+ }
+
+ /* ensure there are at least two threads */
+ if (threads < 2) {
+ omp_set_num_threads(2);
+ threads = 2;
+ }
+
+ /* Now the real parallel work:
+ * Each thread will start immediately with the first chunk.
+ */
+ #pragma omp parallel shared(tids,maxiter)
+ { /* begin of parallel */
+ double count;
+ int tid;
+ int j;
+
+ tid = omp_get_thread_num ();
+
+ #pragma omp for nowait schedule(guided)
+ for(j = 0; j < CFSMAX_SIZE; ++j) {
+ count = 0.;
+ #pragma omp flush(maxiter)
+ if (j > maxiter) {
+ #pragma omp critical
+ {
+ maxiter = j;
+ }
+ }
+ /*printf ("thread %d sleeping\n", tid);*/
+ #pragma omp flush(maxiter,notout)
+ while (notout && (count < MAX_TIME) && (maxiter == j)) {
+ #pragma omp flush(maxiter,notout)
+ my_sleep (SLEEPTIME);
+ count += SLEEPTIME;
+#ifdef VERBOSE
+ printf(".");
+#endif
+ }
+#ifdef VERBOSE
+ if (count > 0.) printf(" waited %lf s\n", count);
+#endif
+ /*printf ("thread %d awake\n", tid);*/
+ tids[j] = tid;
+#ifdef VERBOSE
+ printf("%d finished by %d\n",j,tid);
+#endif
+ } /* end of for */
+ notout = 0;
+ #pragma omp flush(maxiter,notout)
+ } /* end of parallel */
+
+ /*******************************************************
+ * evaluation of the values *
+ *******************************************************/
+ {
+ int determined_chunksize = 1;
+ int last_threadnr = tids[0];
+ int global_chunknr = 0;
+ int openwork = CFSMAX_SIZE;
+ int expected_chunk_size;
+ int* local_chunknr = (int*)malloc(threads * sizeof(int));
+ double c = 1;
+
+ for (i = 0; i < threads; i++)
+ local_chunknr[i] = 0;
+
+ tids[CFSMAX_SIZE] = -1;
+
+ /*
+ * determine the number of global chunks
+ */
+ // fprintf(stderr,"# global_chunknr thread local_chunknr chunksize\n");
+ for(i = 1; i <= CFSMAX_SIZE; ++i) {
+ if (last_threadnr==tids[i]) {
+ determined_chunksize++;
+ } else {
+ /* fprintf(stderr, "%d\t%d\t%d\t%d\n", global_chunknr,
+ last_threadnr, local_chunknr[last_threadnr], m); */
+ global_chunknr++;
+ local_chunknr[last_threadnr]++;
+ last_threadnr = tids[i];
+ determined_chunksize = 1;
+ }
+ }
+ /* now allocate the memory for saving the sizes of the global chunks */
+ chunksizes = (int*)malloc(global_chunknr * sizeof(int));
+
+ /*
+ * Evaluate the sizes of the global chunks
+ */
+ global_chunknr = 0;
+ determined_chunksize = 1;
+ last_threadnr = tids[0];
+ for (i = 1; i <= CFSMAX_SIZE; ++i) {
+ /* If the threadnumber was the same as before increase the
+ * detected chunksize for this chunk otherwise set the detected
+ * chunksize again to one and save the number of the next
+ * thread in last_threadnr.
+ */
+ if (last_threadnr == tids[i]) {
+ determined_chunksize++;
+ } else {
+ chunksizes[global_chunknr] = determined_chunksize;
+ global_chunknr++;
+ local_chunknr[last_threadnr]++;
+ last_threadnr = tids[i];
+ determined_chunksize = 1;
+ }
+ }
+
+#ifdef VERBOSE
+ fprintf(stderr, "found\texpected\tconstant\n");
+#endif
+
+ /* identify the constant c for the exponential
+ decrease of the chunksize */
+ expected_chunk_size = openwork / threads;
+ c = (double) chunksizes[0] / expected_chunk_size;
+
+ for (i = 0; i < global_chunknr; i++) {
+ /* calculate the new expected chunksize */
+ if (expected_chunk_size > 1)
+ expected_chunk_size = c * openwork / threads;
+#ifdef VERBOSE
+ fprintf(stderr, "%8d\t%8d\t%lf\n", chunksizes[i],
+ expected_chunk_size, c * chunksizes[i]/expected_chunk_size);
+#endif
+ /* check if chunksize is inside the rounding errors */
+ if (abs (chunksizes[i] - expected_chunk_size) >= 2) {
+ result = 0;
+#ifndef VERBOSE
+ fprintf(stderr, "Chunksize differed from expected "
+ "value: %d instead of %d\n", chunksizes[i],
+ expected_chunk_size);
+ return 0;
+#endif
+ } /* end if */
+
+#ifndef VERBOSE
+ if (expected_chunk_size - chunksizes[i] < 0)
+ fprintf(stderr, "Chunksize did not decrease: %d"
+ " instead of %d\n", chunksizes[i],expected_chunk_size);
+#endif
+
+ /* calculating the remaining amount of work */
+ openwork -= chunksizes[i];
+ }
+ }
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_schedule_guided()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c b/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c
new file mode 100644
index 0000000..b957fc3
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c
@@ -0,0 +1,82 @@
+// RUN: %libomp-compile
+// RUN: env OMP_SCHEDULE=static %libomp-run 1 0
+// RUN: env OMP_SCHEDULE=static,10 %libomp-run 1 10
+// RUN: env OMP_SCHEDULE=dynamic %libomp-run 2 1
+// RUN: env OMP_SCHEDULE=dynamic,11 %libomp-run 2 11
+// RUN: env OMP_SCHEDULE=guided %libomp-run 3 1
+// RUN: env OMP_SCHEDULE=guided,12 %libomp-run 3 12
+// RUN: env OMP_SCHEDULE=auto %libomp-run 4 1
+// RUN: env OMP_SCHEDULE=trapezoidal %libomp-run 101 1
+// RUN: env OMP_SCHEDULE=trapezoidal,13 %libomp-run 101 13
+// RUN: env OMP_SCHEDULE=static_steal %libomp-run 102 1
+// RUN: env OMP_SCHEDULE=static_steal,14 %libomp-run 102 14
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int sum;
+char* correct_kind_string;
+omp_sched_t correct_kind;
+int correct_chunk_size;
+
+int test_omp_for_runtime()
+{
+ int sum;
+ int known_sum;
+ int chunk_size;
+ int error;
+ omp_sched_t kind;
+
+ sum = 0;
+ error = 0;
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ omp_get_schedule(&kind, &chunk_size);
+
+ printf("omp_get_schedule() returns: Schedule = %d, Chunk Size = %d\n",
+ kind, chunk_size);
+ if (kind != correct_kind) {
+ printf("kind(%d) != correct_kind(%d)\n", kind, correct_kind);
+ error = 1;
+ }
+ if (chunk_size != correct_chunk_size) {
+ printf("chunk_size(%d) != correct_chunk_size(%d)\n", chunk_size,
+ correct_chunk_size);
+ error = 1;
+ }
+
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for schedule(runtime)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ #pragma omp critical
+ sum+=i;
+ }
+ }
+ if (known_sum != sum) {
+ printf("Known Sum = %d, Calculated Sum = %d\n", known_sum, sum);
+ error = 1;
+ }
+ return !error;
+}
+
+int main(int argc, char** argv)
+{
+ int i;
+ int num_failed=0;
+ if (argc != 3) {
+ fprintf(stderr, "usage: %s schedule_kind chunk_size\n", argv[0]);
+ fprintf(stderr, " Run with envirable OMP_SCHEDULE=kind[,chunk_size]\n");
+ return 1;
+ }
+ correct_kind = atoi(argv[1]);
+ correct_chunk_size = atoi(argv[2]);
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_omp_for_runtime()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_static.c b/final/runtime/test/worksharing/for/omp_for_schedule_static.c
new file mode 100644
index 0000000..f46a544
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_static.c
@@ -0,0 +1,154 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+#define CFSMAX_SIZE 1000
+#define MAX_TIME 0.01
+
+#ifdef SLEEPTIME
+#undef SLEEPTIME
+#define SLEEPTIME 0.0005
+#endif
+
+int test_omp_for_schedule_static()
+{
+ int threads;
+ int i,lasttid;
+ int * tids;
+ int notout;
+ int maxiter;
+ int chunk_size;
+ int counter = 0;
+ int tmp_count=1;
+ int lastthreadsstarttid = -1;
+ int result = 1;
+
+ chunk_size = 7;
+ tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1));
+ notout = 1;
+ maxiter = 0;
+
+ #pragma omp parallel shared(tids,counter)
+ { /* begin of parallel*/
+ #pragma omp single
+ {
+ threads = omp_get_num_threads ();
+ } /* end of single */
+ } /* end of parallel */
+
+ if (threads < 2) {
+ omp_set_num_threads(2);
+ threads = 2;
+ }
+ fprintf (stderr,"Using an internal count of %d\nUsing a specified"
+ " chunksize of %d\n", CFSMAX_SIZE, chunk_size);
+ tids[CFSMAX_SIZE] = -1; /* setting endflag */
+ #pragma omp parallel shared(tids)
+ { /* begin of parallel */
+ double count;
+ int tid;
+ int j;
+
+ tid = omp_get_thread_num ();
+
+ #pragma omp for nowait schedule(static,chunk_size)
+ for(j = 0; j < CFSMAX_SIZE; ++j) {
+ count = 0.;
+ #pragma omp flush(maxiter)
+ if (j > maxiter) {
+ #pragma omp critical
+ {
+ maxiter = j;
+ }
+ }
+ /*printf ("thread %d sleeping\n", tid);*/
+ while (notout && (count < MAX_TIME) && (maxiter == j)) {
+ #pragma omp flush(maxiter,notout)
+ my_sleep (SLEEPTIME);
+ count += SLEEPTIME;
+ printf(".");
+ }
+#ifdef VERBOSE
+ if (count > 0.) printf(" waited %lf s\n", count);
+#endif
+ /*printf ("thread %d awake\n", tid);*/
+ tids[j] = tid;
+#ifdef VERBOSE
+ printf("%d finished by %d\n",j,tid);
+#endif
+ } /* end of for */
+ notout = 0;
+ #pragma omp flush(maxiter,notout)
+ } /* end of parallel */
+
+ /**** analysing the data in array tids ****/
+
+ lasttid = tids[0];
+ tmp_count = 0;
+
+ for (i = 0; i < CFSMAX_SIZE + 1; ++i) {
+ /* If the work was done by the same thread increase tmp_count by one. */
+ if (tids[i] == lasttid) {
+ tmp_count++;
+#ifdef VERBOSE
+ fprintf (stderr, "%d: %d \n", i, tids[i]);
+#endif
+ continue;
+ }
+
+ /* Check if the next thread had has the right thread number. When finding
+ * threadnumber -1 the end should be reached.
+ */
+ if (tids[i] == (lasttid + 1) % threads || tids[i] == -1) {
+ /* checking for the right chunk size */
+ if (tmp_count == chunk_size) {
+ tmp_count = 1;
+ lasttid = tids[i];
+#ifdef VERBOSE
+ fprintf (stderr, "OK\n");
+#endif
+ } else {
+ /* If the chunk size was wrong, check if the end was reached */
+ if (tids[i] == -1) {
+ if (i == CFSMAX_SIZE) {
+ fprintf (stderr, "Last thread had chunk size %d\n",
+ tmp_count);
+ break;
+ } else {
+ fprintf (stderr, "ERROR: Last thread (thread with"
+ " number -1) was found before the end.\n");
+ result = 0;
+ }
+ } else {
+ fprintf (stderr, "ERROR: chunk size was %d. (assigned"
+ " was %d)\n", tmp_count, chunk_size);
+ result = 0;
+ }
+ }
+ } else {
+ fprintf(stderr, "ERROR: Found thread with number %d (should be"
+ " inbetween 0 and %d).", tids[i], threads - 1);
+ result = 0;
+ }
+#ifdef VERBOSE
+ fprintf (stderr, "%d: %d \n", i, tids[i]);
+#endif
+ }
+
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_schedule_static()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c b/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c
new file mode 100644
index 0000000..922f27a
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c
@@ -0,0 +1,202 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+#define CFSMAX_SIZE 1000
+#define MAX_TIME 0.01
+
+#ifdef SLEEPTIME
+#undef SLEEPTIME
+#define SLEEPTIME 0.0005
+#endif
+
+#define VERBOSE 0
+
+int test_omp_for_schedule_static_3()
+{
+ int threads;
+ int i,lasttid;
+
+ int * tids;
+ int * tids2;
+ int notout;
+ int maxiter;
+ int chunk_size;
+
+ int counter = 0;
+ int tmp_count=1;
+ int lastthreadsstarttid = -1;
+ int result = 1;
+ chunk_size = 7;
+
+ tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1));
+ notout = 1;
+ maxiter = 0;
+
+ #pragma omp parallel shared(tids,counter)
+ { /* begin of parallel*/
+ #pragma omp single
+ {
+ threads = omp_get_num_threads ();
+ } /* end of single */
+ } /* end of parallel */
+
+ /* Ensure that at least two threads are created */
+ if (threads < 2) {
+ omp_set_num_threads(2);
+ threads = 2;
+ }
+ fprintf (stderr,"Using an internal count of %d\nUsing a"
+ " specified chunksize of %d\n", CFSMAX_SIZE, chunk_size);
+ tids[CFSMAX_SIZE] = -1; /* setting endflag */
+
+ #pragma omp parallel shared(tids)
+ { /* begin of parallel */
+ double count;
+ int tid;
+ int j;
+
+ tid = omp_get_thread_num ();
+
+ #pragma omp for nowait schedule(static,chunk_size)
+ for(j = 0; j < CFSMAX_SIZE; ++j) {
+ count = 0.;
+ #pragma omp flush(maxiter)
+ if (j > maxiter) {
+ #pragma omp critical
+ {
+ maxiter = j;
+ }
+ }
+ /*printf ("thread %d sleeping\n", tid);*/
+ while (notout && (count < MAX_TIME) && (maxiter == j)) {
+ #pragma omp flush(maxiter,notout)
+ my_sleep (SLEEPTIME);
+ count += SLEEPTIME;
+ printf(".");
+ }
+#ifdef VERBOSE
+ if (count > 0.) printf(" waited %lf s\n", count);
+#endif
+ /*printf ("thread %d awake\n", tid);*/
+ tids[j] = tid;
+#ifdef VERBOSE
+ printf("%d finished by %d\n",j,tid);
+#endif
+ } /* end of omp parallel for */
+
+ notout = 0;
+ #pragma omp flush(maxiter,notout)
+ } /* end of parallel */
+
+ /**** analysing the data in array tids ****/
+
+ lasttid = tids[0];
+ tmp_count = 0;
+
+ for (i = 0; i < CFSMAX_SIZE + 1; ++i) {
+ /* If the work was done by the same thread
+ increase tmp_count by one. */
+ if (tids[i] == lasttid) {
+ tmp_count++;
+#ifdef VERBOSE
+ fprintf (stderr, "%d: %d \n", i, tids[i]);
+#endif
+ continue;
+ }
+
+ /* Check if the next thread had has the right thread number.
+ * When finding threadnumber -1 the end should be reached.
+ */
+ if (tids[i] == (lasttid + 1) % threads || tids[i] == -1) {
+ /* checking for the right chunk size */
+ if (tmp_count == chunk_size) {
+ tmp_count = 1;
+ lasttid = tids[i];
+#ifdef VERBOSE
+ fprintf (stderr, "OK\n");
+#endif
+ } else {
+ /* If the chunk size was wrong, check if the end was reached */
+ if (tids[i] == -1) {
+ if (i == CFSMAX_SIZE) {
+ fprintf (stderr, "Last thread had chunk size %d\n",
+ tmp_count);
+ break;
+ } else {
+ fprintf (stderr, "ERROR: Last thread (thread with"
+ " number -1) was found before the end.\n");
+ result = 0;
+ }
+ } else {
+ fprintf (stderr, "ERROR: chunk size was %d. (assigned"
+ " was %d)\n", tmp_count, chunk_size);
+ result = 0;
+ }
+ }
+ } else {
+ fprintf(stderr, "ERROR: Found thread with number %d (should be"
+ " inbetween 0 and %d).", tids[i], threads - 1);
+ result = 0;
+ }
+#ifdef VERBOSE
+ fprintf (stderr, "%d: %d \n", i, tids[i]);
+#endif
+ }
+
+ /* Now we check if several loop regions in one parallel region have the
+ * same logical assignement of chunks to threads. We use the nowait
+ * clause to increase the probability to get an error. */
+
+ /* First we allocate some more memmory */
+ free (tids);
+ tids = (int *) malloc (sizeof (int) * LOOPCOUNT);
+ tids2 = (int *) malloc (sizeof (int) * LOOPCOUNT);
+
+ #pragma omp parallel
+ {
+ {
+ int n;
+ #pragma omp for schedule(static) nowait
+ for (n = 0; n < LOOPCOUNT; n++) {
+ if (LOOPCOUNT == n + 1 )
+ my_sleep(SLEEPTIME);
+
+ tids[n] = omp_get_thread_num();
+ }
+ }
+ {
+ int m;
+ #pragma omp for schedule(static) nowait
+ for (m = 1; m <= LOOPCOUNT; m++) {
+ tids2[m-1] = omp_get_thread_num();
+ }
+ }
+ }
+
+ for (i = 0; i < LOOPCOUNT; i++)
+ if (tids[i] != tids2[i]) {
+ fprintf (stderr, "Chunk no. %d was assigned once to thread %d and"
+ " later to thread %d.\n", i, tids[i],tids2[i]);
+ result = 0;
+ }
+
+ free (tids);
+ free (tids2);
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_schedule_static_3()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c b/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c
new file mode 100644
index 0000000..3b3bf7d
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c
@@ -0,0 +1,35 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_for_firstprivate()
+{
+ int sum ;
+ int i2;
+ int i;
+ int known_sum;
+
+ sum=0;
+ i2=3;
+
+ #pragma omp parallel for reduction(+:sum) private(i) firstprivate(i2)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum = sum + (i + i2);
+ }
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 + i2 * LOOPCOUNT;
+ return (known_sum == sum);
+} /* end of check_parallel_for_fistprivate */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_if.c b/final/runtime/test/worksharing/for/omp_parallel_for_if.c
new file mode 100644
index 0000000..57fe498
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_if.c
@@ -0,0 +1,42 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_for_if()
+{
+ int known_sum;
+ int num_threads;
+ int sum, sum2;
+ int i;
+ int control;
+
+ control = 0;
+ num_threads=0;
+ sum = 0;
+ sum2 = 0;
+
+ #pragma omp parallel for private(i) if (control==1)
+ for (i=0; i <= LOOPCOUNT; i++) {
+ num_threads = omp_get_num_threads();
+ sum = sum + i;
+ }
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ fprintf(stderr, "Number of threads determined by"
+ " omp_get_num_threads: %d\n", num_threads);
+ return (known_sum == sum && num_threads == 1);
+} /* end of check_parallel_for_private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_if()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c b/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c
new file mode 100644
index 0000000..a53cfb2
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c
@@ -0,0 +1,37 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_for_lastprivate()
+{
+ int sum;
+ int i;
+ int i0;
+ int known_sum;
+
+ sum =0;
+ i0 = -1;
+
+ #pragma omp parallel for reduction(+:sum) \
+ schedule(static,7) private(i) lastprivate(i0)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum = sum + i;
+ i0 = i;
+ } /* end of parallel for */
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return ((known_sum == sum) && (i0 == LOOPCOUNT));
+} /* end of check_parallel_for_lastprivate */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_lastprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c b/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c
new file mode 100644
index 0000000..5fef460
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c
@@ -0,0 +1,64 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+static int last_i = 0;
+
+int i;
+#pragma omp threadprivate(i)
+
+/* Variable ii is used to avoid problems with a threadprivate variable used as a loop
+ * index. See test omp_threadprivate_for.
+ */
+static int ii;
+#pragma omp threadprivate(ii)
+
+/*!
+ Utility function: returns true if the passed argument is larger than
+ the argument of the last call of this function.
+ */
+static int check_i_islarger2(int i)
+{
+ int islarger;
+ islarger = (i > last_i);
+ last_i = i;
+ return (islarger);
+}
+
+int test_omp_parallel_for_ordered()
+{
+ int sum;
+ int is_larger;
+ int known_sum;
+ int i;
+
+ sum = 0;
+ is_larger = 1;
+ last_i = 0;
+ #pragma omp parallel for schedule(static,1) private(i) ordered
+ for (i = 1; i < 100; i++) {
+ ii = i;
+ #pragma omp ordered
+ {
+ is_larger = check_i_islarger2 (ii) && is_larger;
+ sum = sum + ii;
+ }
+ }
+ known_sum = (99 * 100) / 2;
+ fprintf (stderr," known_sum = %d , sum = %d \n", known_sum, sum);
+ fprintf (stderr," is_larger = %d\n", is_larger);
+ return (known_sum == sum) && is_larger;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_ordered()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_private.c b/final/runtime/test/worksharing/for/omp_parallel_for_private.c
new file mode 100644
index 0000000..1231d36
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_private.c
@@ -0,0 +1,50 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/*! Utility function to spend some time in a loop */
+static void do_some_work (void)
+{
+ int i;
+ double sum = 0;
+ for(i = 0; i < 1000; i++){
+ sum += sqrt (i);
+ }
+}
+
+int test_omp_parallel_for_private()
+{
+ int sum;
+ int i;
+ int i2;
+ int known_sum;
+
+ sum =0;
+ i2=0;
+
+ #pragma omp parallel for reduction(+:sum) schedule(static,1) private(i) private(i2)
+ for (i=1;i<=LOOPCOUNT;i++)
+ {
+ i2 = i;
+ #pragma omp flush
+ do_some_work ();
+ #pragma omp flush
+ sum = sum + i2;
+ } /*end of for*/
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return (known_sum == sum);
+} /* end of check_parallel_for_private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c b/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c
new file mode 100644
index 0000000..118d730
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c
@@ -0,0 +1,266 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */
+#define MAX_FACTOR 10
+#define KNOWN_PRODUCT 3628800 /* 10! */
+
+int test_omp_parallel_for_reduction()
+{
+ int sum;
+ int known_sum;
+ double dsum;
+ double dknown_sum;
+ double dt=0.5; /* base of geometric row for + and - test*/
+ double rounding_error= 1.E-9;
+ int diff;
+ double ddiff;
+ int product;
+ int known_product;
+ int logic_and;
+ int logic_or;
+ int bit_and;
+ int bit_or;
+ int exclusiv_bit_or;
+ int logics[LOOPCOUNT];
+ int i;
+ double dpt;
+ int result;
+
+ sum =0;
+ dsum=0;
+ dt = 1./3.;
+ result = 0;
+ product = 1;
+ logic_and=1;
+ logic_or=0;
+ bit_and=1;
+ bit_or=0;
+ exclusiv_bit_or=0;
+
+ /* Tests for integers */
+ known_sum = (LOOPCOUNT*(LOOPCOUNT+1))/2;
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:sum)
+ for (i=1;i<=LOOPCOUNT;i++) {
+ sum=sum+i;
+ }
+ if(known_sum!=sum) {
+ result++;
+ fprintf(stderr,"Error in sum with integers: Result was %d"
+ " instead of %d\n",sum,known_sum);
+ }
+
+ diff = (LOOPCOUNT*(LOOPCOUNT+1))/2;
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:diff)
+ for (i=1;i<=LOOPCOUNT;++i) {
+ diff=diff-i;
+ }
+ if(diff != 0) {
+ result++;
+ fprintf(stderr,"Error in difference with integers: Result was %d"
+ " instead of 0.\n",diff);
+ }
+
+ /* Tests for doubles */
+ dsum=0;
+ dpt=1;
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dpt*=dt;
+ }
+ dknown_sum = (1-dpt)/(1-dt);
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:dsum)
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dsum += pow(dt,i);
+ }
+ if( fabs(dsum-dknown_sum) > rounding_error ) {
+ result++;
+ fprintf(stderr,"Error in sum with doubles: Result was %f"
+ " instead of %f (Difference: %E)\n",
+ dsum, dknown_sum, dsum-dknown_sum);
+ }
+
+ dpt=1;
+
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dpt*=dt;
+ }
+ fprintf(stderr,"\n");
+ ddiff = (1-dpt)/(1-dt);
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:ddiff)
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ ddiff -= pow(dt,i);
+ }
+ if( fabs(ddiff) > rounding_error) {
+ result++;
+ fprintf(stderr,"Error in Difference with doubles: Result was %E"
+ " instead of 0.0\n",ddiff);
+ }
+
+ /* Tests for integers */
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(*:product)
+ for(i=1;i<=MAX_FACTOR;i++) {
+ product *= i;
+ }
+ known_product = KNOWN_PRODUCT;
+ if(known_product != product) {
+ result++;
+ fprintf(stderr,"Error in Product with integers: Result was %d"
+ " instead of %d\n\n",product,known_product);
+ }
+
+ /* Tests for logic AND */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(&&:logic_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_and = (logic_and && logics[i]);
+ }
+ if(!logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 1.\n");
+ }
+
+ logic_and = 1;
+ logics[LOOPCOUNT/2]=0;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(&&:logic_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_and = logic_and && logics[i];
+ }
+ if(logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 2.\n");
+ }
+
+ /* Tests for logic OR */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(||:logic_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_or = logic_or || logics[i];
+ }
+ if(logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 1.\n");
+ }
+ logic_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(||:logic_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_or = logic_or || logics[i];
+ }
+ if(!logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 2.\n");
+ }
+
+ /* Tests for bitwise AND */
+ for(i=0;i<LOOPCOUNT;++i) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(&:bit_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ if(!bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 1.\n");
+ }
+
+ bit_and = 1;
+ logics[LOOPCOUNT/2]=0;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(&:bit_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ if(bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 2.\n");
+ }
+
+ /* Tests for bitwise OR */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(|:bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ if(bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 1\n");
+ }
+ bit_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(|:bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ if(!bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 2\n");
+ }
+
+ /* Tests for bitwise XOR */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(^:exclusiv_bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ if(exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n");
+ }
+
+ exclusiv_bit_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(^:exclusiv_bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ if(!exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n");
+ }
+
+ /*printf("\nResult:%d\n",result);*/
+ return (result==0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_reduction()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c
new file mode 100644
index 0000000..1780fab
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c
@@ -0,0 +1,54 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_sections_firstprivate()
+{
+ int sum;
+ int sum0;
+ int known_sum;
+
+ sum =7;
+ sum0=11;
+
+ #pragma omp parallel sections firstprivate(sum0)
+ {
+ #pragma omp section
+ {
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ #pragma omp section
+ {
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ #pragma omp section
+ {
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ }
+
+ known_sum=11*3+7;
+ return (known_sum==sum);
+} /* end of check_section_firstprivate*/
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_sections_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c
new file mode 100644
index 0000000..9b775ec
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c
@@ -0,0 +1,71 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_sections_lastprivate()
+{
+ int sum;
+ int sum0;
+ int i;
+ int i0;
+ int known_sum;
+ sum =0;
+ sum0 = 0;
+ i0 = -1;
+
+ #pragma omp parallel sections private(i,sum0) lastprivate(i0)
+ {
+ #pragma omp section
+ {
+ sum0=0;
+ for (i=1;i<400;i++) {
+ sum0=sum0+i;
+ i0=i;
+ }
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ #pragma omp section
+ {
+ sum0=0;
+ for(i=400;i<700;i++) {
+ sum0=sum0+i;
+ i0=i;
+ }
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ #pragma omp section
+ {
+ sum0=0;
+ for(i=700;i<1000;i++) {
+ sum0=sum0+i;
+ i0=i;
+ }
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ }
+
+ known_sum=(999*1000)/2;
+ return ((known_sum==sum) && (i0==999) );
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_sections_lastprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c
new file mode 100644
index 0000000..7dab295
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c
@@ -0,0 +1,64 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_sections_private()
+{
+ int sum;
+ int sum0;
+ int i;
+ int known_sum;
+
+ sum = 7;
+ sum0=0;
+
+ #pragma omp parallel sections private(sum0, i)
+ {
+ #pragma omp section
+ {
+ sum0=0;
+ for (i=1;i<400;i++)
+ sum0=sum0+i;
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ #pragma omp section
+ {
+ sum0=0;
+ for(i=400;i<700;i++)
+ sum0=sum0+i;
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ #pragma omp section
+ {
+ sum0=0;
+ for(i=700;i<1000;i++)
+ sum0=sum0+i;
+ #pragma omp critical
+ {
+ sum= sum+sum0;
+ }
+ }
+ }
+
+ known_sum=(999*1000)/2+7;
+ return (known_sum==sum);
+} /* end of check_section_private*/
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_sections_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c
new file mode 100644
index 0000000..0d49865
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c
@@ -0,0 +1,508 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_sections_reduction()
+{
+ int sum;
+ int known_sum;
+ double dpt;
+ double dsum;
+ double dknown_sum;
+ double dt=0.5; /* base of geometric row for + and - test*/
+ double rounding_error= 1.E-5;
+ int diff;
+ double ddiff;
+ int product;
+ int known_product;
+ int logic_and;
+ int bit_and;
+ int logic_or;
+ int bit_or;
+ int exclusiv_bit_or;
+ int logics[1000];
+ int i;
+ int result;
+
+ sum = 7;
+ dsum=0;
+ product =1;
+ dpt = 1;
+ logic_and=1;
+ bit_and=1;
+ logic_or=0;
+ bit_or=0;
+ exclusiv_bit_or=0;
+ result =0;
+ /* int my_islarger;*/
+ /*int is_larger=1;*/
+
+ // Test summation of integers
+ known_sum = (999*1000)/2+7;
+ #pragma omp parallel sections private(i) reduction(+:sum)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ sum=sum+i;
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ sum=sum+i;
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ sum=sum+i;
+ }
+ }
+ }
+ if(known_sum!=sum) {
+ result++;
+ fprintf(stderr,"Error in sum with integers: Result was %d"
+ " instead of %d.\n",sum, known_sum);
+ }
+
+ // Test differences of integers
+ diff = (999*1000)/2;
+ #pragma omp parallel sections private(i) reduction(-:diff)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ diff=diff-i;
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ diff=diff-i;
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ diff=diff-i;
+ }
+ }
+ }
+ if(diff != 0) {
+ result++;
+ fprintf(stderr,"Error in Difference with integers: Result was %d"
+ " instead of 0.\n",diff);
+ }
+
+ // Test summation of doubles
+ for (i=0;i<20;++i) {
+ dpt*=dt;
+ }
+ dknown_sum = (1-dpt)/(1-dt);
+ #pragma omp parallel sections private(i) reduction(+:dsum)
+ {
+ #pragma omp section
+ {
+ for (i=0;i<6;++i) {
+ dsum += pow(dt,i);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=6;i<12;++i) {
+ dsum += pow(dt,i);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=12;i<20;++i) {
+ dsum += pow(dt,i);
+ }
+ }
+ }
+ if( fabs(dsum-dknown_sum) > rounding_error ) {
+ result++;
+ fprintf(stderr,"Error in sum with doubles: Result was %f"
+ " instead of %f (Difference: %E)\n",
+ dsum, dknown_sum, dsum-dknown_sum);
+ }
+
+ // Test differences of doubles
+ dpt=1;
+ for (i=0;i<20;++i) {
+ dpt*=dt;
+ }
+ fprintf(stderr,"\n");
+ ddiff = (1-dpt)/(1-dt);
+ #pragma omp parallel sections private(i) reduction(-:ddiff)
+ {
+ #pragma omp section
+ {
+ for (i=0;i<6;++i) {
+ ddiff -= pow(dt,i);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=6;i<12;++i) {
+ ddiff -= pow(dt,i);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=12;i<20;++i) {
+ ddiff -= pow(dt,i);
+ }
+ }
+ }
+ if( fabs(ddiff) > rounding_error) {
+ result++;
+ fprintf(stderr,"Error in Difference with doubles: Result was %E"
+ " instead of 0.0\n",ddiff);
+ }
+
+ // Test product of integers
+ known_product = 3628800;
+ #pragma omp parallel sections private(i) reduction(*:product)
+ {
+ #pragma omp section
+ {
+ for(i=1;i<3;i++) {
+ product *= i;
+ }
+ }
+ #pragma omp section
+ {
+ for(i=3;i<7;i++) {
+ product *= i;
+ }
+ }
+ #pragma omp section
+ {
+ for(i=7;i<11;i++) {
+ product *= i;
+ }
+ }
+ }
+ if(known_product != product) {
+ result++;
+ fprintf(stderr,"Error in Product with integers: Result was %d"
+ " instead of %d\n",product,known_product);
+ }
+
+ // Test logical AND
+ for(i=0;i<1000;i++) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel sections private(i) reduction(&&:logic_and)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ }
+ if(!logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 1\n");
+ }
+ logic_and = 1;
+ logics[501] = 0;
+
+ #pragma omp parallel sections private(i) reduction(&&:logic_and)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ }
+ if(logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 2");
+ }
+
+ // Test logical OR
+ for(i=0;i<1000;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel sections private(i) reduction(||:logic_or)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ }
+ if(logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 1\n");
+ }
+
+ logic_or = 0;
+ logics[501]=1;
+
+ #pragma omp parallel sections private(i) reduction(||:logic_or)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ }
+ if(!logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 2\n");
+ }
+
+ // Test bitwise AND
+ for(i=0;i<1000;++i) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel sections private(i) reduction(&:bit_and)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ }
+ }
+ if(!bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 1\n");
+ }
+
+ bit_and = 1;
+ logics[501]=0;
+
+ #pragma omp parallel sections private(i) reduction(&:bit_and)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ }
+ }
+ if(bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 2");
+ }
+
+ // Test bitwise OR
+ for(i=0;i<1000;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel sections private(i) reduction(|:bit_or)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ }
+ if(bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 1\n");
+ }
+ bit_or = 0;
+ logics[501]=1;
+
+ #pragma omp parallel sections private(i) reduction(|:bit_or)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ }
+ if(!bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 2\n");
+ }
+
+ // Test bitwise XOR
+ for(i=0;i<1000;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel sections private(i) reduction(^:exclusiv_bit_or)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ }
+ if(exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n");
+ }
+
+ exclusiv_bit_or = 0;
+ logics[501]=1;
+
+ #pragma omp parallel sections private(i) reduction(^:exclusiv_bit_or)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ }
+ if(!exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n");
+ }
+
+ /*printf("\nResult:%d\n",result);*/
+ return (result==0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_sections_reduction()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_section_firstprivate.c b/final/runtime/test/worksharing/sections/omp_section_firstprivate.c
new file mode 100644
index 0000000..5526475
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_section_firstprivate.c
@@ -0,0 +1,55 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_section_firstprivate()
+{
+ int sum;
+ int sum0;
+ int known_sum;
+
+ sum0 = 11;
+ sum = 7;
+ #pragma omp parallel
+ {
+ #pragma omp sections firstprivate(sum0)
+ {
+ #pragma omp section
+ {
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ }
+ }
+ #pragma omp section
+ {
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ }
+ }
+ #pragma omp section
+ {
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ }
+ }
+ }
+ }
+ known_sum = 11 * 3 + 7;
+ return (known_sum == sum);
+} /* end of check_section_firstprivate*/
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_section_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_section_lastprivate.c b/final/runtime/test/worksharing/sections/omp_section_lastprivate.c
new file mode 100644
index 0000000..0dbbea9
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_section_lastprivate.c
@@ -0,0 +1,76 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_section_lastprivate()
+{
+ int i0 = -1;
+ int sum = 0;
+ int i;
+ int sum0 = 0;
+ int known_sum;
+
+ i0 = -1;
+ sum = 0;
+
+ #pragma omp parallel
+ {
+ #pragma omp sections lastprivate(i0) private(i,sum0)
+ {
+ #pragma omp section
+ {
+ sum0 = 0;
+ for (i = 1; i < 400; i++)
+ {
+ sum0 = sum0 + i;
+ i0 = i;
+ }
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ } /*end of critical*/
+ } /* end of section */
+ #pragma omp section
+ {
+ sum0 = 0;
+ for(i = 400; i < 700; i++)
+ {
+ sum0 = sum0 + i;
+ i0 = i;
+ }
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ } /*end of critical*/
+ }
+ #pragma omp section
+ {
+ sum0 = 0;
+ for(i = 700; i < 1000; i++)
+ {
+ sum0 = sum0 + i;
+ i0 = i;
+ }
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ } /*end of critical*/
+ } /* end of section */
+ } /* end of sections*/
+ } /* end of parallel*/
+ known_sum = (999 * 1000) / 2;
+ return ((known_sum == sum) && (i0 == 999) );
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_section_lastprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_section_private.c b/final/runtime/test/worksharing/sections/omp_section_private.c
new file mode 100644
index 0000000..bf2a30d
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_section_private.c
@@ -0,0 +1,66 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_section_private()
+{
+ int sum;
+ int sum0;
+ int i;
+ int known_sum;
+
+ sum = 7;
+ sum0 = 0;
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(sum0,i)
+ {
+ #pragma omp section
+ {
+ sum0 = 0;
+ for (i = 1; i < 400; i++)
+ sum0 = sum0 + i;
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ }
+ }
+ #pragma omp section
+ {
+ sum0 = 0;
+ for (i = 400; i < 700; i++)
+ sum0 = sum0 + i;
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ }
+ }
+ #pragma omp section
+ {
+ sum0 = 0;
+ for (i = 700; i < 1000; i++)
+ sum0 = sum0 + i;
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ }
+ }
+ } /*end of sections*/
+ } /* end of parallel */
+ known_sum = (999 * 1000) / 2 + 7;
+ return (known_sum == sum);
+} /* end of check_section_private*/
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_section_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_sections_nowait.c b/final/runtime/test/worksharing/sections/omp_sections_nowait.c
new file mode 100644
index 0000000..caff254
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_sections_nowait.c
@@ -0,0 +1,104 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+/*
+ * This test will hang if the nowait is not working properly
+ *
+ * It relies on a thread skipping to the second sections construct to
+ * release the threads in the first sections construct
+ *
+ * Also, since scheduling of sections is implementation defined, it is
+ * necessary to have all four sections in the second sections construct
+ * release the threads since we can't guarantee which section a single thread
+ * will execute.
+ */
+volatile int release;
+volatile int count;
+
+void wait_for_release_then_increment(int rank)
+{
+ fprintf(stderr, "Thread nr %d enters first section"
+ " and waits.\n", rank);
+ while (release == 0);
+ #pragma omp atomic
+ count++;
+}
+
+void release_and_increment(int rank)
+{
+ fprintf(stderr, "Thread nr %d sets release to 1\n", rank);
+ release = 1;
+ #pragma omp flush(release)
+ #pragma omp atomic
+ count++;
+}
+
+int test_omp_sections_nowait()
+{
+ release = 0;
+ count = 0;
+
+ #pragma omp parallel num_threads(4)
+ {
+ int rank;
+ rank = omp_get_thread_num ();
+ #pragma omp sections nowait
+ {
+ #pragma omp section
+ {
+ wait_for_release_then_increment(rank);
+ }
+ #pragma omp section
+ {
+ wait_for_release_then_increment(rank);
+ }
+ #pragma omp section
+ {
+ wait_for_release_then_increment(rank);
+ }
+ #pragma omp section
+ {
+ fprintf(stderr, "Thread nr %d enters first sections and goes "
+ "immediately to next sections construct to release.\n", rank);
+ #pragma omp atomic
+ count++;
+ }
+ }
+ /* Begin of second sections environment */
+ #pragma omp sections
+ {
+ #pragma omp section
+ {
+ release_and_increment(rank);
+ }
+ #pragma omp section
+ {
+ release_and_increment(rank);
+ }
+ #pragma omp section
+ {
+ release_and_increment(rank);
+ }
+ #pragma omp section
+ {
+ release_and_increment(rank);
+ }
+ }
+ }
+ // Check to make sure all eight sections were executed
+ return (count==8);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_sections_nowait()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/sections/omp_sections_reduction.c b/final/runtime/test/worksharing/sections/omp_sections_reduction.c
new file mode 100644
index 0000000..1fdb5ec
--- /dev/null
+++ b/final/runtime/test/worksharing/sections/omp_sections_reduction.c
@@ -0,0 +1,543 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int test_omp_sections_reduction()
+{
+ int sum;
+ int known_sum;
+ double dpt,dsum;
+ double dknown_sum;
+ double dt=0.5; /* base of geometric row for + and - test*/
+ double rounding_error= 1.E-9;
+ int diff;
+ double ddiff;
+ int product;
+ int known_product;
+ int logic_and;
+ int bit_and;
+ int logic_or;
+ int bit_or;
+ int exclusiv_bit_or;
+ int logics[1000];
+ int i;
+ int result;
+ /* int my_islarger; */
+ /*int is_larger=1;*/
+ sum =7;
+ dpt =1;
+ dsum=0;
+ product =1;
+ logic_and=1;
+ bit_and=1;
+ logic_or=0;
+ bit_or=0;
+ exclusiv_bit_or=0;
+ result = 0;
+ dt = 1./3.;
+
+ known_sum = (999*1000)/2+7;
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(+:sum)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ sum=sum+i;
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ sum=sum+i;
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ sum=sum+i;
+ }
+ }
+ }
+ }
+ if(known_sum!=sum) {
+ ++result;
+ fprintf(stderr,"Error in sum with integers: Result was %d"
+ " instead of %d\n", sum,known_sum);
+ }
+
+ diff = (999*1000)/2;
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(-:diff)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ diff=diff-i;
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ diff=diff-i;
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ diff=diff-i;
+ }
+ }
+ }
+ }
+ if(diff != 0) {
+ result++;
+ fprintf(stderr,"Error in Difference with integers: Result was %d"
+ " instead of 0.\n",diff);
+ }
+
+ for (i=0;i<20;++i) {
+ dpt*=dt;
+ }
+ dknown_sum = (1-dpt)/(1-dt);
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(+:dsum)
+ {
+ #pragma omp section
+ {
+ for (i=0;i<6;++i) {
+ dsum += pow(dt,i);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=6;i<12;++i) {
+ dsum += pow(dt,i);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=12;i<20;++i) {
+ dsum += pow(dt,i);
+ }
+ }
+ }
+ }
+ if( fabs(dsum-dknown_sum) > rounding_error ) {
+ result++;
+ fprintf(stderr,"Error in sum with doubles: Result was %f"
+ " instead of %f (Difference: %E)\n",
+ dsum, dknown_sum, dsum-dknown_sum);
+ }
+
+ dpt=1;
+ for (i=0;i<20;++i) {
+ dpt*=dt;
+ }
+ fprintf(stderr,"\n");
+ ddiff = (1-dpt)/(1-dt);
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(-:ddiff)
+ {
+ #pragma omp section
+ {
+ for (i=0;i<6;++i) {
+ ddiff -= pow(dt,i);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=6;i<12;++i) {
+ ddiff -= pow(dt,i);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=12;i<20;++i) {
+ ddiff -= pow(dt,i);
+ }
+ }
+ }
+ }
+
+ if(fabs(ddiff) > rounding_error) {
+ result++;
+ fprintf(stderr,"Error in Difference with doubles: Result was %E"
+ " instead of 0.0\n",ddiff);
+ }
+
+ known_product = 3628800;
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(*:product)
+ {
+ #pragma omp section
+ {
+ for(i=1;i<3;i++) {
+ product *= i;
+ }
+ }
+ #pragma omp section
+ {
+ for(i=3;i<7;i++) {
+ product *= i;
+ }
+ }
+ #pragma omp section
+ {
+ for(i=7;i<11;i++) {
+ product *= i;
+ }
+ }
+ }
+ }
+ if(known_product != product) {
+ result++;
+ fprintf(stderr,"Error in Product with integers: Result was %d"
+ " instead of %d\n",product,known_product);
+ }
+
+ for(i=0;i<1000;i++) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(&&:logic_and)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ }
+ }
+ if(!logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 1\n");
+ }
+
+ logic_and = 1;
+ logics[501] = 0;
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(&&:logic_and)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ logic_and = (logic_and && logics[i]);
+ }
+ }
+ }
+ }
+ if(logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 2\n");
+ }
+
+ for(i=0;i<1000;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(||:logic_or)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ }
+ }
+ if(logic_or) {
+ result++;
+ fprintf(stderr,"\nError in logic OR part 1\n");
+ }
+
+ logic_or = 0;
+ logics[501]=1;
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(||:logic_or)
+ {
+ #pragma omp section
+ {
+ for (i=1;i<300;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=300;i<700;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for (i=700;i<1000;i++) {
+ logic_or = (logic_or || logics[i]);
+ }
+ }
+ }
+ }
+ if(!logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 2\n");
+ }
+
+ for(i=0;i<1000;++i) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(&:bit_and)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ }
+ }
+ }
+ if(!bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 1\n");
+ }
+
+ bit_and = 1;
+ logics[501]=0;
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(&:bit_and)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ }
+ }
+ }
+ if(bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 2\n");
+ }
+
+ for(i=0;i<1000;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(|:bit_or)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ }
+ }
+ if(bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 1\n");
+ }
+ bit_or = 0;
+ logics[501]=1;
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(|:bit_or)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ }
+ }
+ }
+ if(!bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 2\n");
+ }
+
+ for(i=0;i<1000;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(^:exclusiv_bit_or)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ }
+ }
+ if(exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n");
+ }
+
+ exclusiv_bit_or = 0;
+ logics[501]=1;
+
+ #pragma omp parallel
+ {
+ #pragma omp sections private(i) reduction(^:exclusiv_bit_or)
+ {
+ #pragma omp section
+ {
+ for(i=0;i<300;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=300;i<700;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ #pragma omp section
+ {
+ for(i=700;i<1000;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ }
+ }
+ }
+ if(!exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n");
+ }
+
+ /*printf("\nResult:%d\n",result);*/
+ return (result==0);
+}
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_sections_reduction()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/single/omp_single.c b/final/runtime/test/worksharing/single/omp_single.c
new file mode 100644
index 0000000..4963579
--- /dev/null
+++ b/final/runtime/test/worksharing/single/omp_single.c
@@ -0,0 +1,44 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_single()
+{
+ int nr_threads_in_single;
+ int result;
+ int nr_iterations;
+ int i;
+
+ nr_threads_in_single = 0;
+ result = 0;
+ nr_iterations = 0;
+
+ #pragma omp parallel private(i)
+ {
+ for (i = 0; i < LOOPCOUNT; i++) {
+ #pragma omp single
+ {
+ #pragma omp flush
+ nr_threads_in_single++;
+ #pragma omp flush
+ nr_iterations++;
+ nr_threads_in_single--;
+ result = result + nr_threads_in_single;
+ }
+ }
+ }
+ return ((result == 0) && (nr_iterations == LOOPCOUNT));
+} /* end of check_single*/
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_single()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/single/omp_single_copyprivate.c b/final/runtime/test/worksharing/single/omp_single_copyprivate.c
new file mode 100644
index 0000000..2fece5c
--- /dev/null
+++ b/final/runtime/test/worksharing/single/omp_single_copyprivate.c
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile-and-run
+#include "omp_testsuite.h"
+
+#define DEBUG_TEST 0
+
+int j;
+#pragma omp threadprivate(j)
+
+int test_omp_single_copyprivate()
+{
+ int result;
+ int nr_iterations;
+
+ result = 0;
+ nr_iterations = 0;
+ #pragma omp parallel num_threads(4)
+ {
+ int i;
+ for (i = 0; i < LOOPCOUNT; i++)
+ {
+#if DEBUG_TEST
+ int thread;
+ thread = omp_get_thread_num ();
+#endif
+ #pragma omp single copyprivate(j)
+ {
+ nr_iterations++;
+ j = i;
+#if DEBUG_TEST
+ printf ("thread %d assigns, j = %d, i = %d\n", thread, j, i);
+#endif
+ }
+#if DEBUG_TEST
+ #pragma omp barrier
+#endif
+ #pragma omp critical
+ {
+#if DEBUG_TEST
+ printf ("thread = %d, j = %d, i = %d\n", thread, j, i);
+#endif
+ result = result + j - i;
+ }
+ #pragma omp barrier
+ } /* end of for */
+ } /* end of parallel */
+ return ((result == 0) && (nr_iterations == LOOPCOUNT));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_single_copyprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/single/omp_single_nowait.c b/final/runtime/test/worksharing/single/omp_single_nowait.c
new file mode 100644
index 0000000..22f8930
--- /dev/null
+++ b/final/runtime/test/worksharing/single/omp_single_nowait.c
@@ -0,0 +1,73 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+/*
+ * This test will hang if the nowait is not working properly
+ *
+ * It relies on a one thread skipping to the last single construct to
+ * release the threads in the first three single constructs
+ */
+volatile int release;
+volatile int count;
+
+void wait_for_release_then_increment(int rank)
+{
+ fprintf(stderr, "Thread nr %d enters first section"
+ " and waits.\n", rank);
+ while (release == 0);
+ #pragma omp atomic
+ count++;
+}
+
+void release_and_increment(int rank)
+{
+ fprintf(stderr, "Thread nr %d sets release to 1\n", rank);
+ release = 1;
+ #pragma omp atomic
+ count++;
+}
+
+int test_omp_single_nowait()
+{
+ release = 0;
+ count = 0;
+
+ #pragma omp parallel num_threads(4)
+ {
+ int rank;
+ rank = omp_get_thread_num ();
+ #pragma omp single nowait
+ {
+ wait_for_release_then_increment(rank);
+ }
+ #pragma omp single nowait
+ {
+ wait_for_release_then_increment(rank);
+ }
+ #pragma omp single nowait
+ {
+ wait_for_release_then_increment(rank);
+ }
+
+ #pragma omp single
+ {
+ release_and_increment(rank);
+ }
+ }
+ // Check to make sure all four singles were executed
+ return (count==4);
+} /* end of check_single_nowait*/
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_single_nowait()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/single/omp_single_private.c b/final/runtime/test/worksharing/single/omp_single_private.c
new file mode 100644
index 0000000..a27f8de
--- /dev/null
+++ b/final/runtime/test/worksharing/single/omp_single_private.c
@@ -0,0 +1,57 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int myit = 0;
+#pragma omp threadprivate(myit)
+int myresult = 0;
+#pragma omp threadprivate(myresult)
+
+int test_omp_single_private()
+{
+ int nr_threads_in_single;
+ int result;
+ int nr_iterations;
+ int i;
+
+ myit = 0;
+ nr_threads_in_single = 0;
+ nr_iterations = 0;
+ result = 0;
+
+ #pragma omp parallel private(i)
+ {
+ myresult = 0;
+ myit = 0;
+ for (i = 0; i < LOOPCOUNT; i++) {
+ #pragma omp single private(nr_threads_in_single) nowait
+ {
+ nr_threads_in_single = 0;
+ #pragma omp flush
+ nr_threads_in_single++;
+ #pragma omp flush
+ myit++;
+ myresult = myresult + nr_threads_in_single;
+ }
+ }
+ #pragma omp critical
+ {
+ result += nr_threads_in_single;
+ nr_iterations += myit;
+ }
+ }
+ return ((result == 0) && (nr_iterations == LOOPCOUNT));
+} /* end of check_single private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_single_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}