aboutsummaryrefslogtreecommitdiff
path: root/final/runtime/test/tasking
diff options
context:
space:
mode:
Diffstat (limited to 'final/runtime/test/tasking')
-rw-r--r--final/runtime/test/tasking/bug_36720.c36
-rw-r--r--final/runtime/test/tasking/bug_nested_proxy_task.c131
-rw-r--r--final/runtime/test/tasking/bug_proxy_task_dep_waiting.c134
-rw-r--r--final/runtime/test/tasking/bug_serial_taskgroup.c16
-rw-r--r--final/runtime/test/tasking/kmp_task_reduction_nest.cpp376
-rw-r--r--final/runtime/test/tasking/kmp_taskloop.c159
-rw-r--r--final/runtime/test/tasking/nested_parallel_tasking.c32
-rw-r--r--final/runtime/test/tasking/nested_task_creation.c35
-rw-r--r--final/runtime/test/tasking/omp_task.c52
-rw-r--r--final/runtime/test/tasking/omp_task_final.c65
-rw-r--r--final/runtime/test/tasking/omp_task_firstprivate.c51
-rw-r--r--final/runtime/test/tasking/omp_task_if.c43
-rw-r--r--final/runtime/test/tasking/omp_task_imp_firstprivate.c47
-rw-r--r--final/runtime/test/tasking/omp_task_priority.c22
-rw-r--r--final/runtime/test/tasking/omp_task_private.c53
-rw-r--r--final/runtime/test/tasking/omp_task_shared.c41
-rw-r--r--final/runtime/test/tasking/omp_taskloop_grainsize.c113
-rw-r--r--final/runtime/test/tasking/omp_taskloop_num_tasks.c71
-rw-r--r--final/runtime/test/tasking/omp_taskwait.c74
-rw-r--r--final/runtime/test/tasking/omp_taskyield.c58
20 files changed, 1609 insertions, 0 deletions
diff --git a/final/runtime/test/tasking/bug_36720.c b/final/runtime/test/tasking/bug_36720.c
new file mode 100644
index 0000000..684d675
--- /dev/null
+++ b/final/runtime/test/tasking/bug_36720.c
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile-and-run
+
+/*
+Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=36720
+
+Assertion failure at kmp_runtime.cpp(1715): nthreads > 0.
+OMP: Error #13: Assertion failure at kmp_runtime.cpp(1715).
+
+The assertion fails even with OMP_NUM_THREADS=1. If the second task is removed,
+everything runs to completion. If the "omp parallel for" directives are removed
+from inside the tasks, once again everything runs fine.
+*/
+
+#define N 1024
+
+int main() {
+ #pragma omp task
+ {
+ int i;
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ (void)0;
+ }
+
+ #pragma omp task
+ {
+ int i;
+ #pragma omp parallel for
+ for (i = 0; i < N; ++i)
+ (void)0;
+ }
+
+ #pragma omp taskwait
+
+ return 0;
+}
diff --git a/final/runtime/test/tasking/bug_nested_proxy_task.c b/final/runtime/test/tasking/bug_nested_proxy_task.c
new file mode 100644
index 0000000..6c00822
--- /dev/null
+++ b/final/runtime/test/tasking/bug_nested_proxy_task.c
@@ -0,0 +1,131 @@
+// RUN: %libomp-compile -lpthread && %libomp-run
+// The runtime currently does not get dependency information from GCC.
+// UNSUPPORTED: gcc
+
+#include <stdio.h>
+#include <omp.h>
+#include <pthread.h>
+#include "omp_my_sleep.h"
+
+/*
+ With task dependencies one can generate proxy tasks from an explicit task
+ being executed by a serial task team. The OpenMP runtime library didn't
+ expect that and tries to free the explicit task that is the parent of the
+ proxy task still working in background. It therefore has incomplete children
+ which triggers a debugging assertion.
+*/
+
+// Compiler-generated code (emulation)
+typedef long kmp_intptr_t;
+typedef int kmp_int32;
+
+typedef char bool;
+
+typedef struct ident {
+ kmp_int32 reserved_1; /**< might be used in Fortran; see above */
+ kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */
+ kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
+#if USE_ITT_BUILD
+ /* but currently used for storing region-specific ITT */
+ /* contextual information. */
+#endif /* USE_ITT_BUILD */
+ kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
+ char const *psource; /**< String describing the source location.
+ The string is composed of semi-colon separated fields which describe the source file,
+ the function and a pair of line numbers that delimit the construct.
+ */
+} ident_t;
+
+typedef struct kmp_depend_info {
+ kmp_intptr_t base_addr;
+ size_t len;
+ struct {
+ bool in:1;
+ bool out:1;
+ } flags;
+} kmp_depend_info_t;
+
+struct kmp_task;
+typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, struct kmp_task * );
+
+typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
+ void * shareds; /**< pointer to block of pointers to shared vars */
+ kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */
+ kmp_int32 part_id; /**< part id for the task */
+} kmp_task_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+kmp_int32 __kmpc_global_thread_num ( ident_t * );
+kmp_task_t*
+__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ kmp_routine_entry_t task_entry );
+void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask );
+kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
+ kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+ kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
+kmp_int32
+__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
+#ifdef __cplusplus
+}
+#endif
+
+void *target(void *task)
+{
+ my_sleep( 0.1 );
+ __kmpc_proxy_task_completed_ooo((kmp_task_t*) task);
+ return NULL;
+}
+
+pthread_t target_thread;
+
+// User's code
+int task_entry(kmp_int32 gtid, kmp_task_t *task)
+{
+ pthread_create(&target_thread, NULL, &target, task);
+ return 0;
+}
+
+int main()
+{
+ int dep;
+
+#pragma omp taskgroup
+{
+/*
+ * Corresponds to:
+ #pragma omp target nowait depend(out: dep)
+ {
+ my_sleep( 0.1 );
+ }
+*/
+ kmp_depend_info_t dep_info;
+ dep_info.base_addr = (long) &dep;
+ dep_info.len = sizeof(int);
+ // out = inout per spec and runtime expects this
+ dep_info.flags.in = 1;
+ dep_info.flags.out = 1;
+
+ kmp_int32 gtid = __kmpc_global_thread_num(NULL);
+ kmp_task_t *proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry);
+ __kmpc_omp_task_with_deps(NULL,gtid,proxy_task,1,&dep_info,0,NULL);
+
+ #pragma omp task depend(in: dep)
+ {
+/*
+ * Corresponds to:
+ #pragma omp target nowait
+ {
+ my_sleep( 0.1 );
+ }
+*/
+ kmp_task_t *nested_proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry);
+ __kmpc_omp_task(NULL,gtid,nested_proxy_task);
+ }
+}
+
+ // only check that it didn't crash
+ return 0;
+}
diff --git a/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c
new file mode 100644
index 0000000..e6dd895
--- /dev/null
+++ b/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c
@@ -0,0 +1,134 @@
+// RUN: %libomp-compile -lpthread && %libomp-run
+// The runtime currently does not get dependency information from GCC.
+// UNSUPPORTED: gcc
+
+#include <stdio.h>
+#include <omp.h>
+#include <pthread.h>
+#include "omp_my_sleep.h"
+
+/*
+ An explicit task can have a dependency on a target task. If it is not
+ directly satisfied, the runtime should not wait but resume execution.
+*/
+
+// Compiler-generated code (emulation)
+typedef long kmp_intptr_t;
+typedef int kmp_int32;
+
+typedef char bool;
+
+typedef struct ident {
+ kmp_int32 reserved_1; /**< might be used in Fortran; see above */
+ kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */
+ kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
+#if USE_ITT_BUILD
+ /* but currently used for storing region-specific ITT */
+ /* contextual information. */
+#endif /* USE_ITT_BUILD */
+ kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
+ char const *psource; /**< String describing the source location.
+ The string is composed of semi-colon separated fields which describe the source file,
+ the function and a pair of line numbers that delimit the construct.
+ */
+} ident_t;
+
+typedef struct kmp_depend_info {
+ kmp_intptr_t base_addr;
+ size_t len;
+ struct {
+ bool in:1;
+ bool out:1;
+ } flags;
+} kmp_depend_info_t;
+
+struct kmp_task;
+typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, struct kmp_task * );
+
+typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
+ void * shareds; /**< pointer to block of pointers to shared vars */
+ kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */
+ kmp_int32 part_id; /**< part id for the task */
+} kmp_task_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+kmp_int32 __kmpc_global_thread_num ( ident_t * );
+kmp_task_t*
+__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ kmp_routine_entry_t task_entry );
+void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask );
+kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
+ kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+ kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
+kmp_int32
+__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
+#ifdef __cplusplus
+}
+#endif
+
+void *target(void *task)
+{
+ my_sleep( 0.1 );
+ __kmpc_proxy_task_completed_ooo((kmp_task_t*) task);
+ return NULL;
+}
+
+pthread_t target_thread;
+
+// User's code
+int task_entry(kmp_int32 gtid, kmp_task_t *task)
+{
+ pthread_create(&target_thread, NULL, &target, task);
+ return 0;
+}
+
+int main()
+{
+ int dep;
+
+/*
+ * Corresponds to:
+ #pragma omp target nowait depend(out: dep)
+ {
+ my_sleep( 0.1 );
+ }
+*/
+ kmp_depend_info_t dep_info;
+ dep_info.base_addr = (long) &dep;
+ dep_info.len = sizeof(int);
+ // out = inout per spec and runtime expects this
+ dep_info.flags.in = 1;
+ dep_info.flags.out = 1;
+
+ kmp_int32 gtid = __kmpc_global_thread_num(NULL);
+ kmp_task_t *proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry);
+ __kmpc_omp_task_with_deps(NULL,gtid,proxy_task,1,&dep_info,0,NULL);
+
+ int first_task_finished = 0;
+ #pragma omp task shared(first_task_finished) depend(inout: dep)
+ {
+ first_task_finished = 1;
+ }
+
+ int second_task_finished = 0;
+ #pragma omp task shared(second_task_finished) depend(in: dep)
+ {
+ second_task_finished = 1;
+ }
+
+ // check that execution has been resumed and the runtime has not waited
+ // for the dependencies to be satisfied.
+ int error = (first_task_finished == 1);
+ error += (second_task_finished == 1);
+
+ #pragma omp taskwait
+
+ // by now all tasks should have finished
+ error += (first_task_finished != 1);
+ error += (second_task_finished != 1);
+
+ return error;
+}
diff --git a/final/runtime/test/tasking/bug_serial_taskgroup.c b/final/runtime/test/tasking/bug_serial_taskgroup.c
new file mode 100644
index 0000000..850bc90
--- /dev/null
+++ b/final/runtime/test/tasking/bug_serial_taskgroup.c
@@ -0,0 +1,16 @@
+// RUN: %libomp-compile-and-run
+
+/*
+ GCC failed this test because __kmp_get_gtid() instead of __kmp_entry_gtid()
+ was called in xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void).
+ __kmp_entry_gtid() will initialize the runtime if not yet done which does not
+ happen with __kmp_get_gtid().
+ */
+
+int main()
+{
+ #pragma omp taskgroup
+ { }
+
+ return 0;
+}
diff --git a/final/runtime/test/tasking/kmp_task_reduction_nest.cpp b/final/runtime/test/tasking/kmp_task_reduction_nest.cpp
new file mode 100644
index 0000000..63dffe4
--- /dev/null
+++ b/final/runtime/test/tasking/kmp_task_reduction_nest.cpp
@@ -0,0 +1,376 @@
+// RUN: %libomp-cxx-compile-and-run
+// RUN: %libomp-cxx-compile -DFLG=1 && %libomp-run
+// GCC-5 is needed for OpenMP 4.0 support (taskgroup)
+// XFAIL: gcc-4
+#include <cstdio>
+#include <cmath>
+#include <cassert>
+#include <omp.h>
+
+// Total number of loop iterations, should be multiple of T for this test
+#define N 10000
+
+// Flag to request lazy (1) or eager (0) allocation of reduction objects
+#ifndef FLG
+#define FLG 0
+#endif
+
+/*
+ // initial user's code that corresponds to pseudo code of the test
+ #pragma omp taskgroup task_reduction(+:i,j) task_reduction(*:x)
+ {
+ for( int l = 0; l < N; ++l ) {
+ #pragma omp task firstprivate(l) in_reduction(+:i) in_reduction(*:x)
+ {
+ i += l;
+ if( l%2 )
+ x *= 1.0 / (l + 1);
+ else
+ x *= (l + 1);
+ }
+ }
+
+ #pragma omp taskgroup task_reduction(-:i,k) task_reduction(+:y)
+ {
+ for( int l = 0; l < N; ++l ) {
+ #pragma omp task firstprivate(l) in_reduction(+:j,y) \
+ in_reduction(*:x) in_reduction(-:k)
+ {
+ j += l;
+ k -= l;
+ y += (double)l;
+ if( l%2 )
+ x *= 1.0 / (l + 1);
+ else
+ x *= (l + 1);
+ }
+ #pragma omp task firstprivate(l) in_reduction(+:y) in_reduction(-:i,k)
+ {
+ i -= l;
+ k -= l;
+ y += (double)l;
+ }
+ #pragma omp task firstprivate(l) in_reduction(+:j) in_reduction(*:x)
+ {
+ j += l;
+ if( l%2 )
+ x *= 1.0 / (l + 1);
+ else
+ x *= (l + 1);
+ }
+ }
+ } // inner reduction
+
+ for( int l = 0; l < N; ++l ) {
+ #pragma omp task firstprivate(l) in_reduction(+:j)
+ j += l;
+ }
+ } // outer reduction
+*/
+
+//------------------------------------------------
+// OpenMP runtime library routines
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void* __kmpc_task_reduction_get_th_data(int gtid, void* tg, void* item);
+extern void* __kmpc_task_reduction_init(int gtid, int num, void* data);
+extern int __kmpc_global_thread_num(void*);
+#ifdef __cplusplus
+}
+#endif
+
+//------------------------------------------------
+// Compiler-generated code
+
+typedef struct _task_red_item {
+ void *shar; // shared reduction item
+ size_t size; // size of data item
+ void *f_init; // data initialization routine
+ void *f_fini; // data finalization routine
+ void *f_comb; // data combiner routine
+ unsigned flags;
+} _task_red_item_t;
+
+// int:+ no need in init/fini callbacks, valid for subtraction
+void __red_int_add_comb(void *lhs, void *rhs) // combiner
+{ *(int*)lhs += *(int*)rhs; }
+
+// long long:+ no need in init/fini callbacks, valid for subtraction
+void __red_llong_add_comb(void *lhs, void *rhs) // combiner
+{ *(long long*)lhs += *(long long*)rhs; }
+
+// double:* no need in fini callback
+void __red_dbl_mul_init(void *data) // initializer
+{ *(double*)data = 1.0; }
+void __red_dbl_mul_comb(void *lhs, void *rhs) // combiner
+{ *(double*)lhs *= *(double*)rhs; }
+
+// double:+ no need in init/fini callbacks
+void __red_dbl_add_comb(void *lhs, void *rhs) // combiner
+{ *(double*)lhs += *(double*)rhs; }
+
+// ==============================
+
+void calc_serial(int *pi, long long *pj, double *px, long long *pk, double *py)
+{
+ for( int l = 0; l < N; ++l ) {
+ *pi += l;
+ if( l%2 )
+ *px *= 1.0 / (l + 1);
+ else
+ *px *= (l + 1);
+ }
+ for( int l = 0; l < N; ++l ) {
+ *pj += l;
+ *pk -= l;
+ *py += (double)l;
+ if( l%2 )
+ *px *= 1.0 / (l + 1);
+ else
+ *px *= (l + 1);
+
+ *pi -= l;
+ *pk -= l;
+ *py += (double)l;
+
+ *pj += l;
+ if( l%2 )
+ *px *= 1.0 / (l + 1);
+ else
+ *px *= (l + 1);
+ }
+ for( int l = 0; l < N; ++l ) {
+ *pj += l;
+ }
+}
+
+//------------------------------------------------
+// Test case
+int main()
+{
+ int nthreads = omp_get_max_threads();
+ int err = 0;
+ void** ptrs = (void**)malloc(nthreads*sizeof(void*));
+
+ // user's code ======================================
+ // variables for serial calculations:
+ int is = 3;
+ long long js = -9999999;
+ double xs = 99999.0;
+ long long ks = 99999999;
+ double ys = -99999999.0;
+ // variables for parallel calculations:
+ int ip = 3;
+ long long jp = -9999999;
+ double xp = 99999.0;
+ long long kp = 99999999;
+ double yp = -99999999.0;
+
+ calc_serial(&is, &js, &xs, &ks, &ys);
+ // ==================================================
+ for (int i = 0; i < nthreads; ++i)
+ ptrs[i] = NULL;
+ #pragma omp parallel
+ {
+ #pragma omp single nowait
+ {
+ // outer taskgroup reduces (i,j,x)
+ #pragma omp taskgroup // task_reduction(+:i,j) task_reduction(*:x)
+ {
+ _task_red_item_t red_data[3];
+ red_data[0].shar = &ip;
+ red_data[0].size = sizeof(ip);
+ red_data[0].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[0].f_fini = NULL; // no destructors needed
+ red_data[0].f_comb = (void*)&__red_int_add_comb;
+ red_data[0].flags = FLG;
+ red_data[1].shar = &jp;
+ red_data[1].size = sizeof(jp);
+ red_data[1].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[1].f_fini = NULL; // no destructors needed
+ red_data[1].f_comb = (void*)&__red_llong_add_comb;
+ red_data[1].flags = FLG;
+ red_data[2].shar = &xp;
+ red_data[2].size = sizeof(xp);
+ red_data[2].f_init = (void*)&__red_dbl_mul_init;
+ red_data[2].f_fini = NULL; // no destructors needed
+ red_data[2].f_comb = (void*)&__red_dbl_mul_comb;
+ red_data[2].flags = FLG;
+ int gtid = __kmpc_global_thread_num(NULL);
+ void* tg1 = __kmpc_task_reduction_init(gtid, 3, red_data);
+
+ for( int l = 0; l < N; l += 2 ) {
+ // 2 iterations per task to get correct x value; actually any even
+ // number of iters per task will work, otherwise x looses precision
+ #pragma omp task firstprivate(l) //in_reduction(+:i) in_reduction(*:x)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_ip = (int*)__kmpc_task_reduction_get_th_data(gtid, tg1, &ip);
+ double *p_xp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &xp);
+ if (!ptrs[gtid]) ptrs[gtid] = p_xp;
+
+ // user's pseudo-code ==============================
+ *p_ip += l;
+ *p_xp *= (l + 1);
+
+ *p_ip += l + 1;
+ *p_xp *= 1.0 / (l + 2);
+ // ==================================================
+ }
+ }
+ // inner taskgroup reduces (i,k,y), i is same object as in outer one
+ #pragma omp taskgroup // task_reduction(-:i,k) task_reduction(+:y)
+ {
+ _task_red_item_t red_data[3];
+ red_data[0].shar = &ip;
+ red_data[0].size = sizeof(ip);
+ red_data[0].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[0].f_fini = NULL; // no destructors needed
+ red_data[0].f_comb = (void*)&__red_int_add_comb;
+ red_data[0].flags = FLG;
+ red_data[1].shar = &kp;
+ red_data[1].size = sizeof(kp);
+ red_data[1].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[1].f_fini = NULL; // no destructors needed
+ red_data[1].f_comb = (void*)&__red_llong_add_comb; // same for + and -
+ red_data[1].flags = FLG;
+ red_data[2].shar = &yp;
+ red_data[2].size = sizeof(yp);
+ red_data[2].f_init = NULL; // RTL will zero thread-specific objects
+ red_data[2].f_fini = NULL; // no destructors needed
+ red_data[2].f_comb = (void*)&__red_dbl_add_comb;
+ red_data[2].flags = FLG;
+ int gtid = __kmpc_global_thread_num(NULL);
+ void* tg2 = __kmpc_task_reduction_init(gtid, 3, red_data);
+
+ for( int l = 0; l < N; l += 2 ) {
+ #pragma omp task firstprivate(l)
+ // in_reduction(+:j,y) in_reduction(*:x) in_reduction(-:k)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &jp);
+ long long *p_kp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &kp);
+ double *p_xp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &xp);
+ double *p_yp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &yp);
+ // user's pseudo-code ==============================
+ *p_jp += l;
+ *p_kp -= l;
+ *p_yp += (double)l;
+ *p_xp *= (l + 1);
+
+ *p_jp += l + 1;
+ *p_kp -= l + 1;
+ *p_yp += (double)(l + 1);
+ *p_xp *= 1.0 / (l + 2);
+ // =================================================
+{
+ // the following code is here just to check __kmpc_task_reduction_get_th_data:
+ int tid = omp_get_thread_num();
+ void *addr1;
+ void *addr2;
+ addr1 = __kmpc_task_reduction_get_th_data(gtid, tg1, &xp); // from shared
+ addr2 = __kmpc_task_reduction_get_th_data(gtid, tg1, addr1); // from private
+ if (addr1 != addr2) {
+ #pragma omp atomic
+ ++err;
+ printf("Wrong thread-specific addresses %d s:%p p:%p\n", tid, addr1, addr2);
+ }
+ // from neighbour w/o taskgroup (should start lookup from current tg2)
+ if (tid > 0) {
+ if (ptrs[tid-1]) {
+ addr2 = __kmpc_task_reduction_get_th_data(gtid, NULL, ptrs[tid-1]);
+ if (addr1 != addr2) {
+ #pragma omp atomic
+ ++err;
+ printf("Wrong thread-specific addresses %d s:%p n:%p\n",
+ tid, addr1, addr2);
+ }
+ }
+ } else {
+ if (ptrs[nthreads-1]) {
+ addr2 = __kmpc_task_reduction_get_th_data(gtid, NULL, ptrs[nthreads-1]);
+ if (addr1 != addr2) {
+ #pragma omp atomic
+ ++err;
+ printf("Wrong thread-specific addresses %d s:%p n:%p\n",
+ tid, addr1, addr2);
+ }
+ }
+ }
+ // ----------------------------------------------
+}
+ }
+ #pragma omp task firstprivate(l)
+ // in_reduction(+:y) in_reduction(-:i,k)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_ip = (int*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &ip);
+ long long *p_kp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &kp);
+ double *p_yp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg2, &yp);
+
+ // user's pseudo-code ==============================
+ *p_ip -= l;
+ *p_kp -= l;
+ *p_yp += (double)l;
+
+ *p_ip -= l + 1;
+ *p_kp -= l + 1;
+ *p_yp += (double)(l + 1);
+ // =================================================
+ }
+ #pragma omp task firstprivate(l)
+ // in_reduction(+:j) in_reduction(*:x)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &jp);
+ double *p_xp = (double*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &xp);
+ // user's pseudo-code ==============================
+ *p_jp += l;
+ *p_xp *= (l + 1);
+
+ *p_jp += l + 1;
+ *p_xp *= 1.0 / (l + 2);
+ // =================================================
+ }
+ }
+ } // inner reduction
+
+ for( int l = 0; l < N; l += 2 ) {
+ #pragma omp task firstprivate(l) // in_reduction(+:j)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data(
+ gtid, tg1, &jp);
+ // user's pseudo-code ==============================
+ *p_jp += l;
+ *p_jp += l + 1;
+ // =================================================
+ }
+ }
+ } // outer reduction
+ } // end single
+ } // end parallel
+ // check results
+#if _DEBUG
+ printf("reduction flags = %u\n", FLG);
+#endif
+ if (ip == is && jp == js && ks == kp &&
+ fabs(xp - xs) < 0.01 && fabs(yp - ys) < 0.01)
+ printf("passed\n");
+ else
+ printf("failed,\n ser:(%d %lld %f %lld %f)\n par:(%d %lld %f %lld %f)\n",
+ is, js, xs, ks, ys,
+ ip, jp, xp, kp, yp);
+ return 0;
+}
diff --git a/final/runtime/test/tasking/kmp_taskloop.c b/final/runtime/test/tasking/kmp_taskloop.c
new file mode 100644
index 0000000..4b13793
--- /dev/null
+++ b/final/runtime/test/tasking/kmp_taskloop.c
@@ -0,0 +1,159 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+#include <stdio.h>
+#include <omp.h>
+#include "omp_my_sleep.h"
+
+#define N 4
+#define GRAIN 10
+#define STRIDE 3
+
+// globals
+int th_counter[N];
+int counter;
+
+
+// Compiler-generated code (emulation)
+typedef struct ident {
+ void* dummy;
+} ident_t;
+
+typedef struct shar {
+ int(*pth_counter)[N];
+ int *pcounter;
+ int *pj;
+} *pshareds;
+
+typedef struct task {
+ pshareds shareds;
+ int(* routine)(int,struct task*);
+ int part_id;
+// privates:
+ unsigned long long lb; // library always uses ULONG
+ unsigned long long ub;
+ int st;
+ int last;
+ int i;
+ int j;
+ int th;
+} *ptask, kmp_task_t;
+
+typedef int(* task_entry_t)( int, ptask );
+
+void
+__task_dup_entry(ptask task_dst, ptask task_src, int lastpriv)
+{
+// setup lastprivate flag
+ task_dst->last = lastpriv;
+// could be constructor calls here...
+}
+
+
+// OpenMP RTL interfaces
+typedef unsigned long long kmp_uint64;
+typedef long long kmp_int64;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void
+__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
+ kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+ int nogroup, int sched, kmp_int64 grainsize, void *task_dup );
+ptask
+__kmpc_omp_task_alloc( ident_t *loc, int gtid, int flags,
+ size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ task_entry_t task_entry );
+void __kmpc_atomic_fixed4_add(void *id_ref, int gtid, int * lhs, int rhs);
+int __kmpc_global_thread_num(void *id_ref);
+#ifdef __cplusplus
+}
+#endif
+
+
+// User's code
+int task_entry(int gtid, ptask task)
+{
+ pshareds pshar = task->shareds;
+ for( task->i = task->lb; task->i <= (int)task->ub; task->i += task->st ) {
+ task->th = omp_get_thread_num();
+ __kmpc_atomic_fixed4_add(NULL,gtid,pshar->pcounter,1);
+ __kmpc_atomic_fixed4_add(NULL,gtid,&((*pshar->pth_counter)[task->th]),1);
+ task->j = task->i;
+ }
+ my_sleep( 0.1 ); // sleep 100 ms in order to allow other threads to steal tasks
+ if( task->last ) {
+ *(pshar->pj) = task->j; // lastprivate
+ }
+ return 0;
+}
+
+int main()
+{
+ int i, j, gtid = __kmpc_global_thread_num(NULL);
+ ptask task;
+ pshareds psh;
+ omp_set_dynamic(0);
+ counter = 0;
+ for( i=0; i<N; ++i )
+ th_counter[i] = 0;
+ #pragma omp parallel num_threads(N)
+ {
+ #pragma omp master
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+/*
+ * This is what the OpenMP runtime calls correspond to:
+ #pragma omp taskloop num_tasks(N) lastprivate(j)
+ for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE )
+ {
+ int th = omp_get_thread_num();
+ #pragma omp atomic
+ counter++;
+ #pragma omp atomic
+ th_counter[th]++;
+ j = i;
+ }
+*/
+ task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct task),sizeof(struct shar),&task_entry);
+ psh = task->shareds;
+ psh->pth_counter = &th_counter;
+ psh->pcounter = &counter;
+ psh->pj = &j;
+ task->lb = 0;
+ task->ub = N*GRAIN*STRIDE-2;
+ task->st = STRIDE;
+
+ __kmpc_taskloop(
+ NULL, // location
+ gtid, // gtid
+ task, // task structure
+ 1, // if clause value
+ &task->lb, // lower bound
+ &task->ub, // upper bound
+ STRIDE, // loop increment
+ 0, // 1 if nogroup specified
+ 2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
+ N, // schedule value (ignored for type 0)
+ (void*)&__task_dup_entry // tasks duplication routine
+ );
+ } // end master
+ } // end parallel
+// check results
+ if( j != N*GRAIN*STRIDE-STRIDE ) {
+ printf("Error in lastprivate, %d != %d\n",j,N*GRAIN*STRIDE-STRIDE);
+ return 1;
+ }
+ if( counter != N*GRAIN ) {
+ printf("Error, counter %d != %d\n",counter,N*GRAIN);
+ return 1;
+ }
+ for( i=0; i<N; ++i ) {
+ if( th_counter[i] % GRAIN ) {
+ printf("Error, th_counter[%d] = %d\n",i,th_counter[i]);
+ return 1;
+ }
+ }
+ printf("passed\n");
+ return 0;
+}
diff --git a/final/runtime/test/tasking/nested_parallel_tasking.c b/final/runtime/test/tasking/nested_parallel_tasking.c
new file mode 100644
index 0000000..4374d6e
--- /dev/null
+++ b/final/runtime/test/tasking/nested_parallel_tasking.c
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <omp.h>
+
+/*
+ * This test would hang when level instead of active level
+ * used to push task state.
+ */
+
+int main()
+{
+ // If num_threads is changed to a value greater than 1, then the test passes
+ #pragma omp parallel num_threads(1)
+ {
+ #pragma omp parallel
+ printf("Hello World from thread %d\n", omp_get_thread_num());
+ }
+
+ printf("omp_num_threads: %d\n", omp_get_max_threads());
+
+ #pragma omp parallel
+ {
+ #pragma omp master
+ #pragma omp task default(none)
+ {
+ printf("%d is executing this task\n", omp_get_thread_num());
+ }
+ }
+
+ printf("pass\n");
+ return 0;
+}
diff --git a/final/runtime/test/tasking/nested_task_creation.c b/final/runtime/test/tasking/nested_task_creation.c
new file mode 100644
index 0000000..c7c25fc
--- /dev/null
+++ b/final/runtime/test/tasking/nested_task_creation.c
@@ -0,0 +1,35 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <omp.h>
+#include "omp_my_sleep.h"
+
+/*
+ * This test creates tasks that themselves create a new task.
+ * The runtime has to take care that they are correctly freed.
+ */
+
+int main()
+{
+ #pragma omp task
+ {
+ #pragma omp task
+ {
+ my_sleep( 0.1 );
+ }
+ }
+
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp single
+ #pragma omp task
+ {
+ #pragma omp task
+ {
+ my_sleep( 0.1 );
+ }
+ }
+ }
+
+ printf("pass\n");
+ return 0;
+}
diff --git a/final/runtime/test/tasking/omp_task.c b/final/runtime/test/tasking/omp_task.c
new file mode 100644
index 0000000..c534abe
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task.c
@@ -0,0 +1,52 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_task()
+{
+ int tids[NUM_TASKS];
+ int i;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ /* First we have to store the value of the loop index in a new variable
+ * which will be private for each task because otherwise it will be overwritten
+ * if the execution of the task takes longer than the time which is needed to
+ * enter the next step of the loop!
+ */
+ int myi;
+ myi = i;
+ #pragma omp task
+ {
+ my_sleep (SLEEPTIME);
+ tids[myi] = omp_get_thread_num();
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /*end of parallel */
+
+ /* Now we ckeck if more than one thread executed the tasks. */
+ for (i = 1; i < NUM_TASKS; i++) {
+ if (tids[0] != tids[i])
+ return 1;
+ }
+ return 0;
+} /* end of check_parallel_for_private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_final.c b/final/runtime/test/tasking/omp_task_final.c
new file mode 100644
index 0000000..b531af6
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_final.c
@@ -0,0 +1,65 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_task_final()
+{
+ int tids[NUM_TASKS];
+ int includedtids[NUM_TASKS];
+ int i;
+ int error = 0;
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ /* First we have to store the value of the loop index in a new variable
+ * which will be private for each task because otherwise it will be overwritten
+ * if the execution of the task takes longer than the time which is needed to
+ * enter the next step of the loop!
+ */
+ int myi;
+ myi = i;
+
+ #pragma omp task final(i>=10)
+ {
+ tids[myi] = omp_get_thread_num();
+ /* we generate included tasks for final tasks */
+ if(myi >= 10) {
+ int included = myi;
+ #pragma omp task
+ {
+ my_sleep (SLEEPTIME);
+ includedtids[included] = omp_get_thread_num();
+ } /* end of omp included task of the final task */
+ my_sleep (SLEEPTIME);
+ } /* end of if it is a final task*/
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /*end of parallel */
+
+ /* Now we ckeck if more than one thread executed the final task and its included task. */
+ for (i = 10; i < NUM_TASKS; i++) {
+ if (tids[i] != includedtids[i]) {
+ error++;
+ }
+ }
+ return (error==0);
+} /* end of check_paralel_for_private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_final()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
+
diff --git a/final/runtime/test/tasking/omp_task_firstprivate.c b/final/runtime/test/tasking/omp_task_firstprivate.c
new file mode 100644
index 0000000..d1f7c35
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_firstprivate.c
@@ -0,0 +1,51 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int test_omp_task_firstprivate()
+{
+ int i;
+ int sum = 1234;
+ int known_sum;
+ int result = 0; /* counts the wrong sums from tasks */
+
+ known_sum = 1234 + (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ #pragma omp task firstprivate(sum)
+ {
+ int j;
+ for (j = 0; j <= LOOPCOUNT; j++) {
+ #pragma omp flush
+ sum += j;
+ }
+
+ /* check if calculated sum was right */
+ if (sum != known_sum) {
+ #pragma omp critical
+ { result++; }
+ }
+ } /* omp task */
+ } /* for loop */
+ } /* omp single */
+ } /* omp parallel */
+ return (result == 0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_if.c b/final/runtime/test/tasking/omp_task_if.c
new file mode 100644
index 0000000..8b4728e
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_if.c
@@ -0,0 +1,43 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_task_if()
+{
+ int condition_false;
+ int count;
+ int result;
+
+ count=0;
+ condition_false = (count == 1);
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ #pragma omp task if (condition_false) shared(count, result)
+ {
+ my_sleep (SLEEPTIME);
+ #pragma omp critical
+ result = (0 == count);
+ } /* end of omp task */
+ #pragma omp critical
+ count = 1;
+ } /* end of single */
+ } /*end of parallel */
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_if()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_imp_firstprivate.c b/final/runtime/test/tasking/omp_task_imp_firstprivate.c
new file mode 100644
index 0000000..905ab9a
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_imp_firstprivate.c
@@ -0,0 +1,47 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function do spend some time in a loop */
+int test_omp_task_imp_firstprivate()
+{
+ int i=5;
+ int k = 0;
+ int result = 0;
+ int task_result = 1;
+ #pragma omp parallel firstprivate(i)
+ {
+ #pragma omp single
+ {
+ for (k = 0; k < NUM_TASKS; k++) {
+ #pragma omp task shared(result , task_result)
+ {
+ int j;
+ //check if i is private
+ if(i != 5)
+ task_result = 0;
+ for(j = 0; j < NUM_TASKS; j++)
+ i++;
+ //this should be firstprivate implicitly
+ }
+ }
+ #pragma omp taskwait
+ result = (task_result && i==5);
+ }
+ }
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_imp_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_priority.c b/final/runtime/test/tasking/omp_task_priority.c
new file mode 100644
index 0000000..7b62360
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_priority.c
@@ -0,0 +1,22 @@
+// RUN: %libomp-compile && env OMP_MAX_TASK_PRIORITY=42 %libomp-run
+// Test OMP 4.5 task priorities
+// Currently only API function and envirable parsing implemented.
+// Test environment sets envirable: OMP_MAX_TASK_PRIORITY=42 as tested below.
+#include <stdio.h>
+#include <omp.h>
+
+int main (void) {
+ int passed;
+
+ passed = (omp_get_max_task_priority() == 42);
+ printf("Got %d\n", omp_get_max_task_priority());
+
+ if (passed) {
+ printf("passed\n");
+ return 0;
+ }
+
+ printf("failed\n");
+ return 1;
+}
+
diff --git a/final/runtime/test/tasking/omp_task_private.c b/final/runtime/test/tasking/omp_task_private.c
new file mode 100644
index 0000000..7a93716
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_private.c
@@ -0,0 +1,53 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function do spend some time in a loop */
+int test_omp_task_private()
+{
+ int i;
+ int known_sum;
+ int sum = 0;
+ int result = 0; /* counts the wrong sums from tasks */
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ #pragma omp task private(sum) shared(result, known_sum)
+ {
+ int j;
+ //if sum is private, initialize to 0
+ sum = 0;
+ for (j = 0; j <= LOOPCOUNT; j++) {
+ #pragma omp flush
+ sum += j;
+ }
+ /* check if calculated sum was right */
+ if (sum != known_sum) {
+ #pragma omp critical
+ result++;
+ }
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /* end of parallel*/
+ return (result == 0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_task_shared.c b/final/runtime/test/tasking/omp_task_shared.c
new file mode 100644
index 0000000..0304026
--- /dev/null
+++ b/final/runtime/test/tasking/omp_task_shared.c
@@ -0,0 +1,41 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function do spend some time in a loop */
+int test_omp_task_imp_shared()
+{
+ int i;
+ int k = 0;
+ int result = 0;
+ i=0;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ for (k = 0; k < NUM_TASKS; k++) {
+ #pragma omp task shared(i)
+ {
+ #pragma omp atomic
+ i++;
+ //this should be shared implicitly
+ }
+ }
+ }
+ result = i;
+ return ((result == NUM_TASKS));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_task_imp_shared()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_taskloop_grainsize.c b/final/runtime/test/tasking/omp_taskloop_grainsize.c
new file mode 100644
index 0000000..0833073
--- /dev/null
+++ b/final/runtime/test/tasking/omp_taskloop_grainsize.c
@@ -0,0 +1,113 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+
+// These compilers don't support the taskloop construct
+// UNSUPPORTED: gcc-4, gcc-5, icc-16
+// GCC 6 has support for taskloops, but at least 6.3.0 is crashing on this test
+// UNSUPPORTED: gcc-6
+
+/*
+ * Test for taskloop
+ * Method: caculate how many times the iteration space is dispatched
+ * and judge if each dispatch has the requested grainsize
+ * It is possible for two adjacent chunks are executed by the same thread
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#define CFDMAX_SIZE 1120
+
+int test_omp_taskloop_grainsize()
+{
+ int result = 0;
+ int i, grainsize, count, tmp_count, num_off;
+ int *tmp, *tids, *tidsArray;
+
+ tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE);
+ tids = tidsArray;
+
+ for (grainsize = 1; grainsize < 48; ++grainsize) {
+ fprintf(stderr, "Grainsize %d\n", grainsize);
+ count = tmp_count = num_off = 0;
+
+ for (i = 0; i < CFDMAX_SIZE; ++i) {
+ tids[i] = -1;
+ }
+
+ #pragma omp parallel shared(tids)
+ {
+ #pragma omp master
+ #pragma omp taskloop grainsize(grainsize)
+ for (i = 0; i < CFDMAX_SIZE; i++) {
+ tids[i] = omp_get_thread_num();
+ }
+ }
+
+ for (i = 0; i < CFDMAX_SIZE; ++i) {
+ if (tids[i] == -1) {
+ fprintf(stderr, " Iteration %d not touched!\n", i);
+ result++;
+ }
+ }
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tids[i] != tids[i + 1]) {
+ count++;
+ }
+ }
+
+ tmp = (int *)malloc(sizeof(int) * (count + 1));
+ tmp[0] = 1;
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tmp_count > count) {
+ printf("--------------------\nTestinternal Error: List too "
+ "small!!!\n--------------------\n");
+ break;
+ }
+ if (tids[i] != tids[i + 1]) {
+ tmp_count++;
+ tmp[tmp_count] = 1;
+ } else {
+ tmp[tmp_count]++;
+ }
+ }
+
+ // is grainsize statement working?
+ int num_tasks = CFDMAX_SIZE / grainsize;
+ int multiple1 = CFDMAX_SIZE / num_tasks;
+ int multiple2 = CFDMAX_SIZE / num_tasks + 1;
+ for (i = 0; i < count; i++) {
+ // it is possible for 2 adjacent chunks assigned to a same thread
+ if (tmp[i] % multiple1 != 0 && tmp[i] % multiple2 != 0) {
+ num_off++;
+ }
+ }
+
+ if (num_off > 1) {
+ fprintf(stderr, " The number of bad chunks is %d\n", num_off);
+ result++;
+ } else {
+ fprintf(stderr, " Everything ok\n");
+ }
+
+ free(tmp);
+ }
+ free(tidsArray);
+ return (result==0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_omp_taskloop_grainsize()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_taskloop_num_tasks.c b/final/runtime/test/tasking/omp_taskloop_num_tasks.c
new file mode 100644
index 0000000..7c3c704
--- /dev/null
+++ b/final/runtime/test/tasking/omp_taskloop_num_tasks.c
@@ -0,0 +1,71 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+
+// These compilers don't support the taskloop construct
+// UNSUPPORTED: gcc-4, gcc-5, icc-16
+
+/*
+ * Test for taskloop
+ * Method: caculate how many times the iteration space is dispatched
+ * and judge if each dispatch has the requested grainsize
+ * It is possible for two adjacent chunks are executed by the same thread
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#define CFDMAX_SIZE 1120
+
+int test_omp_taskloop_num_tasks()
+{
+ int i;
+ int *tids;
+ int *tidsArray;
+ int count;
+ int result = 0;
+ int num_tasks;
+
+ for (num_tasks = 1; num_tasks < 120; ++num_tasks) {
+ count = 0;
+ tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE);
+ tids = tidsArray;
+
+ #pragma omp parallel shared(tids)
+ {
+ int i;
+ #pragma omp master
+ #pragma omp taskloop num_tasks(num_tasks)
+ for (i = 0; i < CFDMAX_SIZE; i++) {
+ tids[i] = omp_get_thread_num();
+ }
+ }
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tids[i] != tids[i + 1]) {
+ count++;
+ }
+ }
+
+ if (count > num_tasks) {
+ fprintf(stderr, "counted too many tasks: (wanted %d, got %d)\n",
+ num_tasks, count);
+ result++;
+ }
+ }
+
+ return (result==0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_omp_taskloop_num_tasks()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_taskwait.c b/final/runtime/test/tasking/omp_taskwait.c
new file mode 100644
index 0000000..c3a0ea7
--- /dev/null
+++ b/final/runtime/test/tasking/omp_taskwait.c
@@ -0,0 +1,74 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_taskwait()
+{
+ int result1 = 0; /* Stores number of not finished tasks after the taskwait */
+ int result2 = 0; /* Stores number of wrong array elements at the end */
+ int array[NUM_TASKS];
+ int i;
+
+ /* fill array */
+ for (i = 0; i < NUM_TASKS; i++)
+ array[i] = 0;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ /* First we have to store the value of the loop index in a new variable
+ * which will be private for each task because otherwise it will be overwritten
+ * if the execution of the task takes longer than the time which is needed to
+ * enter the next step of the loop!
+ */
+ int myi;
+ myi = i;
+ #pragma omp task
+ {
+ my_sleep (SLEEPTIME);
+ array[myi] = 1;
+ } /* end of omp task */
+ } /* end of for */
+ #pragma omp taskwait
+ /* check if all tasks were finished */
+ for (i = 0; i < NUM_TASKS; i++)
+ if (array[i] != 1)
+ result1++;
+
+ /* generate some more tasks which now shall overwrite
+ * the values in the tids array */
+ for (i = 0; i < NUM_TASKS; i++) {
+ int myi;
+ myi = i;
+ #pragma omp task
+ {
+ array[myi] = 2;
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /*end of parallel */
+
+ /* final check, if all array elements contain the right values: */
+ for (i = 0; i < NUM_TASKS; i++) {
+ if (array[i] != 2)
+ result2++;
+ }
+ return ((result1 == 0) && (result2 == 0));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_taskwait()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/tasking/omp_taskyield.c b/final/runtime/test/tasking/omp_taskyield.c
new file mode 100644
index 0000000..5bb6984
--- /dev/null
+++ b/final/runtime/test/tasking/omp_taskyield.c
@@ -0,0 +1,58 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+int test_omp_taskyield()
+{
+ int i;
+ int count = 0;
+ int start_tid[NUM_TASKS];
+ int current_tid[NUM_TASKS];
+
+ for (i=0; i< NUM_TASKS; i++) {
+ start_tid[i]=0;
+ current_tid[i]=0;
+ }
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ for (i = 0; i < NUM_TASKS; i++) {
+ int myi = i;
+ #pragma omp task untied
+ {
+ my_sleep(SLEEPTIME);
+ start_tid[myi] = omp_get_thread_num();
+ #pragma omp taskyield
+ if((start_tid[myi] %2) ==0){
+ my_sleep(SLEEPTIME);
+ current_tid[myi] = omp_get_thread_num();
+ } /*end of if*/
+ } /* end of omp task */
+ } /* end of for */
+ } /* end of single */
+ } /* end of parallel */
+ for (i=0;i<NUM_TASKS; i++) {
+ //printf("start_tid[%d]=%d, current_tid[%d]=%d\n",
+ //i, start_tid[i], i , current_tid[i]);
+ if (current_tid[i] == start_tid[i])
+ count++;
+ }
+ return (count<NUM_TASKS);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_taskyield()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}