aboutsummaryrefslogtreecommitdiff
path: root/final/runtime/test/worksharing/for
diff options
context:
space:
mode:
Diffstat (limited to 'final/runtime/test/worksharing/for')
-rw-r--r--final/runtime/test/worksharing/for/bug_set_schedule_0.c40
-rw-r--r--final/runtime/test/worksharing/for/kmp_doacross_check.c62
-rw-r--r--final/runtime/test/worksharing/for/kmp_sch_simd_guided.c410
-rw-r--r--final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c221
-rw-r--r--final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c196
-rw-r--r--final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c201
-rw-r--r--final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c91
-rw-r--r--final/runtime/test/worksharing/for/omp_doacross.c60
-rw-r--r--final/runtime/test/worksharing/for/omp_for_bigbounds.c70
-rw-r--r--final/runtime/test/worksharing/for/omp_for_collapse.c51
-rw-r--r--final/runtime/test/worksharing/for/omp_for_firstprivate.c55
-rw-r--r--final/runtime/test/worksharing/for/omp_for_lastprivate.c52
-rw-r--r--final/runtime/test/worksharing/for/omp_for_nowait.c77
-rw-r--r--final/runtime/test/worksharing/for/omp_for_ordered.c60
-rw-r--r--final/runtime/test/worksharing/for/omp_for_private.c63
-rw-r--r--final/runtime/test/worksharing/for/omp_for_reduction.c339
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_auto.c69
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c89
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_guided.c217
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_runtime.c82
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_static.c154
-rw-r--r--final/runtime/test/worksharing/for/omp_for_schedule_static_3.c202
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c35
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_if.c42
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c37
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_ordered.c64
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_private.c50
-rw-r--r--final/runtime/test/worksharing/for/omp_parallel_for_reduction.c266
28 files changed, 3355 insertions, 0 deletions
diff --git a/final/runtime/test/worksharing/for/bug_set_schedule_0.c b/final/runtime/test/worksharing/for/bug_set_schedule_0.c
new file mode 100644
index 0000000..889e239
--- /dev/null
+++ b/final/runtime/test/worksharing/for/bug_set_schedule_0.c
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <omp.h>
+#include "omp_testsuite.h"
+
+/* Test that the chunk size is set to default (1) when
+ chunk size <= 0 is specified */
+int a = 0;
+
+int test_set_schedule_0()
+{
+ int i;
+ a = 0;
+ omp_set_schedule(omp_sched_dynamic,0);
+
+ #pragma omp parallel
+ {
+ #pragma omp for schedule(runtime)
+ for(i = 0; i < 10; i++) {
+ #pragma omp atomic
+ a++;
+ if(a > 10)
+ exit(1);
+ }
+ }
+ return a==10;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_set_schedule_0()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/kmp_doacross_check.c b/final/runtime/test/worksharing/for/kmp_doacross_check.c
new file mode 100644
index 0000000..59b61e3
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_doacross_check.c
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile-and-run
+// UNSUPPORTED: gcc
+// This test is incompatible with gcc because of the explicit call to
+// __kmpc_doacross_fini(). gcc relies on an implicit call to this function
+// when the last iteration is executed inside the GOMP_loop_*_next() functions.
+// Hence, in gcc, having the explicit call leads to __kmpc_doacross_fini()
+// being called twice.
+#include <stdio.h>
+
+#define N 1000
+
+struct dim {
+ long long lo; // lower
+ long long up; // upper
+ long long st; // stride
+};
+extern void __kmpc_doacross_init(void*, int, int, struct dim *);
+extern void __kmpc_doacross_wait(void*, int, long long*);
+extern void __kmpc_doacross_post(void*, int, long long*);
+extern void __kmpc_doacross_fini(void*, int);
+extern int __kmpc_global_thread_num(void*);
+
+int main()
+{
+ int i;
+ int iter[N];
+ struct dim dims;
+ for( i = 0; i < N; ++i )
+ iter[i] = 1;
+ dims.lo = 1;
+ dims.up = N-1;
+ dims.st = 1;
+ #pragma omp parallel num_threads(4)
+ {
+ int i, gtid;
+ long long vec;
+ gtid = __kmpc_global_thread_num(NULL);
+ __kmpc_doacross_init(NULL,gtid,1,&dims); // thread starts the loop
+ #pragma omp for nowait schedule(dynamic)
+ for( i = 1; i < N; ++i )
+ {
+ // runtime call corresponding to #pragma omp ordered depend(sink:i-1)
+ vec=i-1;
+ __kmpc_doacross_wait(NULL,gtid,&vec);
+ // user's code
+ iter[i] = iter[i-1] + 1;
+ // runtime call corresponding to #pragma omp ordered depend(source)
+ vec=i;
+ __kmpc_doacross_post(NULL,gtid,&vec);
+ }
+ // thread finishes the loop (should be before the loop barrier)
+ __kmpc_doacross_fini(NULL,gtid);
+ }
+ if( iter[N-1] == N ) {
+ printf("passed\n");
+ } else {
+ printf("failed %d != %d\n", iter[N-1], N);
+ return 1;
+ }
+ return 0;
+}
+
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c
new file mode 100644
index 0000000..5c6f94b
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c
@@ -0,0 +1,410 @@
+// RUN: %libomp-compile-and-run
+/*
+ Test for the 'schedule(simd:guided)' clause.
+ Compiler needs to generate a dynamic dispatching and pass the schedule
+ value 46 to the OpenMP RTL. Test uses numerous loop parameter combinations.
+*/
+#include <stdio.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#endif
+
+// uncomment for debug diagnostics:
+//#define DEBUG
+
+#define SIMD_LEN 4
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+extern int __kmpc_global_thread_num(id*);
+extern void __kmpc_barrier(id*, int gtid);
+extern void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+extern void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+extern int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+extern int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+int run_loop_64(i64 loop_lb, i64 loop_ub, i64 loop_st, int loop_chunk) {
+ int err = 0;
+ static int volatile loop_sync = 0;
+ i64 lb; // Chunk lower bound
+ i64 ub; // Chunk upper bound
+ i64 st; // Chunk stride
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = tid;
+ int last;
+#if DEBUG
+ printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n",
+ (int)sizeof(i64), gtid, tid,
+ (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen
+ if (loop_st == 0)
+ return 0;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return 0;
+
+ __kmpc_dispatch_init_8(&loc, gtid, kmp_sch_guided_simd,
+ loop_lb, loop_ub, loop_st, loop_chunk);
+ if (tid == 0) {
+ // Let the master thread handle the chunks alone
+ int chunk; // No of current chunk
+ i64 next_lb; // Lower bound of the next chunk
+ i64 last_ub; // Upper bound of the last processed chunk
+ u64 cur; // Number of interations in current chunk
+ u64 max; // Max allowed iterations for current chunk
+ int undersized = 0;
+
+ chunk = 0;
+ next_lb = loop_lb;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations
+ while (__kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if DEBUG
+ printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub);
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized
+ if (undersized) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Check lower and upper bounds
+ if (lb != next_lb) {
+ printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk);
+ err++;
+ }
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub)) {
+ printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb <= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(ub >= loop_ub)) {
+ printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb >= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ }; // if
+ // Stride should not change
+ if (!(st == loop_st)) {
+ printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk);
+ err++;
+ }
+ cur = (ub - lb) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum
+ if (!(cur <= max + 1)) {
+ printf("Error with iter %d, %d\n", cur, max);
+ err++;
+ }
+ // Update maximum for the next chunk
+ if (cur < max)
+ max = cur;
+ next_lb = ub + loop_st;
+ last_ub = ub;
+ undersized = (cur < loop_chunk);
+ }; // while
+ // Must have at least one chunk
+ if (!(chunk > 0)) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Must have the right last iteration index
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st > loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(last_ub >= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st < loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ }; // if
+ // Let non-master threads go
+ loop_sync = 1;
+ } else {
+ int i;
+ // Workers wait for master thread to finish, then call __kmpc_dispatch_next
+ for (i = 0; i < 1000000; ++ i) {
+ if (loop_sync != 0) {
+ break;
+ }; // if
+ }; // for i
+ while (loop_sync == 0) {
+ delay();
+ }; // while
+ // At this moment we do not have any more chunks -- all the chunks already
+ // processed by master thread
+ rc = __kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st);
+ if (rc) {
+ printf("Error return value\n");
+ err++;
+ }
+ }; // if
+
+ __kmpc_barrier(&loc, gtid);
+ if (tid == 0) {
+ loop_sync = 0; // Restore original state
+#if DEBUG
+ printf("run_loop_64(): at the end\n");
+#endif
+ }; // if
+ __kmpc_barrier(&loc, gtid);
+ return err;
+} // run_loop
+
+// ---------------------------------------------------------------------------
+int run_loop_32(int loop_lb, int loop_ub, int loop_st, int loop_chunk) {
+ int err = 0;
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound
+ int ub; // Chunk upper bound
+ int st; // Chunk stride
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = tid;
+ int last;
+#if DEBUG
+ printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n",
+ (int)sizeof(int), gtid, tid,
+ (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen
+ if (loop_st == 0)
+ return 0;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return 0;
+
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_guided_simd,
+ loop_lb, loop_ub, loop_st, loop_chunk);
+ if (tid == 0) {
+ // Let the master thread handle the chunks alone
+ int chunk; // No of current chunk
+ int next_lb; // Lower bound of the next chunk
+ int last_ub; // Upper bound of the last processed chunk
+ u64 cur; // Number of interations in current chunk
+ u64 max; // Max allowed iterations for current chunk
+ int undersized = 0;
+
+ chunk = 0;
+ next_lb = loop_lb;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if DEBUG
+ printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub);
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized
+ if (undersized) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Check lower and upper bounds
+ if (lb != next_lb) {
+ printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk);
+ err++;
+ }
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub)) {
+ printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb <= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(ub >= loop_ub)) {
+ printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb >= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ }; // if
+ // Stride should not change
+ if (!(st == loop_st)) {
+ printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk);
+ err++;
+ }
+ cur = (ub - lb) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum
+ if (!(cur <= max + 1)) {
+ printf("Error with iter %d, %d\n", cur, max);
+ err++;
+ }
+ // Update maximum for the next chunk
+ if (cur < max)
+ max = cur;
+ next_lb = ub + loop_st;
+ last_ub = ub;
+ undersized = (cur < loop_chunk);
+ }; // while
+ // Must have at least one chunk
+ if (!(chunk > 0)) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Must have the right last iteration index
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st > loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(last_ub >= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st < loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ }; // if
+ // Let non-master threads go
+ loop_sync = 1;
+ } else {
+ int i;
+ // Workers wait for master thread to finish, then call __kmpc_dispatch_next
+ for (i = 0; i < 1000000; ++ i) {
+ if (loop_sync != 0) {
+ break;
+ }; // if
+ }; // for i
+ while (loop_sync == 0) {
+ delay();
+ }; // while
+ // At this moment we do not have any more chunks -- all the chunks already
+ // processed by the master thread
+ rc = __kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st);
+ if (rc) {
+ printf("Error return value\n");
+ err++;
+ }
+ }; // if
+
+ __kmpc_barrier(&loc, gtid);
+ if (tid == 0) {
+ loop_sync = 0; // Restore original state
+#if DEBUG
+ printf("run_loop<>(): at the end\n");
+#endif
+ }; // if
+ __kmpc_barrier(&loc, gtid);
+ return err;
+} // run_loop
+
+// ---------------------------------------------------------------------------
+int run_64(int num_th)
+{
+ int err = 0;
+#pragma omp parallel num_threads(num_th)
+ {
+ int chunk;
+ i64 st, lb, ub;
+ for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) {
+ for (st = 1; st <= 3; ++ st) {
+ for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) {
+ for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) {
+ err += run_loop_64(lb, ub, st, chunk);
+ err += run_loop_64(ub, lb, -st, chunk);
+ }; // for ub
+ }; // for lb
+ }; // for st
+ }; // for chunk
+ }
+ return err;
+} // run_all
+
+int run_32(int num_th)
+{
+ int err = 0;
+#pragma omp parallel num_threads(num_th)
+ {
+ int chunk, st, lb, ub;
+ for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) {
+ for (st = 1; st <= 3; ++ st) {
+ for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) {
+ for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) {
+ err += run_loop_32(lb, ub, st, chunk);
+ err += run_loop_32(ub, lb, -st, chunk);
+ }; // for ub
+ }; // for lb
+ }; // for st
+ }; // for chunk
+ }
+ return err;
+} // run_all
+
+// ---------------------------------------------------------------------------
+int main()
+{
+ int n, err = 0;
+ for (n = 1; n <= 4; ++ n) {
+ err += run_32(n);
+ err += run_64(n);
+ }; // for n
+ if (err)
+ printf("failed with %d errors\n", err);
+ else
+ printf("passed\n");
+ return err;
+}
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
new file mode 100644
index 0000000..bb538d1
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
@@ -0,0 +1,221 @@
+// RUN: %libomp-compile-and-run
+
+// The test checks schedule(simd:runtime)
+// in combination with omp_set_schedule()
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (last) {
+ if (!no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ } else {
+ if (cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ }
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+// static (no chunk)
+ omp_set_schedule(omp_sched_static,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// auto (chunk should be ignorted)
+ omp_set_schedule(omp_sched_auto,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// static,1
+ chunk = 1;
+ omp_set_schedule(omp_sched_static,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// dynamic,1
+ omp_set_schedule(omp_sched_dynamic,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// guided,1
+ omp_set_schedule(omp_sched_guided,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// dynamic,0 - use default chunk size 1
+ omp_set_schedule(omp_sched_dynamic,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// guided,0 - use default chunk size 1
+ omp_set_schedule(omp_sched_guided,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
new file mode 100644
index 0000000..d137831
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
@@ -0,0 +1,196 @@
+// RUN: %libomp-compile
+// RUN: env OMP_SCHEDULE=guided %libomp-run
+// RUN: env OMP_SCHEDULE=guided,1 %libomp-run 1
+// RUN: env OMP_SCHEDULE=guided,2 %libomp-run 2
+// RUN: env OMP_SCHEDULE=dynamic %libomp-run
+// RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1
+// RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2
+// RUN: env OMP_SCHEDULE=auto %libomp-run
+
+// The test checks schedule(simd:runtime)
+// in combination with OMP_SCHEDULE=guided[,chunk]
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define UBOUND 100
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (!last && cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ if (last && !no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+ if (argc > 1) {
+ // expect chunk size as a parameter
+ chunk = atoi(argv[1]);
+ }
+#pragma omp parallel //num_threads(num_th)
+ run_loop(0, UBOUND, 1, chunk);
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
new file mode 100644
index 0000000..4cb15d6
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
@@ -0,0 +1,201 @@
+// RUN: %libomp-compile && %libomp-run
+// RUN: %libomp-run 1 && %libomp-run 2
+
+// The test checks schedule(simd:runtime)
+// in combination with OMP_SCHEDULE=static[,chunk]
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (last) {
+ if (!no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ } else {
+ if (cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ }
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+ if (argc > 1) {
+ char *buf = malloc(8 + strlen(argv[1]));
+ // expect chunk size as a parameter
+ chunk = atoi(argv[1]);
+ strcpy(buf,"static,");
+ strcat(buf,argv[1]);
+ seten("OMP_SCHEDULE",buf,1);
+ printf("Testing schedule(simd:%s)\n", buf);
+ free(buf);
+ } else {
+ seten("OMP_SCHEDULE","static",1);
+ printf("Testing schedule(simd:static)\n");
+ }
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
diff --git a/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c b/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c
new file mode 100644
index 0000000..a6378fe
--- /dev/null
+++ b/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c
@@ -0,0 +1,91 @@
+// RUN: %libomp-compile && %libomp-run 7
+// RUN: %libomp-run 0 && %libomp-run -1
+// RUN: %libomp-run 1 && %libomp-run 2 && %libomp-run 5
+// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run 7
+// RUN: %libomp-run 1 && %libomp-run 2 && %libomp-run 5
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+#define INCR 7
+#define MY_MAX 200
+#define MY_MIN -200
+#ifndef MY_SCHEDULE
+# define MY_SCHEDULE dynamic
+#endif
+
+int num_disp_buffers, num_loops;
+int a, b, a_known_value, b_known_value;
+
+int test_kmp_set_disp_num_buffers()
+{
+ int success = 1;
+ a = 0;
+ b = 0;
+ // run many small dynamic loops to stress the dispatch buffer system
+ #pragma omp parallel
+ {
+ int i,j;
+ for (j = 0; j < num_loops; j++) {
+ #pragma omp for schedule(MY_SCHEDULE) nowait
+ for (i = MY_MIN; i < MY_MAX; i+=INCR) {
+ #pragma omp atomic
+ a++;
+ }
+ #pragma omp for schedule(MY_SCHEDULE) nowait
+ for (i = MY_MAX; i >= MY_MIN; i-=INCR) {
+ #pragma omp atomic
+ b++;
+ }
+ }
+ }
+ // detect failure
+ if (a != a_known_value || b != b_known_value) {
+ success = 0;
+ printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value,
+ b, b_known_value);
+ }
+ return success;
+}
+
+int main(int argc, char** argv)
+{
+ int i,j;
+ int num_failed=0;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s num_disp_buffers\n", argv[0]);
+ exit(1);
+ }
+
+ // set the number of dispatch buffers
+ num_disp_buffers = atoi(argv[1]);
+ kmp_set_disp_num_buffers(num_disp_buffers);
+
+ // figure out the known values to compare with calculated result
+ a_known_value = 0;
+ b_known_value = 0;
+
+ // if specified to use bad num_disp_buffers set num_loops
+ // to something reasonable
+ if (num_disp_buffers <= 0)
+ num_loops = 10;
+ else
+ num_loops = num_disp_buffers*10;
+
+ for (j = 0; j < num_loops; j++) {
+ for (i = MY_MIN; i < MY_MAX; i+=INCR)
+ a_known_value++;
+ for (i = MY_MAX; i >= MY_MIN; i-=INCR)
+ b_known_value++;
+ }
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_kmp_set_disp_num_buffers()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_doacross.c b/final/runtime/test/worksharing/for/omp_doacross.c
new file mode 100644
index 0000000..4187112
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_doacross.c
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile-and-run
+// XFAIL: gcc-4, gcc-5, clang-3.7, clang-3.8, icc-15, icc-16
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#ifndef N
+#define N 750
+#endif
+
+int test_doacross() {
+ int i, j;
+ // Allocate and zero out the matrix
+ int *m = (int *)malloc(sizeof(int) * N * N);
+ for (i = 0; i < N; ++i) {
+ for (j = 0; j < N; ++j) {
+ m[i * N + j] = 0;
+ }
+ }
+ // Have first row and column be 0, 1, 2, 3, etc.
+ for (i = 0; i < N; ++i)
+ m[i * N] = i;
+ for (j = 0; j < N; ++j)
+ m[j] = j;
+ // Perform wavefront which results in matrix:
+ // 0 1 2 3 4
+ // 1 2 3 4 5
+ // 2 3 4 5 6
+ // 3 4 5 6 7
+ // 4 5 6 7 8
+ #pragma omp parallel shared(m)
+ {
+ int row, col;
+ #pragma omp for ordered(2)
+ for (row = 1; row < N; ++row) {
+ for (col = 1; col < N; ++col) {
+ #pragma omp ordered depend(sink : row - 1, col) depend(sink : row, col - 1)
+ m[row * N + col] = m[(row - 1) * N + col] + m[row * N + (col - 1)] -
+ m[(row - 1) * N + (col - 1)];
+ #pragma omp ordered depend(source)
+ }
+ }
+ }
+
+ // Check the bottom right element to see if iteration dependencies were held
+ int retval = (m[(N - 1) * N + N - 1] == 2 * (N - 1));
+ free(m);
+ return retval;
+}
+
+int main(int argc, char **argv) {
+ int i;
+ int num_failed = 0;
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_doacross()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_bigbounds.c b/final/runtime/test/worksharing/for/omp_for_bigbounds.c
new file mode 100644
index 0000000..901d760
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_bigbounds.c
@@ -0,0 +1,70 @@
+// RUN: %libomp-compile -DMY_SCHEDULE=static && %libomp-run
+// RUN: %libomp-compile -DMY_SCHEDULE=dynamic && %libomp-run
+// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run
+
+// Only works with Intel Compiler since at least version 15.0
+// XFAIL: gcc, clang
+
+/*
+ * Test that large bounds are handled properly and calculations of
+ * loop iterations don't accidently overflow
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+#define INCR 50000000
+#define MY_MAX 2000000000
+#define MY_MIN -2000000000
+#ifndef MY_SCHEDULE
+# define MY_SCHEDULE static
+#endif
+
+int a, b, a_known_value, b_known_value;
+
+int test_omp_for_bigbounds()
+{
+ a = 0;
+ b = 0;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for schedule(MY_SCHEDULE)
+ for (i = INT_MIN; i < MY_MAX; i+=INCR) {
+ #pragma omp atomic
+ a++;
+ }
+ #pragma omp for schedule(MY_SCHEDULE)
+ for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
+ #pragma omp atomic
+ b++;
+ }
+ }
+ printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, b, b_known_value);
+ return (a == a_known_value && b == b_known_value);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ a_known_value = 0;
+ for (i = INT_MIN; i < MY_MAX; i+=INCR) {
+ a_known_value++;
+ }
+
+ b_known_value = 0;
+ for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
+ b_known_value++;
+ }
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_bigbounds()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_collapse.c b/final/runtime/test/worksharing/for/omp_for_collapse.c
new file mode 100644
index 0000000..a08086d
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_collapse.c
@@ -0,0 +1,51 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function to check that i is increasing monotonically
+ with each call */
+static int check_i_islarger (int i)
+{
+ static int last_i;
+ int islarger;
+ if (i==1)
+ last_i=0;
+ islarger = ((i >= last_i)&&(i - last_i<=1));
+ last_i = i;
+ return (islarger);
+}
+
+int test_omp_for_collapse()
+{
+ int is_larger = 1;
+
+ #pragma omp parallel
+ {
+ int i,j;
+ int my_islarger = 1;
+ #pragma omp for private(i,j) schedule(static,1) collapse(2) ordered
+ for (i = 1; i < 100; i++) {
+ for (j =1; j <100; j++) {
+ #pragma omp ordered
+ my_islarger = check_i_islarger(i)&&my_islarger;
+ }
+ }
+ #pragma omp critical
+ is_larger = is_larger && my_islarger;
+ }
+ return (is_larger);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_collapse()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_firstprivate.c b/final/runtime/test/worksharing/for/omp_for_firstprivate.c
new file mode 100644
index 0000000..6c4121c
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_firstprivate.c
@@ -0,0 +1,55 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int sum1;
+#pragma omp threadprivate(sum1)
+
+int test_omp_for_firstprivate()
+{
+ int sum;
+ int sum0;
+ int known_sum;
+ int threadsnum;
+
+ sum = 0;
+ sum0 = 12345;
+ sum1 = 0;
+
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ threadsnum=omp_get_num_threads();
+ }
+ /* sum0 = 0; */
+
+ int i;
+ #pragma omp for firstprivate(sum0)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum0 = sum0 + i;
+ sum1 = sum0;
+ } /* end of for */
+
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ } /* end of critical */
+ } /* end of parallel */
+ known_sum = 12345* threadsnum+ (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_lastprivate.c b/final/runtime/test/worksharing/for/omp_for_lastprivate.c
new file mode 100644
index 0000000..88694b8
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_lastprivate.c
@@ -0,0 +1,52 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int sum0;
+#pragma omp threadprivate(sum0)
+
+int test_omp_for_lastprivate()
+{
+ int sum = 0;
+ int known_sum;
+ int i0;
+
+ i0 = -1;
+
+ #pragma omp parallel
+ {
+ sum0 = 0;
+ { /* Begin of orphaned block */
+ int i;
+ #pragma omp for schedule(static,7) lastprivate(i0)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum0 = sum0 + i;
+ i0 = i;
+ } /* end of for */
+ } /* end of orphaned block */
+
+ #pragma omp critical
+ {
+ sum = sum + sum0;
+ } /* end of critical */
+ } /* end of parallel */
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ fprintf(stderr, "known_sum = %d , sum = %d\n",known_sum,sum);
+ fprintf(stderr, "LOOPCOUNT = %d , i0 = %d\n",LOOPCOUNT,i0);
+ return ((known_sum == sum) && (i0 == LOOPCOUNT));
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_lastprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_nowait.c b/final/runtime/test/worksharing/for/omp_for_nowait.c
new file mode 100644
index 0000000..95a4775
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_nowait.c
@@ -0,0 +1,77 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+/*
+ * This test will hang if the nowait is not working properly.
+ *
+ * It relies on a thread skipping to the second for construct to
+ * release the threads in the first for construct.
+ *
+ * Also, we use static scheduling to guarantee that one
+ * thread will make it to the second for construct.
+ */
+volatile int release;
+volatile int count;
+
+void wait_for_release_then_increment(int rank)
+{
+ fprintf(stderr, "Thread nr %d enters first for construct"
+ " and waits.\n", rank);
+ while (release == 0);
+ #pragma omp atomic
+ count++;
+}
+
+void release_and_increment(int rank)
+{
+ fprintf(stderr, "Thread nr %d sets release to 1\n", rank);
+ release = 1;
+ #pragma omp atomic
+ count++;
+}
+
+int test_omp_for_nowait()
+{
+ release = 0;
+ count = 0;
+
+ #pragma omp parallel num_threads(4)
+ {
+ int rank;
+ int i;
+
+ rank = omp_get_thread_num();
+
+ #pragma omp for schedule(static) nowait
+ for (i = 0; i < 4; i++) {
+ if (i < 3)
+ wait_for_release_then_increment(rank);
+ else {
+ fprintf(stderr, "Thread nr %d enters first for and goes "
+ "immediately to the next for construct to release.\n", rank);
+ #pragma omp atomic
+ count++;
+ }
+ }
+
+ #pragma omp for schedule(static)
+ for (i = 0; i < 4; i++) {
+ release_and_increment(rank);
+ }
+ }
+ return (count==8);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_nowait()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_ordered.c b/final/runtime/test/worksharing/for/omp_for_ordered.c
new file mode 100644
index 0000000..18ac7eb
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_ordered.c
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+static int last_i = 0;
+
+/* Utility function to check that i is increasing monotonically
+ with each call */
+static int check_i_islarger (int i)
+{
+ int islarger;
+ islarger = (i > last_i);
+ last_i = i;
+ return (islarger);
+}
+
+int test_omp_for_ordered()
+{
+ int sum;
+ int is_larger = 1;
+ int known_sum;
+
+ last_i = 0;
+ sum = 0;
+
+ #pragma omp parallel
+ {
+ int i;
+ int my_islarger = 1;
+ #pragma omp for schedule(static,1) ordered
+ for (i = 1; i < 100; i++) {
+ #pragma omp ordered
+ {
+ my_islarger = check_i_islarger(i) && my_islarger;
+ sum = sum + i;
+ }
+ }
+ #pragma omp critical
+ {
+ is_larger = is_larger && my_islarger;
+ }
+ }
+
+ known_sum=(99 * 100) / 2;
+ return ((known_sum == sum) && is_larger);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_ordered()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_private.c b/final/runtime/test/worksharing/for/omp_for_private.c
new file mode 100644
index 0000000..1f537b9
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_private.c
@@ -0,0 +1,63 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/* Utility function do spend some time in a loop */
+static void do_some_work()
+{
+ int i;
+ double sum = 0;
+ for(i = 0; i < 1000; i++){
+ sum += sqrt ((double) i);
+ }
+}
+
+int sum1;
+#pragma omp threadprivate(sum1)
+
+int test_omp_for_private()
+{
+ int sum = 0;
+ int sum0;
+ int known_sum;
+
+ sum0 = 0; /* setting (global) sum0 = 0 */
+
+ #pragma omp parallel
+ {
+ sum1 = 0; /* setting sum1 in each thread to 0 */
+ { /* begin of orphaned block */
+ int i;
+ #pragma omp for private(sum0) schedule(static,1)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum0 = sum1;
+ #pragma omp flush
+ sum0 = sum0 + i;
+ do_some_work ();
+ #pragma omp flush
+ sum1 = sum0;
+ }
+ } /* end of orphaned block */
+
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ } /*end of critical*/
+ } /* end of parallel*/
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_reduction.c b/final/runtime/test/worksharing/for/omp_for_reduction.c
new file mode 100644
index 0000000..28f0907
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_reduction.c
@@ -0,0 +1,339 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */
+#define MAX_FACTOR 10
+#define KNOWN_PRODUCT 3628800 /* 10! */
+
+int test_omp_for_reduction ()
+{
+ double dt;
+ int sum;
+ int diff;
+ int product = 1;
+ double dsum;
+ double dknown_sum;
+ double ddiff;
+ int logic_and;
+ int logic_or;
+ int bit_and;
+ int bit_or;
+ int exclusiv_bit_or;
+ int *logics;
+ int i;
+ int known_sum;
+ int known_product;
+ double rounding_error = 1.E-9; /* over all rounding error to be
+ ignored in the double tests */
+ double dpt;
+ int result = 0;
+ int logicsArray[LOOPCOUNT];
+
+ /* Variables for integer tests */
+ sum = 0;
+ product = 1;
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ /* variabels for double tests */
+ dt = 1. / 3.; /* base of geometric row for + and - test*/
+ dsum = 0.;
+ /* Variabeles for logic tests */
+ logics = logicsArray;
+ logic_and = 1;
+ logic_or = 0;
+ /* Variabeles for bit operators tests */
+ bit_and = 1;
+ bit_or = 0;
+ /* Variables for exclusiv bit or */
+ exclusiv_bit_or = 0;
+
+ /************************************************************************/
+ /** Tests for integers **/
+ /************************************************************************/
+
+ /**** Testing integer addition ****/
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(+:sum)
+ for (j = 1; j <= LOOPCOUNT; j++) {
+ sum = sum + j;
+ }
+ }
+ if (known_sum != sum) {
+ result++;
+ fprintf (stderr, "Error in sum with integers: Result was %d"
+ " instead of %d.\n", sum, known_sum);
+ }
+
+ /**** Testing integer subtracton ****/
+ diff = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(-:diff)
+ for (j = 1; j <= LOOPCOUNT; j++) {
+ diff = diff - j;
+ }
+ }
+ if (diff != 0) {
+ result++;
+ fprintf (stderr, "Error in difference with integers: Result was %d"
+ " instead of 0.\n", diff);
+ }
+
+ /**** Testing integer multiplication ****/
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(*:product)
+ for (j = 1; j <= MAX_FACTOR; j++) {
+ product *= j;
+ }
+ }
+ known_product = KNOWN_PRODUCT;
+ if(known_product != product) {
+ result++;
+ fprintf (stderr,"Error in Product with integers: Result was %d"
+ " instead of %d\n",product,known_product);
+ }
+
+ /************************************************************************/
+ /** Tests for doubles **/
+ /************************************************************************/
+
+ /**** Testing double addition ****/
+ dsum = 0.;
+ dpt = 1.;
+ for (i = 0; i < DOUBLE_DIGITS; ++i) {
+ dpt *= dt;
+ }
+ dknown_sum = (1 - dpt) / (1 - dt);
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(+:dsum)
+ for (j = 0; j < DOUBLE_DIGITS; j++) {
+ dsum += pow (dt, j);
+ }
+ }
+ if (fabs (dsum - dknown_sum) > rounding_error) {
+ result++;
+ fprintf (stderr, "\nError in sum with doubles: Result was %f"
+ " instead of: %f (Difference: %E)\n",
+ dsum, dknown_sum, dsum-dknown_sum);
+ }
+
+ /**** Testing double subtraction ****/
+ ddiff = (1 - dpt) / (1 - dt);
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(-:ddiff)
+ for (j = 0; j < DOUBLE_DIGITS; ++j) {
+ ddiff -= pow (dt, j);
+ }
+ }
+ if (fabs (ddiff) > rounding_error) {
+ result++;
+ fprintf (stderr, "Error in Difference with doubles: Result was %E"
+ " instead of 0.0\n", ddiff);
+ }
+
+
+ /************************************************************************/
+ /** Tests for logical values **/
+ /************************************************************************/
+
+ /**** Testing logic and ****/
+ for (i = 0; i < LOOPCOUNT; i++) {
+ logics[i] = 1;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(&&:logic_and)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logic_and = (logic_and && logics[j]);
+ }
+ }
+ if(!logic_and) {
+ result++;
+ fprintf (stderr, "Error in logic AND part 1\n");
+ }
+
+ logic_and = 1;
+ logics[LOOPCOUNT / 2] = 0;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(&&:logic_and)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logic_and = logic_and && logics[j];
+ }
+ }
+ if(logic_and) {
+ result++;
+ fprintf (stderr, "Error in logic AND part 2\n");
+ }
+
+ /**** Testing logic or ****/
+ for (i = 0; i < LOOPCOUNT; i++) {
+ logics[i] = 0;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(||:logic_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logic_or = logic_or || logics[j];
+ }
+ }
+ if (logic_or) {
+ result++;
+ fprintf (stderr, "Error in logic OR part 1\n");
+ }
+
+ logic_or = 0;
+ logics[LOOPCOUNT / 2] = 1;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(||:logic_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ logic_or = logic_or || logics[j];
+ }
+ }
+ if(!logic_or) {
+ result++;
+ fprintf (stderr, "Error in logic OR part 2\n");
+ }
+
+ /************************************************************************/
+ /** Tests for bit values **/
+ /************************************************************************/
+
+ /**** Testing bit and ****/
+ for (i = 0; i < LOOPCOUNT; ++i) {
+ logics[i] = 1;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(&:bit_and)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ bit_and = (bit_and & logics[j]);
+ }
+ }
+ if (!bit_and) {
+ result++;
+ fprintf (stderr, "Error in BIT AND part 1\n");
+ }
+
+ bit_and = 1;
+ logics[LOOPCOUNT / 2] = 0;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(&:bit_and)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ bit_and = bit_and & logics[j];
+ }
+ }
+ if (bit_and) {
+ result++;
+ fprintf (stderr, "Error in BIT AND part 2\n");
+ }
+
+ /**** Testing bit or ****/
+ for (i = 0; i < LOOPCOUNT; i++) {
+ logics[i] = 0;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(|:bit_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ bit_or = bit_or | logics[j];
+ }
+ }
+ if (bit_or) {
+ result++;
+ fprintf (stderr, "Error in BIT OR part 1\n");
+ }
+
+ bit_or = 0;
+ logics[LOOPCOUNT / 2] = 1;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(|:bit_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ bit_or = bit_or | logics[j];
+ }
+ }
+ if (!bit_or) {
+ result++;
+ fprintf (stderr, "Error in BIT OR part 2\n");
+ }
+
+ /**** Testing exclusive bit or ****/
+ for (i = 0; i < LOOPCOUNT; i++) {
+ logics[i] = 0;
+ }
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(^:exclusiv_bit_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[j];
+ }
+ }
+ if (exclusiv_bit_or) {
+ result++;
+ fprintf (stderr, "Error in EXCLUSIV BIT OR part 1\n");
+ }
+
+ exclusiv_bit_or = 0;
+ logics[LOOPCOUNT / 2] = 1;
+
+ #pragma omp parallel
+ {
+ int j;
+ #pragma omp for schedule(dynamic,1) reduction(^:exclusiv_bit_or)
+ for (j = 0; j < LOOPCOUNT; ++j) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[j];
+ }
+ }
+ if (!exclusiv_bit_or) {
+ result++;
+ fprintf (stderr, "Error in EXCLUSIV BIT OR part 2\n");
+ }
+
+ return (result == 0);
+ free (logics);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_reduction()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_auto.c b/final/runtime/test/worksharing/for/omp_for_schedule_auto.c
new file mode 100644
index 0000000..075617c
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_auto.c
@@ -0,0 +1,69 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int sum1;
+#pragma omp threadprivate(sum1)
+
+int test_omp_for_auto()
+{
+ int j;
+ int sum;
+ int sum0;
+ int known_sum;
+ int threadsnum;
+
+ sum = 0;
+ sum0 = 12345;
+
+ // array which keeps track of which threads participated in the for loop
+ // e.g., given 4 threads, [ 0 | 1 | 1 | 0 ] implies
+ // threads 0 and 3 did not, threads 1 and 2 did
+ int max_threads = omp_get_max_threads();
+ int* active_threads = (int*)malloc(sizeof(int)*max_threads);
+ for(j = 0; j < max_threads; j++)
+ active_threads[j] = 0;
+
+ #pragma omp parallel
+ {
+ int i;
+ sum1 = 0;
+ #pragma omp for firstprivate(sum0) schedule(auto)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ active_threads[omp_get_thread_num()] = 1;
+ sum0 = sum0 + i;
+ sum1 = sum0;
+ }
+
+ #pragma omp critical
+ {
+ sum = sum + sum1;
+ }
+ }
+
+ // count the threads that participated (sum is stored in threadsnum)
+ threadsnum=0;
+ for(j = 0; j < max_threads; j++) {
+ if(active_threads[j])
+ threadsnum++;
+ }
+ free(active_threads);
+
+ known_sum = 12345 * threadsnum + (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return (known_sum == sum);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_auto()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c b/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c
new file mode 100644
index 0000000..6d4f59b
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c
@@ -0,0 +1,89 @@
+// RUN: %libomp-compile-and-run
+/*
+ * Test for dynamic scheduling with chunk size
+ * Method: caculate how many times the iteration space is dispatched
+ * and judge if each dispatch has the requested chunk size
+ * unless it is the last one.
+ * It is possible for two adjacent chunks are assigned to the same thread
+ * Modified by Chunhua Liao
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#define CFDMAX_SIZE 100
+const int chunk_size = 7;
+
+int test_omp_for_schedule_dynamic()
+{
+ int tid;
+ int *tids;
+ int i;
+ int tidsArray[CFDMAX_SIZE];
+ int count = 0;
+ int tmp_count = 0; /*dispatch times*/
+ int *tmp; /*store chunk size for each dispatch*/
+ int result = 0;
+
+ tids = tidsArray;
+
+ #pragma omp parallel private(tid) shared(tids)
+ { /* begin of parallel */
+ int tid;
+ tid = omp_get_thread_num ();
+ #pragma omp for schedule(dynamic,chunk_size)
+ for (i = 0; i < CFDMAX_SIZE; i++) {
+ tids[i] = tid;
+ }
+ }
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tids[i] != tids[i + 1]) {
+ count++;
+ }
+ }
+
+ tmp = (int *) malloc (sizeof (int) * (count + 1));
+ tmp[0] = 1;
+
+ for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+ if (tmp_count > count) {
+ printf ("--------------------\nTestinternal Error: List too small!!!\n--------------------\n"); /* Error handling */
+ break;
+ }
+ if (tids[i] != tids[i + 1]) {
+ tmp_count++;
+ tmp[tmp_count] = 1;
+ } else {
+ tmp[tmp_count]++;
+ }
+ }
+ /* is dynamic statement working? */
+ for (i = 0; i < count; i++) {
+ if ((tmp[i]%chunk_size)!=0) {
+ /* it is possible for 2 adjacent chunks assigned to a same thread */
+ result++;
+ fprintf(stderr,"The intermediate dispatch has wrong chunksize.\n");
+ /* result += ((tmp[i] / chunk_size) - 1); */
+ }
+ }
+ if ((tmp[count]%chunk_size)!=(CFDMAX_SIZE%chunk_size)) {
+ result++;
+ fprintf(stderr,"the last dispatch has wrong chunksize.\n");
+ }
+ /* for (int i=0;i<count+1;++i) printf("%d\t:=\t%d\n",i+1,tmp[i]); */
+ return (result==0);
+}
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_schedule_dynamic()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_guided.c b/final/runtime/test/worksharing/for/omp_for_schedule_guided.c
new file mode 100644
index 0000000..1ee7449
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_guided.c
@@ -0,0 +1,217 @@
+// RUN: %libomp-compile-and-run
+
+/* Test for guided scheduling
+ * Ensure threads get chunks interleavely first
+ * Then judge the chunk sizes are decreasing to a stable value
+ * Modified by Chunhua Liao
+ * For example, 100 iteration on 2 threads, chunksize 7
+ * one line for each dispatch, 0/1 means thread id
+ * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24
+ * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18
+ * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14
+ * 1 1 1 1 1 1 1 1 1 1 10
+ * 0 0 0 0 0 0 0 0 8
+ * 1 1 1 1 1 1 1 7
+ * 0 0 0 0 0 0 0 7
+ * 1 1 1 1 1 1 1 7
+ * 0 0 0 0 0 5
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+#define CFSMAX_SIZE 1000
+#define MAX_TIME 0.005
+
+#ifdef SLEEPTIME
+#undef SLEEPTIME
+#define SLEEPTIME 0.0001
+#endif
+
+int test_omp_for_schedule_guided()
+{
+ int * tids;
+ int * chunksizes;
+ int notout;
+ int maxiter;
+ int threads;
+ int i;
+ int result;
+
+ tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1));
+ maxiter = 0;
+ result = 1;
+ notout = 1;
+
+ /* Testing if enough threads are available for this check. */
+ #pragma omp parallel
+ {
+ #pragma omp single
+ {
+ threads = omp_get_num_threads();
+ }
+ }
+
+ /* ensure there are at least two threads */
+ if (threads < 2) {
+ omp_set_num_threads(2);
+ threads = 2;
+ }
+
+ /* Now the real parallel work:
+ * Each thread will start immediately with the first chunk.
+ */
+ #pragma omp parallel shared(tids,maxiter)
+ { /* begin of parallel */
+ double count;
+ int tid;
+ int j;
+
+ tid = omp_get_thread_num ();
+
+ #pragma omp for nowait schedule(guided)
+ for(j = 0; j < CFSMAX_SIZE; ++j) {
+ count = 0.;
+ #pragma omp flush(maxiter)
+ if (j > maxiter) {
+ #pragma omp critical
+ {
+ maxiter = j;
+ }
+ }
+ /*printf ("thread %d sleeping\n", tid);*/
+ #pragma omp flush(maxiter,notout)
+ while (notout && (count < MAX_TIME) && (maxiter == j)) {
+ #pragma omp flush(maxiter,notout)
+ my_sleep (SLEEPTIME);
+ count += SLEEPTIME;
+#ifdef VERBOSE
+ printf(".");
+#endif
+ }
+#ifdef VERBOSE
+ if (count > 0.) printf(" waited %lf s\n", count);
+#endif
+ /*printf ("thread %d awake\n", tid);*/
+ tids[j] = tid;
+#ifdef VERBOSE
+ printf("%d finished by %d\n",j,tid);
+#endif
+ } /* end of for */
+ notout = 0;
+ #pragma omp flush(maxiter,notout)
+ } /* end of parallel */
+
+ /*******************************************************
+ * evaluation of the values *
+ *******************************************************/
+ {
+ int determined_chunksize = 1;
+ int last_threadnr = tids[0];
+ int global_chunknr = 0;
+ int openwork = CFSMAX_SIZE;
+ int expected_chunk_size;
+ int* local_chunknr = (int*)malloc(threads * sizeof(int));
+ double c = 1;
+
+ for (i = 0; i < threads; i++)
+ local_chunknr[i] = 0;
+
+ tids[CFSMAX_SIZE] = -1;
+
+ /*
+ * determine the number of global chunks
+ */
+ // fprintf(stderr,"# global_chunknr thread local_chunknr chunksize\n");
+ for(i = 1; i <= CFSMAX_SIZE; ++i) {
+ if (last_threadnr==tids[i]) {
+ determined_chunksize++;
+ } else {
+ /* fprintf(stderr, "%d\t%d\t%d\t%d\n", global_chunknr,
+ last_threadnr, local_chunknr[last_threadnr], m); */
+ global_chunknr++;
+ local_chunknr[last_threadnr]++;
+ last_threadnr = tids[i];
+ determined_chunksize = 1;
+ }
+ }
+ /* now allocate the memory for saving the sizes of the global chunks */
+ chunksizes = (int*)malloc(global_chunknr * sizeof(int));
+
+ /*
+ * Evaluate the sizes of the global chunks
+ */
+ global_chunknr = 0;
+ determined_chunksize = 1;
+ last_threadnr = tids[0];
+ for (i = 1; i <= CFSMAX_SIZE; ++i) {
+ /* If the threadnumber was the same as before increase the
+ * detected chunksize for this chunk otherwise set the detected
+ * chunksize again to one and save the number of the next
+ * thread in last_threadnr.
+ */
+ if (last_threadnr == tids[i]) {
+ determined_chunksize++;
+ } else {
+ chunksizes[global_chunknr] = determined_chunksize;
+ global_chunknr++;
+ local_chunknr[last_threadnr]++;
+ last_threadnr = tids[i];
+ determined_chunksize = 1;
+ }
+ }
+
+#ifdef VERBOSE
+ fprintf(stderr, "found\texpected\tconstant\n");
+#endif
+
+ /* identify the constant c for the exponential
+ decrease of the chunksize */
+ expected_chunk_size = openwork / threads;
+ c = (double) chunksizes[0] / expected_chunk_size;
+
+ for (i = 0; i < global_chunknr; i++) {
+ /* calculate the new expected chunksize */
+ if (expected_chunk_size > 1)
+ expected_chunk_size = c * openwork / threads;
+#ifdef VERBOSE
+ fprintf(stderr, "%8d\t%8d\t%lf\n", chunksizes[i],
+ expected_chunk_size, c * chunksizes[i]/expected_chunk_size);
+#endif
+ /* check if chunksize is inside the rounding errors */
+ if (abs (chunksizes[i] - expected_chunk_size) >= 2) {
+ result = 0;
+#ifndef VERBOSE
+ fprintf(stderr, "Chunksize differed from expected "
+ "value: %d instead of %d\n", chunksizes[i],
+ expected_chunk_size);
+ return 0;
+#endif
+ } /* end if */
+
+#ifndef VERBOSE
+ if (expected_chunk_size - chunksizes[i] < 0)
+ fprintf(stderr, "Chunksize did not decrease: %d"
+ " instead of %d\n", chunksizes[i],expected_chunk_size);
+#endif
+
+ /* calculating the remaining amount of work */
+ openwork -= chunksizes[i];
+ }
+ }
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_schedule_guided()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c b/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c
new file mode 100644
index 0000000..b957fc3
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c
@@ -0,0 +1,82 @@
+// RUN: %libomp-compile
+// RUN: env OMP_SCHEDULE=static %libomp-run 1 0
+// RUN: env OMP_SCHEDULE=static,10 %libomp-run 1 10
+// RUN: env OMP_SCHEDULE=dynamic %libomp-run 2 1
+// RUN: env OMP_SCHEDULE=dynamic,11 %libomp-run 2 11
+// RUN: env OMP_SCHEDULE=guided %libomp-run 3 1
+// RUN: env OMP_SCHEDULE=guided,12 %libomp-run 3 12
+// RUN: env OMP_SCHEDULE=auto %libomp-run 4 1
+// RUN: env OMP_SCHEDULE=trapezoidal %libomp-run 101 1
+// RUN: env OMP_SCHEDULE=trapezoidal,13 %libomp-run 101 13
+// RUN: env OMP_SCHEDULE=static_steal %libomp-run 102 1
+// RUN: env OMP_SCHEDULE=static_steal,14 %libomp-run 102 14
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int sum;
+char* correct_kind_string;
+omp_sched_t correct_kind;
+int correct_chunk_size;
+
+int test_omp_for_runtime()
+{
+ int sum;
+ int known_sum;
+ int chunk_size;
+ int error;
+ omp_sched_t kind;
+
+ sum = 0;
+ error = 0;
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ omp_get_schedule(&kind, &chunk_size);
+
+ printf("omp_get_schedule() returns: Schedule = %d, Chunk Size = %d\n",
+ kind, chunk_size);
+ if (kind != correct_kind) {
+ printf("kind(%d) != correct_kind(%d)\n", kind, correct_kind);
+ error = 1;
+ }
+ if (chunk_size != correct_chunk_size) {
+ printf("chunk_size(%d) != correct_chunk_size(%d)\n", chunk_size,
+ correct_chunk_size);
+ error = 1;
+ }
+
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for schedule(runtime)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ #pragma omp critical
+ sum+=i;
+ }
+ }
+ if (known_sum != sum) {
+ printf("Known Sum = %d, Calculated Sum = %d\n", known_sum, sum);
+ error = 1;
+ }
+ return !error;
+}
+
+int main(int argc, char** argv)
+{
+ int i;
+ int num_failed=0;
+ if (argc != 3) {
+ fprintf(stderr, "usage: %s schedule_kind chunk_size\n", argv[0]);
+ fprintf(stderr, " Run with envirable OMP_SCHEDULE=kind[,chunk_size]\n");
+ return 1;
+ }
+ correct_kind = atoi(argv[1]);
+ correct_chunk_size = atoi(argv[2]);
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_omp_for_runtime()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_static.c b/final/runtime/test/worksharing/for/omp_for_schedule_static.c
new file mode 100644
index 0000000..f46a544
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_static.c
@@ -0,0 +1,154 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+#define CFSMAX_SIZE 1000
+#define MAX_TIME 0.01
+
+#ifdef SLEEPTIME
+#undef SLEEPTIME
+#define SLEEPTIME 0.0005
+#endif
+
+int test_omp_for_schedule_static()
+{
+ int threads;
+ int i,lasttid;
+ int * tids;
+ int notout;
+ int maxiter;
+ int chunk_size;
+ int counter = 0;
+ int tmp_count=1;
+ int lastthreadsstarttid = -1;
+ int result = 1;
+
+ chunk_size = 7;
+ tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1));
+ notout = 1;
+ maxiter = 0;
+
+ #pragma omp parallel shared(tids,counter)
+ { /* begin of parallel*/
+ #pragma omp single
+ {
+ threads = omp_get_num_threads ();
+ } /* end of single */
+ } /* end of parallel */
+
+ if (threads < 2) {
+ omp_set_num_threads(2);
+ threads = 2;
+ }
+ fprintf (stderr,"Using an internal count of %d\nUsing a specified"
+ " chunksize of %d\n", CFSMAX_SIZE, chunk_size);
+ tids[CFSMAX_SIZE] = -1; /* setting endflag */
+ #pragma omp parallel shared(tids)
+ { /* begin of parallel */
+ double count;
+ int tid;
+ int j;
+
+ tid = omp_get_thread_num ();
+
+ #pragma omp for nowait schedule(static,chunk_size)
+ for(j = 0; j < CFSMAX_SIZE; ++j) {
+ count = 0.;
+ #pragma omp flush(maxiter)
+ if (j > maxiter) {
+ #pragma omp critical
+ {
+ maxiter = j;
+ }
+ }
+ /*printf ("thread %d sleeping\n", tid);*/
+ while (notout && (count < MAX_TIME) && (maxiter == j)) {
+ #pragma omp flush(maxiter,notout)
+ my_sleep (SLEEPTIME);
+ count += SLEEPTIME;
+ printf(".");
+ }
+#ifdef VERBOSE
+ if (count > 0.) printf(" waited %lf s\n", count);
+#endif
+ /*printf ("thread %d awake\n", tid);*/
+ tids[j] = tid;
+#ifdef VERBOSE
+ printf("%d finished by %d\n",j,tid);
+#endif
+ } /* end of for */
+ notout = 0;
+ #pragma omp flush(maxiter,notout)
+ } /* end of parallel */
+
+ /**** analysing the data in array tids ****/
+
+ lasttid = tids[0];
+ tmp_count = 0;
+
+ for (i = 0; i < CFSMAX_SIZE + 1; ++i) {
+ /* If the work was done by the same thread increase tmp_count by one. */
+ if (tids[i] == lasttid) {
+ tmp_count++;
+#ifdef VERBOSE
+ fprintf (stderr, "%d: %d \n", i, tids[i]);
+#endif
+ continue;
+ }
+
+ /* Check if the next thread had has the right thread number. When finding
+ * threadnumber -1 the end should be reached.
+ */
+ if (tids[i] == (lasttid + 1) % threads || tids[i] == -1) {
+ /* checking for the right chunk size */
+ if (tmp_count == chunk_size) {
+ tmp_count = 1;
+ lasttid = tids[i];
+#ifdef VERBOSE
+ fprintf (stderr, "OK\n");
+#endif
+ } else {
+ /* If the chunk size was wrong, check if the end was reached */
+ if (tids[i] == -1) {
+ if (i == CFSMAX_SIZE) {
+ fprintf (stderr, "Last thread had chunk size %d\n",
+ tmp_count);
+ break;
+ } else {
+ fprintf (stderr, "ERROR: Last thread (thread with"
+ " number -1) was found before the end.\n");
+ result = 0;
+ }
+ } else {
+ fprintf (stderr, "ERROR: chunk size was %d. (assigned"
+ " was %d)\n", tmp_count, chunk_size);
+ result = 0;
+ }
+ }
+ } else {
+ fprintf(stderr, "ERROR: Found thread with number %d (should be"
+ " inbetween 0 and %d).", tids[i], threads - 1);
+ result = 0;
+ }
+#ifdef VERBOSE
+ fprintf (stderr, "%d: %d \n", i, tids[i]);
+#endif
+ }
+
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_schedule_static()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c b/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c
new file mode 100644
index 0000000..922f27a
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c
@@ -0,0 +1,202 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+#include "omp_my_sleep.h"
+
+#define CFSMAX_SIZE 1000
+#define MAX_TIME 0.01
+
+#ifdef SLEEPTIME
+#undef SLEEPTIME
+#define SLEEPTIME 0.0005
+#endif
+
+#define VERBOSE 0
+
+int test_omp_for_schedule_static_3()
+{
+ int threads;
+ int i,lasttid;
+
+ int * tids;
+ int * tids2;
+ int notout;
+ int maxiter;
+ int chunk_size;
+
+ int counter = 0;
+ int tmp_count=1;
+ int lastthreadsstarttid = -1;
+ int result = 1;
+ chunk_size = 7;
+
+ tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1));
+ notout = 1;
+ maxiter = 0;
+
+ #pragma omp parallel shared(tids,counter)
+ { /* begin of parallel*/
+ #pragma omp single
+ {
+ threads = omp_get_num_threads ();
+ } /* end of single */
+ } /* end of parallel */
+
+ /* Ensure that at least two threads are created */
+ if (threads < 2) {
+ omp_set_num_threads(2);
+ threads = 2;
+ }
+ fprintf (stderr,"Using an internal count of %d\nUsing a"
+ " specified chunksize of %d\n", CFSMAX_SIZE, chunk_size);
+ tids[CFSMAX_SIZE] = -1; /* setting endflag */
+
+ #pragma omp parallel shared(tids)
+ { /* begin of parallel */
+ double count;
+ int tid;
+ int j;
+
+ tid = omp_get_thread_num ();
+
+ #pragma omp for nowait schedule(static,chunk_size)
+ for(j = 0; j < CFSMAX_SIZE; ++j) {
+ count = 0.;
+ #pragma omp flush(maxiter)
+ if (j > maxiter) {
+ #pragma omp critical
+ {
+ maxiter = j;
+ }
+ }
+ /*printf ("thread %d sleeping\n", tid);*/
+ while (notout && (count < MAX_TIME) && (maxiter == j)) {
+ #pragma omp flush(maxiter,notout)
+ my_sleep (SLEEPTIME);
+ count += SLEEPTIME;
+ printf(".");
+ }
+#ifdef VERBOSE
+ if (count > 0.) printf(" waited %lf s\n", count);
+#endif
+ /*printf ("thread %d awake\n", tid);*/
+ tids[j] = tid;
+#ifdef VERBOSE
+ printf("%d finished by %d\n",j,tid);
+#endif
+ } /* end of omp parallel for */
+
+ notout = 0;
+ #pragma omp flush(maxiter,notout)
+ } /* end of parallel */
+
+ /**** analysing the data in array tids ****/
+
+ lasttid = tids[0];
+ tmp_count = 0;
+
+ for (i = 0; i < CFSMAX_SIZE + 1; ++i) {
+ /* If the work was done by the same thread
+ increase tmp_count by one. */
+ if (tids[i] == lasttid) {
+ tmp_count++;
+#ifdef VERBOSE
+ fprintf (stderr, "%d: %d \n", i, tids[i]);
+#endif
+ continue;
+ }
+
+ /* Check if the next thread had has the right thread number.
+ * When finding threadnumber -1 the end should be reached.
+ */
+ if (tids[i] == (lasttid + 1) % threads || tids[i] == -1) {
+ /* checking for the right chunk size */
+ if (tmp_count == chunk_size) {
+ tmp_count = 1;
+ lasttid = tids[i];
+#ifdef VERBOSE
+ fprintf (stderr, "OK\n");
+#endif
+ } else {
+ /* If the chunk size was wrong, check if the end was reached */
+ if (tids[i] == -1) {
+ if (i == CFSMAX_SIZE) {
+ fprintf (stderr, "Last thread had chunk size %d\n",
+ tmp_count);
+ break;
+ } else {
+ fprintf (stderr, "ERROR: Last thread (thread with"
+ " number -1) was found before the end.\n");
+ result = 0;
+ }
+ } else {
+ fprintf (stderr, "ERROR: chunk size was %d. (assigned"
+ " was %d)\n", tmp_count, chunk_size);
+ result = 0;
+ }
+ }
+ } else {
+ fprintf(stderr, "ERROR: Found thread with number %d (should be"
+ " inbetween 0 and %d).", tids[i], threads - 1);
+ result = 0;
+ }
+#ifdef VERBOSE
+ fprintf (stderr, "%d: %d \n", i, tids[i]);
+#endif
+ }
+
+ /* Now we check if several loop regions in one parallel region have the
+ * same logical assignement of chunks to threads. We use the nowait
+ * clause to increase the probability to get an error. */
+
+ /* First we allocate some more memmory */
+ free (tids);
+ tids = (int *) malloc (sizeof (int) * LOOPCOUNT);
+ tids2 = (int *) malloc (sizeof (int) * LOOPCOUNT);
+
+ #pragma omp parallel
+ {
+ {
+ int n;
+ #pragma omp for schedule(static) nowait
+ for (n = 0; n < LOOPCOUNT; n++) {
+ if (LOOPCOUNT == n + 1 )
+ my_sleep(SLEEPTIME);
+
+ tids[n] = omp_get_thread_num();
+ }
+ }
+ {
+ int m;
+ #pragma omp for schedule(static) nowait
+ for (m = 1; m <= LOOPCOUNT; m++) {
+ tids2[m-1] = omp_get_thread_num();
+ }
+ }
+ }
+
+ for (i = 0; i < LOOPCOUNT; i++)
+ if (tids[i] != tids2[i]) {
+ fprintf (stderr, "Chunk no. %d was assigned once to thread %d and"
+ " later to thread %d.\n", i, tids[i],tids2[i]);
+ result = 0;
+ }
+
+ free (tids);
+ free (tids2);
+ return result;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_schedule_static_3()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c b/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c
new file mode 100644
index 0000000..3b3bf7d
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c
@@ -0,0 +1,35 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_for_firstprivate()
+{
+ int sum ;
+ int i2;
+ int i;
+ int known_sum;
+
+ sum=0;
+ i2=3;
+
+ #pragma omp parallel for reduction(+:sum) private(i) firstprivate(i2)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum = sum + (i + i2);
+ }
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 + i2 * LOOPCOUNT;
+ return (known_sum == sum);
+} /* end of check_parallel_for_fistprivate */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_firstprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_if.c b/final/runtime/test/worksharing/for/omp_parallel_for_if.c
new file mode 100644
index 0000000..57fe498
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_if.c
@@ -0,0 +1,42 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_for_if()
+{
+ int known_sum;
+ int num_threads;
+ int sum, sum2;
+ int i;
+ int control;
+
+ control = 0;
+ num_threads=0;
+ sum = 0;
+ sum2 = 0;
+
+ #pragma omp parallel for private(i) if (control==1)
+ for (i=0; i <= LOOPCOUNT; i++) {
+ num_threads = omp_get_num_threads();
+ sum = sum + i;
+ }
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ fprintf(stderr, "Number of threads determined by"
+ " omp_get_num_threads: %d\n", num_threads);
+ return (known_sum == sum && num_threads == 1);
+} /* end of check_parallel_for_private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_if()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c b/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c
new file mode 100644
index 0000000..a53cfb2
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c
@@ -0,0 +1,37 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+int test_omp_parallel_for_lastprivate()
+{
+ int sum;
+ int i;
+ int i0;
+ int known_sum;
+
+ sum =0;
+ i0 = -1;
+
+ #pragma omp parallel for reduction(+:sum) \
+ schedule(static,7) private(i) lastprivate(i0)
+ for (i = 1; i <= LOOPCOUNT; i++) {
+ sum = sum + i;
+ i0 = i;
+ } /* end of parallel for */
+
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return ((known_sum == sum) && (i0 == LOOPCOUNT));
+} /* end of check_parallel_for_lastprivate */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_lastprivate()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c b/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c
new file mode 100644
index 0000000..5fef460
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c
@@ -0,0 +1,64 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+static int last_i = 0;
+
+int i;
+#pragma omp threadprivate(i)
+
+/* Variable ii is used to avoid problems with a threadprivate variable used as a loop
+ * index. See test omp_threadprivate_for.
+ */
+static int ii;
+#pragma omp threadprivate(ii)
+
+/*!
+ Utility function: returns true if the passed argument is larger than
+ the argument of the last call of this function.
+ */
+static int check_i_islarger2(int i)
+{
+ int islarger;
+ islarger = (i > last_i);
+ last_i = i;
+ return (islarger);
+}
+
+int test_omp_parallel_for_ordered()
+{
+ int sum;
+ int is_larger;
+ int known_sum;
+ int i;
+
+ sum = 0;
+ is_larger = 1;
+ last_i = 0;
+ #pragma omp parallel for schedule(static,1) private(i) ordered
+ for (i = 1; i < 100; i++) {
+ ii = i;
+ #pragma omp ordered
+ {
+ is_larger = check_i_islarger2 (ii) && is_larger;
+ sum = sum + ii;
+ }
+ }
+ known_sum = (99 * 100) / 2;
+ fprintf (stderr," known_sum = %d , sum = %d \n", known_sum, sum);
+ fprintf (stderr," is_larger = %d\n", is_larger);
+ return (known_sum == sum) && is_larger;
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_ordered()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_private.c b/final/runtime/test/worksharing/for/omp_parallel_for_private.c
new file mode 100644
index 0000000..1231d36
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_private.c
@@ -0,0 +1,50 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+/*! Utility function to spend some time in a loop */
+static void do_some_work (void)
+{
+ int i;
+ double sum = 0;
+ for(i = 0; i < 1000; i++){
+ sum += sqrt (i);
+ }
+}
+
+int test_omp_parallel_for_private()
+{
+ int sum;
+ int i;
+ int i2;
+ int known_sum;
+
+ sum =0;
+ i2=0;
+
+ #pragma omp parallel for reduction(+:sum) schedule(static,1) private(i) private(i2)
+ for (i=1;i<=LOOPCOUNT;i++)
+ {
+ i2 = i;
+ #pragma omp flush
+ do_some_work ();
+ #pragma omp flush
+ sum = sum + i2;
+ } /*end of for*/
+ known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2;
+ return (known_sum == sum);
+} /* end of check_parallel_for_private */
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_private()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c b/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c
new file mode 100644
index 0000000..118d730
--- /dev/null
+++ b/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c
@@ -0,0 +1,266 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <math.h>
+#include "omp_testsuite.h"
+
+#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */
+#define MAX_FACTOR 10
+#define KNOWN_PRODUCT 3628800 /* 10! */
+
+int test_omp_parallel_for_reduction()
+{
+ int sum;
+ int known_sum;
+ double dsum;
+ double dknown_sum;
+ double dt=0.5; /* base of geometric row for + and - test*/
+ double rounding_error= 1.E-9;
+ int diff;
+ double ddiff;
+ int product;
+ int known_product;
+ int logic_and;
+ int logic_or;
+ int bit_and;
+ int bit_or;
+ int exclusiv_bit_or;
+ int logics[LOOPCOUNT];
+ int i;
+ double dpt;
+ int result;
+
+ sum =0;
+ dsum=0;
+ dt = 1./3.;
+ result = 0;
+ product = 1;
+ logic_and=1;
+ logic_or=0;
+ bit_and=1;
+ bit_or=0;
+ exclusiv_bit_or=0;
+
+ /* Tests for integers */
+ known_sum = (LOOPCOUNT*(LOOPCOUNT+1))/2;
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:sum)
+ for (i=1;i<=LOOPCOUNT;i++) {
+ sum=sum+i;
+ }
+ if(known_sum!=sum) {
+ result++;
+ fprintf(stderr,"Error in sum with integers: Result was %d"
+ " instead of %d\n",sum,known_sum);
+ }
+
+ diff = (LOOPCOUNT*(LOOPCOUNT+1))/2;
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:diff)
+ for (i=1;i<=LOOPCOUNT;++i) {
+ diff=diff-i;
+ }
+ if(diff != 0) {
+ result++;
+ fprintf(stderr,"Error in difference with integers: Result was %d"
+ " instead of 0.\n",diff);
+ }
+
+ /* Tests for doubles */
+ dsum=0;
+ dpt=1;
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dpt*=dt;
+ }
+ dknown_sum = (1-dpt)/(1-dt);
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:dsum)
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dsum += pow(dt,i);
+ }
+ if( fabs(dsum-dknown_sum) > rounding_error ) {
+ result++;
+ fprintf(stderr,"Error in sum with doubles: Result was %f"
+ " instead of %f (Difference: %E)\n",
+ dsum, dknown_sum, dsum-dknown_sum);
+ }
+
+ dpt=1;
+
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ dpt*=dt;
+ }
+ fprintf(stderr,"\n");
+ ddiff = (1-dpt)/(1-dt);
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:ddiff)
+ for (i=0;i<DOUBLE_DIGITS;++i) {
+ ddiff -= pow(dt,i);
+ }
+ if( fabs(ddiff) > rounding_error) {
+ result++;
+ fprintf(stderr,"Error in Difference with doubles: Result was %E"
+ " instead of 0.0\n",ddiff);
+ }
+
+ /* Tests for integers */
+ #pragma omp parallel for schedule(dynamic,1) private(i) reduction(*:product)
+ for(i=1;i<=MAX_FACTOR;i++) {
+ product *= i;
+ }
+ known_product = KNOWN_PRODUCT;
+ if(known_product != product) {
+ result++;
+ fprintf(stderr,"Error in Product with integers: Result was %d"
+ " instead of %d\n\n",product,known_product);
+ }
+
+ /* Tests for logic AND */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(&&:logic_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_and = (logic_and && logics[i]);
+ }
+ if(!logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 1.\n");
+ }
+
+ logic_and = 1;
+ logics[LOOPCOUNT/2]=0;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(&&:logic_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_and = logic_and && logics[i];
+ }
+ if(logic_and) {
+ result++;
+ fprintf(stderr,"Error in logic AND part 2.\n");
+ }
+
+ /* Tests for logic OR */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(||:logic_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_or = logic_or || logics[i];
+ }
+ if(logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 1.\n");
+ }
+ logic_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(||:logic_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ logic_or = logic_or || logics[i];
+ }
+ if(!logic_or) {
+ result++;
+ fprintf(stderr,"Error in logic OR part 2.\n");
+ }
+
+ /* Tests for bitwise AND */
+ for(i=0;i<LOOPCOUNT;++i) {
+ logics[i]=1;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(&:bit_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_and = (bit_and & logics[i]);
+ }
+ if(!bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 1.\n");
+ }
+
+ bit_and = 1;
+ logics[LOOPCOUNT/2]=0;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(&:bit_and)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_and = bit_and & logics[i];
+ }
+ if(bit_and) {
+ result++;
+ fprintf(stderr,"Error in BIT AND part 2.\n");
+ }
+
+ /* Tests for bitwise OR */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(|:bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ if(bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 1\n");
+ }
+ bit_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(|:bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ bit_or = bit_or | logics[i];
+ }
+ if(!bit_or) {
+ result++;
+ fprintf(stderr,"Error in BIT OR part 2\n");
+ }
+
+ /* Tests for bitwise XOR */
+ for(i=0;i<LOOPCOUNT;i++) {
+ logics[i]=0;
+ }
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(^:exclusiv_bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ if(exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n");
+ }
+
+ exclusiv_bit_or = 0;
+ logics[LOOPCOUNT/2]=1;
+
+ #pragma omp parallel for schedule(dynamic,1) private(i) \
+ reduction(^:exclusiv_bit_or)
+ for(i=0;i<LOOPCOUNT;++i) {
+ exclusiv_bit_or = exclusiv_bit_or ^ logics[i];
+ }
+ if(!exclusiv_bit_or) {
+ result++;
+ fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n");
+ }
+
+ /*printf("\nResult:%d\n",result);*/
+ return (result==0);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_parallel_for_reduction()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}