diff options
Diffstat (limited to 'final/runtime/test/worksharing')
41 files changed, 5141 insertions, 0 deletions
diff --git a/final/runtime/test/worksharing/for/bug_set_schedule_0.c b/final/runtime/test/worksharing/for/bug_set_schedule_0.c new file mode 100644 index 0000000..889e239 --- /dev/null +++ b/final/runtime/test/worksharing/for/bug_set_schedule_0.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <omp.h> +#include "omp_testsuite.h" + +/* Test that the chunk size is set to default (1) when + chunk size <= 0 is specified */ +int a = 0; + +int test_set_schedule_0() +{ + int i; + a = 0; + omp_set_schedule(omp_sched_dynamic,0); + + #pragma omp parallel + { + #pragma omp for schedule(runtime) + for(i = 0; i < 10; i++) { + #pragma omp atomic + a++; + if(a > 10) + exit(1); + } + } + return a==10; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_set_schedule_0()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/kmp_doacross_check.c b/final/runtime/test/worksharing/for/kmp_doacross_check.c new file mode 100644 index 0000000..4eea328 --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_doacross_check.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run +// REQUIRES: openmp-4.5 +// UNSUPPORTED: gcc +// This test is incompatible with gcc because of the explicit call to +// __kmpc_doacross_fini(). gcc relies on an implicit call to this function +// when the last iteration is executed inside the GOMP_loop_*_next() functions. +// Hence, in gcc, having the explicit call leads to __kmpc_doacross_fini() +// being called twice. +#include <stdio.h> + +#define N 1000 + +struct dim { + long long lo; // lower + long long up; // upper + long long st; // stride +}; +extern void __kmpc_doacross_init(void*, int, int, struct dim *); +extern void __kmpc_doacross_wait(void*, int, long long*); +extern void __kmpc_doacross_post(void*, int, long long*); +extern void __kmpc_doacross_fini(void*, int); +extern int __kmpc_global_thread_num(void*); + +int main() +{ + int i; + int iter[N]; + struct dim dims; + for( i = 0; i < N; ++i ) + iter[i] = 1; + dims.lo = 1; + dims.up = N-1; + dims.st = 1; + #pragma omp parallel num_threads(4) + { + int i, gtid; + long long vec; + gtid = __kmpc_global_thread_num(NULL); + __kmpc_doacross_init(NULL,gtid,1,&dims); // thread starts the loop + #pragma omp for nowait schedule(dynamic) + for( i = 1; i < N; ++i ) + { + // runtime call corresponding to #pragma omp ordered depend(sink:i-1) + vec=i-1; + __kmpc_doacross_wait(NULL,gtid,&vec); + // user's code + iter[i] = iter[i-1] + 1; + // runtime call corresponding to #pragma omp ordered depend(source) + vec=i; + __kmpc_doacross_post(NULL,gtid,&vec); + } + // thread finishes the loop (should be before the loop barrier) + __kmpc_doacross_fini(NULL,gtid); + } + if( iter[N-1] == N ) { + printf("passed\n"); + } else { + printf("failed %d != %d\n", iter[N-1], N); + return 1; + } + return 0; +} + diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c new file mode 100644 index 0000000..6cf5d2f --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c @@ -0,0 +1,411 @@ +// RUN: %libomp-compile-and-run +// REQUIRES: openmp-4.5 +/* + Test for the 'schedule(simd:guided)' clause. + Compiler needs to generate a dynamic dispatching and pass the schedule + value 46 to the OpenMP RTL. Test uses numerous loop parameter combinations. +*/ +#include <stdio.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#else +#include <unistd.h> +#define delay() usleep(10); +#endif + +// uncomment for debug diagnostics: +//#define DEBUG + +#define SIMD_LEN 4 + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +extern int __kmpc_global_thread_num(id*); +extern void __kmpc_barrier(id*, int gtid); +extern void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); +extern void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); +extern int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); +extern int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +int run_loop_64(i64 loop_lb, i64 loop_ub, i64 loop_st, int loop_chunk) { + int err = 0; + static int volatile loop_sync = 0; + i64 lb; // Chunk lower bound + i64 ub; // Chunk upper bound + i64 st; // Chunk stride + int rc; + int tid = omp_get_thread_num(); + int gtid = tid; + int last; +#if DEBUG + printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n", + (int)sizeof(i64), gtid, tid, + (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen + if (loop_st == 0) + return 0; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return 0; + + __kmpc_dispatch_init_8(&loc, gtid, kmp_sch_guided_simd, + loop_lb, loop_ub, loop_st, loop_chunk); + if (tid == 0) { + // Let the master thread handle the chunks alone + int chunk; // No of current chunk + i64 next_lb; // Lower bound of the next chunk + i64 last_ub; // Upper bound of the last processed chunk + u64 cur; // Number of interations in current chunk + u64 max; // Max allowed iterations for current chunk + int undersized = 0; + + chunk = 0; + next_lb = loop_lb; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations + while (__kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if DEBUG + printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub); +#endif + // Check if previous chunk (it is not the final chunk) is undersized + if (undersized) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Check lower and upper bounds + if (lb != next_lb) { + printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk); + err++; + } + if (loop_st > 0) { + if (!(ub <= loop_ub)) { + printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb <= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + } else { + if (!(ub >= loop_ub)) { + printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb >= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + }; // if + // Stride should not change + if (!(st == loop_st)) { + printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk); + err++; + } + cur = (ub - lb) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum + if (!(cur <= max + 1)) { + printf("Error with iter %d, %d\n", cur, max); + err++; + } + // Update maximum for the next chunk + if (cur < max) + max = cur; + next_lb = ub + loop_st; + last_ub = ub; + undersized = (cur < loop_chunk); + }; // while + // Must have at least one chunk + if (!(chunk > 0)) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Must have the right last iteration index + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st > loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + } else { + if (!(last_ub >= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st < loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + }; // if + // Let non-master threads go + loop_sync = 1; + } else { + int i; + // Workers wait for master thread to finish, then call __kmpc_dispatch_next + for (i = 0; i < 1000000; ++ i) { + if (loop_sync != 0) { + break; + }; // if + }; // for i + while (loop_sync == 0) { + delay(); + }; // while + // At this moment we do not have any more chunks -- all the chunks already + // processed by master thread + rc = __kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st); + if (rc) { + printf("Error return value\n"); + err++; + } + }; // if + + __kmpc_barrier(&loc, gtid); + if (tid == 0) { + loop_sync = 0; // Restore original state +#if DEBUG + printf("run_loop_64(): at the end\n"); +#endif + }; // if + __kmpc_barrier(&loc, gtid); + return err; +} // run_loop + +// --------------------------------------------------------------------------- +int run_loop_32(int loop_lb, int loop_ub, int loop_st, int loop_chunk) { + int err = 0; + static int volatile loop_sync = 0; + int lb; // Chunk lower bound + int ub; // Chunk upper bound + int st; // Chunk stride + int rc; + int tid = omp_get_thread_num(); + int gtid = tid; + int last; +#if DEBUG + printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n", + (int)sizeof(int), gtid, tid, + (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen + if (loop_st == 0) + return 0; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return 0; + + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_guided_simd, + loop_lb, loop_ub, loop_st, loop_chunk); + if (tid == 0) { + // Let the master thread handle the chunks alone + int chunk; // No of current chunk + int next_lb; // Lower bound of the next chunk + int last_ub; // Upper bound of the last processed chunk + u64 cur; // Number of interations in current chunk + u64 max; // Max allowed iterations for current chunk + int undersized = 0; + + chunk = 0; + next_lb = loop_lb; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if DEBUG + printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub); +#endif + // Check if previous chunk (it is not the final chunk) is undersized + if (undersized) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Check lower and upper bounds + if (lb != next_lb) { + printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk); + err++; + } + if (loop_st > 0) { + if (!(ub <= loop_ub)) { + printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb <= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + } else { + if (!(ub >= loop_ub)) { + printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb >= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + }; // if + // Stride should not change + if (!(st == loop_st)) { + printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk); + err++; + } + cur = (ub - lb) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum + if (!(cur <= max + 1)) { + printf("Error with iter %d, %d\n", cur, max); + err++; + } + // Update maximum for the next chunk + if (cur < max) + max = cur; + next_lb = ub + loop_st; + last_ub = ub; + undersized = (cur < loop_chunk); + }; // while + // Must have at least one chunk + if (!(chunk > 0)) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Must have the right last iteration index + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st > loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + } else { + if (!(last_ub >= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st < loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + }; // if + // Let non-master threads go + loop_sync = 1; + } else { + int i; + // Workers wait for master thread to finish, then call __kmpc_dispatch_next + for (i = 0; i < 1000000; ++ i) { + if (loop_sync != 0) { + break; + }; // if + }; // for i + while (loop_sync == 0) { + delay(); + }; // while + // At this moment we do not have any more chunks -- all the chunks already + // processed by the master thread + rc = __kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st); + if (rc) { + printf("Error return value\n"); + err++; + } + }; // if + + __kmpc_barrier(&loc, gtid); + if (tid == 0) { + loop_sync = 0; // Restore original state +#if DEBUG + printf("run_loop<>(): at the end\n"); +#endif + }; // if + __kmpc_barrier(&loc, gtid); + return err; +} // run_loop + +// --------------------------------------------------------------------------- +int run_64(int num_th) +{ + int err = 0; +#pragma omp parallel num_threads(num_th) + { + int chunk; + i64 st, lb, ub; + for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) { + for (st = 1; st <= 3; ++ st) { + for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) { + for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) { + err += run_loop_64(lb, ub, st, chunk); + err += run_loop_64(ub, lb, -st, chunk); + }; // for ub + }; // for lb + }; // for st + }; // for chunk + } + return err; +} // run_all + +int run_32(int num_th) +{ + int err = 0; +#pragma omp parallel num_threads(num_th) + { + int chunk, st, lb, ub; + for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) { + for (st = 1; st <= 3; ++ st) { + for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) { + for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) { + err += run_loop_32(lb, ub, st, chunk); + err += run_loop_32(ub, lb, -st, chunk); + }; // for ub + }; // for lb + }; // for st + }; // for chunk + } + return err; +} // run_all + +// --------------------------------------------------------------------------- +int main() +{ + int n, err = 0; + for (n = 1; n <= 4; ++ n) { + err += run_32(n); + err += run_64(n); + }; // for n + if (err) + printf("failed with %d errors\n", err); + else + printf("passed\n"); + return err; +} diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c new file mode 100644 index 0000000..8b5f34a --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c @@ -0,0 +1,223 @@ +// RUN: %libomp-compile-and-run +// REQUIRES: openmp-4.5 + +// The test checks schedule(simd:runtime) +// in combination with omp_set_schedule() +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#define seten(a,b,c) _putenv_s((a),(b)) +#else +#include <unistd.h> +#define delay() usleep(10); +#define seten(a,b,c) setenv((a),(b),(c)) +#endif + +#define SIMD_LEN 4 +int err = 0; + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL. +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#ifdef __cplusplus +extern "C" { +#endif + int __kmpc_global_thread_num(id*); + void __kmpc_barrier(id*, int gtid); + void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); + void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); + int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); + int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +#ifdef __cplusplus +} // extern "C" +#endif +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +void +run_loop( + int loop_lb, // Loop lower bound. + int loop_ub, // Loop upper bound. + int loop_st, // Loop stride. + int lchunk +) { + static int volatile loop_sync = 0; + int lb; // Chunk lower bound. + int ub; // Chunk upper bound. + int st; // Chunk stride. + int rc; + int nthreads = omp_get_num_threads(); + int tid = omp_get_thread_num(); + int gtid = __kmpc_global_thread_num(&loc); + int last; + int tc = (loop_ub - loop_lb) / loop_st + 1; + int ch; + int no_chunk = 0; + if (lchunk == 0) { + no_chunk = 1; + lchunk = 1; + } + ch = lchunk * SIMD_LEN; +#if _DEBUG > 1 + printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", + gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen. + if (loop_st == 0) + return; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return; + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, + loop_lb, loop_ub, loop_st, SIMD_LEN); + { + // Let the master thread handle the chunks alone. + int chunk; // No of current chunk. + int last_ub; // Upper bound of the last processed chunk. + u64 cur; // Number of interations in current chunk. + u64 max; // Max allowed iterations for current chunk. + int undersized = 0; + last_ub = loop_ub; + chunk = 0; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations. + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if _DEBUG + printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", + tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); +#endif + // Check if previous chunk (it is not the final chunk) is undersized. + if (undersized) + printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); + if (loop_st > 0) { + if (!(ub <= loop_ub)) + printf("Error with ub %d, %d, ch %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb <= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + } else { + if (!(ub >= loop_ub)) + printf("Error with ub %d, %d, %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb >= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + }; // if + // Stride should not change. + if (!(st == loop_st)) + printf("Error with st %d, %d, ch %d, err %d\n", + (int)st, (int)loop_st, chunk, ++err); + cur = ( ub - lb ) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum. + if (!( cur <= max + 1)) + printf("Error with iter %d, %d, err %d\n", cur, max, ++err); + // Update maximum for the next chunk. + if (last) { + if (!no_chunk && cur > ch && nthreads > 1) + printf("Error: too big last chunk %d (%d), tid %d, err %d\n", + (int)cur, ch, tid, ++err); + } else { + if (cur % ch) + printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", + chunk, (int)cur, ch, tid, ++err); + } + if (cur < max) + max = cur; + last_ub = ub; + undersized = (cur < ch); +#if _DEBUG > 1 + if (last) + printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", + undersized,cur,ch,tid,ub,lb,loop_st); +#endif + } // while + // Must have the right last iteration index. + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st > loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } else { + if (!(last_ub >= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st < loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } // if + } + __kmpc_barrier(&loc, gtid); +} // run_loop + +int main(int argc, char *argv[]) +{ + int chunk = 0; +// static (no chunk) + omp_set_schedule(omp_sched_static,0); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// auto (chunk should be ignorted) + omp_set_schedule(omp_sched_auto,0); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// static,1 + chunk = 1; + omp_set_schedule(omp_sched_static,1); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// dynamic,1 + omp_set_schedule(omp_sched_dynamic,1); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// guided,1 + omp_set_schedule(omp_sched_guided,1); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// dynamic,0 - use default chunk size 1 + omp_set_schedule(omp_sched_dynamic,0); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// guided,0 - use default chunk size 1 + omp_set_schedule(omp_sched_guided,0); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + + if (err) { + printf("failed, err = %d\n", err); + return 1; + } else { + printf("passed\n"); + return 0; + } +} diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c new file mode 100644 index 0000000..142e9b3 --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c @@ -0,0 +1,198 @@ +// RUN: %libomp-compile +// RUN: env OMP_SCHEDULE=guided %libomp-run +// RUN: env OMP_SCHEDULE=guided,1 %libomp-run 1 +// RUN: env OMP_SCHEDULE=guided,2 %libomp-run 2 +// RUN: env OMP_SCHEDULE=dynamic %libomp-run +// RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1 +// RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2 +// RUN: env OMP_SCHEDULE=auto %libomp-run +// REQUIRES: openmp-4.5 + +// The test checks schedule(simd:runtime) +// in combination with OMP_SCHEDULE=guided[,chunk] +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#define seten(a,b,c) _putenv_s((a),(b)) +#else +#include <unistd.h> +#define delay() usleep(10); +#define seten(a,b,c) setenv((a),(b),(c)) +#endif + +#define UBOUND 100 +#define SIMD_LEN 4 +int err = 0; + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL. +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#ifdef __cplusplus +extern "C" { +#endif + int __kmpc_global_thread_num(id*); + void __kmpc_barrier(id*, int gtid); + void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); + void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); + int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); + int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +#ifdef __cplusplus +} // extern "C" +#endif +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +void +run_loop( + int loop_lb, // Loop lower bound. + int loop_ub, // Loop upper bound. + int loop_st, // Loop stride. + int lchunk +) { + static int volatile loop_sync = 0; + int lb; // Chunk lower bound. + int ub; // Chunk upper bound. + int st; // Chunk stride. + int rc; + int nthreads = omp_get_num_threads(); + int tid = omp_get_thread_num(); + int gtid = __kmpc_global_thread_num(&loc); + int last; + int tc = (loop_ub - loop_lb) / loop_st + 1; + int ch; + int no_chunk = 0; + if (lchunk == 0) { + no_chunk = 1; + lchunk = 1; + } + ch = lchunk * SIMD_LEN; +#if _DEBUG > 1 + printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", + gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen. + if (loop_st == 0) + return; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return; + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, + loop_lb, loop_ub, loop_st, SIMD_LEN); + { + // Let the master thread handle the chunks alone. + int chunk; // No of current chunk. + int last_ub; // Upper bound of the last processed chunk. + u64 cur; // Number of interations in current chunk. + u64 max; // Max allowed iterations for current chunk. + int undersized = 0; + last_ub = loop_ub; + chunk = 0; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations. + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if _DEBUG + printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", + tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); +#endif + // Check if previous chunk (it is not the final chunk) is undersized. + if (undersized) + printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); + if (loop_st > 0) { + if (!(ub <= loop_ub)) + printf("Error with ub %d, %d, ch %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb <= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + } else { + if (!(ub >= loop_ub)) + printf("Error with ub %d, %d, %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb >= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + }; // if + // Stride should not change. + if (!(st == loop_st)) + printf("Error with st %d, %d, ch %d, err %d\n", + (int)st, (int)loop_st, chunk, ++err); + cur = ( ub - lb ) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum. + if (!( cur <= max + 1)) + printf("Error with iter %d, %d, err %d\n", cur, max, ++err); + // Update maximum for the next chunk. + if (!last && cur % ch) + printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", + chunk, (int)cur, ch, tid, ++err); + if (last && !no_chunk && cur > ch && nthreads > 1) + printf("Error: too big last chunk %d (%d), tid %d, err %d\n", + (int)cur, ch, tid, ++err); + if (cur < max) + max = cur; + last_ub = ub; + undersized = (cur < ch); +#if _DEBUG > 1 + if (last) + printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", + undersized,cur,ch,tid,ub,lb,loop_st); +#endif + } // while + // Must have the right last iteration index. + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st > loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } else { + if (!(last_ub >= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st < loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } // if + } + __kmpc_barrier(&loc, gtid); +} // run_loop + +int main(int argc, char *argv[]) +{ + int chunk = 0; + if (argc > 1) { + // expect chunk size as a parameter + chunk = atoi(argv[1]); + } +#pragma omp parallel //num_threads(num_th) + run_loop(0, UBOUND, 1, chunk); + if (err) { + printf("failed, err = %d\n", err); + return 1; + } else { + printf("passed\n"); + return 0; + } +} diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c new file mode 100644 index 0000000..e2c878f --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c @@ -0,0 +1,203 @@ +// RUN: %libomp-compile && %libomp-run +// RUN: %libomp-run 1 && %libomp-run 2 +// REQUIRES: openmp-4.5 + +// The test checks schedule(simd:runtime) +// in combination with OMP_SCHEDULE=static[,chunk] +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#define seten(a,b,c) _putenv_s((a),(b)) +#else +#include <unistd.h> +#define delay() usleep(10); +#define seten(a,b,c) setenv((a),(b),(c)) +#endif + +#define SIMD_LEN 4 +int err = 0; + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL. +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#ifdef __cplusplus +extern "C" { +#endif + int __kmpc_global_thread_num(id*); + void __kmpc_barrier(id*, int gtid); + void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); + void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); + int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); + int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +#ifdef __cplusplus +} // extern "C" +#endif +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +void +run_loop( + int loop_lb, // Loop lower bound. + int loop_ub, // Loop upper bound. + int loop_st, // Loop stride. + int lchunk +) { + static int volatile loop_sync = 0; + int lb; // Chunk lower bound. + int ub; // Chunk upper bound. + int st; // Chunk stride. + int rc; + int nthreads = omp_get_num_threads(); + int tid = omp_get_thread_num(); + int gtid = __kmpc_global_thread_num(&loc); + int last; + int tc = (loop_ub - loop_lb) / loop_st + 1; + int ch; + int no_chunk = 0; + if (lchunk == 0) { + no_chunk = 1; + lchunk = 1; + } + ch = lchunk * SIMD_LEN; +#if _DEBUG > 1 + printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", + gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen. + if (loop_st == 0) + return; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return; + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, + loop_lb, loop_ub, loop_st, SIMD_LEN); + { + // Let the master thread handle the chunks alone. + int chunk; // No of current chunk. + int last_ub; // Upper bound of the last processed chunk. + u64 cur; // Number of interations in current chunk. + u64 max; // Max allowed iterations for current chunk. + int undersized = 0; + last_ub = loop_ub; + chunk = 0; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations. + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if _DEBUG + printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", + tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); +#endif + // Check if previous chunk (it is not the final chunk) is undersized. + if (undersized) + printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); + if (loop_st > 0) { + if (!(ub <= loop_ub)) + printf("Error with ub %d, %d, ch %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb <= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + } else { + if (!(ub >= loop_ub)) + printf("Error with ub %d, %d, %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb >= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + }; // if + // Stride should not change. + if (!(st == loop_st)) + printf("Error with st %d, %d, ch %d, err %d\n", + (int)st, (int)loop_st, chunk, ++err); + cur = ( ub - lb ) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum. + if (!( cur <= max + 1)) + printf("Error with iter %d, %d, err %d\n", cur, max, ++err); + // Update maximum for the next chunk. + if (last) { + if (!no_chunk && cur > ch && nthreads > 1) + printf("Error: too big last chunk %d (%d), tid %d, err %d\n", + (int)cur, ch, tid, ++err); + } else { + if (cur % ch) + printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", + chunk, (int)cur, ch, tid, ++err); + } + if (cur < max) + max = cur; + last_ub = ub; + undersized = (cur < ch); +#if _DEBUG > 1 + if (last) + printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", + undersized,cur,ch,tid,ub,lb,loop_st); +#endif + } // while + // Must have the right last iteration index. + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st > loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } else { + if (!(last_ub >= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st < loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } // if + } + __kmpc_barrier(&loc, gtid); +} // run_loop + +int main(int argc, char *argv[]) +{ + int chunk = 0; + if (argc > 1) { + char *buf = malloc(8 + strlen(argv[1])); + // expect chunk size as a parameter + chunk = atoi(argv[1]); + strcpy(buf,"static,"); + strcat(buf,argv[1]); + seten("OMP_SCHEDULE",buf,1); + printf("Testing schedule(simd:%s)\n", buf); + free(buf); + } else { + seten("OMP_SCHEDULE","static",1); + printf("Testing schedule(simd:static)\n"); + } +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + if (err) { + printf("failed, err = %d\n", err); + return 1; + } else { + printf("passed\n"); + return 0; + } +} diff --git a/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c b/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c new file mode 100644 index 0000000..a6378fe --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c @@ -0,0 +1,91 @@ +// RUN: %libomp-compile && %libomp-run 7 +// RUN: %libomp-run 0 && %libomp-run -1 +// RUN: %libomp-run 1 && %libomp-run 2 && %libomp-run 5 +// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run 7 +// RUN: %libomp-run 1 && %libomp-run 2 && %libomp-run 5 +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include <limits.h> +#include "omp_testsuite.h" + +#define INCR 7 +#define MY_MAX 200 +#define MY_MIN -200 +#ifndef MY_SCHEDULE +# define MY_SCHEDULE dynamic +#endif + +int num_disp_buffers, num_loops; +int a, b, a_known_value, b_known_value; + +int test_kmp_set_disp_num_buffers() +{ + int success = 1; + a = 0; + b = 0; + // run many small dynamic loops to stress the dispatch buffer system + #pragma omp parallel + { + int i,j; + for (j = 0; j < num_loops; j++) { + #pragma omp for schedule(MY_SCHEDULE) nowait + for (i = MY_MIN; i < MY_MAX; i+=INCR) { + #pragma omp atomic + a++; + } + #pragma omp for schedule(MY_SCHEDULE) nowait + for (i = MY_MAX; i >= MY_MIN; i-=INCR) { + #pragma omp atomic + b++; + } + } + } + // detect failure + if (a != a_known_value || b != b_known_value) { + success = 0; + printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, + b, b_known_value); + } + return success; +} + +int main(int argc, char** argv) +{ + int i,j; + int num_failed=0; + + if (argc != 2) { + fprintf(stderr, "usage: %s num_disp_buffers\n", argv[0]); + exit(1); + } + + // set the number of dispatch buffers + num_disp_buffers = atoi(argv[1]); + kmp_set_disp_num_buffers(num_disp_buffers); + + // figure out the known values to compare with calculated result + a_known_value = 0; + b_known_value = 0; + + // if specified to use bad num_disp_buffers set num_loops + // to something reasonable + if (num_disp_buffers <= 0) + num_loops = 10; + else + num_loops = num_disp_buffers*10; + + for (j = 0; j < num_loops; j++) { + for (i = MY_MIN; i < MY_MAX; i+=INCR) + a_known_value++; + for (i = MY_MAX; i >= MY_MIN; i-=INCR) + b_known_value++; + } + + for(i = 0; i < REPETITIONS; i++) { + if(!test_kmp_set_disp_num_buffers()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_doacross.c b/final/runtime/test/worksharing/for/omp_doacross.c new file mode 100644 index 0000000..32e8e82 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_doacross.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run +// REQUIRES: openmp-4.5 +// XFAIL: gcc-4, gcc-5, clang-3.7, clang-3.8, icc-15, icc-16 +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +#ifndef N +#define N 750 +#endif + +int test_doacross() { + int i, j; + // Allocate and zero out the matrix + int *m = (int *)malloc(sizeof(int) * N * N); + for (i = 0; i < N; ++i) { + for (j = 0; j < N; ++j) { + m[i * N + j] = 0; + } + } + // Have first row and column be 0, 1, 2, 3, etc. + for (i = 0; i < N; ++i) + m[i * N] = i; + for (j = 0; j < N; ++j) + m[j] = j; + // Perform wavefront which results in matrix: + // 0 1 2 3 4 + // 1 2 3 4 5 + // 2 3 4 5 6 + // 3 4 5 6 7 + // 4 5 6 7 8 + #pragma omp parallel shared(m) + { + int row, col; + #pragma omp for ordered(2) + for (row = 1; row < N; ++row) { + for (col = 1; col < N; ++col) { + #pragma omp ordered depend(sink : row - 1, col) depend(sink : row, col - 1) + m[row * N + col] = m[(row - 1) * N + col] + m[row * N + (col - 1)] - + m[(row - 1) * N + (col - 1)]; + #pragma omp ordered depend(source) + } + } + } + + // Check the bottom right element to see if iteration dependencies were held + int retval = (m[(N - 1) * N + N - 1] == 2 * (N - 1)); + free(m); + return retval; +} + +int main(int argc, char **argv) { + int i; + int num_failed = 0; + if (omp_get_max_threads() < 2) + omp_set_num_threads(4); + for (i = 0; i < REPETITIONS; i++) { + if (!test_doacross()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_bigbounds.c b/final/runtime/test/worksharing/for/omp_for_bigbounds.c new file mode 100644 index 0000000..901d760 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_bigbounds.c @@ -0,0 +1,70 @@ +// RUN: %libomp-compile -DMY_SCHEDULE=static && %libomp-run +// RUN: %libomp-compile -DMY_SCHEDULE=dynamic && %libomp-run +// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run + +// Only works with Intel Compiler since at least version 15.0 +// XFAIL: gcc, clang + +/* + * Test that large bounds are handled properly and calculations of + * loop iterations don't accidently overflow + */ +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include <limits.h> +#include "omp_testsuite.h" + +#define INCR 50000000 +#define MY_MAX 2000000000 +#define MY_MIN -2000000000 +#ifndef MY_SCHEDULE +# define MY_SCHEDULE static +#endif + +int a, b, a_known_value, b_known_value; + +int test_omp_for_bigbounds() +{ + a = 0; + b = 0; + #pragma omp parallel + { + int i; + #pragma omp for schedule(MY_SCHEDULE) + for (i = INT_MIN; i < MY_MAX; i+=INCR) { + #pragma omp atomic + a++; + } + #pragma omp for schedule(MY_SCHEDULE) + for (i = INT_MAX; i >= MY_MIN; i-=INCR) { + #pragma omp atomic + b++; + } + } + printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, b, b_known_value); + return (a == a_known_value && b == b_known_value); +} + +int main() +{ + int i; + int num_failed=0; + + a_known_value = 0; + for (i = INT_MIN; i < MY_MAX; i+=INCR) { + a_known_value++; + } + + b_known_value = 0; + for (i = INT_MAX; i >= MY_MIN; i-=INCR) { + b_known_value++; + } + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_bigbounds()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_collapse.c b/final/runtime/test/worksharing/for/omp_for_collapse.c new file mode 100644 index 0000000..a08086d --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_collapse.c @@ -0,0 +1,51 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/* Utility function to check that i is increasing monotonically + with each call */ +static int check_i_islarger (int i) +{ + static int last_i; + int islarger; + if (i==1) + last_i=0; + islarger = ((i >= last_i)&&(i - last_i<=1)); + last_i = i; + return (islarger); +} + +int test_omp_for_collapse() +{ + int is_larger = 1; + + #pragma omp parallel + { + int i,j; + int my_islarger = 1; + #pragma omp for private(i,j) schedule(static,1) collapse(2) ordered + for (i = 1; i < 100; i++) { + for (j =1; j <100; j++) { + #pragma omp ordered + my_islarger = check_i_islarger(i)&&my_islarger; + } + } + #pragma omp critical + is_larger = is_larger && my_islarger; + } + return (is_larger); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_collapse()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_firstprivate.c b/final/runtime/test/worksharing/for/omp_for_firstprivate.c new file mode 100644 index 0000000..6c4121c --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_firstprivate.c @@ -0,0 +1,55 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int sum1; +#pragma omp threadprivate(sum1) + +int test_omp_for_firstprivate() +{ + int sum; + int sum0; + int known_sum; + int threadsnum; + + sum = 0; + sum0 = 12345; + sum1 = 0; + + #pragma omp parallel + { + #pragma omp single + { + threadsnum=omp_get_num_threads(); + } + /* sum0 = 0; */ + + int i; + #pragma omp for firstprivate(sum0) + for (i = 1; i <= LOOPCOUNT; i++) { + sum0 = sum0 + i; + sum1 = sum0; + } /* end of for */ + + #pragma omp critical + { + sum = sum + sum1; + } /* end of critical */ + } /* end of parallel */ + known_sum = 12345* threadsnum+ (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_lastprivate.c b/final/runtime/test/worksharing/for/omp_for_lastprivate.c new file mode 100644 index 0000000..88694b8 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_lastprivate.c @@ -0,0 +1,52 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int sum0; +#pragma omp threadprivate(sum0) + +int test_omp_for_lastprivate() +{ + int sum = 0; + int known_sum; + int i0; + + i0 = -1; + + #pragma omp parallel + { + sum0 = 0; + { /* Begin of orphaned block */ + int i; + #pragma omp for schedule(static,7) lastprivate(i0) + for (i = 1; i <= LOOPCOUNT; i++) { + sum0 = sum0 + i; + i0 = i; + } /* end of for */ + } /* end of orphaned block */ + + #pragma omp critical + { + sum = sum + sum0; + } /* end of critical */ + } /* end of parallel */ + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + fprintf(stderr, "known_sum = %d , sum = %d\n",known_sum,sum); + fprintf(stderr, "LOOPCOUNT = %d , i0 = %d\n",LOOPCOUNT,i0); + return ((known_sum == sum) && (i0 == LOOPCOUNT)); +} + +int main() +{ + int i; + int num_failed=0; + + for (i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_lastprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_nowait.c b/final/runtime/test/worksharing/for/omp_for_nowait.c new file mode 100644 index 0000000..95a4775 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_nowait.c @@ -0,0 +1,77 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +/* + * This test will hang if the nowait is not working properly. + * + * It relies on a thread skipping to the second for construct to + * release the threads in the first for construct. + * + * Also, we use static scheduling to guarantee that one + * thread will make it to the second for construct. + */ +volatile int release; +volatile int count; + +void wait_for_release_then_increment(int rank) +{ + fprintf(stderr, "Thread nr %d enters first for construct" + " and waits.\n", rank); + while (release == 0); + #pragma omp atomic + count++; +} + +void release_and_increment(int rank) +{ + fprintf(stderr, "Thread nr %d sets release to 1\n", rank); + release = 1; + #pragma omp atomic + count++; +} + +int test_omp_for_nowait() +{ + release = 0; + count = 0; + + #pragma omp parallel num_threads(4) + { + int rank; + int i; + + rank = omp_get_thread_num(); + + #pragma omp for schedule(static) nowait + for (i = 0; i < 4; i++) { + if (i < 3) + wait_for_release_then_increment(rank); + else { + fprintf(stderr, "Thread nr %d enters first for and goes " + "immediately to the next for construct to release.\n", rank); + #pragma omp atomic + count++; + } + } + + #pragma omp for schedule(static) + for (i = 0; i < 4; i++) { + release_and_increment(rank); + } + } + return (count==8); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_nowait()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_ordered.c b/final/runtime/test/worksharing/for/omp_for_ordered.c new file mode 100644 index 0000000..18ac7eb --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_ordered.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +static int last_i = 0; + +/* Utility function to check that i is increasing monotonically + with each call */ +static int check_i_islarger (int i) +{ + int islarger; + islarger = (i > last_i); + last_i = i; + return (islarger); +} + +int test_omp_for_ordered() +{ + int sum; + int is_larger = 1; + int known_sum; + + last_i = 0; + sum = 0; + + #pragma omp parallel + { + int i; + int my_islarger = 1; + #pragma omp for schedule(static,1) ordered + for (i = 1; i < 100; i++) { + #pragma omp ordered + { + my_islarger = check_i_islarger(i) && my_islarger; + sum = sum + i; + } + } + #pragma omp critical + { + is_larger = is_larger && my_islarger; + } + } + + known_sum=(99 * 100) / 2; + return ((known_sum == sum) && is_larger); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_ordered()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_private.c b/final/runtime/test/worksharing/for/omp_for_private.c new file mode 100644 index 0000000..1f537b9 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_private.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/* Utility function do spend some time in a loop */ +static void do_some_work() +{ + int i; + double sum = 0; + for(i = 0; i < 1000; i++){ + sum += sqrt ((double) i); + } +} + +int sum1; +#pragma omp threadprivate(sum1) + +int test_omp_for_private() +{ + int sum = 0; + int sum0; + int known_sum; + + sum0 = 0; /* setting (global) sum0 = 0 */ + + #pragma omp parallel + { + sum1 = 0; /* setting sum1 in each thread to 0 */ + { /* begin of orphaned block */ + int i; + #pragma omp for private(sum0) schedule(static,1) + for (i = 1; i <= LOOPCOUNT; i++) { + sum0 = sum1; + #pragma omp flush + sum0 = sum0 + i; + do_some_work (); + #pragma omp flush + sum1 = sum0; + } + } /* end of orphaned block */ + + #pragma omp critical + { + sum = sum + sum1; + } /*end of critical*/ + } /* end of parallel*/ + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_reduction.c b/final/runtime/test/worksharing/for/omp_for_reduction.c new file mode 100644 index 0000000..28f0907 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_reduction.c @@ -0,0 +1,339 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include "omp_testsuite.h" + +#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */ +#define MAX_FACTOR 10 +#define KNOWN_PRODUCT 3628800 /* 10! */ + +int test_omp_for_reduction () +{ + double dt; + int sum; + int diff; + int product = 1; + double dsum; + double dknown_sum; + double ddiff; + int logic_and; + int logic_or; + int bit_and; + int bit_or; + int exclusiv_bit_or; + int *logics; + int i; + int known_sum; + int known_product; + double rounding_error = 1.E-9; /* over all rounding error to be + ignored in the double tests */ + double dpt; + int result = 0; + int logicsArray[LOOPCOUNT]; + + /* Variables for integer tests */ + sum = 0; + product = 1; + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + /* variabels for double tests */ + dt = 1. / 3.; /* base of geometric row for + and - test*/ + dsum = 0.; + /* Variabeles for logic tests */ + logics = logicsArray; + logic_and = 1; + logic_or = 0; + /* Variabeles for bit operators tests */ + bit_and = 1; + bit_or = 0; + /* Variables for exclusiv bit or */ + exclusiv_bit_or = 0; + + /************************************************************************/ + /** Tests for integers **/ + /************************************************************************/ + + /**** Testing integer addition ****/ + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(+:sum) + for (j = 1; j <= LOOPCOUNT; j++) { + sum = sum + j; + } + } + if (known_sum != sum) { + result++; + fprintf (stderr, "Error in sum with integers: Result was %d" + " instead of %d.\n", sum, known_sum); + } + + /**** Testing integer subtracton ****/ + diff = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(-:diff) + for (j = 1; j <= LOOPCOUNT; j++) { + diff = diff - j; + } + } + if (diff != 0) { + result++; + fprintf (stderr, "Error in difference with integers: Result was %d" + " instead of 0.\n", diff); + } + + /**** Testing integer multiplication ****/ + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(*:product) + for (j = 1; j <= MAX_FACTOR; j++) { + product *= j; + } + } + known_product = KNOWN_PRODUCT; + if(known_product != product) { + result++; + fprintf (stderr,"Error in Product with integers: Result was %d" + " instead of %d\n",product,known_product); + } + + /************************************************************************/ + /** Tests for doubles **/ + /************************************************************************/ + + /**** Testing double addition ****/ + dsum = 0.; + dpt = 1.; + for (i = 0; i < DOUBLE_DIGITS; ++i) { + dpt *= dt; + } + dknown_sum = (1 - dpt) / (1 - dt); + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(+:dsum) + for (j = 0; j < DOUBLE_DIGITS; j++) { + dsum += pow (dt, j); + } + } + if (fabs (dsum - dknown_sum) > rounding_error) { + result++; + fprintf (stderr, "\nError in sum with doubles: Result was %f" + " instead of: %f (Difference: %E)\n", + dsum, dknown_sum, dsum-dknown_sum); + } + + /**** Testing double subtraction ****/ + ddiff = (1 - dpt) / (1 - dt); + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(-:ddiff) + for (j = 0; j < DOUBLE_DIGITS; ++j) { + ddiff -= pow (dt, j); + } + } + if (fabs (ddiff) > rounding_error) { + result++; + fprintf (stderr, "Error in Difference with doubles: Result was %E" + " instead of 0.0\n", ddiff); + } + + + /************************************************************************/ + /** Tests for logical values **/ + /************************************************************************/ + + /**** Testing logic and ****/ + for (i = 0; i < LOOPCOUNT; i++) { + logics[i] = 1; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(&&:logic_and) + for (j = 0; j < LOOPCOUNT; ++j) { + logic_and = (logic_and && logics[j]); + } + } + if(!logic_and) { + result++; + fprintf (stderr, "Error in logic AND part 1\n"); + } + + logic_and = 1; + logics[LOOPCOUNT / 2] = 0; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(&&:logic_and) + for (j = 0; j < LOOPCOUNT; ++j) { + logic_and = logic_and && logics[j]; + } + } + if(logic_and) { + result++; + fprintf (stderr, "Error in logic AND part 2\n"); + } + + /**** Testing logic or ****/ + for (i = 0; i < LOOPCOUNT; i++) { + logics[i] = 0; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(||:logic_or) + for (j = 0; j < LOOPCOUNT; ++j) { + logic_or = logic_or || logics[j]; + } + } + if (logic_or) { + result++; + fprintf (stderr, "Error in logic OR part 1\n"); + } + + logic_or = 0; + logics[LOOPCOUNT / 2] = 1; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(||:logic_or) + for (j = 0; j < LOOPCOUNT; ++j) { + logic_or = logic_or || logics[j]; + } + } + if(!logic_or) { + result++; + fprintf (stderr, "Error in logic OR part 2\n"); + } + + /************************************************************************/ + /** Tests for bit values **/ + /************************************************************************/ + + /**** Testing bit and ****/ + for (i = 0; i < LOOPCOUNT; ++i) { + logics[i] = 1; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(&:bit_and) + for (j = 0; j < LOOPCOUNT; ++j) { + bit_and = (bit_and & logics[j]); + } + } + if (!bit_and) { + result++; + fprintf (stderr, "Error in BIT AND part 1\n"); + } + + bit_and = 1; + logics[LOOPCOUNT / 2] = 0; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(&:bit_and) + for (j = 0; j < LOOPCOUNT; ++j) { + bit_and = bit_and & logics[j]; + } + } + if (bit_and) { + result++; + fprintf (stderr, "Error in BIT AND part 2\n"); + } + + /**** Testing bit or ****/ + for (i = 0; i < LOOPCOUNT; i++) { + logics[i] = 0; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(|:bit_or) + for (j = 0; j < LOOPCOUNT; ++j) { + bit_or = bit_or | logics[j]; + } + } + if (bit_or) { + result++; + fprintf (stderr, "Error in BIT OR part 1\n"); + } + + bit_or = 0; + logics[LOOPCOUNT / 2] = 1; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(|:bit_or) + for (j = 0; j < LOOPCOUNT; ++j) { + bit_or = bit_or | logics[j]; + } + } + if (!bit_or) { + result++; + fprintf (stderr, "Error in BIT OR part 2\n"); + } + + /**** Testing exclusive bit or ****/ + for (i = 0; i < LOOPCOUNT; i++) { + logics[i] = 0; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(^:exclusiv_bit_or) + for (j = 0; j < LOOPCOUNT; ++j) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[j]; + } + } + if (exclusiv_bit_or) { + result++; + fprintf (stderr, "Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[LOOPCOUNT / 2] = 1; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(^:exclusiv_bit_or) + for (j = 0; j < LOOPCOUNT; ++j) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[j]; + } + } + if (!exclusiv_bit_or) { + result++; + fprintf (stderr, "Error in EXCLUSIV BIT OR part 2\n"); + } + + return (result == 0); + free (logics); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_auto.c b/final/runtime/test/worksharing/for/omp_for_schedule_auto.c new file mode 100644 index 0000000..075617c --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_auto.c @@ -0,0 +1,69 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include "omp_testsuite.h" + +int sum1; +#pragma omp threadprivate(sum1) + +int test_omp_for_auto() +{ + int j; + int sum; + int sum0; + int known_sum; + int threadsnum; + + sum = 0; + sum0 = 12345; + + // array which keeps track of which threads participated in the for loop + // e.g., given 4 threads, [ 0 | 1 | 1 | 0 ] implies + // threads 0 and 3 did not, threads 1 and 2 did + int max_threads = omp_get_max_threads(); + int* active_threads = (int*)malloc(sizeof(int)*max_threads); + for(j = 0; j < max_threads; j++) + active_threads[j] = 0; + + #pragma omp parallel + { + int i; + sum1 = 0; + #pragma omp for firstprivate(sum0) schedule(auto) + for (i = 1; i <= LOOPCOUNT; i++) { + active_threads[omp_get_thread_num()] = 1; + sum0 = sum0 + i; + sum1 = sum0; + } + + #pragma omp critical + { + sum = sum + sum1; + } + } + + // count the threads that participated (sum is stored in threadsnum) + threadsnum=0; + for(j = 0; j < max_threads; j++) { + if(active_threads[j]) + threadsnum++; + } + free(active_threads); + + known_sum = 12345 * threadsnum + (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_auto()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c b/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c new file mode 100644 index 0000000..6d4f59b --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c @@ -0,0 +1,89 @@ +// RUN: %libomp-compile-and-run +/* + * Test for dynamic scheduling with chunk size + * Method: caculate how many times the iteration space is dispatched + * and judge if each dispatch has the requested chunk size + * unless it is the last one. + * It is possible for two adjacent chunks are assigned to the same thread + * Modified by Chunhua Liao + */ +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +#define CFDMAX_SIZE 100 +const int chunk_size = 7; + +int test_omp_for_schedule_dynamic() +{ + int tid; + int *tids; + int i; + int tidsArray[CFDMAX_SIZE]; + int count = 0; + int tmp_count = 0; /*dispatch times*/ + int *tmp; /*store chunk size for each dispatch*/ + int result = 0; + + tids = tidsArray; + + #pragma omp parallel private(tid) shared(tids) + { /* begin of parallel */ + int tid; + tid = omp_get_thread_num (); + #pragma omp for schedule(dynamic,chunk_size) + for (i = 0; i < CFDMAX_SIZE; i++) { + tids[i] = tid; + } + } + + for (i = 0; i < CFDMAX_SIZE - 1; ++i) { + if (tids[i] != tids[i + 1]) { + count++; + } + } + + tmp = (int *) malloc (sizeof (int) * (count + 1)); + tmp[0] = 1; + + for (i = 0; i < CFDMAX_SIZE - 1; ++i) { + if (tmp_count > count) { + printf ("--------------------\nTestinternal Error: List too small!!!\n--------------------\n"); /* Error handling */ + break; + } + if (tids[i] != tids[i + 1]) { + tmp_count++; + tmp[tmp_count] = 1; + } else { + tmp[tmp_count]++; + } + } + /* is dynamic statement working? */ + for (i = 0; i < count; i++) { + if ((tmp[i]%chunk_size)!=0) { + /* it is possible for 2 adjacent chunks assigned to a same thread */ + result++; + fprintf(stderr,"The intermediate dispatch has wrong chunksize.\n"); + /* result += ((tmp[i] / chunk_size) - 1); */ + } + } + if ((tmp[count]%chunk_size)!=(CFDMAX_SIZE%chunk_size)) { + result++; + fprintf(stderr,"the last dispatch has wrong chunksize.\n"); + } + /* for (int i=0;i<count+1;++i) printf("%d\t:=\t%d\n",i+1,tmp[i]); */ + return (result==0); +} +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_schedule_dynamic()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_guided.c b/final/runtime/test/worksharing/for/omp_for_schedule_guided.c new file mode 100644 index 0000000..1ee7449 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_guided.c @@ -0,0 +1,217 @@ +// RUN: %libomp-compile-and-run + +/* Test for guided scheduling + * Ensure threads get chunks interleavely first + * Then judge the chunk sizes are decreasing to a stable value + * Modified by Chunhua Liao + * For example, 100 iteration on 2 threads, chunksize 7 + * one line for each dispatch, 0/1 means thread id + * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24 + * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18 + * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 + * 1 1 1 1 1 1 1 1 1 1 10 + * 0 0 0 0 0 0 0 0 8 + * 1 1 1 1 1 1 1 7 + * 0 0 0 0 0 0 0 7 + * 1 1 1 1 1 1 1 7 + * 0 0 0 0 0 5 +*/ +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +#define CFSMAX_SIZE 1000 +#define MAX_TIME 0.005 + +#ifdef SLEEPTIME +#undef SLEEPTIME +#define SLEEPTIME 0.0001 +#endif + +int test_omp_for_schedule_guided() +{ + int * tids; + int * chunksizes; + int notout; + int maxiter; + int threads; + int i; + int result; + + tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1)); + maxiter = 0; + result = 1; + notout = 1; + + /* Testing if enough threads are available for this check. */ + #pragma omp parallel + { + #pragma omp single + { + threads = omp_get_num_threads(); + } + } + + /* ensure there are at least two threads */ + if (threads < 2) { + omp_set_num_threads(2); + threads = 2; + } + + /* Now the real parallel work: + * Each thread will start immediately with the first chunk. + */ + #pragma omp parallel shared(tids,maxiter) + { /* begin of parallel */ + double count; + int tid; + int j; + + tid = omp_get_thread_num (); + + #pragma omp for nowait schedule(guided) + for(j = 0; j < CFSMAX_SIZE; ++j) { + count = 0.; + #pragma omp flush(maxiter) + if (j > maxiter) { + #pragma omp critical + { + maxiter = j; + } + } + /*printf ("thread %d sleeping\n", tid);*/ + #pragma omp flush(maxiter,notout) + while (notout && (count < MAX_TIME) && (maxiter == j)) { + #pragma omp flush(maxiter,notout) + my_sleep (SLEEPTIME); + count += SLEEPTIME; +#ifdef VERBOSE + printf("."); +#endif + } +#ifdef VERBOSE + if (count > 0.) printf(" waited %lf s\n", count); +#endif + /*printf ("thread %d awake\n", tid);*/ + tids[j] = tid; +#ifdef VERBOSE + printf("%d finished by %d\n",j,tid); +#endif + } /* end of for */ + notout = 0; + #pragma omp flush(maxiter,notout) + } /* end of parallel */ + + /******************************************************* + * evaluation of the values * + *******************************************************/ + { + int determined_chunksize = 1; + int last_threadnr = tids[0]; + int global_chunknr = 0; + int openwork = CFSMAX_SIZE; + int expected_chunk_size; + int* local_chunknr = (int*)malloc(threads * sizeof(int)); + double c = 1; + + for (i = 0; i < threads; i++) + local_chunknr[i] = 0; + + tids[CFSMAX_SIZE] = -1; + + /* + * determine the number of global chunks + */ + // fprintf(stderr,"# global_chunknr thread local_chunknr chunksize\n"); + for(i = 1; i <= CFSMAX_SIZE; ++i) { + if (last_threadnr==tids[i]) { + determined_chunksize++; + } else { + /* fprintf(stderr, "%d\t%d\t%d\t%d\n", global_chunknr, + last_threadnr, local_chunknr[last_threadnr], m); */ + global_chunknr++; + local_chunknr[last_threadnr]++; + last_threadnr = tids[i]; + determined_chunksize = 1; + } + } + /* now allocate the memory for saving the sizes of the global chunks */ + chunksizes = (int*)malloc(global_chunknr * sizeof(int)); + + /* + * Evaluate the sizes of the global chunks + */ + global_chunknr = 0; + determined_chunksize = 1; + last_threadnr = tids[0]; + for (i = 1; i <= CFSMAX_SIZE; ++i) { + /* If the threadnumber was the same as before increase the + * detected chunksize for this chunk otherwise set the detected + * chunksize again to one and save the number of the next + * thread in last_threadnr. + */ + if (last_threadnr == tids[i]) { + determined_chunksize++; + } else { + chunksizes[global_chunknr] = determined_chunksize; + global_chunknr++; + local_chunknr[last_threadnr]++; + last_threadnr = tids[i]; + determined_chunksize = 1; + } + } + +#ifdef VERBOSE + fprintf(stderr, "found\texpected\tconstant\n"); +#endif + + /* identify the constant c for the exponential + decrease of the chunksize */ + expected_chunk_size = openwork / threads; + c = (double) chunksizes[0] / expected_chunk_size; + + for (i = 0; i < global_chunknr; i++) { + /* calculate the new expected chunksize */ + if (expected_chunk_size > 1) + expected_chunk_size = c * openwork / threads; +#ifdef VERBOSE + fprintf(stderr, "%8d\t%8d\t%lf\n", chunksizes[i], + expected_chunk_size, c * chunksizes[i]/expected_chunk_size); +#endif + /* check if chunksize is inside the rounding errors */ + if (abs (chunksizes[i] - expected_chunk_size) >= 2) { + result = 0; +#ifndef VERBOSE + fprintf(stderr, "Chunksize differed from expected " + "value: %d instead of %d\n", chunksizes[i], + expected_chunk_size); + return 0; +#endif + } /* end if */ + +#ifndef VERBOSE + if (expected_chunk_size - chunksizes[i] < 0) + fprintf(stderr, "Chunksize did not decrease: %d" + " instead of %d\n", chunksizes[i],expected_chunk_size); +#endif + + /* calculating the remaining amount of work */ + openwork -= chunksizes[i]; + } + } + return result; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_schedule_guided()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c b/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c new file mode 100644 index 0000000..b957fc3 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c @@ -0,0 +1,82 @@ +// RUN: %libomp-compile +// RUN: env OMP_SCHEDULE=static %libomp-run 1 0 +// RUN: env OMP_SCHEDULE=static,10 %libomp-run 1 10 +// RUN: env OMP_SCHEDULE=dynamic %libomp-run 2 1 +// RUN: env OMP_SCHEDULE=dynamic,11 %libomp-run 2 11 +// RUN: env OMP_SCHEDULE=guided %libomp-run 3 1 +// RUN: env OMP_SCHEDULE=guided,12 %libomp-run 3 12 +// RUN: env OMP_SCHEDULE=auto %libomp-run 4 1 +// RUN: env OMP_SCHEDULE=trapezoidal %libomp-run 101 1 +// RUN: env OMP_SCHEDULE=trapezoidal,13 %libomp-run 101 13 +// RUN: env OMP_SCHEDULE=static_steal %libomp-run 102 1 +// RUN: env OMP_SCHEDULE=static_steal,14 %libomp-run 102 14 +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include "omp_testsuite.h" + +int sum; +char* correct_kind_string; +omp_sched_t correct_kind; +int correct_chunk_size; + +int test_omp_for_runtime() +{ + int sum; + int known_sum; + int chunk_size; + int error; + omp_sched_t kind; + + sum = 0; + error = 0; + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + omp_get_schedule(&kind, &chunk_size); + + printf("omp_get_schedule() returns: Schedule = %d, Chunk Size = %d\n", + kind, chunk_size); + if (kind != correct_kind) { + printf("kind(%d) != correct_kind(%d)\n", kind, correct_kind); + error = 1; + } + if (chunk_size != correct_chunk_size) { + printf("chunk_size(%d) != correct_chunk_size(%d)\n", chunk_size, + correct_chunk_size); + error = 1; + } + + #pragma omp parallel + { + int i; + #pragma omp for schedule(runtime) + for (i = 1; i <= LOOPCOUNT; i++) { + #pragma omp critical + sum+=i; + } + } + if (known_sum != sum) { + printf("Known Sum = %d, Calculated Sum = %d\n", known_sum, sum); + error = 1; + } + return !error; +} + +int main(int argc, char** argv) +{ + int i; + int num_failed=0; + if (argc != 3) { + fprintf(stderr, "usage: %s schedule_kind chunk_size\n", argv[0]); + fprintf(stderr, " Run with envirable OMP_SCHEDULE=kind[,chunk_size]\n"); + return 1; + } + correct_kind = atoi(argv[1]); + correct_chunk_size = atoi(argv[2]); + + for (i = 0; i < REPETITIONS; i++) { + if (!test_omp_for_runtime()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_static.c b/final/runtime/test/worksharing/for/omp_for_schedule_static.c new file mode 100644 index 0000000..f46a544 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_static.c @@ -0,0 +1,154 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +#define CFSMAX_SIZE 1000 +#define MAX_TIME 0.01 + +#ifdef SLEEPTIME +#undef SLEEPTIME +#define SLEEPTIME 0.0005 +#endif + +int test_omp_for_schedule_static() +{ + int threads; + int i,lasttid; + int * tids; + int notout; + int maxiter; + int chunk_size; + int counter = 0; + int tmp_count=1; + int lastthreadsstarttid = -1; + int result = 1; + + chunk_size = 7; + tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1)); + notout = 1; + maxiter = 0; + + #pragma omp parallel shared(tids,counter) + { /* begin of parallel*/ + #pragma omp single + { + threads = omp_get_num_threads (); + } /* end of single */ + } /* end of parallel */ + + if (threads < 2) { + omp_set_num_threads(2); + threads = 2; + } + fprintf (stderr,"Using an internal count of %d\nUsing a specified" + " chunksize of %d\n", CFSMAX_SIZE, chunk_size); + tids[CFSMAX_SIZE] = -1; /* setting endflag */ + #pragma omp parallel shared(tids) + { /* begin of parallel */ + double count; + int tid; + int j; + + tid = omp_get_thread_num (); + + #pragma omp for nowait schedule(static,chunk_size) + for(j = 0; j < CFSMAX_SIZE; ++j) { + count = 0.; + #pragma omp flush(maxiter) + if (j > maxiter) { + #pragma omp critical + { + maxiter = j; + } + } + /*printf ("thread %d sleeping\n", tid);*/ + while (notout && (count < MAX_TIME) && (maxiter == j)) { + #pragma omp flush(maxiter,notout) + my_sleep (SLEEPTIME); + count += SLEEPTIME; + printf("."); + } +#ifdef VERBOSE + if (count > 0.) printf(" waited %lf s\n", count); +#endif + /*printf ("thread %d awake\n", tid);*/ + tids[j] = tid; +#ifdef VERBOSE + printf("%d finished by %d\n",j,tid); +#endif + } /* end of for */ + notout = 0; + #pragma omp flush(maxiter,notout) + } /* end of parallel */ + + /**** analysing the data in array tids ****/ + + lasttid = tids[0]; + tmp_count = 0; + + for (i = 0; i < CFSMAX_SIZE + 1; ++i) { + /* If the work was done by the same thread increase tmp_count by one. */ + if (tids[i] == lasttid) { + tmp_count++; +#ifdef VERBOSE + fprintf (stderr, "%d: %d \n", i, tids[i]); +#endif + continue; + } + + /* Check if the next thread had has the right thread number. When finding + * threadnumber -1 the end should be reached. + */ + if (tids[i] == (lasttid + 1) % threads || tids[i] == -1) { + /* checking for the right chunk size */ + if (tmp_count == chunk_size) { + tmp_count = 1; + lasttid = tids[i]; +#ifdef VERBOSE + fprintf (stderr, "OK\n"); +#endif + } else { + /* If the chunk size was wrong, check if the end was reached */ + if (tids[i] == -1) { + if (i == CFSMAX_SIZE) { + fprintf (stderr, "Last thread had chunk size %d\n", + tmp_count); + break; + } else { + fprintf (stderr, "ERROR: Last thread (thread with" + " number -1) was found before the end.\n"); + result = 0; + } + } else { + fprintf (stderr, "ERROR: chunk size was %d. (assigned" + " was %d)\n", tmp_count, chunk_size); + result = 0; + } + } + } else { + fprintf(stderr, "ERROR: Found thread with number %d (should be" + " inbetween 0 and %d).", tids[i], threads - 1); + result = 0; + } +#ifdef VERBOSE + fprintf (stderr, "%d: %d \n", i, tids[i]); +#endif + } + + return result; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_schedule_static()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c b/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c new file mode 100644 index 0000000..922f27a --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c @@ -0,0 +1,202 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +#define CFSMAX_SIZE 1000 +#define MAX_TIME 0.01 + +#ifdef SLEEPTIME +#undef SLEEPTIME +#define SLEEPTIME 0.0005 +#endif + +#define VERBOSE 0 + +int test_omp_for_schedule_static_3() +{ + int threads; + int i,lasttid; + + int * tids; + int * tids2; + int notout; + int maxiter; + int chunk_size; + + int counter = 0; + int tmp_count=1; + int lastthreadsstarttid = -1; + int result = 1; + chunk_size = 7; + + tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1)); + notout = 1; + maxiter = 0; + + #pragma omp parallel shared(tids,counter) + { /* begin of parallel*/ + #pragma omp single + { + threads = omp_get_num_threads (); + } /* end of single */ + } /* end of parallel */ + + /* Ensure that at least two threads are created */ + if (threads < 2) { + omp_set_num_threads(2); + threads = 2; + } + fprintf (stderr,"Using an internal count of %d\nUsing a" + " specified chunksize of %d\n", CFSMAX_SIZE, chunk_size); + tids[CFSMAX_SIZE] = -1; /* setting endflag */ + + #pragma omp parallel shared(tids) + { /* begin of parallel */ + double count; + int tid; + int j; + + tid = omp_get_thread_num (); + + #pragma omp for nowait schedule(static,chunk_size) + for(j = 0; j < CFSMAX_SIZE; ++j) { + count = 0.; + #pragma omp flush(maxiter) + if (j > maxiter) { + #pragma omp critical + { + maxiter = j; + } + } + /*printf ("thread %d sleeping\n", tid);*/ + while (notout && (count < MAX_TIME) && (maxiter == j)) { + #pragma omp flush(maxiter,notout) + my_sleep (SLEEPTIME); + count += SLEEPTIME; + printf("."); + } +#ifdef VERBOSE + if (count > 0.) printf(" waited %lf s\n", count); +#endif + /*printf ("thread %d awake\n", tid);*/ + tids[j] = tid; +#ifdef VERBOSE + printf("%d finished by %d\n",j,tid); +#endif + } /* end of omp parallel for */ + + notout = 0; + #pragma omp flush(maxiter,notout) + } /* end of parallel */ + + /**** analysing the data in array tids ****/ + + lasttid = tids[0]; + tmp_count = 0; + + for (i = 0; i < CFSMAX_SIZE + 1; ++i) { + /* If the work was done by the same thread + increase tmp_count by one. */ + if (tids[i] == lasttid) { + tmp_count++; +#ifdef VERBOSE + fprintf (stderr, "%d: %d \n", i, tids[i]); +#endif + continue; + } + + /* Check if the next thread had has the right thread number. + * When finding threadnumber -1 the end should be reached. + */ + if (tids[i] == (lasttid + 1) % threads || tids[i] == -1) { + /* checking for the right chunk size */ + if (tmp_count == chunk_size) { + tmp_count = 1; + lasttid = tids[i]; +#ifdef VERBOSE + fprintf (stderr, "OK\n"); +#endif + } else { + /* If the chunk size was wrong, check if the end was reached */ + if (tids[i] == -1) { + if (i == CFSMAX_SIZE) { + fprintf (stderr, "Last thread had chunk size %d\n", + tmp_count); + break; + } else { + fprintf (stderr, "ERROR: Last thread (thread with" + " number -1) was found before the end.\n"); + result = 0; + } + } else { + fprintf (stderr, "ERROR: chunk size was %d. (assigned" + " was %d)\n", tmp_count, chunk_size); + result = 0; + } + } + } else { + fprintf(stderr, "ERROR: Found thread with number %d (should be" + " inbetween 0 and %d).", tids[i], threads - 1); + result = 0; + } +#ifdef VERBOSE + fprintf (stderr, "%d: %d \n", i, tids[i]); +#endif + } + + /* Now we check if several loop regions in one parallel region have the + * same logical assignement of chunks to threads. We use the nowait + * clause to increase the probability to get an error. */ + + /* First we allocate some more memmory */ + free (tids); + tids = (int *) malloc (sizeof (int) * LOOPCOUNT); + tids2 = (int *) malloc (sizeof (int) * LOOPCOUNT); + + #pragma omp parallel + { + { + int n; + #pragma omp for schedule(static) nowait + for (n = 0; n < LOOPCOUNT; n++) { + if (LOOPCOUNT == n + 1 ) + my_sleep(SLEEPTIME); + + tids[n] = omp_get_thread_num(); + } + } + { + int m; + #pragma omp for schedule(static) nowait + for (m = 1; m <= LOOPCOUNT; m++) { + tids2[m-1] = omp_get_thread_num(); + } + } + } + + for (i = 0; i < LOOPCOUNT; i++) + if (tids[i] != tids2[i]) { + fprintf (stderr, "Chunk no. %d was assigned once to thread %d and" + " later to thread %d.\n", i, tids[i],tids2[i]); + result = 0; + } + + free (tids); + free (tids2); + return result; +} + +int main() +{ + int i; + int num_failed=0; + + for (i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_schedule_static_3()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c b/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c new file mode 100644 index 0000000..3b3bf7d --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c @@ -0,0 +1,35 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_for_firstprivate() +{ + int sum ; + int i2; + int i; + int known_sum; + + sum=0; + i2=3; + + #pragma omp parallel for reduction(+:sum) private(i) firstprivate(i2) + for (i = 1; i <= LOOPCOUNT; i++) { + sum = sum + (i + i2); + } + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 + i2 * LOOPCOUNT; + return (known_sum == sum); +} /* end of check_parallel_for_fistprivate */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_if.c b/final/runtime/test/worksharing/for/omp_parallel_for_if.c new file mode 100644 index 0000000..57fe498 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_if.c @@ -0,0 +1,42 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int test_omp_parallel_for_if() +{ + int known_sum; + int num_threads; + int sum, sum2; + int i; + int control; + + control = 0; + num_threads=0; + sum = 0; + sum2 = 0; + + #pragma omp parallel for private(i) if (control==1) + for (i=0; i <= LOOPCOUNT; i++) { + num_threads = omp_get_num_threads(); + sum = sum + i; + } + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + fprintf(stderr, "Number of threads determined by" + " omp_get_num_threads: %d\n", num_threads); + return (known_sum == sum && num_threads == 1); +} /* end of check_parallel_for_private */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_if()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c b/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c new file mode 100644 index 0000000..a53cfb2 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c @@ -0,0 +1,37 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_for_lastprivate() +{ + int sum; + int i; + int i0; + int known_sum; + + sum =0; + i0 = -1; + + #pragma omp parallel for reduction(+:sum) \ + schedule(static,7) private(i) lastprivate(i0) + for (i = 1; i <= LOOPCOUNT; i++) { + sum = sum + i; + i0 = i; + } /* end of parallel for */ + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return ((known_sum == sum) && (i0 == LOOPCOUNT)); +} /* end of check_parallel_for_lastprivate */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_lastprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c b/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c new file mode 100644 index 0000000..5fef460 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c @@ -0,0 +1,64 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +static int last_i = 0; + +int i; +#pragma omp threadprivate(i) + +/* Variable ii is used to avoid problems with a threadprivate variable used as a loop + * index. See test omp_threadprivate_for. + */ +static int ii; +#pragma omp threadprivate(ii) + +/*! + Utility function: returns true if the passed argument is larger than + the argument of the last call of this function. + */ +static int check_i_islarger2(int i) +{ + int islarger; + islarger = (i > last_i); + last_i = i; + return (islarger); +} + +int test_omp_parallel_for_ordered() +{ + int sum; + int is_larger; + int known_sum; + int i; + + sum = 0; + is_larger = 1; + last_i = 0; + #pragma omp parallel for schedule(static,1) private(i) ordered + for (i = 1; i < 100; i++) { + ii = i; + #pragma omp ordered + { + is_larger = check_i_islarger2 (ii) && is_larger; + sum = sum + ii; + } + } + known_sum = (99 * 100) / 2; + fprintf (stderr," known_sum = %d , sum = %d \n", known_sum, sum); + fprintf (stderr," is_larger = %d\n", is_larger); + return (known_sum == sum) && is_larger; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_ordered()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_private.c b/final/runtime/test/worksharing/for/omp_parallel_for_private.c new file mode 100644 index 0000000..1231d36 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_private.c @@ -0,0 +1,50 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/*! Utility function to spend some time in a loop */ +static void do_some_work (void) +{ + int i; + double sum = 0; + for(i = 0; i < 1000; i++){ + sum += sqrt (i); + } +} + +int test_omp_parallel_for_private() +{ + int sum; + int i; + int i2; + int known_sum; + + sum =0; + i2=0; + + #pragma omp parallel for reduction(+:sum) schedule(static,1) private(i) private(i2) + for (i=1;i<=LOOPCOUNT;i++) + { + i2 = i; + #pragma omp flush + do_some_work (); + #pragma omp flush + sum = sum + i2; + } /*end of for*/ + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return (known_sum == sum); +} /* end of check_parallel_for_private */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c b/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c new file mode 100644 index 0000000..118d730 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c @@ -0,0 +1,266 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */ +#define MAX_FACTOR 10 +#define KNOWN_PRODUCT 3628800 /* 10! */ + +int test_omp_parallel_for_reduction() +{ + int sum; + int known_sum; + double dsum; + double dknown_sum; + double dt=0.5; /* base of geometric row for + and - test*/ + double rounding_error= 1.E-9; + int diff; + double ddiff; + int product; + int known_product; + int logic_and; + int logic_or; + int bit_and; + int bit_or; + int exclusiv_bit_or; + int logics[LOOPCOUNT]; + int i; + double dpt; + int result; + + sum =0; + dsum=0; + dt = 1./3.; + result = 0; + product = 1; + logic_and=1; + logic_or=0; + bit_and=1; + bit_or=0; + exclusiv_bit_or=0; + + /* Tests for integers */ + known_sum = (LOOPCOUNT*(LOOPCOUNT+1))/2; + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:sum) + for (i=1;i<=LOOPCOUNT;i++) { + sum=sum+i; + } + if(known_sum!=sum) { + result++; + fprintf(stderr,"Error in sum with integers: Result was %d" + " instead of %d\n",sum,known_sum); + } + + diff = (LOOPCOUNT*(LOOPCOUNT+1))/2; + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:diff) + for (i=1;i<=LOOPCOUNT;++i) { + diff=diff-i; + } + if(diff != 0) { + result++; + fprintf(stderr,"Error in difference with integers: Result was %d" + " instead of 0.\n",diff); + } + + /* Tests for doubles */ + dsum=0; + dpt=1; + for (i=0;i<DOUBLE_DIGITS;++i) { + dpt*=dt; + } + dknown_sum = (1-dpt)/(1-dt); + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:dsum) + for (i=0;i<DOUBLE_DIGITS;++i) { + dsum += pow(dt,i); + } + if( fabs(dsum-dknown_sum) > rounding_error ) { + result++; + fprintf(stderr,"Error in sum with doubles: Result was %f" + " instead of %f (Difference: %E)\n", + dsum, dknown_sum, dsum-dknown_sum); + } + + dpt=1; + + for (i=0;i<DOUBLE_DIGITS;++i) { + dpt*=dt; + } + fprintf(stderr,"\n"); + ddiff = (1-dpt)/(1-dt); + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:ddiff) + for (i=0;i<DOUBLE_DIGITS;++i) { + ddiff -= pow(dt,i); + } + if( fabs(ddiff) > rounding_error) { + result++; + fprintf(stderr,"Error in Difference with doubles: Result was %E" + " instead of 0.0\n",ddiff); + } + + /* Tests for integers */ + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(*:product) + for(i=1;i<=MAX_FACTOR;i++) { + product *= i; + } + known_product = KNOWN_PRODUCT; + if(known_product != product) { + result++; + fprintf(stderr,"Error in Product with integers: Result was %d" + " instead of %d\n\n",product,known_product); + } + + /* Tests for logic AND */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=1; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(&&:logic_and) + for(i=0;i<LOOPCOUNT;++i) { + logic_and = (logic_and && logics[i]); + } + if(!logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 1.\n"); + } + + logic_and = 1; + logics[LOOPCOUNT/2]=0; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(&&:logic_and) + for(i=0;i<LOOPCOUNT;++i) { + logic_and = logic_and && logics[i]; + } + if(logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 2.\n"); + } + + /* Tests for logic OR */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(||:logic_or) + for(i=0;i<LOOPCOUNT;++i) { + logic_or = logic_or || logics[i]; + } + if(logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 1.\n"); + } + logic_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(||:logic_or) + for(i=0;i<LOOPCOUNT;++i) { + logic_or = logic_or || logics[i]; + } + if(!logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 2.\n"); + } + + /* Tests for bitwise AND */ + for(i=0;i<LOOPCOUNT;++i) { + logics[i]=1; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(&:bit_and) + for(i=0;i<LOOPCOUNT;++i) { + bit_and = (bit_and & logics[i]); + } + if(!bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 1.\n"); + } + + bit_and = 1; + logics[LOOPCOUNT/2]=0; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(&:bit_and) + for(i=0;i<LOOPCOUNT;++i) { + bit_and = bit_and & logics[i]; + } + if(bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 2.\n"); + } + + /* Tests for bitwise OR */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(|:bit_or) + for(i=0;i<LOOPCOUNT;++i) { + bit_or = bit_or | logics[i]; + } + if(bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 1\n"); + } + bit_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(|:bit_or) + for(i=0;i<LOOPCOUNT;++i) { + bit_or = bit_or | logics[i]; + } + if(!bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 2\n"); + } + + /* Tests for bitwise XOR */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(^:exclusiv_bit_or) + for(i=0;i<LOOPCOUNT;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + if(exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(^:exclusiv_bit_or) + for(i=0;i<LOOPCOUNT;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + if(!exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n"); + } + + /*printf("\nResult:%d\n",result);*/ + return (result==0); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c new file mode 100644 index 0000000..1780fab --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c @@ -0,0 +1,54 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_sections_firstprivate() +{ + int sum; + int sum0; + int known_sum; + + sum =7; + sum0=11; + + #pragma omp parallel sections firstprivate(sum0) + { + #pragma omp section + { + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + #pragma omp critical + { + sum= sum+sum0; + } + } + } + + known_sum=11*3+7; + return (known_sum==sum); +} /* end of check_section_firstprivate*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_sections_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c new file mode 100644 index 0000000..9b775ec --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c @@ -0,0 +1,71 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_sections_lastprivate() +{ + int sum; + int sum0; + int i; + int i0; + int known_sum; + sum =0; + sum0 = 0; + i0 = -1; + + #pragma omp parallel sections private(i,sum0) lastprivate(i0) + { + #pragma omp section + { + sum0=0; + for (i=1;i<400;i++) { + sum0=sum0+i; + i0=i; + } + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + sum0=0; + for(i=400;i<700;i++) { + sum0=sum0+i; + i0=i; + } + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + sum0=0; + for(i=700;i<1000;i++) { + sum0=sum0+i; + i0=i; + } + #pragma omp critical + { + sum= sum+sum0; + } + } + } + + known_sum=(999*1000)/2; + return ((known_sum==sum) && (i0==999) ); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_sections_lastprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c new file mode 100644 index 0000000..7dab295 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c @@ -0,0 +1,64 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_sections_private() +{ + int sum; + int sum0; + int i; + int known_sum; + + sum = 7; + sum0=0; + + #pragma omp parallel sections private(sum0, i) + { + #pragma omp section + { + sum0=0; + for (i=1;i<400;i++) + sum0=sum0+i; + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + sum0=0; + for(i=400;i<700;i++) + sum0=sum0+i; + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + sum0=0; + for(i=700;i<1000;i++) + sum0=sum0+i; + #pragma omp critical + { + sum= sum+sum0; + } + } + } + + known_sum=(999*1000)/2+7; + return (known_sum==sum); +} /* end of check_section_private*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_sections_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c new file mode 100644 index 0000000..0d49865 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c @@ -0,0 +1,508 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int test_omp_parallel_sections_reduction() +{ + int sum; + int known_sum; + double dpt; + double dsum; + double dknown_sum; + double dt=0.5; /* base of geometric row for + and - test*/ + double rounding_error= 1.E-5; + int diff; + double ddiff; + int product; + int known_product; + int logic_and; + int bit_and; + int logic_or; + int bit_or; + int exclusiv_bit_or; + int logics[1000]; + int i; + int result; + + sum = 7; + dsum=0; + product =1; + dpt = 1; + logic_and=1; + bit_and=1; + logic_or=0; + bit_or=0; + exclusiv_bit_or=0; + result =0; + /* int my_islarger;*/ + /*int is_larger=1;*/ + + // Test summation of integers + known_sum = (999*1000)/2+7; + #pragma omp parallel sections private(i) reduction(+:sum) + { + #pragma omp section + { + for (i=1;i<300;i++) { + sum=sum+i; + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + sum=sum+i; + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + sum=sum+i; + } + } + } + if(known_sum!=sum) { + result++; + fprintf(stderr,"Error in sum with integers: Result was %d" + " instead of %d.\n",sum, known_sum); + } + + // Test differences of integers + diff = (999*1000)/2; + #pragma omp parallel sections private(i) reduction(-:diff) + { + #pragma omp section + { + for (i=1;i<300;i++) { + diff=diff-i; + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + diff=diff-i; + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + diff=diff-i; + } + } + } + if(diff != 0) { + result++; + fprintf(stderr,"Error in Difference with integers: Result was %d" + " instead of 0.\n",diff); + } + + // Test summation of doubles + for (i=0;i<20;++i) { + dpt*=dt; + } + dknown_sum = (1-dpt)/(1-dt); + #pragma omp parallel sections private(i) reduction(+:dsum) + { + #pragma omp section + { + for (i=0;i<6;++i) { + dsum += pow(dt,i); + } + } + #pragma omp section + { + for (i=6;i<12;++i) { + dsum += pow(dt,i); + } + } + #pragma omp section + { + for (i=12;i<20;++i) { + dsum += pow(dt,i); + } + } + } + if( fabs(dsum-dknown_sum) > rounding_error ) { + result++; + fprintf(stderr,"Error in sum with doubles: Result was %f" + " instead of %f (Difference: %E)\n", + dsum, dknown_sum, dsum-dknown_sum); + } + + // Test differences of doubles + dpt=1; + for (i=0;i<20;++i) { + dpt*=dt; + } + fprintf(stderr,"\n"); + ddiff = (1-dpt)/(1-dt); + #pragma omp parallel sections private(i) reduction(-:ddiff) + { + #pragma omp section + { + for (i=0;i<6;++i) { + ddiff -= pow(dt,i); + } + } + #pragma omp section + { + for (i=6;i<12;++i) { + ddiff -= pow(dt,i); + } + } + #pragma omp section + { + for (i=12;i<20;++i) { + ddiff -= pow(dt,i); + } + } + } + if( fabs(ddiff) > rounding_error) { + result++; + fprintf(stderr,"Error in Difference with doubles: Result was %E" + " instead of 0.0\n",ddiff); + } + + // Test product of integers + known_product = 3628800; + #pragma omp parallel sections private(i) reduction(*:product) + { + #pragma omp section + { + for(i=1;i<3;i++) { + product *= i; + } + } + #pragma omp section + { + for(i=3;i<7;i++) { + product *= i; + } + } + #pragma omp section + { + for(i=7;i<11;i++) { + product *= i; + } + } + } + if(known_product != product) { + result++; + fprintf(stderr,"Error in Product with integers: Result was %d" + " instead of %d\n",product,known_product); + } + + // Test logical AND + for(i=0;i<1000;i++) { + logics[i]=1; + } + + #pragma omp parallel sections private(i) reduction(&&:logic_and) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_and = (logic_and && logics[i]); + } + } + } + if(!logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 1\n"); + } + logic_and = 1; + logics[501] = 0; + + #pragma omp parallel sections private(i) reduction(&&:logic_and) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_and = (logic_and && logics[i]); + } + } + } + if(logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 2"); + } + + // Test logical OR + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel sections private(i) reduction(||:logic_or) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_or = (logic_or || logics[i]); + } + } + } + if(logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 1\n"); + } + + logic_or = 0; + logics[501]=1; + + #pragma omp parallel sections private(i) reduction(||:logic_or) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_or = (logic_or || logics[i]); + } + } + } + if(!logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 2\n"); + } + + // Test bitwise AND + for(i=0;i<1000;++i) { + logics[i]=1; + } + + #pragma omp parallel sections private(i) reduction(&:bit_and) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_and = (bit_and & logics[i]); + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_and = (bit_and & logics[i]); + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_and = (bit_and & logics[i]); + } + } + } + if(!bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 1\n"); + } + + bit_and = 1; + logics[501]=0; + + #pragma omp parallel sections private(i) reduction(&:bit_and) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_and = bit_and & logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_and = bit_and & logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_and = bit_and & logics[i]; + } + } + } + if(bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 2"); + } + + // Test bitwise OR + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel sections private(i) reduction(|:bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_or = bit_or | logics[i]; + } + } + } + if(bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 1\n"); + } + bit_or = 0; + logics[501]=1; + + #pragma omp parallel sections private(i) reduction(|:bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_or = bit_or | logics[i]; + } + } + } + if(!bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 2\n"); + } + + // Test bitwise XOR + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel sections private(i) reduction(^:exclusiv_bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + } + if(exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[501]=1; + + #pragma omp parallel sections private(i) reduction(^:exclusiv_bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + } + if(!exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n"); + } + + /*printf("\nResult:%d\n",result);*/ + return (result==0); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_sections_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_section_firstprivate.c b/final/runtime/test/worksharing/sections/omp_section_firstprivate.c new file mode 100644 index 0000000..5526475 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_section_firstprivate.c @@ -0,0 +1,55 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_section_firstprivate() +{ + int sum; + int sum0; + int known_sum; + + sum0 = 11; + sum = 7; + #pragma omp parallel + { + #pragma omp sections firstprivate(sum0) + { + #pragma omp section + { + #pragma omp critical + { + sum = sum + sum0; + } + } + #pragma omp section + { + #pragma omp critical + { + sum = sum + sum0; + } + } + #pragma omp section + { + #pragma omp critical + { + sum = sum + sum0; + } + } + } + } + known_sum = 11 * 3 + 7; + return (known_sum == sum); +} /* end of check_section_firstprivate*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_section_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_section_lastprivate.c b/final/runtime/test/worksharing/sections/omp_section_lastprivate.c new file mode 100644 index 0000000..0dbbea9 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_section_lastprivate.c @@ -0,0 +1,76 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_section_lastprivate() +{ + int i0 = -1; + int sum = 0; + int i; + int sum0 = 0; + int known_sum; + + i0 = -1; + sum = 0; + + #pragma omp parallel + { + #pragma omp sections lastprivate(i0) private(i,sum0) + { + #pragma omp section + { + sum0 = 0; + for (i = 1; i < 400; i++) + { + sum0 = sum0 + i; + i0 = i; + } + #pragma omp critical + { + sum = sum + sum0; + } /*end of critical*/ + } /* end of section */ + #pragma omp section + { + sum0 = 0; + for(i = 400; i < 700; i++) + { + sum0 = sum0 + i; + i0 = i; + } + #pragma omp critical + { + sum = sum + sum0; + } /*end of critical*/ + } + #pragma omp section + { + sum0 = 0; + for(i = 700; i < 1000; i++) + { + sum0 = sum0 + i; + i0 = i; + } + #pragma omp critical + { + sum = sum + sum0; + } /*end of critical*/ + } /* end of section */ + } /* end of sections*/ + } /* end of parallel*/ + known_sum = (999 * 1000) / 2; + return ((known_sum == sum) && (i0 == 999) ); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_section_lastprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_section_private.c b/final/runtime/test/worksharing/sections/omp_section_private.c new file mode 100644 index 0000000..bf2a30d --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_section_private.c @@ -0,0 +1,66 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_section_private() +{ + int sum; + int sum0; + int i; + int known_sum; + + sum = 7; + sum0 = 0; + + #pragma omp parallel + { + #pragma omp sections private(sum0,i) + { + #pragma omp section + { + sum0 = 0; + for (i = 1; i < 400; i++) + sum0 = sum0 + i; + #pragma omp critical + { + sum = sum + sum0; + } + } + #pragma omp section + { + sum0 = 0; + for (i = 400; i < 700; i++) + sum0 = sum0 + i; + #pragma omp critical + { + sum = sum + sum0; + } + } + #pragma omp section + { + sum0 = 0; + for (i = 700; i < 1000; i++) + sum0 = sum0 + i; + #pragma omp critical + { + sum = sum + sum0; + } + } + } /*end of sections*/ + } /* end of parallel */ + known_sum = (999 * 1000) / 2 + 7; + return (known_sum == sum); +} /* end of check_section_private*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_section_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_sections_nowait.c b/final/runtime/test/worksharing/sections/omp_sections_nowait.c new file mode 100644 index 0000000..caff254 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_sections_nowait.c @@ -0,0 +1,104 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +/* + * This test will hang if the nowait is not working properly + * + * It relies on a thread skipping to the second sections construct to + * release the threads in the first sections construct + * + * Also, since scheduling of sections is implementation defined, it is + * necessary to have all four sections in the second sections construct + * release the threads since we can't guarantee which section a single thread + * will execute. + */ +volatile int release; +volatile int count; + +void wait_for_release_then_increment(int rank) +{ + fprintf(stderr, "Thread nr %d enters first section" + " and waits.\n", rank); + while (release == 0); + #pragma omp atomic + count++; +} + +void release_and_increment(int rank) +{ + fprintf(stderr, "Thread nr %d sets release to 1\n", rank); + release = 1; + #pragma omp flush(release) + #pragma omp atomic + count++; +} + +int test_omp_sections_nowait() +{ + release = 0; + count = 0; + + #pragma omp parallel num_threads(4) + { + int rank; + rank = omp_get_thread_num (); + #pragma omp sections nowait + { + #pragma omp section + { + wait_for_release_then_increment(rank); + } + #pragma omp section + { + wait_for_release_then_increment(rank); + } + #pragma omp section + { + wait_for_release_then_increment(rank); + } + #pragma omp section + { + fprintf(stderr, "Thread nr %d enters first sections and goes " + "immediately to next sections construct to release.\n", rank); + #pragma omp atomic + count++; + } + } + /* Begin of second sections environment */ + #pragma omp sections + { + #pragma omp section + { + release_and_increment(rank); + } + #pragma omp section + { + release_and_increment(rank); + } + #pragma omp section + { + release_and_increment(rank); + } + #pragma omp section + { + release_and_increment(rank); + } + } + } + // Check to make sure all eight sections were executed + return (count==8); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_sections_nowait()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_sections_reduction.c b/final/runtime/test/worksharing/sections/omp_sections_reduction.c new file mode 100644 index 0000000..1fdb5ec --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_sections_reduction.c @@ -0,0 +1,543 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int test_omp_sections_reduction() +{ + int sum; + int known_sum; + double dpt,dsum; + double dknown_sum; + double dt=0.5; /* base of geometric row for + and - test*/ + double rounding_error= 1.E-9; + int diff; + double ddiff; + int product; + int known_product; + int logic_and; + int bit_and; + int logic_or; + int bit_or; + int exclusiv_bit_or; + int logics[1000]; + int i; + int result; + /* int my_islarger; */ + /*int is_larger=1;*/ + sum =7; + dpt =1; + dsum=0; + product =1; + logic_and=1; + bit_and=1; + logic_or=0; + bit_or=0; + exclusiv_bit_or=0; + result = 0; + dt = 1./3.; + + known_sum = (999*1000)/2+7; + #pragma omp parallel + { + #pragma omp sections private(i) reduction(+:sum) + { + #pragma omp section + { + for (i=1;i<300;i++) { + sum=sum+i; + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + sum=sum+i; + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + sum=sum+i; + } + } + } + } + if(known_sum!=sum) { + ++result; + fprintf(stderr,"Error in sum with integers: Result was %d" + " instead of %d\n", sum,known_sum); + } + + diff = (999*1000)/2; + #pragma omp parallel + { + #pragma omp sections private(i) reduction(-:diff) + { + #pragma omp section + { + for (i=1;i<300;i++) { + diff=diff-i; + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + diff=diff-i; + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + diff=diff-i; + } + } + } + } + if(diff != 0) { + result++; + fprintf(stderr,"Error in Difference with integers: Result was %d" + " instead of 0.\n",diff); + } + + for (i=0;i<20;++i) { + dpt*=dt; + } + dknown_sum = (1-dpt)/(1-dt); + #pragma omp parallel + { + #pragma omp sections private(i) reduction(+:dsum) + { + #pragma omp section + { + for (i=0;i<6;++i) { + dsum += pow(dt,i); + } + } + #pragma omp section + { + for (i=6;i<12;++i) { + dsum += pow(dt,i); + } + } + #pragma omp section + { + for (i=12;i<20;++i) { + dsum += pow(dt,i); + } + } + } + } + if( fabs(dsum-dknown_sum) > rounding_error ) { + result++; + fprintf(stderr,"Error in sum with doubles: Result was %f" + " instead of %f (Difference: %E)\n", + dsum, dknown_sum, dsum-dknown_sum); + } + + dpt=1; + for (i=0;i<20;++i) { + dpt*=dt; + } + fprintf(stderr,"\n"); + ddiff = (1-dpt)/(1-dt); + #pragma omp parallel + { + #pragma omp sections private(i) reduction(-:ddiff) + { + #pragma omp section + { + for (i=0;i<6;++i) { + ddiff -= pow(dt,i); + } + } + #pragma omp section + { + for (i=6;i<12;++i) { + ddiff -= pow(dt,i); + } + } + #pragma omp section + { + for (i=12;i<20;++i) { + ddiff -= pow(dt,i); + } + } + } + } + + if(fabs(ddiff) > rounding_error) { + result++; + fprintf(stderr,"Error in Difference with doubles: Result was %E" + " instead of 0.0\n",ddiff); + } + + known_product = 3628800; + #pragma omp parallel + { + #pragma omp sections private(i) reduction(*:product) + { + #pragma omp section + { + for(i=1;i<3;i++) { + product *= i; + } + } + #pragma omp section + { + for(i=3;i<7;i++) { + product *= i; + } + } + #pragma omp section + { + for(i=7;i<11;i++) { + product *= i; + } + } + } + } + if(known_product != product) { + result++; + fprintf(stderr,"Error in Product with integers: Result was %d" + " instead of %d\n",product,known_product); + } + + for(i=0;i<1000;i++) { + logics[i]=1; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(&&:logic_and) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_and = (logic_and && logics[i]); + } + } + } + } + if(!logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 1\n"); + } + + logic_and = 1; + logics[501] = 0; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(&&:logic_and) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_and = (logic_and && logics[i]); + } + } + } + } + if(logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 2\n"); + } + + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(||:logic_or) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_or = (logic_or || logics[i]); + } + } + } + } + if(logic_or) { + result++; + fprintf(stderr,"\nError in logic OR part 1\n"); + } + + logic_or = 0; + logics[501]=1; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(||:logic_or) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_or = (logic_or || logics[i]); + } + } + } + } + if(!logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 2\n"); + } + + for(i=0;i<1000;++i) { + logics[i]=1; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(&:bit_and) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_and = (bit_and & logics[i]); + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_and = (bit_and & logics[i]); + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_and = (bit_and & logics[i]); + } + } + } + } + if(!bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 1\n"); + } + + bit_and = 1; + logics[501]=0; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(&:bit_and) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_and = bit_and & logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_and = bit_and & logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_and = bit_and & logics[i]; + } + } + } + } + if(bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 2\n"); + } + + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(|:bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_or = bit_or | logics[i]; + } + } + } + } + if(bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 1\n"); + } + bit_or = 0; + logics[501]=1; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(|:bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_or = bit_or | logics[i]; + } + } + } + } + if(!bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 2\n"); + } + + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(^:exclusiv_bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + } + } + if(exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[501]=1; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(^:exclusiv_bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + } + } + if(!exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n"); + } + + /*printf("\nResult:%d\n",result);*/ + return (result==0); +} +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_sections_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/single/omp_single.c b/final/runtime/test/worksharing/single/omp_single.c new file mode 100644 index 0000000..4963579 --- /dev/null +++ b/final/runtime/test/worksharing/single/omp_single.c @@ -0,0 +1,44 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_single() +{ + int nr_threads_in_single; + int result; + int nr_iterations; + int i; + + nr_threads_in_single = 0; + result = 0; + nr_iterations = 0; + + #pragma omp parallel private(i) + { + for (i = 0; i < LOOPCOUNT; i++) { + #pragma omp single + { + #pragma omp flush + nr_threads_in_single++; + #pragma omp flush + nr_iterations++; + nr_threads_in_single--; + result = result + nr_threads_in_single; + } + } + } + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} /* end of check_single*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_single()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/single/omp_single_copyprivate.c b/final/runtime/test/worksharing/single/omp_single_copyprivate.c new file mode 100644 index 0000000..2fece5c --- /dev/null +++ b/final/runtime/test/worksharing/single/omp_single_copyprivate.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile-and-run +#include "omp_testsuite.h" + +#define DEBUG_TEST 0 + +int j; +#pragma omp threadprivate(j) + +int test_omp_single_copyprivate() +{ + int result; + int nr_iterations; + + result = 0; + nr_iterations = 0; + #pragma omp parallel num_threads(4) + { + int i; + for (i = 0; i < LOOPCOUNT; i++) + { +#if DEBUG_TEST + int thread; + thread = omp_get_thread_num (); +#endif + #pragma omp single copyprivate(j) + { + nr_iterations++; + j = i; +#if DEBUG_TEST + printf ("thread %d assigns, j = %d, i = %d\n", thread, j, i); +#endif + } +#if DEBUG_TEST + #pragma omp barrier +#endif + #pragma omp critical + { +#if DEBUG_TEST + printf ("thread = %d, j = %d, i = %d\n", thread, j, i); +#endif + result = result + j - i; + } + #pragma omp barrier + } /* end of for */ + } /* end of parallel */ + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_single_copyprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/single/omp_single_nowait.c b/final/runtime/test/worksharing/single/omp_single_nowait.c new file mode 100644 index 0000000..22f8930 --- /dev/null +++ b/final/runtime/test/worksharing/single/omp_single_nowait.c @@ -0,0 +1,73 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +/* + * This test will hang if the nowait is not working properly + * + * It relies on a one thread skipping to the last single construct to + * release the threads in the first three single constructs + */ +volatile int release; +volatile int count; + +void wait_for_release_then_increment(int rank) +{ + fprintf(stderr, "Thread nr %d enters first section" + " and waits.\n", rank); + while (release == 0); + #pragma omp atomic + count++; +} + +void release_and_increment(int rank) +{ + fprintf(stderr, "Thread nr %d sets release to 1\n", rank); + release = 1; + #pragma omp atomic + count++; +} + +int test_omp_single_nowait() +{ + release = 0; + count = 0; + + #pragma omp parallel num_threads(4) + { + int rank; + rank = omp_get_thread_num (); + #pragma omp single nowait + { + wait_for_release_then_increment(rank); + } + #pragma omp single nowait + { + wait_for_release_then_increment(rank); + } + #pragma omp single nowait + { + wait_for_release_then_increment(rank); + } + + #pragma omp single + { + release_and_increment(rank); + } + } + // Check to make sure all four singles were executed + return (count==4); +} /* end of check_single_nowait*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_single_nowait()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/single/omp_single_private.c b/final/runtime/test/worksharing/single/omp_single_private.c new file mode 100644 index 0000000..a27f8de --- /dev/null +++ b/final/runtime/test/worksharing/single/omp_single_private.c @@ -0,0 +1,57 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int myit = 0; +#pragma omp threadprivate(myit) +int myresult = 0; +#pragma omp threadprivate(myresult) + +int test_omp_single_private() +{ + int nr_threads_in_single; + int result; + int nr_iterations; + int i; + + myit = 0; + nr_threads_in_single = 0; + nr_iterations = 0; + result = 0; + + #pragma omp parallel private(i) + { + myresult = 0; + myit = 0; + for (i = 0; i < LOOPCOUNT; i++) { + #pragma omp single private(nr_threads_in_single) nowait + { + nr_threads_in_single = 0; + #pragma omp flush + nr_threads_in_single++; + #pragma omp flush + myit++; + myresult = myresult + nr_threads_in_single; + } + } + #pragma omp critical + { + result += nr_threads_in_single; + nr_iterations += myit; + } + } + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} /* end of check_single private */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_single_private()) { + num_failed++; + } + } + return num_failed; +} |