diff options
Diffstat (limited to 'final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c')
-rw-r--r-- | final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c new file mode 100644 index 0000000..d137831 --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c @@ -0,0 +1,196 @@ +// RUN: %libomp-compile +// RUN: env OMP_SCHEDULE=guided %libomp-run +// RUN: env OMP_SCHEDULE=guided,1 %libomp-run 1 +// RUN: env OMP_SCHEDULE=guided,2 %libomp-run 2 +// RUN: env OMP_SCHEDULE=dynamic %libomp-run +// RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1 +// RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2 +// RUN: env OMP_SCHEDULE=auto %libomp-run + +// The test checks schedule(simd:runtime) +// in combination with OMP_SCHEDULE=guided[,chunk] +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#define seten(a,b,c) _putenv_s((a),(b)) +#else +#include <unistd.h> +#define delay() usleep(10); +#define seten(a,b,c) setenv((a),(b),(c)) +#endif + +#define UBOUND 100 +#define SIMD_LEN 4 +int err = 0; + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL. +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#ifdef __cplusplus +extern "C" { +#endif + int __kmpc_global_thread_num(id*); + void __kmpc_barrier(id*, int gtid); + void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); + void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); + int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); + int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +#ifdef __cplusplus +} // extern "C" +#endif +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +void +run_loop( + int loop_lb, // Loop lower bound. + int loop_ub, // Loop upper bound. + int loop_st, // Loop stride. + int lchunk +) { + static int volatile loop_sync = 0; + int lb; // Chunk lower bound. + int ub; // Chunk upper bound. + int st; // Chunk stride. + int rc; + int tid = omp_get_thread_num(); + int gtid = __kmpc_global_thread_num(&loc); + int last; + int tc = (loop_ub - loop_lb) / loop_st + 1; + int ch; + int no_chunk = 0; + if (lchunk == 0) { + no_chunk = 1; + lchunk = 1; + } + ch = lchunk * SIMD_LEN; +#if _DEBUG > 1 + printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", + gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen. + if (loop_st == 0) + return; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return; + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, + loop_lb, loop_ub, loop_st, SIMD_LEN); + { + // Let the master thread handle the chunks alone. + int chunk; // No of current chunk. + int last_ub; // Upper bound of the last processed chunk. + u64 cur; // Number of interations in current chunk. + u64 max; // Max allowed iterations for current chunk. + int undersized = 0; + last_ub = loop_ub; + chunk = 0; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations. + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if _DEBUG + printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", + tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); +#endif + // Check if previous chunk (it is not the final chunk) is undersized. + if (undersized) + printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); + if (loop_st > 0) { + if (!(ub <= loop_ub)) + printf("Error with ub %d, %d, ch %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb <= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + } else { + if (!(ub >= loop_ub)) + printf("Error with ub %d, %d, %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb >= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + }; // if + // Stride should not change. + if (!(st == loop_st)) + printf("Error with st %d, %d, ch %d, err %d\n", + (int)st, (int)loop_st, chunk, ++err); + cur = ( ub - lb ) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum. + if (!( cur <= max + 1)) + printf("Error with iter %d, %d, err %d\n", cur, max, ++err); + // Update maximum for the next chunk. + if (!last && cur % ch) + printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", + chunk, (int)cur, ch, tid, ++err); + if (last && !no_chunk && cur > ch) + printf("Error: too big last chunk %d (%d), tid %d, err %d\n", + (int)cur, ch, tid, ++err); + if (cur < max) + max = cur; + last_ub = ub; + undersized = (cur < ch); +#if _DEBUG > 1 + if (last) + printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", + undersized,cur,ch,tid,ub,lb,loop_st); +#endif + } // while + // Must have the right last iteration index. + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st > loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } else { + if (!(last_ub >= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st < loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } // if + } + __kmpc_barrier(&loc, gtid); +} // run_loop + +int main(int argc, char *argv[]) +{ + int chunk = 0; + if (argc > 1) { + // expect chunk size as a parameter + chunk = atoi(argv[1]); + } +#pragma omp parallel //num_threads(num_th) + run_loop(0, UBOUND, 1, chunk); + if (err) { + printf("failed, err = %d\n", err); + return 1; + } else { + printf("passed\n"); + return 0; + } +} |