diff options
Diffstat (limited to 'final/runtime/test/worksharing/for/kmp_sch_simd_guided.c')
-rw-r--r-- | final/runtime/test/worksharing/for/kmp_sch_simd_guided.c | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c new file mode 100644 index 0000000..5c6f94b --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c @@ -0,0 +1,410 @@ +// RUN: %libomp-compile-and-run +/* + Test for the 'schedule(simd:guided)' clause. + Compiler needs to generate a dynamic dispatching and pass the schedule + value 46 to the OpenMP RTL. Test uses numerous loop parameter combinations. +*/ +#include <stdio.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#else +#include <unistd.h> +#define delay() usleep(10); +#endif + +// uncomment for debug diagnostics: +//#define DEBUG + +#define SIMD_LEN 4 + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +extern int __kmpc_global_thread_num(id*); +extern void __kmpc_barrier(id*, int gtid); +extern void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); +extern void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); +extern int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); +extern int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +int run_loop_64(i64 loop_lb, i64 loop_ub, i64 loop_st, int loop_chunk) { + int err = 0; + static int volatile loop_sync = 0; + i64 lb; // Chunk lower bound + i64 ub; // Chunk upper bound + i64 st; // Chunk stride + int rc; + int tid = omp_get_thread_num(); + int gtid = tid; + int last; +#if DEBUG + printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n", + (int)sizeof(i64), gtid, tid, + (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen + if (loop_st == 0) + return 0; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return 0; + + __kmpc_dispatch_init_8(&loc, gtid, kmp_sch_guided_simd, + loop_lb, loop_ub, loop_st, loop_chunk); + if (tid == 0) { + // Let the master thread handle the chunks alone + int chunk; // No of current chunk + i64 next_lb; // Lower bound of the next chunk + i64 last_ub; // Upper bound of the last processed chunk + u64 cur; // Number of interations in current chunk + u64 max; // Max allowed iterations for current chunk + int undersized = 0; + + chunk = 0; + next_lb = loop_lb; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations + while (__kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if DEBUG + printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub); +#endif + // Check if previous chunk (it is not the final chunk) is undersized + if (undersized) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Check lower and upper bounds + if (lb != next_lb) { + printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk); + err++; + } + if (loop_st > 0) { + if (!(ub <= loop_ub)) { + printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb <= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + } else { + if (!(ub >= loop_ub)) { + printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb >= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + }; // if + // Stride should not change + if (!(st == loop_st)) { + printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk); + err++; + } + cur = (ub - lb) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum + if (!(cur <= max + 1)) { + printf("Error with iter %d, %d\n", cur, max); + err++; + } + // Update maximum for the next chunk + if (cur < max) + max = cur; + next_lb = ub + loop_st; + last_ub = ub; + undersized = (cur < loop_chunk); + }; // while + // Must have at least one chunk + if (!(chunk > 0)) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Must have the right last iteration index + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st > loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + } else { + if (!(last_ub >= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st < loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + }; // if + // Let non-master threads go + loop_sync = 1; + } else { + int i; + // Workers wait for master thread to finish, then call __kmpc_dispatch_next + for (i = 0; i < 1000000; ++ i) { + if (loop_sync != 0) { + break; + }; // if + }; // for i + while (loop_sync == 0) { + delay(); + }; // while + // At this moment we do not have any more chunks -- all the chunks already + // processed by master thread + rc = __kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st); + if (rc) { + printf("Error return value\n"); + err++; + } + }; // if + + __kmpc_barrier(&loc, gtid); + if (tid == 0) { + loop_sync = 0; // Restore original state +#if DEBUG + printf("run_loop_64(): at the end\n"); +#endif + }; // if + __kmpc_barrier(&loc, gtid); + return err; +} // run_loop + +// --------------------------------------------------------------------------- +int run_loop_32(int loop_lb, int loop_ub, int loop_st, int loop_chunk) { + int err = 0; + static int volatile loop_sync = 0; + int lb; // Chunk lower bound + int ub; // Chunk upper bound + int st; // Chunk stride + int rc; + int tid = omp_get_thread_num(); + int gtid = tid; + int last; +#if DEBUG + printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n", + (int)sizeof(int), gtid, tid, + (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen + if (loop_st == 0) + return 0; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return 0; + + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_guided_simd, + loop_lb, loop_ub, loop_st, loop_chunk); + if (tid == 0) { + // Let the master thread handle the chunks alone + int chunk; // No of current chunk + int next_lb; // Lower bound of the next chunk + int last_ub; // Upper bound of the last processed chunk + u64 cur; // Number of interations in current chunk + u64 max; // Max allowed iterations for current chunk + int undersized = 0; + + chunk = 0; + next_lb = loop_lb; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if DEBUG + printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub); +#endif + // Check if previous chunk (it is not the final chunk) is undersized + if (undersized) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Check lower and upper bounds + if (lb != next_lb) { + printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk); + err++; + } + if (loop_st > 0) { + if (!(ub <= loop_ub)) { + printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb <= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + } else { + if (!(ub >= loop_ub)) { + printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb >= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + }; // if + // Stride should not change + if (!(st == loop_st)) { + printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk); + err++; + } + cur = (ub - lb) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum + if (!(cur <= max + 1)) { + printf("Error with iter %d, %d\n", cur, max); + err++; + } + // Update maximum for the next chunk + if (cur < max) + max = cur; + next_lb = ub + loop_st; + last_ub = ub; + undersized = (cur < loop_chunk); + }; // while + // Must have at least one chunk + if (!(chunk > 0)) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Must have the right last iteration index + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st > loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + } else { + if (!(last_ub >= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st < loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + }; // if + // Let non-master threads go + loop_sync = 1; + } else { + int i; + // Workers wait for master thread to finish, then call __kmpc_dispatch_next + for (i = 0; i < 1000000; ++ i) { + if (loop_sync != 0) { + break; + }; // if + }; // for i + while (loop_sync == 0) { + delay(); + }; // while + // At this moment we do not have any more chunks -- all the chunks already + // processed by the master thread + rc = __kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st); + if (rc) { + printf("Error return value\n"); + err++; + } + }; // if + + __kmpc_barrier(&loc, gtid); + if (tid == 0) { + loop_sync = 0; // Restore original state +#if DEBUG + printf("run_loop<>(): at the end\n"); +#endif + }; // if + __kmpc_barrier(&loc, gtid); + return err; +} // run_loop + +// --------------------------------------------------------------------------- +int run_64(int num_th) +{ + int err = 0; +#pragma omp parallel num_threads(num_th) + { + int chunk; + i64 st, lb, ub; + for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) { + for (st = 1; st <= 3; ++ st) { + for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) { + for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) { + err += run_loop_64(lb, ub, st, chunk); + err += run_loop_64(ub, lb, -st, chunk); + }; // for ub + }; // for lb + }; // for st + }; // for chunk + } + return err; +} // run_all + +int run_32(int num_th) +{ + int err = 0; +#pragma omp parallel num_threads(num_th) + { + int chunk, st, lb, ub; + for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) { + for (st = 1; st <= 3; ++ st) { + for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) { + for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) { + err += run_loop_32(lb, ub, st, chunk); + err += run_loop_32(ub, lb, -st, chunk); + }; // for ub + }; // for lb + }; // for st + }; // for chunk + } + return err; +} // run_all + +// --------------------------------------------------------------------------- +int main() +{ + int n, err = 0; + for (n = 1; n <= 4; ++ n) { + err += run_32(n); + err += run_64(n); + }; // for n + if (err) + printf("failed with %d errors\n", err); + else + printf("passed\n"); + return err; +} |