diff options
Diffstat (limited to 'final/runtime/test')
181 files changed, 15167 insertions, 0 deletions
diff --git a/final/runtime/test/CMakeLists.txt b/final/runtime/test/CMakeLists.txt new file mode 100644 index 0000000..8f367c5 --- /dev/null +++ b/final/runtime/test/CMakeLists.txt @@ -0,0 +1,37 @@ +# CMakeLists.txt file for unit testing OpenMP host runtime library. +include(CheckFunctionExists) +include(CheckLibraryExists) + +# Some tests use math functions +check_library_exists(m sqrt "" LIBOMP_HAVE_LIBM) +# When using libgcc, -latomic may be needed for atomics +# (but when using compiler-rt, the atomics will be built-in) +# Note: we can not check for __atomic_load because clang treats it +# as special built-in and that breaks CMake checks +check_function_exists(__atomic_load_1 LIBOMP_HAVE_BUILTIN_ATOMIC) +if(NOT LIBOMP_HAVE_BUILTIN_ATOMIC) + check_library_exists(atomic __atomic_load_1 "" LIBOMP_HAVE_LIBATOMIC) +else() + # not needed + set(LIBOMP_HAVE_LIBATOMIC 0) +endif() + +macro(pythonize_bool var) + if (${var}) + set(${var} True) + else() + set(${var} False) + endif() +endmacro() + +pythonize_bool(LIBOMP_USE_HWLOC) +pythonize_bool(LIBOMP_OMPT_SUPPORT) +pythonize_bool(LIBOMP_OMPT_OPTIONAL) +pythonize_bool(LIBOMP_HAVE_LIBM) +pythonize_bool(LIBOMP_HAVE_LIBATOMIC) + +add_openmp_testsuite(check-libomp "Running libomp tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omp) + +# Configure the lit.site.cfg.in file +set(AUTO_GEN_COMMENT "## Autogenerated by libomp configuration.\n# Do not edit!") +configure_file(lit.site.cfg.in lit.site.cfg @ONLY) diff --git a/final/runtime/test/api/has_openmp.c b/final/runtime/test/api/has_openmp.c new file mode 100644 index 0000000..da95f59 --- /dev/null +++ b/final/runtime/test/api/has_openmp.c @@ -0,0 +1,23 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +int test_has_openmp() +{ + int rvalue = 0; +#ifdef _OPENMP + rvalue = 1; +#endif + return (rvalue); +} + +int main() +{ + int i; + int num_failed=0; + if(!test_has_openmp()) { + num_failed++; + } + return num_failed; +} diff --git a/final/runtime/test/api/kmp_aligned_malloc.c b/final/runtime/test/api/kmp_aligned_malloc.c new file mode 100644 index 0000000..5302fec --- /dev/null +++ b/final/runtime/test/api/kmp_aligned_malloc.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdint.h> +#include <omp.h> +#include "omp_testsuite.h" + +int alignments[] = {64, 128, 256, 512, 1024, 2048, 4096}; + +unsigned aligned_by(uint64_t addr) { + uint64_t alignment = 1; + while((addr & (alignment-1)) == 0) { + alignment <<= 1; + } + return (alignment >> 1); +} + +int test_kmp_aligned_malloc() +{ + int err = 0; + #pragma omp parallel shared(err) + { + int i; + int* ptr; + uint64_t addr; + int tid = omp_get_thread_num(); + + for(i = 0; i < sizeof(alignments)/sizeof(int); i++) { + int alignment = alignments[i]; + // allocate 64 bytes with 64-byte alignment + // allocate 128 bytes with 128-byte alignment, etc. + ptr = (int*)kmp_aligned_malloc(alignment, alignment); + addr = (uint64_t)ptr; + if(addr & (alignment-1)) { + printf("thread %d: addr = %p (aligned to %u bytes) but expected " + " alignment = %d\n", tid, ptr, aligned_by(addr), alignment); + err = 1; + } + kmp_free(ptr); + } + + ptr = kmp_aligned_malloc(128, 127); + if (ptr != NULL) { + printf("thread %d: kmp_aligned_malloc() didn't return NULL when " + "alignment was not power of 2\n", tid); + err = 1; + } + } /* end of parallel */ + return !err; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_kmp_aligned_malloc()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/api/kmp_set_defaults_lock_bug.c b/final/runtime/test/api/kmp_set_defaults_lock_bug.c new file mode 100644 index 0000000..73a7afb --- /dev/null +++ b/final/runtime/test/api/kmp_set_defaults_lock_bug.c @@ -0,0 +1,53 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" +/* The bug occurs if the lock table is reallocated after + kmp_set_defaults() is called. If the table is reallocated, + then the lock will not point to a valid lock object after the + kmp_set_defaults() call.*/ +omp_lock_t lock; + +int test_kmp_set_defaults_lock_bug() +{ + /* checks that omp_get_num_threads is equal to the number of + threads */ + int nthreads_lib; + int nthreads = 0; + + nthreads_lib = -1; + + #pragma omp parallel + { + omp_set_lock(&lock); + nthreads++; + omp_unset_lock(&lock); + #pragma omp single + { + nthreads_lib = omp_get_num_threads (); + } /* end of single */ + } /* end of parallel */ + kmp_set_defaults("OMP_NUM_THREADS"); + #pragma omp parallel + { + omp_set_lock(&lock); + nthreads++; + omp_unset_lock(&lock); + } /* end of parallel */ + + return (nthreads == 2*nthreads_lib); +} + +int main() +{ + int i; + int num_failed=0; + omp_init_lock(&lock); + + for(i = 0; i < REPETITIONS; i++) { + if(!test_kmp_set_defaults_lock_bug()) { + num_failed++; + } + } + omp_destroy_lock(&lock); + return num_failed; +} diff --git a/final/runtime/test/api/omp_get_num_threads.c b/final/runtime/test/api/omp_get_num_threads.c new file mode 100644 index 0000000..daf286d --- /dev/null +++ b/final/runtime/test/api/omp_get_num_threads.c @@ -0,0 +1,39 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_get_num_threads() +{ + /* checks that omp_get_num_threads is equal to the number of + threads */ + int nthreads_lib; + int nthreads = 0; + + nthreads_lib = -1; + + #pragma omp parallel + { + #pragma omp critical + { + nthreads++; + } /* end of critical */ + #pragma omp single + { + nthreads_lib = omp_get_num_threads (); + } /* end of single */ + } /* end of parallel */ + return (nthreads == nthreads_lib); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_get_num_threads()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/api/omp_get_wtick.c b/final/runtime/test/api/omp_get_wtick.c new file mode 100644 index 0000000..8b35226 --- /dev/null +++ b/final/runtime/test/api/omp_get_wtick.c @@ -0,0 +1,24 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_get_wtick() +{ + double tick; + tick = -1.; + tick = omp_get_wtick (); + return ((tick > 0.0) && (tick < 0.01)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_get_wtick()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/api/omp_get_wtime.c b/final/runtime/test/api/omp_get_wtime.c new file mode 100644 index 0000000..b309440 --- /dev/null +++ b/final/runtime/test/api/omp_get_wtime.c @@ -0,0 +1,33 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +int test_omp_get_wtime() +{ + double start; + double end; + double measured_time; + double wait_time = 5.0; + start = 0; + end = 0; + start = omp_get_wtime(); + my_sleep (wait_time); + end = omp_get_wtime(); + measured_time = end-start; + return ((measured_time > 0.97 * wait_time) && (measured_time < 1.03 * wait_time)) ; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_get_wtime()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/api/omp_in_parallel.c b/final/runtime/test/api/omp_in_parallel.c new file mode 100644 index 0000000..d09313e --- /dev/null +++ b/final/runtime/test/api/omp_in_parallel.c @@ -0,0 +1,39 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +/* + * Checks that false is returned when called from serial region + * and true is returned when called within parallel region. + */ +int test_omp_in_parallel() +{ + int serial; + int isparallel; + + serial = 1; + isparallel = 0; + serial = omp_in_parallel(); + + #pragma omp parallel + { + #pragma omp single + { + isparallel = omp_in_parallel(); + } + } + return (!(serial) && isparallel); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_in_parallel()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/atomic/omp_atomic.c b/final/runtime/test/atomic/omp_atomic.c new file mode 100644 index 0000000..7cdd30d --- /dev/null +++ b/final/runtime/test/atomic/omp_atomic.c @@ -0,0 +1,366 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */ +#define MAX_FACTOR 10 +#define KNOWN_PRODUCT 3628800 /* 10! */ + +int test_omp_atomic() +{ + int sum; + int diff; + double dsum = 0; + double dt = 0.5; /* base of geometric row for + and - test*/ + double ddiff; + int product; + int x; + int *logics; + int bit_and = 1; + int bit_or = 0; + int exclusiv_bit_or = 0; + int j; + int known_sum; + int known_diff; + int known_product; + int result = 0; + int logic_and = 1; + int logic_or = 0; + double dknown_sum; + double rounding_error = 1.E-9; + double dpt, div; + int logicsArray[LOOPCOUNT]; + logics = logicsArray; + + sum = 0; + diff = 0; + product = 1; + + // sum of integers test + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 1; i <= LOOPCOUNT; i++) { + #pragma omp atomic + sum += i; + } + + } + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + if (known_sum != sum) + { + fprintf(stderr, + "Error in sum with integers: Result was %d instead of %d.\n", + sum, known_sum); + result++; + } + + // difference of integers test + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; i++) { + #pragma omp atomic + diff -= i; + } + } + known_diff = ((LOOPCOUNT - 1) * LOOPCOUNT) / 2 * -1; + if (diff != known_diff) + { + fprintf (stderr, + "Error in difference with integers: Result was %d instead of 0.\n", + diff); + result++; + } + + // sum of doubles test + dsum = 0; + dpt = 1; + for (j = 0; j < DOUBLE_DIGITS; ++j) { + dpt *= dt; + } + dknown_sum = (1 - dpt) / (1 -dt); + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < DOUBLE_DIGITS; ++i) { + #pragma omp atomic + dsum += pow (dt, i); + } + } + if (dsum != dknown_sum && (fabs (dsum - dknown_sum) > rounding_error)) { + fprintf (stderr, "Error in sum with doubles: Result was %f" + " instead of: %f (Difference: %E)\n", + dsum, dknown_sum, dsum - dknown_sum); + result++; + } + + // difference of doubles test + dpt = 1; + for (j = 0; j < DOUBLE_DIGITS; ++j) { + dpt *= dt; + } + ddiff = (1 - dpt) / (1 - dt); + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < DOUBLE_DIGITS; ++i) { + #pragma omp atomic + ddiff -= pow (dt, i); + } + } + if (fabs (ddiff) > rounding_error) { + fprintf (stderr, + "Error in difference with doubles: Result was %E instead of 0.0\n", + ddiff); + result++; + } + + // product of integers test + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 1; i <= MAX_FACTOR; i++) { + #pragma omp atomic + product *= i; + } + } + known_product = KNOWN_PRODUCT; + if (known_product != product) { + fprintf (stderr, + "Error in product with integers: Result was %d instead of %d\n", + product, known_product); + result++; + } + + // division of integers test + product = KNOWN_PRODUCT; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 1; i <= MAX_FACTOR; ++i) { + #pragma omp atomic + product /= i; + } + } + if (product != 1) { + fprintf (stderr, + "Error in product division with integers: Result was %d" + " instead of 1\n", + product); + result++; + } + + // division of doubles test + div = 5.0E+5; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 1; i <= MAX_FACTOR; i++) { + #pragma omp atomic + div /= i; + } + } + if (fabs(div-0.137787) >= 1.0E-4 ) { + result++; + fprintf (stderr, "Error in division with double: Result was %f" + " instead of 0.137787\n", div); + } + + // ++ test + x = 0; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; ++i) { + #pragma omp atomic + x++; + } + } + if (x != LOOPCOUNT) { + result++; + fprintf (stderr, "Error in ++\n"); + } + + // -- test + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; ++i) { + #pragma omp atomic + x--; + } + } + if (x != 0) { + result++; + fprintf (stderr, "Error in --\n"); + } + + // bit-and test part 1 + for (j = 0; j < LOOPCOUNT; ++j) { + logics[j] = 1; + } + bit_and = 1; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; ++i) { + #pragma omp atomic + bit_and &= logics[i]; + } + } + if (!bit_and) { + result++; + fprintf (stderr, "Error in BIT AND part 1\n"); + } + + // bit-and test part 2 + bit_and = 1; + logics[LOOPCOUNT / 2] = 0; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; ++i) { + #pragma omp atomic + bit_and &= logics[i]; + } + } + if (bit_and) { + result++; + fprintf (stderr, "Error in BIT AND part 2\n"); + } + + // bit-or test part 1 + for (j = 0; j < LOOPCOUNT; j++) { + logics[j] = 0; + } + bit_or = 0; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; ++i) { + #pragma omp atomic + bit_or |= logics[i]; + } + } + if (bit_or) { + result++; + fprintf (stderr, "Error in BIT OR part 1\n"); + } + + // bit-or test part 2 + bit_or = 0; + logics[LOOPCOUNT / 2] = 1; + #pragma omp parallel + { + + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; ++i) { + #pragma omp atomic + bit_or |= logics[i]; + } + } + if (!bit_or) { + result++; + fprintf (stderr, "Error in BIT OR part 2\n"); + } + + // bit-xor test part 1 + for (j = 0; j < LOOPCOUNT; j++) { + logics[j] = 0; + } + exclusiv_bit_or = 0; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; ++i) { + #pragma omp atomic + exclusiv_bit_or ^= logics[i]; + } + } + if (exclusiv_bit_or) { + result++; + fprintf (stderr, "Error in EXCLUSIV BIT OR part 1\n"); + } + + // bit-xor test part 2 + exclusiv_bit_or = 0; + logics[LOOPCOUNT / 2] = 1; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < LOOPCOUNT; ++i) { + #pragma omp atomic + exclusiv_bit_or ^= logics[i]; + } + + } + if (!exclusiv_bit_or) { + result++; + fprintf (stderr, "Error in EXCLUSIV BIT OR part 2\n"); + } + + // left shift test + x = 1; + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < 10; ++i) { + #pragma omp atomic + x <<= 1; + } + + } + if ( x != 1024) { + result++; + fprintf (stderr, "Error in <<\n"); + x = 1024; + } + + // right shift test + #pragma omp parallel + { + int i; + #pragma omp for + for (i = 0; i < 10; ++i) { + #pragma omp atomic + x >>= 1; + } + } + if (x != 1) { + result++; + fprintf (stderr, "Error in >>\n"); + } + + return (result == 0); +} // test_omp_atomic() + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_atomic()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/barrier/omp_barrier.c b/final/runtime/test/barrier/omp_barrier.c new file mode 100644 index 0000000..a3fb060 --- /dev/null +++ b/final/runtime/test/barrier/omp_barrier.c @@ -0,0 +1,44 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +int test_omp_barrier() +{ + int result1; + int result2; + result1 = 0; + result2 = 0; + + #pragma omp parallel + { + int rank; + rank = omp_get_thread_num (); + if (rank ==1) { + my_sleep(((double)SLEEPTIME)/REPETITIONS); // give 1 sec to whole test + result2 = 3; + } + #pragma omp barrier + if (rank == 2) { + result1 = result2; + } + } + return (result1 == 3); +} + +int main() +{ + int i; + int num_failed=0; + +#ifdef _OPENMP + omp_set_dynamic(0); // prevent runtime to change number of threads + omp_set_num_threads(4); // the test expects at least 3 threads + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_barrier()) { + num_failed++; + } + } +#endif + return num_failed; +} diff --git a/final/runtime/test/critical/omp_critical.c b/final/runtime/test/critical/omp_critical.c new file mode 100644 index 0000000..e07dbcb --- /dev/null +++ b/final/runtime/test/critical/omp_critical.c @@ -0,0 +1,37 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_critical() +{ + int sum; + int known_sum; + + sum=0; + #pragma omp parallel + { + int mysum=0; + int i; + #pragma omp for + for (i = 0; i < 1000; i++) + mysum = mysum + i; + + #pragma omp critical + sum = mysum +sum; + } + known_sum = 999 * 1000 / 2; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_critical()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/env/kmp_aff_disable_hwloc.c b/final/runtime/test/env/kmp_aff_disable_hwloc.c new file mode 100644 index 0000000..5f848ac --- /dev/null +++ b/final/runtime/test/env/kmp_aff_disable_hwloc.c @@ -0,0 +1,21 @@ +// RUN: %libomp-compile && env KMP_AFFINITY=disabled KMP_TOPOLOGY_METHOD=hwloc %libomp-run +// REQUIRES: hwloc +#include <stdio.h> +#include <stdlib.h> + +// Test will assert() without fix +int test_affinity_disabled_plus_hwloc() { + #pragma omp parallel + {} + return 1; +} + +int main(int argc, char **argv) { + int i, j; + int failed = 0; + + if (!test_affinity_disabled_plus_hwloc()) { + failed = 1; + } + return failed; +} diff --git a/final/runtime/test/env/kmp_set_dispatch_buf.c b/final/runtime/test/env/kmp_set_dispatch_buf.c new file mode 100644 index 0000000..49eb7b5 --- /dev/null +++ b/final/runtime/test/env/kmp_set_dispatch_buf.c @@ -0,0 +1,76 @@ +// RUN: %libomp-compile && env KMP_DISP_NUM_BUFFERS=0 %libomp-run +// RUN: env KMP_DISP_NUM_BUFFERS=1 %libomp-run && env KMP_DISP_NUM_BUFFERS=3 %libomp-run +// RUN: env KMP_DISP_NUM_BUFFERS=4 %libomp-run && env KMP_DISP_NUM_BUFFERS=7 %libomp-run +// RUN: %libomp-compile -DMY_SCHEDULE=guided && env KMP_DISP_NUM_BUFFERS=1 %libomp-run +// RUN: env KMP_DISP_NUM_BUFFERS=3 %libomp-run && env KMP_DISP_NUM_BUFFERS=4 %libomp-run +// RUN: env KMP_DISP_NUM_BUFFERS=7 %libomp-run +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include <limits.h> +#include "omp_testsuite.h" + +#define INCR 7 +#define MY_MAX 200 +#define MY_MIN -200 +#define NUM_LOOPS 100 +#ifndef MY_SCHEDULE +# define MY_SCHEDULE dynamic +#endif + +int a, b, a_known_value, b_known_value; + +int test_kmp_set_disp_num_buffers() +{ + int success = 1; + a = 0; + b = 0; + // run many small dynamic loops to stress the dispatch buffer system + #pragma omp parallel + { + int i,j; + for (j = 0; j < NUM_LOOPS; j++) { + #pragma omp for schedule(MY_SCHEDULE) nowait + for (i = MY_MIN; i < MY_MAX; i+=INCR) { + #pragma omp atomic + a++; + } + #pragma omp for schedule(MY_SCHEDULE) nowait + for (i = MY_MAX; i >= MY_MIN; i-=INCR) { + #pragma omp atomic + b++; + } + } + } + // detect failure + if (a != a_known_value || b != b_known_value) { + success = 0; + printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, + b, b_known_value); + } + return success; +} + +int main(int argc, char** argv) +{ + int i,j; + int num_failed=0; + + // figure out the known values to compare with calculated result + a_known_value = 0; + b_known_value = 0; + + for (j = 0; j < NUM_LOOPS; j++) { + for (i = MY_MIN; i < MY_MAX; i+=INCR) + a_known_value++; + for (i = MY_MAX; i >= MY_MIN; i-=INCR) + b_known_value++; + } + + for(i = 0; i < REPETITIONS; i++) { + if(!test_kmp_set_disp_num_buffers()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/env/omp_thread_limit.c b/final/runtime/test/env/omp_thread_limit.c new file mode 100644 index 0000000..800edc4 --- /dev/null +++ b/final/runtime/test/env/omp_thread_limit.c @@ -0,0 +1,82 @@ +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=4 %libomp-run 4 +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=7 %libomp-run 7 +// +// OMP_THREAD_LIMIT=N should imply that no more than N threads are active in +// a contention group +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include "omp_testsuite.h" + +int failed = 0; + +void usage() { + fprintf(stderr, "usage: omp_thread_limit <n>\n"); +} + +void verify(const char* file_name, int line_number, int team_size) { + int num_threads = omp_get_num_threads(); + if (team_size != num_threads) { +#pragma omp critical(A) + { + char label[256]; + snprintf(label, sizeof(label), "%s:%d", file_name, line_number); + failed = 1; + printf("failed: %s: team_size(%d) != omp_get_num_threads(%d)\n", + label, team_size, num_threads); + } + } +} + +int main(int argc, char** argv) +{ + int cl_thread_limit; + + if (argc != 2) { + usage(); + return 1; + } + cl_thread_limit = atoi(argv[1]); + + omp_set_dynamic(0); + if (omp_get_thread_limit() != cl_thread_limit) { + fprintf(stderr, "omp_get_thread_limit failed with %d, should be%d\n", + omp_get_thread_limit(), cl_thread_limit); + return 1; + } + else if (omp_get_max_threads() > cl_thread_limit) { +#if _OPENMP + int team_size = cl_thread_limit; +#else + int team_size = 1; +#endif + omp_set_num_threads(19); + verify(__FILE__, __LINE__, 1); +#pragma omp parallel + { + verify(__FILE__, __LINE__, team_size); + verify(__FILE__, __LINE__, team_size); + } + verify(__FILE__, __LINE__, 1); + + omp_set_nested(1); +#pragma omp parallel num_threads(3) + { + verify(__FILE__, __LINE__, 3); +#pragma omp master +#pragma omp parallel num_threads(21) + { + verify(__FILE__, __LINE__, team_size-2); + verify(__FILE__, __LINE__, team_size-2); + } + } + verify(__FILE__, __LINE__, 1); + + return failed; + } else { + fprintf(stderr, "This test is not applicable for max num_threads='%d'\n", + omp_get_max_threads()); + return 0; + } + +} diff --git a/final/runtime/test/env/omp_wait_policy.c b/final/runtime/test/env/omp_wait_policy.c new file mode 100644 index 0000000..b260ce4 --- /dev/null +++ b/final/runtime/test/env/omp_wait_policy.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile && env OMP_WAIT_POLICY=active %libomp-run active +// RUN: %libomp-compile && env OMP_WAIT_POLICY=passive %libomp-run passive +// +// OMP_WAIT_POLICY=active should imply blocktime == INT_MAX +// i.e., threads spin-wait forever +// OMP_WAIT_POLICY=passive should imply blocktime == 0 +// i.e., threads immediately sleep +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include "omp_testsuite.h" + +void usage() { + fprintf(stderr, "usage: omp_wait_policy active|passive\n"); +} + +int main(int argc, char** argv) +{ + int blocktime, retval=1; + const char* env_var_value; + + if (argc != 2) { + usage(); + return 1; + } + + blocktime = kmp_get_blocktime(); + + env_var_value = argv[1]; + if (!strcmp(env_var_value, "active")) { + retval = (blocktime != INT_MAX); + } else if (!strcmp(env_var_value, "passive")) { + retval = (blocktime != 0); + } else { + usage(); + retval = 1; + } + + return retval; +} diff --git a/final/runtime/test/flush/omp_flush.c b/final/runtime/test/flush/omp_flush.c new file mode 100644 index 0000000..3fd3cdf --- /dev/null +++ b/final/runtime/test/flush/omp_flush.c @@ -0,0 +1,45 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +int test_omp_flush() +{ + int result1; + int result2; + int dummy; + + result1 = 0; + result2 = 0; + + #pragma omp parallel + { + int rank; + rank = omp_get_thread_num (); + #pragma omp barrier + if (rank == 1) { + result2 = 3; + #pragma omp flush (result2) + dummy = result2; + } + if (rank == 0) { + my_sleep(SLEEPTIME); + #pragma omp flush (result2) + result1 = result2; + } + } /* end of parallel */ + return ((result1 == result2) && (result2 == dummy) && (result2 == 3)); +} + +int main() +{ + int i; + int num_failed=0; + + for (i = 0; i < REPETITIONS; i++) { + if(!test_omp_flush()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/lit.cfg b/final/runtime/test/lit.cfg new file mode 100644 index 0000000..e4561eb --- /dev/null +++ b/final/runtime/test/lit.cfg @@ -0,0 +1,130 @@ +# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +# Configuration file for the 'lit' test runner. + +import os +import re +import subprocess +import lit.formats + +# Tell pylint that we know config and lit_config exist somewhere. +if 'PYLINT_IMPORT' in os.environ: + config = object() + lit_config = object() + +def append_dynamic_library_path(path): + if config.operating_system == 'Windows': + name = 'PATH' + sep = ';' + elif config.operating_system == 'Darwin': + name = 'DYLD_LIBRARY_PATH' + sep = ':' + else: + name = 'LD_LIBRARY_PATH' + sep = ':' + if name in config.environment: + config.environment[name] = path + sep + config.environment[name] + else: + config.environment[name] = path + +# name: The name of this test suite. +config.name = 'libomp' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = ['.c', '.cpp'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root object directory where output is placed +config.test_exec_root = config.libomp_obj_root + +# test format +config.test_format = lit.formats.ShTest() + +# compiler flags +config.test_flags = " -I " + config.test_source_root + \ + " -I " + config.omp_header_directory + \ + " -L " + config.library_dir + \ + " " + config.test_extra_flags + +# extra libraries +libs = "" +if config.has_libm: + libs += " -lm" +if config.has_libatomic: + libs += " -latomic" + +# Allow XFAIL to work +config.target_triple = [ ] +for feature in config.test_compiler_features: + config.available_features.add(feature) + +# Setup environment to find dynamic library at runtime +append_dynamic_library_path(config.library_dir) +if config.using_hwloc: + append_dynamic_library_path(config.hwloc_library_dir) + config.available_features.add('hwloc') + +# Rpath modifications for Darwin +if config.operating_system == 'Darwin': + config.test_flags += " -Wl,-rpath," + config.library_dir + if config.using_hwloc: + config.test_flags += " -Wl,-rpath," + config.hwloc_library_dir + +# Find the SDK on Darwin +if config.operating_system == 'Darwin': + cmd = subprocess.Popen(['xcrun', '--show-sdk-path'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = cmd.communicate() + out = out.strip() + res = cmd.wait() + if res == 0 and out: + config.test_flags += " -isysroot " + out + +# Disable OMPT tests if FileCheck was not found +if config.has_ompt and config.test_filecheck == "": + lit_config.note("Not testing OMPT because FileCheck was not found") + config.has_ompt = False + +if config.has_ompt: + config.available_features.add("ompt") + # for callback.h + config.test_flags += " -I " + config.test_source_root + "/ompt" + +if 'Linux' in config.operating_system: + config.available_features.add("linux") + +# to run with icc INTEL_LICENSE_FILE must be set +if 'INTEL_LICENSE_FILE' in os.environ: + config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE'] + + +# substitutions +config.substitutions.append(("%libomp-compile-and-run", \ + "%libomp-compile && %libomp-run")) +config.substitutions.append(("%libomp-cxx-compile-and-run", \ + "%libomp-cxx-compile && %libomp-run")) +config.substitutions.append(("%libomp-cxx-compile", \ + "%clangXX %openmp_flags %flags -std=c++11 %s -o %t" + libs)) +config.substitutions.append(("%libomp-compile", \ + "%clang %openmp_flags %flags %s -o %t" + libs)) +config.substitutions.append(("%libomp-run", "%t")) +config.substitutions.append(("%clangXX", config.test_cxx_compiler)) +config.substitutions.append(("%clang", config.test_c_compiler)) +config.substitutions.append(("%openmp_flags", config.test_openmp_flags)) +config.substitutions.append(("%flags", config.test_flags)) + +if config.has_ompt: + config.substitutions.append(("FileCheck", config.test_filecheck)) + config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable")) + if config.operating_system == 'Windows': + # No such environment variable on Windows. + config.substitutions.append(("%preload-tool", "true ||")) + config.substitutions.append(("%no-as-needed-flag", "-Wl,--no-as-needed")) + elif config.operating_system == 'Darwin': + config.substitutions.append(("%preload-tool", "env DYLD_INSERT_LIBRARIES=%T/tool.so")) + # No such linker flag on Darwin. + config.substitutions.append(("%no-as-needed-flag", "")) + else: + config.substitutions.append(("%preload-tool", "env LD_PRELOAD=%T/tool.so")) + config.substitutions.append(("%no-as-needed-flag", "-Wl,--no-as-needed")) diff --git a/final/runtime/test/lit.site.cfg.in b/final/runtime/test/lit.site.cfg.in new file mode 100644 index 0000000..c2825ee --- /dev/null +++ b/final/runtime/test/lit.site.cfg.in @@ -0,0 +1,20 @@ +@AUTO_GEN_COMMENT@ + +config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@" +config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@" +config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@ +config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@" +config.test_openmp_flags = "@OPENMP_TEST_OPENMP_FLAGS@" +config.test_extra_flags = "@OPENMP_TEST_FLAGS@" +config.libomp_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" +config.library_dir = "@LIBOMP_LIBRARY_DIR@" +config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src" +config.operating_system = "@CMAKE_SYSTEM_NAME@" +config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@" +config.using_hwloc = @LIBOMP_USE_HWLOC@ +config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@ +config.has_libm = @LIBOMP_HAVE_LIBM@ +config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@ + +# Let the main config do the real work. +lit_config.load_config(config, "@LIBOMP_BASE_DIR@/test/lit.cfg") diff --git a/final/runtime/test/lock/omp_init_lock.c b/final/runtime/test/lock/omp_init_lock.c new file mode 100644 index 0000000..24b60d1 --- /dev/null +++ b/final/runtime/test/lock/omp_init_lock.c @@ -0,0 +1,42 @@ +// RUN: %libomp-compile-and-run +#include "omp_testsuite.h" +#include <stdio.h> + +// This should be slightly less than KMP_I_LOCK_CHUNK, which is 1024 +#define LOCKS_PER_ITER 1000 +#define ITERATIONS (REPETITIONS + 1) + +// This tests concurrently using locks on one thread while initializing new +// ones on another thread. This exercises the global lock pool. +int test_omp_init_lock() { + int i; + omp_lock_t lcks[ITERATIONS * LOCKS_PER_ITER]; +#pragma omp parallel for schedule(static) num_threads(NUM_TASKS) + for (i = 0; i < ITERATIONS; i++) { + int j; + omp_lock_t *my_lcks = &lcks[i * LOCKS_PER_ITER]; + for (j = 0; j < LOCKS_PER_ITER; j++) { + omp_init_lock(&my_lcks[j]); + } + for (j = 0; j < LOCKS_PER_ITER * 100; j++) { + omp_set_lock(&my_lcks[j % LOCKS_PER_ITER]); + omp_unset_lock(&my_lcks[j % LOCKS_PER_ITER]); + } + } + // Wait until all repititions are done. The test is exercising growth of + // the global lock pool, which does not shrink when no locks are allocated. + { + int j; + for (j = 0; j < ITERATIONS * LOCKS_PER_ITER; j++) { + omp_destroy_lock(&lcks[j]); + } + } + + return 0; +} + +int main() { + // No use repeating this test, since it's exercising a private global pool + // which is not reset between test iterations. + return test_omp_init_lock(); +} diff --git a/final/runtime/test/lock/omp_lock.c b/final/runtime/test/lock/omp_lock.c new file mode 100644 index 0000000..1301f27 --- /dev/null +++ b/final/runtime/test/lock/omp_lock.c @@ -0,0 +1,47 @@ +// RUN: %libomp-compile-and-run +// RUN: env KMP_LOCK_KIND=tas KMP_SPIN_BACKOFF_PARAMS=2048,200 %libomp-run +// RUN: env KMP_LOCK_KIND=futex %libomp-run +#include <stdio.h> +#include "omp_testsuite.h" + +omp_lock_t lck; + +int test_omp_lock() +{ + int nr_threads_in_single = 0; + int result = 0; + int nr_iterations = 0; + int i; + + omp_init_lock(&lck); + #pragma omp parallel shared(lck) + { + #pragma omp for + for(i = 0; i < LOOPCOUNT; i++) { + omp_set_lock(&lck); + #pragma omp flush + nr_threads_in_single++; + #pragma omp flush + nr_iterations++; + nr_threads_in_single--; + result = result + nr_threads_in_single; + omp_unset_lock(&lck); + } + } + omp_destroy_lock(&lck); + + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_lock()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/lock/omp_nest_lock.c b/final/runtime/test/lock/omp_nest_lock.c new file mode 100644 index 0000000..33d7c6a --- /dev/null +++ b/final/runtime/test/lock/omp_nest_lock.c @@ -0,0 +1,45 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +omp_nest_lock_t lck; + +int test_omp_nest_lock() +{ + int nr_threads_in_single = 0; + int result = 0; + int nr_iterations = 0; + int i; + + omp_init_nest_lock(&lck); + #pragma omp parallel shared(lck) + { + #pragma omp for + for(i = 0; i < LOOPCOUNT; i++) { + omp_set_nest_lock(&lck); + #pragma omp flush + nr_threads_in_single++; + #pragma omp flush + nr_iterations++; + nr_threads_in_single--; + result = result + nr_threads_in_single; + omp_unset_nest_lock(&lck); + } + } + omp_destroy_nest_lock(&lck); + + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_nest_lock()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/lock/omp_test_lock.c b/final/runtime/test/lock/omp_test_lock.c new file mode 100644 index 0000000..c512055 --- /dev/null +++ b/final/runtime/test/lock/omp_test_lock.c @@ -0,0 +1,47 @@ +// RUN: %libomp-compile-and-run +// RUN: env KMP_LOCK_KIND=tas %libomp-run +// RUN: env KMP_LOCK_KIND=futex %libomp-run +#include <stdio.h> +#include "omp_testsuite.h" + +omp_lock_t lck; + +int test_omp_test_lock() +{ + int nr_threads_in_single = 0; + int result = 0; + int nr_iterations = 0; + int i; + + omp_init_lock (&lck); + #pragma omp parallel shared(lck) + { + #pragma omp for + for (i = 0; i < LOOPCOUNT; i++) { + while (!omp_test_lock (&lck)) + {}; + #pragma omp flush + nr_threads_in_single++; + #pragma omp flush + nr_iterations++; + nr_threads_in_single--; + result = result + nr_threads_in_single; + omp_unset_lock (&lck); + } + } + omp_destroy_lock(&lck); + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_test_lock()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/lock/omp_test_nest_lock.c b/final/runtime/test/lock/omp_test_nest_lock.c new file mode 100644 index 0000000..2fa6fd2 --- /dev/null +++ b/final/runtime/test/lock/omp_test_nest_lock.c @@ -0,0 +1,47 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +static omp_nest_lock_t lck; + +int test_omp_test_nest_lock() +{ + int nr_threads_in_single = 0; + int result = 0; + int nr_iterations = 0; + int i; + + omp_init_nest_lock (&lck); + #pragma omp parallel shared(lck) + { + #pragma omp for + for (i = 0; i < LOOPCOUNT; i++) + { + /*omp_set_lock(&lck);*/ + while(!omp_test_nest_lock (&lck)) + {}; + #pragma omp flush + nr_threads_in_single++; + #pragma omp flush + nr_iterations++; + nr_threads_in_single--; + result = result + nr_threads_in_single; + omp_unset_nest_lock (&lck); + } + } + omp_destroy_nest_lock (&lck); + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_test_nest_lock()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/master/omp_master.c b/final/runtime/test/master/omp_master.c new file mode 100644 index 0000000..1cc7f9e --- /dev/null +++ b/final/runtime/test/master/omp_master.c @@ -0,0 +1,38 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_master() +{ + int nthreads; + int executing_thread; + + nthreads = 0; + executing_thread = -1; + + #pragma omp parallel + { + #pragma omp master + { + #pragma omp critical + { + nthreads++; + } + executing_thread = omp_get_thread_num(); + } /* end of master*/ + } /* end of parallel*/ + return ((nthreads == 1) && (executing_thread == 0)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_master()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/master/omp_master_3.c b/final/runtime/test/master/omp_master_3.c new file mode 100644 index 0000000..2e9fdf8 --- /dev/null +++ b/final/runtime/test/master/omp_master_3.c @@ -0,0 +1,44 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_master_3() +{ + int nthreads; + int executing_thread; + int tid_result = 0; /* counts up the number of wrong thread no. for + the master thread. (Must be 0) */ + nthreads = 0; + executing_thread = -1; + + #pragma omp parallel + { + #pragma omp master + { + int tid = omp_get_thread_num(); + if (tid != 0) { + #pragma omp critical + { tid_result++; } + } + #pragma omp critical + { + nthreads++; + } + executing_thread = omp_get_thread_num (); + } /* end of master*/ + } /* end of parallel*/ + return ((nthreads == 1) && (executing_thread == 0) && (tid_result == 0)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_master_3()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/misc_bugs/cancellation_for_sections.c b/final/runtime/test/misc_bugs/cancellation_for_sections.c new file mode 100644 index 0000000..07a61cb --- /dev/null +++ b/final/runtime/test/misc_bugs/cancellation_for_sections.c @@ -0,0 +1,64 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run +// XFAIL: gcc +// Clang had a bug until version 4.0.1 which resulted in a hang. +// UNSUPPORTED: clang-3, clang-4.0.0 + +// Regression test for a bug in cancellation to cover effect of `#pragma omp cancel` +// in a loop construct, on sections construct. +// Pass condition: Cancellation status from `for` does not persist +// to `sections`. + +#include <stdio.h> +#include <omp.h> + +int result[2] = {0, 0}; + +void cq416850_for_sections() { + + unsigned i; + // 1) loop + #pragma omp for + for (i = 0; i < 1; i++) { + result[0] = 1; + #pragma omp cancel for + result[0] = 2; + } + +// printf("thread %d: result[0] = %d, result[1] = %d \n", omp_get_thread_num(), result[0], result[1]); + + + // 2) sections + #pragma omp sections + { + #pragma omp section + { + result[1] = 1; + #pragma omp cancellation point sections + result[1] = 2; + } + } +} + +int main(void) { + if(!omp_get_cancellation()) { + printf("Cancellation not enabled!\n"); + return 2; + } + + #pragma omp parallel num_threads(4) + { + cq416850_for_sections(); + } + + if (result[0] != 1 || result[1] != 2) { + printf("Incorrect values. " + "result[0] = %d (expected 1), " + "result[1] = %d (expected 2).\n", + result[0], result[1]); + printf("FAILED\n"); + return 1; + } + + printf("PASSED\n"); + return 0; +} diff --git a/final/runtime/test/misc_bugs/many-microtask-args.c b/final/runtime/test/misc_bugs/many-microtask-args.c new file mode 100644 index 0000000..d644515 --- /dev/null +++ b/final/runtime/test/misc_bugs/many-microtask-args.c @@ -0,0 +1,39 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> + +int main() +{ + + int i; + int i1 = 0; + int i2 = 1; + int i3 = 2; + int i4 = 3; + int i5 = 4; + int i6 = 6; + int i7 = 7; + int i8 = 8; + int i9 = 9; + int i10 = 10; + int i11 = 11; + int i12 = 12; + int i13 = 13; + int i14 = 14; + int i15 = 15; + int i16 = 16; + + int r = 0; + #pragma omp parallel for firstprivate(i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16) reduction(+:r) + for (i = 0; i < i16; i++) { + r += i + i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10 + i11 + i12 + i13 + i14 + i15 + i16; + } + + int rf = 2216; + if (r != rf) { + fprintf(stderr, "r should be %d but instead equals %d\n", rf, r); + return 1; + } + + return 0; +} + diff --git a/final/runtime/test/misc_bugs/omp_foreign_thread_team_reuse.c b/final/runtime/test/misc_bugs/omp_foreign_thread_team_reuse.c new file mode 100644 index 0000000..4d70d47 --- /dev/null +++ b/final/runtime/test/misc_bugs/omp_foreign_thread_team_reuse.c @@ -0,0 +1,81 @@ +// RUN: %libomp-compile -lpthread && %libomp-run +#include <stdio.h> +#include "omp_testsuite.h" + +#define NUM_THREADS 10 + +/* + After hot teams were enabled by default, the library started using levels + kept in the team structure. The levels are broken in case foreign thread + exits and puts its team into the pool which is then re-used by another foreign + thread. The broken behavior observed is when printing the levels for each + new team, one gets 1, 2, 1, 2, 1, 2, etc. This makes the library believe that + every other team is nested which is incorrect. What is wanted is for the + levels to be 1, 1, 1, etc. +*/ + +int a = 0; +int level; + +typedef struct thread_arg_t { + int iterations; +} thread_arg_t; + +void* thread_function(void* arg) { + int i; + thread_arg_t* targ = (thread_arg_t*)arg; + int iterations = targ->iterations; + #pragma omp parallel private(i) + { + // level should always be 1 + #pragma omp single + level = omp_get_level(); + + #pragma omp for + for(i = 0; i < iterations; i++) { + #pragma omp atomic + a++; + } + } +} + +int test_omp_team_reuse() +{ + int i; + int success = 1; + pthread_t thread[NUM_THREADS]; + thread_arg_t thread_arg[NUM_THREADS]; + // launch NUM_THREADS threads, one at a time to perform thread_function() + for(i = 0; i < NUM_THREADS; i++) { + thread_arg[i].iterations = i + 1; + pthread_create(thread+i, NULL, thread_function, thread_arg+i); + pthread_join(*(thread+i), NULL); + // level read in thread_function()'s parallel region should be 1 + if(level != 1) { + fprintf(stderr, "error: for pthread %d level should be 1 but " + "instead equals %d\n", i, level); + success = 0; + } + } + // make sure the for loop works + int known_sum = (NUM_THREADS * (NUM_THREADS+1)) / 2; + if(a != known_sum) { + fprintf(stderr, "a should be %d but instead equals %d\n", known_sum, a); + success = 0; + } + return success; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + a = 0; + if(!test_omp_team_reuse()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/misc_bugs/teams-no-par.c b/final/runtime/test/misc_bugs/teams-no-par.c new file mode 100644 index 0000000..0ef8d9a --- /dev/null +++ b/final/runtime/test/misc_bugs/teams-no-par.c @@ -0,0 +1,64 @@ +// RUN: %libomp-compile-and-run +// +// The test checks the teams construct pseudocode executed on host +// + +#include <stdio.h> +#include <omp.h> + +#ifndef N_TEAMS +#define N_TEAMS 4 +#endif +#ifndef N_THR +#define N_THR 3 +#endif + +static int err = 0; + +// Internal library staff to emulate compiler's code generation: +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} ident_t; + +static ident_t dummy_loc = {0, 2, 0, 0, ";dummyFile;dummyFunc;0;0;;"}; + +int __kmpc_global_thread_num(void*); +void __kmpc_push_num_teams(ident_t const*, int, int, int); +void __kmpc_fork_teams(ident_t const*, int argc, void *microtask, ...); + +#ifdef __cplusplus +} +#endif + +// Outlined entry point: +void foo(int *gtid, int *tid, int *nt) +{ // start "serial" execution by master threads of each team + if ( nt ) { + printf(" team %d, param %d\n", omp_get_team_num(), *nt); + } else { + printf("ERROR: teams before parallel: gtid, tid: %d %d, bad pointer: %p\n", *gtid, *tid, nt); + err++; + return; + } +} + +int main() +{ + int nt = 4; + int th = __kmpc_global_thread_num(NULL); // registers initial thread + __kmpc_push_num_teams(&dummy_loc, th, N_TEAMS, N_THR); + __kmpc_fork_teams(&dummy_loc, 1, &foo, &nt); // pass 1 shared parameter "nt" + if (err) + printf("failed with %d errors\n",err); + else + printf("passed\n"); + return err; +} diff --git a/final/runtime/test/misc_bugs/teams-reduction.c b/final/runtime/test/misc_bugs/teams-reduction.c new file mode 100644 index 0000000..6d7cd11 --- /dev/null +++ b/final/runtime/test/misc_bugs/teams-reduction.c @@ -0,0 +1,68 @@ +// RUN: %libomp-compile-and-run +// +// The test checks the teams construct with reduction executed on the host. +// + +#include <stdio.h> +#include <omp.h> + +#include <stdint.h> + +#ifndef N_TEAMS +#define N_TEAMS 4 +#endif +#ifndef N_THR +#define N_THR 3 +#endif + +// Internal library stuff to emulate compiler's code generation: +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int32_t reserved_1; + int32_t flags; + int32_t reserved_2; + int32_t reserved_3; + char const *psource; +} ident_t; + +static ident_t dummy_loc = {0, 2, 0, 0, ";dummyFile;dummyFunc;0;0;;"}; + +typedef union { + // The global will be used as pointer, so we need to make sure that the + // compiler correctly aligns the global... + void *ptr; + int32_t data[8]; +} kmp_critical_name; +kmp_critical_name crit; + +int32_t __kmpc_global_thread_num(ident_t *); +void __kmpc_push_num_teams(ident_t *, int32_t global_tid, int32_t num_teams, + int32_t num_threads); +void __kmpc_fork_teams(ident_t *, int32_t argc, void *microtask, ...); +int32_t __kmpc_reduce(ident_t *, int32_t global_tid, int32_t num_vars, + size_t reduce_size, void *reduce_data, void *reduce_func, + kmp_critical_name *lck); +void __kmpc_end_reduce(ident_t *, int32_t global_tid, kmp_critical_name *lck); + +#ifdef __cplusplus +} +#endif + +// Outlined entry point: +void outlined(int32_t *gtid, int32_t *tid) { + int32_t ret = __kmpc_reduce(&dummy_loc, *gtid, 0, 0, NULL, NULL, &crit); + __kmpc_end_reduce(&dummy_loc, *gtid, &crit); +} + +int main() { + int32_t th = __kmpc_global_thread_num(NULL); // registers initial thread + __kmpc_push_num_teams(&dummy_loc, th, N_TEAMS, N_THR); + __kmpc_fork_teams(&dummy_loc, 0, &outlined); + + // Test did not hang -> passed! + printf("passed\n"); + return 0; +} diff --git a/final/runtime/test/omp_my_sleep.h b/final/runtime/test/omp_my_sleep.h new file mode 100644 index 0000000..138d930 --- /dev/null +++ b/final/runtime/test/omp_my_sleep.h @@ -0,0 +1,33 @@ +#ifndef MY_SLEEP_H +#define MY_SLEEP_H + +/*! Utility function to have a sleep function with better resolution and + * which only stops one thread. */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <time.h> + +#if defined(_WIN32) +# include <windows.h> +// Windows version of my_sleep() function +static void my_sleep(double sleeptime) { + DWORD ms = (DWORD) (sleeptime * 1000.0); + Sleep(ms); +} + + +#else // _WIN32 + +// Unices version of my_sleep() function +static void my_sleep(double sleeptime) { + struct timespec ts; + ts.tv_sec = (time_t)sleeptime; + ts.tv_nsec = (long)((sleeptime - (double)ts.tv_sec) * 1E9); + nanosleep(&ts, NULL); +} + +#endif // _WIN32 + +#endif // MY_SLEEP_H diff --git a/final/runtime/test/omp_testsuite.h b/final/runtime/test/omp_testsuite.h new file mode 100644 index 0000000..eef5470 --- /dev/null +++ b/final/runtime/test/omp_testsuite.h @@ -0,0 +1,79 @@ +/* Global headerfile of the OpenMP Testsuite */ + +#ifndef OMP_TESTSUITE_H +#define OMP_TESTSUITE_H + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +/* General */ +/**********************************************************/ +#define LOOPCOUNT 1000 /* Number of iterations to slit amongst threads */ +#define REPETITIONS 10 /* Number of times to run each test */ + +/* following times are in seconds */ +#define SLEEPTIME 1 + +/* Definitions for tasks */ +/**********************************************************/ +#define NUM_TASKS 25 +#define MAX_TASKS_PER_THREAD 5 + +#ifdef _WIN32 +// Windows versions of pthread_create() and pthread_join() +# include <windows.h> +typedef HANDLE pthread_t; + +// encapsulates the information about a pthread-callable function +struct thread_func_info_t { + void* (*start_routine)(void*); + void* arg; +}; + +// call the void* start_routine(void*); +static DWORD __thread_func_wrapper(LPVOID lpParameter) { + struct thread_func_info_t* function_information; + function_information = (struct thread_func_info_t*)lpParameter; + function_information->start_routine(function_information->arg); + free(function_information); + return 0; +} + +// attr is ignored +static int pthread_create(pthread_t *thread, void *attr, + void *(*start_routine) (void *), void *arg) { + pthread_t pthread; + struct thread_func_info_t* info; + info = (struct thread_func_info_t*)malloc(sizeof(struct thread_func_info_t)); + info->start_routine = start_routine; + info->arg = arg; + pthread = CreateThread(NULL, 0, __thread_func_wrapper, info, 0, NULL); + if (pthread == NULL) { + fprintf(stderr, "CreateThread() failed: Error #%u.\n", GetLastError()); + exit(1); + } + *thread = pthread; + return 0; +} +// retval is ignored for now +static int pthread_join(pthread_t thread, void **retval) { + int rc; + rc = WaitForSingleObject(thread, INFINITE); + if (rc == WAIT_FAILED) { + fprintf(stderr, "WaitForSingleObject() failed: Error #%u.\n", + GetLastError()); + exit(1); + } + rc = CloseHandle(thread); + if (rc == 0) { + fprintf(stderr, "CloseHandle() failed: Error #%u.\n", GetLastError()); + exit(1); + } + return 0; +} +#else +# include <pthread.h> +#endif + +#endif diff --git a/final/runtime/test/ompt/callback.h b/final/runtime/test/ompt/callback.h new file mode 100755 index 0000000..f1191ad --- /dev/null +++ b/final/runtime/test/ompt/callback.h @@ -0,0 +1,764 @@ +#ifndef _BSD_SOURCE +#define _BSD_SOURCE +#endif +#define _DEFAULT_SOURCE +#include <stdio.h> +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif +#include <inttypes.h> +#include <omp.h> +#include <ompt.h> +#include "ompt-signal.h" + +// Used to detect architecture +#include "../../src/kmp_platform.h" + +static const char* ompt_thread_type_t_values[] = { + NULL, + "ompt_thread_initial", + "ompt_thread_worker", + "ompt_thread_other" +}; + +static const char* ompt_task_status_t_values[] = { + NULL, + "ompt_task_complete", + "ompt_task_yield", + "ompt_task_cancel", + "ompt_task_others" +}; +static const char* ompt_cancel_flag_t_values[] = { + "ompt_cancel_parallel", + "ompt_cancel_sections", + "ompt_cancel_do", + "ompt_cancel_taskgroup", + "ompt_cancel_activated", + "ompt_cancel_detected", + "ompt_cancel_discarded_task" +}; + +static void format_task_type(int type, char *buffer) { + char *progress = buffer; + if (type & ompt_task_initial) + progress += sprintf(progress, "ompt_task_initial"); + if (type & ompt_task_implicit) + progress += sprintf(progress, "ompt_task_implicit"); + if (type & ompt_task_explicit) + progress += sprintf(progress, "ompt_task_explicit"); + if (type & ompt_task_target) + progress += sprintf(progress, "ompt_task_target"); + if (type & ompt_task_undeferred) + progress += sprintf(progress, "|ompt_task_undeferred"); + if (type & ompt_task_untied) + progress += sprintf(progress, "|ompt_task_untied"); + if (type & ompt_task_final) + progress += sprintf(progress, "|ompt_task_final"); + if (type & ompt_task_mergeable) + progress += sprintf(progress, "|ompt_task_mergeable"); + if (type & ompt_task_merged) + progress += sprintf(progress, "|ompt_task_merged"); +} + +static ompt_set_callback_t ompt_set_callback; +static ompt_get_callback_t ompt_get_callback; +static ompt_get_state_t ompt_get_state; +static ompt_get_task_info_t ompt_get_task_info; +static ompt_get_thread_data_t ompt_get_thread_data; +static ompt_get_parallel_info_t ompt_get_parallel_info; +static ompt_get_unique_id_t ompt_get_unique_id; +static ompt_get_num_procs_t ompt_get_num_procs; +static ompt_get_num_places_t ompt_get_num_places; +static ompt_get_place_proc_ids_t ompt_get_place_proc_ids; +static ompt_get_place_num_t ompt_get_place_num; +static ompt_get_partition_place_nums_t ompt_get_partition_place_nums; +static ompt_get_proc_id_t ompt_get_proc_id; +static ompt_enumerate_states_t ompt_enumerate_states; +static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls; + +static void print_ids(int level) +{ + int task_type, thread_num; + omp_frame_t *frame; + ompt_data_t *task_parallel_data; + ompt_data_t *task_data; + int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame, + &task_parallel_data, &thread_num); + char buffer[2048]; + format_task_type(task_type, buffer); + if (frame) + printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 + ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, " + "task_type=%s=%d, thread_num=%d\n", + ompt_get_thread_data()->value, level, + exists_task ? task_parallel_data->value : 0, + exists_task ? task_data->value : 0, frame->exit_frame, + frame->enter_frame, buffer, task_type, thread_num); +} + +#define get_frame_address(level) __builtin_frame_address(level) + +#define print_frame(level) \ + printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \ + ompt_get_thread_data()->value, level, get_frame_address(level)) + +// clang (version 5.0 and above) adds an intermediate function call with debug flag (-g) +#if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN) + #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5 + #define print_frame_from_outlined_fn(level) print_frame(level+1) + #else + #define print_frame_from_outlined_fn(level) print_frame(level) + #endif + + #if defined(__clang__) && __clang_major__ >= 5 + #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information." + #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!" + #endif +#endif + +// This macro helps to define a label at the current position that can be used +// to get the current address in the code. +// +// For print_current_address(): +// To reliably determine the offset between the address of the label and the +// actual return address, we insert a NOP instruction as a jump target as the +// compiler would otherwise insert an instruction that we can't control. The +// instruction length is target dependent and is explained below. +// +// (The empty block between "#pragma omp ..." and the __asm__ statement is a +// workaround for a bug in the Intel Compiler.) +#define define_ompt_label(id) \ + {} \ + __asm__("nop"); \ +ompt_label_##id: + +// This macro helps to get the address of a label that is inserted by the above +// macro define_ompt_label(). The address is obtained with a GNU extension +// (&&label) that has been tested with gcc, clang and icc. +#define get_ompt_label_address(id) (&& ompt_label_##id) + +// This macro prints the exact address that a previously called runtime function +// returns to. +#define print_current_address(id) \ + define_ompt_label(id) \ + print_possible_return_addresses(get_ompt_label_address(id)) + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +// On X86 the NOP instruction is 1 byte long. In addition, the comiler inserts +// a MOV instruction for non-void runtime functions which is 3 bytes long. +#define print_possible_return_addresses(addr) \ + printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \ + ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4) +#elif KMP_ARCH_PPC64 +// On Power the NOP instruction is 4 bytes long. In addition, the compiler +// inserts an LD instruction which accounts for another 4 bytes. In contrast to +// X86 this instruction is always there, even for void runtime functions. +#define print_possible_return_addresses(addr) \ + printf("%" PRIu64 ": current_address=%p\n", ompt_get_thread_data()->value, \ + ((char *)addr) - 8) +#elif KMP_ARCH_AARCH64 +// On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted +// store instruction (another 4 bytes long). +#define print_possible_return_addresses(addr) \ + printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \ + ((char *)addr) - 4, ((char *)addr) - 8) +#else +#error Unsupported target architecture, cannot determine address offset! +#endif + + +// This macro performs a somewhat similar job to print_current_address(), except +// that it discards a certain number of nibbles from the address and only prints +// the most significant bits / nibbles. This can be used for cases where the +// return address can only be approximated. +// +// To account for overflows (ie the most significant bits / nibbles have just +// changed as we are a few bytes above the relevant power of two) the addresses +// of the "current" and of the "previous block" are printed. +#define print_fuzzy_address(id) \ + define_ompt_label(id) \ + print_fuzzy_address_blocks(get_ompt_label_address(id)) + +// If you change this define you need to adapt all capture patterns in the tests +// to include or discard the new number of nibbles! +#define FUZZY_ADDRESS_DISCARD_NIBBLES 2 +#define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4)) +#define print_fuzzy_address_blocks(addr) \ + printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \ + " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \ + ompt_get_thread_data()->value, \ + ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \ + ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \ + ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \ + ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr) + +static void +on_ompt_callback_mutex_acquire( + ompt_mutex_kind_t kind, + unsigned int hint, + unsigned int impl, + omp_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_mutex_acquired( + ompt_mutex_kind_t kind, + omp_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_acquired_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_acquired_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_mutex_released( + ompt_mutex_kind_t kind, + omp_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_release_nest_lock_last: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_release_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_nest_lock( + ompt_scope_endpoint_t endpoint, + omp_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_release_nest_lock_prev: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + } +} + +static void +on_ompt_callback_sync_region( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + print_ids(0); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } + break; + case ompt_scope_end: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } + break; + } +} + +static void +on_ompt_callback_sync_region_wait( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } + break; + case ompt_scope_end: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_wait_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } + break; + } +} + +static void +on_ompt_callback_flush( + ompt_data_t *thread_data, + const void *codeptr_ra) +{ + printf("%" PRIu64 ": ompt_event_flush: codeptr_ra=%p\n", thread_data->value, codeptr_ra); +} + +static void +on_ompt_callback_cancel( + ompt_data_t *task_data, + int flags, + const void *codeptr_ra) +{ + const char* first_flag_value; + const char* second_flag_value; + if(flags & ompt_cancel_parallel) + first_flag_value = ompt_cancel_flag_t_values[0]; + else if(flags & ompt_cancel_sections) + first_flag_value = ompt_cancel_flag_t_values[1]; + else if(flags & ompt_cancel_do) + first_flag_value = ompt_cancel_flag_t_values[2]; + else if(flags & ompt_cancel_taskgroup) + first_flag_value = ompt_cancel_flag_t_values[3]; + + if(flags & ompt_cancel_activated) + second_flag_value = ompt_cancel_flag_t_values[4]; + else if(flags & ompt_cancel_detected) + second_flag_value = ompt_cancel_flag_t_values[5]; + else if(flags & ompt_cancel_discarded_task) + second_flag_value = ompt_cancel_flag_t_values[6]; + + printf("%" PRIu64 ": ompt_event_cancel: task_data=%" PRIu64 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, task_data->value, first_flag_value, second_flag_value, flags, codeptr_ra); +} + +static void +on_ompt_callback_idle( + ompt_scope_endpoint_t endpoint) +{ + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_idle_begin:\n", ompt_get_thread_data()->value); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_idle_end:\n", ompt_get_thread_data()->value); + break; + } +} + +static void +on_ompt_callback_implicit_task( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int team_size, + unsigned int thread_num) +{ + switch(endpoint) + { + case ompt_scope_begin: + if(task_data->ptr) + printf("%s\n", "0: task_data initially not null"); + task_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num); + break; + } +} + +static void +on_ompt_callback_lock_init( + ompt_mutex_kind_t kind, + unsigned int hint, + unsigned int impl, + omp_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_lock_destroy( + ompt_mutex_kind_t kind, + omp_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_destroy_nest_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_work( + ompt_work_type_t wstype, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + uint64_t count, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + switch(wstype) + { + case ompt_work_loop: + printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_sections: + printf("%" PRIu64 ": ompt_event_sections_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_executor: + printf("%" PRIu64 ": ompt_event_single_in_block_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_other: + printf("%" PRIu64 ": ompt_event_single_others_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_workshare: + //impl + break; + case ompt_work_distribute: + printf("%" PRIu64 ": ompt_event_distribute_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_taskloop: + //impl + printf("%" PRIu64 ": ompt_event_taskloop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + } + break; + case ompt_scope_end: + switch(wstype) + { + case ompt_work_loop: + printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_sections: + printf("%" PRIu64 ": ompt_event_sections_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_executor: + printf("%" PRIu64 ": ompt_event_single_in_block_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_other: + printf("%" PRIu64 ": ompt_event_single_others_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_workshare: + //impl + break; + case ompt_work_distribute: + printf("%" PRIu64 ": ompt_event_distribute_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_taskloop: + //impl + printf("%" PRIu64 ": ompt_event_taskloop_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + } + break; + } +} + +static void +on_ompt_callback_master( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_master_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_master_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } +} + +static void +on_ompt_callback_parallel_begin( + ompt_data_t *encountering_task_data, + const omp_frame_t *encountering_task_frame, + ompt_data_t* parallel_data, + uint32_t requested_team_size, + ompt_invoker_t invoker, + const void *codeptr_ra) +{ + if(parallel_data->ptr) + printf("0: parallel_data initially not null\n"); + parallel_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, encountering_task_data->value, encountering_task_frame->exit_frame, encountering_task_frame->enter_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker); +} + +static void +on_ompt_callback_parallel_end( + ompt_data_t *parallel_data, + ompt_data_t *encountering_task_data, + ompt_invoker_t invoker, + const void *codeptr_ra) +{ + printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, encountering_task_data->value, invoker, codeptr_ra); +} + +static void +on_ompt_callback_task_create( + ompt_data_t *encountering_task_data, + const omp_frame_t *encountering_task_frame, + ompt_data_t* new_task_data, + int type, + int has_dependences, + const void *codeptr_ra) +{ + if(new_task_data->ptr) + printf("0: new_task_data initially not null\n"); + new_task_data->value = ompt_get_unique_id(); + char buffer[2048]; + + format_task_type(type, buffer); + + //there is no parallel_begin callback for implicit parallel region + //thus it is initialized in initial task + if(type & ompt_task_initial) + { + ompt_data_t *parallel_data; + ompt_get_parallel_info(0, ¶llel_data, NULL); + if(parallel_data->ptr) + printf("%s\n", "0: parallel_data initially not null"); + parallel_data->value = ompt_get_unique_id(); + } + + printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame : NULL, encountering_task_frame ? encountering_task_frame->enter_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no"); +} + +static void +on_ompt_callback_task_schedule( + ompt_data_t *first_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *second_task_data) +{ + printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status); + if(prior_task_status == ompt_task_complete) + { + printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value); + } +} + +static void +on_ompt_callback_task_dependences( + ompt_data_t *task_data, + const ompt_task_dependence_t *deps, + int ndeps) +{ + printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps); +} + +static void +on_ompt_callback_task_dependence( + ompt_data_t *first_task_data, + ompt_data_t *second_task_data) +{ + printf("%" PRIu64 ": ompt_event_task_dependence_pair: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value); +} + +static void +on_ompt_callback_thread_begin( + ompt_thread_type_t thread_type, + ompt_data_t *thread_data) +{ + if(thread_data->ptr) + printf("%s\n", "0: thread_data initially not null"); + thread_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value); +} + +static void +on_ompt_callback_thread_end( + ompt_data_t *thread_data) +{ + printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data->value); +} + +static int +on_ompt_callback_control_tool( + uint64_t command, + uint64_t modifier, + void *arg, + const void *codeptr_ra) +{ + omp_frame_t* omptTaskFrame; + ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL); + printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame, omptTaskFrame->enter_frame); + return 0; //success +} + +#define register_callback_t(name, type) \ +do{ \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \ + ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ +}while(0) + +#define register_callback(name) register_callback_t(name, name##_t) + +int ompt_initialize( + ompt_function_lookup_t lookup, + ompt_data_t *tool_data) +{ + ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); + ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback"); + ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state"); + ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info"); + ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data"); + ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info"); + ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id"); + + ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs"); + ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places"); + ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids"); + ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num"); + ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums"); + ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id"); + ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states"); + ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls"); + + register_callback(ompt_callback_mutex_acquire); + register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t); + register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t); + register_callback(ompt_callback_nest_lock); + register_callback(ompt_callback_sync_region); + register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t); + register_callback(ompt_callback_control_tool); + register_callback(ompt_callback_flush); + register_callback(ompt_callback_cancel); + register_callback(ompt_callback_idle); + register_callback(ompt_callback_implicit_task); + register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t); + register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t); + register_callback(ompt_callback_work); + register_callback(ompt_callback_master); + register_callback(ompt_callback_parallel_begin); + register_callback(ompt_callback_parallel_end); + register_callback(ompt_callback_task_create); + register_callback(ompt_callback_task_schedule); + register_callback(ompt_callback_task_dependences); + register_callback(ompt_callback_task_dependence); + register_callback(ompt_callback_thread_begin); + register_callback(ompt_callback_thread_end); + printf("0: NULL_POINTER=%p\n", (void*)NULL); + return 1; //success +} + +void ompt_finalize(ompt_data_t *tool_data) +{ + printf("0: ompt_event_runtime_shutdown\n"); +} + +ompt_start_tool_result_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; + return &ompt_start_tool_result; +} diff --git a/final/runtime/test/ompt/cancel/cancel_parallel.c b/final/runtime/test/ompt/cancel/cancel_parallel.c new file mode 100644 index 0000000..b03239d --- /dev/null +++ b/final/runtime/test/ompt/cancel/cancel_parallel.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// Current GOMP interface implementation does not support cancellation +// XFAIL: gcc + +#include "callback.h" +#include "omp.h" + +int main() { + #pragma omp parallel num_threads(2) + { + if (omp_get_thread_num() == 0) { + print_fuzzy_address_blocks(get_ompt_label_address(1)); + #pragma omp cancel parallel + define_ompt_label(1); + // We cannot print at this location because the parallel region is cancelled! + } else { + delay(100); + print_fuzzy_address_blocks(get_ompt_label_address(2)); + #pragma omp cancellation point parallel + define_ompt_label(2); + // We cannot print at this location because the parallel region is cancelled! + } + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_activated=17, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-DAG: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_detected=33, codeptr_ra=[[OTHER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-DAG: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[OTHER_RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/cancel/cancel_taskgroup.c b/final/runtime/test/ompt/cancel/cancel_taskgroup.c new file mode 100644 index 0000000..803fa97 --- /dev/null +++ b/final/runtime/test/ompt/cancel/cancel_taskgroup.c @@ -0,0 +1,89 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: clang-3, clang-4.0.0 +// Current GOMP interface implementation does not support cancellation; icc 16 has a bug +// XFAIL: gcc, icc-16 + +#include "callback.h" +#include <unistd.h> +#include <stdio.h> + +int main() +{ + int condition=0; + #pragma omp parallel num_threads(2) + {} + + print_frame(0); + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp taskgroup + { + #pragma omp task shared(condition) + { + printf("start execute task 1\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 1\n"); + } + #pragma omp task shared(condition) + { + printf("start execute task 2\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 2\n"); + } + #pragma omp task shared(condition) + { + printf("start execute task 3\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 3\n"); + } + #pragma omp task if(0) shared(condition) + { + printf("start execute task 4\n"); + OMPT_WAIT(condition,1); + #pragma omp cancel taskgroup + printf("end execute task 4\n"); + } + OMPT_SIGNAL(condition); + } + } + #pragma omp barrier + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[CANCEL_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[CANCEL_TASK_ID]], flags=ompt_cancel_taskgroup|ompt_cancel_activated=24, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[CANCEL_TASK_ID]], second_task_id=[[PARENT_TASK_ID]], prior_task_status=ompt_task_cancel=3 + + // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]] + // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_detected=40, codeptr_ra={{0x[0-f]*}} + + return 0; +} diff --git a/final/runtime/test/ompt/cancel/cancel_worksharing.c b/final/runtime/test/ompt/cancel/cancel_worksharing.c new file mode 100644 index 0000000..db3b168 --- /dev/null +++ b/final/runtime/test/ompt/cancel/cancel_worksharing.c @@ -0,0 +1,67 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// Current GOMP interface implementation does not support cancellation; icc 16 does not distinguish between sections and loops +// XFAIL: gcc, icc-16 + +#include "callback.h" +#include <unistd.h> + +int main() +{ + int condition=0; + #pragma omp parallel num_threads(2) + { + int x = 0; + int i; + #pragma omp for + for(i = 0; i < 2; i++) + { + if(i == 0) + { + x++; + OMPT_SIGNAL(condition); + #pragma omp cancel for + } + else + { + x++; + OMPT_WAIT(condition,1); + delay(10000); + #pragma omp cancellation point for + } + } + } + #pragma omp parallel num_threads(2) + { + #pragma omp sections + { + #pragma omp section + { + OMPT_SIGNAL(condition); + #pragma omp cancel sections + } + #pragma omp section + { + OMPT_WAIT(condition,2); + delay(10000); + #pragma omp cancellation point sections + } + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + + // cancel for and sections + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_activated=20, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_activated=18, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_detected=36, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_detected=34, codeptr_ra={{0x[0-f]*}} + + return 0; +} diff --git a/final/runtime/test/ompt/loadtool/tool_available/tool_available.c b/final/runtime/test/ompt/loadtool/tool_available/tool_available.c new file mode 100644 index 0000000..fbbdadd --- /dev/null +++ b/final/runtime/test/ompt/loadtool/tool_available/tool_available.c @@ -0,0 +1,74 @@ +// The OpenMP standard defines 3 ways of providing ompt_start_tool: +// 1. "statically-linking the tool’s definition of ompt_start_tool into an OpenMP application" +// RUN: %libomp-compile -DCODE -DTOOL && %libomp-run | FileCheck %s + +// Note: We should compile the tool without -fopenmp as other tools developer +// would do. Otherwise this test may pass for the wrong reasons on Darwin. +// RUN: %clang %flags -DTOOL -shared -fPIC %s -o %T/tool.so +// 2. "introducing a dynamically-linked library that includes the tool’s definition of ompt_start_tool into the application’s address space" +// 2.1 Link with tool during compilation +// RUN: %libomp-compile -DCODE %no-as-needed-flag %T/tool.so && %libomp-run | FileCheck %s +// 2.2 Link with tool during compilation, but AFTER the runtime +// RUN: %libomp-compile -DCODE -lomp %no-as-needed-flag %T/tool.so && %libomp-run | FileCheck %s +// 2.3 Inject tool via the dynamic loader +// RUN: %libomp-compile -DCODE && %preload-tool %libomp-run | FileCheck %s + +// 3. "providing the name of a dynamically-linked library appropriate for the architecture and operating system used by the application in the tool-libraries-var ICV" +// RUN: %libomp-compile -DCODE && env OMP_TOOL_LIBRARIES=%T/tool.so %libomp-run | FileCheck %s + +// REQUIRES: ompt + +/* + * This file contains code for an OMPT shared library tool to be + * loaded and the code for the OpenMP executable. + * -DTOOL enables the code for the tool during compilation + * -DCODE enables the code for the executable during compilation + */ + +#ifdef CODE +#include "omp.h" + +int main() +{ + #pragma omp parallel num_threads(2) + { + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}0: ompt_event_runtime_shutdown + + return 0; +} + +#endif /* CODE */ + +#ifdef TOOL + +#include <stdio.h> +#include <ompt.h> + +int ompt_initialize( + ompt_function_lookup_t lookup, + ompt_data_t* tool_data) +{ + printf("0: NULL_POINTER=%p\n", (void*)NULL); + return 1; //success +} + +void ompt_finalize(ompt_data_t* tool_data) +{ + printf("0: ompt_event_runtime_shutdown\n"); +} + +ompt_start_tool_result_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#endif /* TOOL */ diff --git a/final/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c b/final/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c new file mode 100644 index 0000000..a6fe8e9 --- /dev/null +++ b/final/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c @@ -0,0 +1,104 @@ +// RUN: %clang %flags -shared -fPIC %s -o %T/first_tool.so +// RUN: %clang %flags -DTOOL -DSECOND_TOOL -shared -fPIC %s -o %T/second_tool.so +// RUN: %clang %flags -DTOOL -DTHIRD_TOOL -shared -fPIC %s -o %T/third_tool.so +// RUN: %libomp-compile -DCODE && env OMP_TOOL_LIBRARIES=%T/non_existing_file.so:%T/first_tool.so:%T/second_tool.so:%T/third_tool.so %libomp-run | FileCheck %s + +// REQUIRES: ompt + +/* + * This file contains code for three OMPT shared library tool to be + * loaded and the code for the OpenMP executable. + * No option enables code for the first shared library + * (without an implementation of ompt_start_tool) during compilation + * -DTOOL -DSECOND_TOOL enables the code for the second tool during compilation + * -DTOOL -DTHIRD_TOOL enables the code for the third tool during compilation + * -DCODE enables the code for the executable during compilation + */ + +#ifdef CODE +#include "stdio.h" +#include "omp.h" +#include "ompt.h" + +int main() +{ + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + int result = omp_control_tool(omp_control_tool_start, 0, NULL); + printf("0: control_tool()=%d\n", result); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback + + // CHECK: {{^}}0: Do not initialize tool + + // CHECK: {{^}}0: Do initialize tool + // CHECK: {{^}}0: Tool initialized + // CHECK: {{^}}0: ompt_event_thread_begin + // CHECK-DAG: {{^}}0: ompt_event_thread_begin + // CHECK-DAG: {{^}}0: control_tool()=-1 + // CHECK: {{^}}0: Tool finalized + + + return 0; +} + +#endif /* CODE */ + +#ifdef TOOL + +#include <ompt.h> +#include "stdio.h" + +#ifdef SECOND_TOOL +// The second tool has an implementation of ompt_start_tool that returns NULL +ompt_start_tool_result_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + printf("0: Do not initialize tool\n"); + return NULL; +} +#elif defined(THIRD_TOOL) +// The third tool has an implementation of ompt_start_tool that returns a +// pointer to a valid instance of ompt_start_tool_result_t + +static void +on_ompt_callback_thread_begin( + ompt_thread_type_t thread_type, + ompt_data_t *thread_data) +{ + printf("0: ompt_event_thread_begin\n"); +} + +int ompt_initialize( + ompt_function_lookup_t lookup, + ompt_data_t *tool_data) +{ + ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); + ompt_set_callback(ompt_callback_thread_begin, (ompt_callback_t)on_ompt_callback_thread_begin); + printf("0: Tool initialized\n"); + return 1; +} + +void ompt_finalize(ompt_data_t *tool_data) +{ + printf("0: Tool finalized\n"); +} + +ompt_start_tool_result_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + printf("0: Do initialize tool\n"); + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#endif + +#endif /* TOOL */ diff --git a/final/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c b/final/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c new file mode 100644 index 0000000..b0d3f2b --- /dev/null +++ b/final/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c @@ -0,0 +1,69 @@ +// The OpenMP standard defines 3 ways of providing ompt_start_tool: +// 1. "statically-linking the tool’s definition of ompt_start_tool into an OpenMP application" +// RUN: %libomp-compile -DCODE -DTOOL && %libomp-run | FileCheck %s + +// Note: We should compile the tool without -fopenmp as other tools developer +// would do. Otherwise this test may pass for the wrong reasons on Darwin. +// RUN: %clang %flags -DTOOL -shared -fPIC %s -o %T/tool.so +// 2. "introducing a dynamically-linked library that includes the tool’s definition of ompt_start_tool into the application’s address space" +// 2.1 Link with tool during compilation +// RUN: %libomp-compile -DCODE %no-as-needed-flag %T/tool.so && %libomp-run | FileCheck %s +// 2.2 Link with tool during compilation, but AFTER the runtime +// RUN: %libomp-compile -DCODE -lomp %no-as-needed-flag %T/tool.so && %libomp-run | FileCheck %s +// 2.3 Inject tool via the dynamic loader +// RUN: %libomp-compile -DCODE && %preload-tool %libomp-run | FileCheck %s + +// 3. "providing the name of a dynamically-linked library appropriate for the architecture and operating system used by the application in the tool-libraries-var ICV" +// RUN: %libomp-compile -DCODE && env OMP_TOOL_LIBRARIES=%T/tool.so %libomp-run | FileCheck %s + +// REQUIRES: ompt + +/* + * This file contains code for an OMPT shared library tool to be + * loaded and the code for the OpenMP executable. + * -DTOOL enables the code for the tool during compilation + * -DCODE enables the code for the executable during compilation + */ + +#ifdef CODE +#include "stdio.h" +#include "omp.h" +#include "ompt.h" + +int main() +{ + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + int result = omp_control_tool(omp_control_tool_start, 0, NULL); + printf("0: control_tool()=%d\n", result); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback + + // CHECK: {{^}}0: Do not initialize tool + // CHECK: {{^}}0: control_tool()=-2 + + + return 0; +} + +#endif /* CODE */ + +#ifdef TOOL + +#include <ompt.h> +#include "stdio.h" + +ompt_start_tool_result_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + printf("0: Do not initialize tool\n"); + return NULL; +} +#endif /* TOOL */ diff --git a/final/runtime/test/ompt/misc/api_calls_from_other_thread.cpp b/final/runtime/test/ompt/misc/api_calls_from_other_thread.cpp new file mode 100644 index 0000000..470d7cd --- /dev/null +++ b/final/runtime/test/ompt/misc/api_calls_from_other_thread.cpp @@ -0,0 +1,92 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s +// REQUIRES: ompt, linux + +#include <thread> +#include "callback.h" + +void f() { + ompt_data_t *tdata = ompt_get_thread_data(); + uint64_t tvalue = tdata ? tdata->value : 0; + + printf("%" PRIu64 ": ompt_get_num_places()=%d\n", tvalue, + ompt_get_num_places()); + + printf("%" PRIu64 ": ompt_get_place_proc_ids()=%d\n", tvalue, + ompt_get_place_proc_ids(0, 0, NULL)); + + printf("%" PRIu64 ": ompt_get_place_num()=%d\n", tvalue, + ompt_get_place_num()); + + printf("%" PRIu64 ": ompt_get_partition_place_nums()=%d\n", tvalue, + ompt_get_partition_place_nums(0, NULL)); + + printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", tvalue, ompt_get_proc_id()); + + printf("%" PRIu64 ": ompt_get_num_procs()=%d\n", tvalue, + ompt_get_num_procs()); + + ompt_callback_t callback; + printf("%" PRIu64 ": ompt_get_callback()=%d\n", tvalue, + ompt_get_callback(ompt_callback_thread_begin, &callback)); + + printf("%" PRIu64 ": ompt_get_state()=%d\n", tvalue, ompt_get_state(NULL)); + + int state = omp_state_undefined; + const char *state_name; + printf("%" PRIu64 ": ompt_enumerate_states()=%d\n", tvalue, + ompt_enumerate_states(state, &state, &state_name)); + + int impl = ompt_mutex_impl_unknown; + const char *impl_name; + printf("%" PRIu64 ": ompt_enumerate_mutex_impls()=%d\n", tvalue, + ompt_enumerate_mutex_impls(impl, &impl, &impl_name)); + + printf("%" PRIu64 ": ompt_get_thread_data()=%p\n", tvalue, + ompt_get_thread_data()); + + printf("%" PRIu64 ": ompt_get_parallel_info()=%d\n", tvalue, + ompt_get_parallel_info(0, NULL, NULL)); + + printf("%" PRIu64 ": ompt_get_task_info()=%d\n", tvalue, + ompt_get_task_info(0, NULL, NULL, NULL, NULL, NULL)); +} + +int main() { +#pragma omp parallel num_threads(1) + {} + + std::thread t1(f); + t1.join(); + + // Check if libomp supports the callbacks for this test. + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_get_num_places()={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids()={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=-1 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_partition_place_nums()=0 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=-1 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_procs()={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_callback()=1 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_state()=0 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_enumerate_states()=1 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_enumerate_mutex_impls()=1 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_thread_data()=[[NULL]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_parallel_info()=0 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_task_info()=0 + + return 0; +} diff --git a/final/runtime/test/ompt/misc/api_calls_misc.c b/final/runtime/test/ompt/misc/api_calls_misc.c new file mode 100644 index 0000000..d567b1b --- /dev/null +++ b/final/runtime/test/ompt/misc/api_calls_misc.c @@ -0,0 +1,72 @@ +// RUN: %libomp-compile && %libomp-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() { +#pragma omp parallel num_threads(1) + { + // ompt_get_callback() + ompt_callback_t callback; + ompt_get_callback(ompt_callback_thread_begin, &callback); + printf("%" PRIu64 ": &on_ompt_callback_thread_begin=%p\n", + ompt_get_thread_data()->value, &on_ompt_callback_thread_begin); + printf("%" PRIu64 ": ompt_get_callback() result=%p\n", + ompt_get_thread_data()->value, callback); + + // ompt_get_state() + printf("%" PRIu64 ": ompt_get_state()=%d\n", ompt_get_thread_data()->value, + ompt_get_state(NULL)); + + // ompt_enumerate_states() + int state = omp_state_undefined; + const char *state_name; + int steps = 0; + while (ompt_enumerate_states(state, &state, &state_name) && steps < 1000) { + steps++; + if (!state_name) + printf("%" PRIu64 ": state_name is NULL\n", + ompt_get_thread_data()->value); + } + if (steps >= 1000) { + // enumeration did not end after 1000 steps + printf("%" PRIu64 ": states enumeration did not end\n", + ompt_get_thread_data()->value); + } + + // ompt_enumerate_mutex_impls() + int impl = ompt_mutex_impl_unknown; + const char *impl_name; + steps = 0; + while (ompt_enumerate_mutex_impls(impl, &impl, &impl_name) && + steps < 1000) { + steps++; + if (!impl_name) + printf("%" PRIu64 ": impl_name is NULL\n", + ompt_get_thread_data()->value); + } + if (steps >= 1000) { + // enumeration did not end after 1000 steps + printf("%" PRIu64 ": mutex_impls enumeration did not end\n", + ompt_get_thread_data()->value); + } + } + + // Check if libomp supports the callbacks for this test. + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: &on_ompt_callback_thread_begin + // CHECK-SAME: =[[FUNCTION_POINTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_get_callback() result=[[FUNCTION_POINTER]] + + // CHECK: {{^}}[[THREAD_ID]]: ompt_get_state()=1 + + // CHECK-NOT: {{^}}[[THREAD_ID]]: state_name is NULL + // CHECK-NOT: {{^}}[[THREAD_ID]]: states enumeration did not end + + // CHECK-NOT: {{^}}[[THREAD_ID]]: impl_name is NULL + // CHECK-NOT: {{^}}[[THREAD_ID]]: mutex_impls enumeration did not end + + return 0; +} diff --git a/final/runtime/test/ompt/misc/api_calls_places.c b/final/runtime/test/ompt/misc/api_calls_places.c new file mode 100644 index 0000000..ad338a7 --- /dev/null +++ b/final/runtime/test/ompt/misc/api_calls_places.c @@ -0,0 +1,88 @@ +// RUN: %libomp-compile && env OMP_PLACES=cores %libomp-run | FileCheck %s +// REQUIRES: ompt, linux +#include "callback.h" +#include <omp.h> +#define __USE_GNU +#include <sched.h> +#undef __USE_GNU + +void print_list(char *function_name, int size, int list[]) { + printf("%" PRIu64 ": %s(0)=(%d", ompt_get_thread_data()->value, function_name, + list[0]); + int i; + for (i = 1; i < size; i++) { + printf(",%d", list[i]); + } + printf(")\n"); +} + +int main() { +#pragma omp parallel num_threads(1) + { + printf("%" PRIu64 ": omp_get_num_places()=%d\n", + ompt_get_thread_data()->value, omp_get_num_places()); + printf("%" PRIu64 ": ompt_get_num_places()=%d\n", + ompt_get_thread_data()->value, ompt_get_num_places()); + + int omp_ids_size = omp_get_place_num_procs(0); + int omp_ids[omp_ids_size]; + omp_get_place_proc_ids(0, omp_ids); + print_list("omp_get_place_proc_ids", omp_ids_size, omp_ids); + int ompt_ids_size = ompt_get_place_proc_ids(0, 0, NULL); + int ompt_ids[ompt_ids_size]; + ompt_get_place_proc_ids(0, ompt_ids_size, ompt_ids); + print_list("ompt_get_place_proc_ids", ompt_ids_size, ompt_ids); + + printf("%" PRIu64 ": omp_get_place_num()=%d\n", + ompt_get_thread_data()->value, omp_get_place_num()); + printf("%" PRIu64 ": ompt_get_place_num()=%d\n", + ompt_get_thread_data()->value, ompt_get_place_num()); + + int omp_nums_size = omp_get_partition_num_places(); + int omp_nums[omp_nums_size]; + omp_get_partition_place_nums(omp_nums); + print_list("omp_get_partition_place_nums", omp_nums_size, omp_nums); + int ompt_nums_size = ompt_get_partition_place_nums(0, NULL); + int ompt_nums[ompt_nums_size]; + ompt_get_partition_place_nums(ompt_nums_size, ompt_nums); + print_list("ompt_get_partition_place_nums", ompt_nums_size, ompt_nums); + + printf("%" PRIu64 ": sched_getcpu()=%d\n", ompt_get_thread_data()->value, + sched_getcpu()); + printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", + ompt_get_thread_data()->value, ompt_get_proc_id()); + + printf("%" PRIu64 ": omp_get_num_procs()=%d\n", + ompt_get_thread_data()->value, omp_get_num_procs()); + printf("%" PRIu64 ": ompt_get_num_procs()=%d\n", + ompt_get_thread_data()->value, ompt_get_num_procs()); + } + + // Check if libomp supports the callbacks for this test. + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: omp_get_num_places + // CHECK-SAME: ()=[[NUM_PLACES:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_places()=[[NUM_PLACES]] + + // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_proc_ids + // CHECK-SAME: (0)=([[PROC_IDS:[0-9\,]+]]) + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids(0)=([[PROC_IDS]]) + + // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_num()=[[PLACE_NUM:[-]?[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=[[PLACE_NUM]] + + // CHECK: {{^}}[[MASTER_ID]]: omp_get_partition_place_nums + // CHECK-SAME: (0)=([[PARTITION_PLACE_NUMS:[0-9\,]+]]) + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_partition_place_nums + // CHECK-SAME: (0)=([[PARTITION_PLACE_NUMS]]) + + // CHECK: {{^}}[[MASTER_ID]]: sched_getcpu()=[[CPU_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=[[CPU_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: omp_get_num_procs()=[[NUM_PROCS:[-]?[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_procs()=[[NUM_PROCS]] + + return 0; +} diff --git a/final/runtime/test/ompt/misc/control_tool.c b/final/runtime/test/ompt/misc/control_tool.c new file mode 100644 index 0000000..2c59666 --- /dev/null +++ b/final/runtime/test/ompt/misc/control_tool.c @@ -0,0 +1,29 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN +#include "callback.h" +#include <omp.h> + +int main() +{ + #pragma omp parallel num_threads(1) + { + print_frame_from_outlined_fn(1); + print_frame(0); + omp_control_tool(omp_control_tool_flush, 1, NULL); + print_current_address(0); + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_control_tool' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address({{.}})=[[EXIT_FRAME:0x[0-f]*]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/misc/control_tool_no_ompt_support.c b/final/runtime/test/ompt/misc/control_tool_no_ompt_support.c new file mode 100644 index 0000000..ee64da0 --- /dev/null +++ b/final/runtime/test/ompt/misc/control_tool_no_ompt_support.c @@ -0,0 +1,12 @@ +// RUN: %libomp-compile-and-run +#include <omp.h> + +int main() +{ + #pragma omp parallel num_threads(1) + { + omp_control_tool(omp_control_tool_flush, 1, NULL); + } + + return 0; +} diff --git a/final/runtime/test/ompt/misc/idle.c b/final/runtime/test/ompt/misc/idle.c new file mode 100644 index 0000000..7413c32 --- /dev/null +++ b/final/runtime/test/ompt/misc/idle.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(3) + { + #pragma omp atomic + x++; + } + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + } + + + printf("x=%d\n", x); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_idle_begin: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_idle_end: + + return 0; +} diff --git a/final/runtime/test/ompt/misc/interoperability.cpp b/final/runtime/test/ompt/misc/interoperability.cpp new file mode 100644 index 0000000..102e6de --- /dev/null +++ b/final/runtime/test/ompt/misc/interoperability.cpp @@ -0,0 +1,115 @@ +// RUN: %libomp-cxx-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt + +#include <iostream> +#include <thread> +#include <alloca.h> + +#include "callback.h" +#include "omp.h" + +int condition = 0; + +void f() { + // Call OpenMP API function to force initialization of OMPT. + // (omp_get_thread_num() does not work because it just returns 0 if the + // runtime isn't initialized yet...) + omp_get_num_threads(); + + // Call alloca() to force availability of frame pointer + void *p = alloca(0); + + OMPT_SIGNAL(condition); + // Wait for both initial threads to arrive that will eventually become the + // master threads in the following parallel region. + OMPT_WAIT(condition, 2); + +#pragma omp parallel num_threads(2) + { + // Wait for all threads to arrive so that no worker thread can be reused... + OMPT_SIGNAL(condition); + OMPT_WAIT(condition, 6); + } +} + +int main() { + std::thread t1(f); + std::thread t2(f); + t1.join(); + t2.join(); +} + +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// first master thread +// CHECK: {{^}}[[MASTER_ID_1:[0-9]+]]: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID_1]] + +// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_task_create: parent_task_id=0 +// CHECK-SAME: parent_task_frame.exit=[[NULL]] +// CHECK-SAME: parent_task_frame.reenter=[[NULL]] +// CHECK-SAME: new_task_id=[[PARENT_TASK_ID_1:[0-9]+]] +// CHECK-SAME: codeptr_ra=[[NULL]], task_type=ompt_task_initial=1 +// CHECK-SAME: has_dependences=no + +// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID_1]] +// CHECK-SAME: parent_task_frame.exit=[[NULL]] +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}} +// CHECK-SAME: parallel_id=[[PARALLEL_ID_1:[0-9]+]], requested_team_size=2 +// CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, invoker={{.*}} + +// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_parallel_end: +// CHECK-SAME: parallel_id=[[PARALLEL_ID_1]], task_id=[[PARENT_TASK_ID_1]] +// CHECK-SAME: invoker={{[0-9]+}} + +// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[MASTER_ID_1]] + +// second master thread +// CHECK: {{^}}[[MASTER_ID_2:[0-9]+]]: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID_2]] + +// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_task_create: parent_task_id=0 +// CHECK-SAME: parent_task_frame.exit=[[NULL]] +// CHECK-SAME: parent_task_frame.reenter=[[NULL]] +// CHECK-SAME: new_task_id=[[PARENT_TASK_ID_2:[0-9]+]] +// CHECK-SAME: codeptr_ra=[[NULL]], task_type=ompt_task_initial=1 +// CHECK-SAME: has_dependences=no + +// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID_2]] +// CHECK-SAME: parent_task_frame.exit=[[NULL]] +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}} +// CHECK-SAME: parallel_id=[[PARALLEL_ID_2:[0-9]+]] +// CHECK-SAME: requested_team_size=2, codeptr_ra=0x{{[0-f]+}} +// CHECK-SAME: invoker={{.*}} + +// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_parallel_end: +// CHECK-SAME: parallel_id=[[PARALLEL_ID_2]], task_id=[[PARENT_TASK_ID_2]] +// CHECK-SAME: invoker={{[0-9]+}} + +// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[MASTER_ID_2]] + +// first worker thread +// CHECK: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID_1]] + +// CHECK: {{^}}[[THREAD_ID_1]]: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[THREAD_ID_1]] + +// second worker thread +// CHECK: {{^}}[[THREAD_ID_2:[0-9]+]]: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID_2]] + +// CHECK: {{^}}[[THREAD_ID_2]]: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[THREAD_ID_2]] diff --git a/final/runtime/test/ompt/misc/threads.c b/final/runtime/test/ompt/misc/threads.c new file mode 100644 index 0000000..4a0fc6f --- /dev/null +++ b/final/runtime/test/ompt/misc/threads.c @@ -0,0 +1,34 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() { + int x = 0; +#pragma omp parallel num_threads(4) + { +#pragma omp atomic + x++; + } + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: + // CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_thread_end: + // CHECK-SAME: thread_id=[[MASTER_ID]] + // CHECK: {{^}}[[WORKER_ID1:[0-9]+]]: ompt_event_thread_begin: + // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID1]] + // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_thread_end: + // CHECK-SAME: thread_id=[[WORKER_ID1]] + // CHECK: {{^}}[[WORKER_ID2:[0-9]+]]: ompt_event_thread_begin: + // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID2]] + // CHECK: {{^}}[[WORKER_ID2]]: ompt_event_thread_end: + // CHECK-SAME: thread_id=[[WORKER_ID2]] + // CHECK: {{^}}[[WORKER_ID3:[0-9]+]]: ompt_event_thread_begin: + // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID3]] + // CHECK: {{^}}[[WORKER_ID3]]: ompt_event_thread_end: + // CHECK-SAME: thread_id=[[WORKER_ID3]] + + return 0; +} diff --git a/final/runtime/test/ompt/misc/threads_nested.c b/final/runtime/test/ompt/misc/threads_nested.c new file mode 100644 index 0000000..0d38dcf --- /dev/null +++ b/final/runtime/test/ompt/misc/threads_nested.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() { + + int condition = 0; + int x = 0; + omp_set_nested(1); +#pragma omp parallel num_threads(2) + { +#pragma omp parallel num_threads(2) + { + OMPT_SIGNAL(condition); + OMPT_WAIT(condition, 4); + } + } + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: + // CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_thread_end: + // CHECK-SAME: thread_id=[[MASTER_ID]] + // CHECK: {{^}}[[WORKER_ID1:[0-9]+]]: ompt_event_thread_begin: + // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID1]] + // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_thread_end: + // CHECK-SAME: thread_id=[[WORKER_ID1]] + // CHECK: {{^}}[[WORKER_ID2:[0-9]+]]: ompt_event_thread_begin: + // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID2]] + // CHECK: {{^}}[[WORKER_ID2]]: ompt_event_thread_end: + // CHECK-SAME: thread_id=[[WORKER_ID2]] + // CHECK: {{^}}[[WORKER_ID3:[0-9]+]]: ompt_event_thread_begin: + // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[WORKER_ID3]] + // CHECK: {{^}}[[WORKER_ID3]]: ompt_event_thread_end: + // CHECK-SAME: thread_id=[[WORKER_ID3]] + + return 0; +} diff --git a/final/runtime/test/ompt/misc/unset_callback.c b/final/runtime/test/ompt/misc/unset_callback.c new file mode 100644 index 0000000..9074ad3 --- /dev/null +++ b/final/runtime/test/ompt/misc/unset_callback.c @@ -0,0 +1,29 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() +{ + #pragma omp parallel num_threads(1) + { + + } + ompt_set_callback(ompt_callback_parallel_begin, NULL); + #pragma omp parallel num_threads(1) + { + + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_parallel_begin: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: + // CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: + + return 0; +} diff --git a/final/runtime/test/ompt/ompt-signal.h b/final/runtime/test/ompt/ompt-signal.h new file mode 100644 index 0000000..b5c28cf --- /dev/null +++ b/final/runtime/test/ompt/ompt-signal.h @@ -0,0 +1,31 @@ +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#else +#include <unistd.h> +#define delay(t) usleep(t); +#endif + +// These functions are used to provide a signal-wait mechanism to enforce expected scheduling for the test cases. +// Conditional variable (s) needs to be shared! Initialize to 0 + +#define OMPT_SIGNAL(s) ompt_signal(&s) +//inline +void ompt_signal(int* s) +{ + #pragma omp atomic + (*s)++; +} + +#define OMPT_WAIT(s,v) ompt_wait(&s,v) +// wait for s >= v +//inline +void ompt_wait(int *s, int v) +{ + int wait=0; + do{ + delay(10); + #pragma omp atomic read + wait = (*s); + }while(wait<v); +} diff --git a/final/runtime/test/ompt/parallel/dynamic_enough_threads.c b/final/runtime/test/ompt/parallel/dynamic_enough_threads.c new file mode 100644 index 0000000..4c340ba --- /dev/null +++ b/final/runtime/test/ompt/parallel/dynamic_enough_threads.c @@ -0,0 +1,43 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ + omp_set_dynamic(1); + + #pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + + //team-size of 1-4 is expected + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[1-4]}} + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/dynamic_not_enough_threads.c b/final/runtime/test/ompt/parallel/dynamic_not_enough_threads.c new file mode 100644 index 0000000..f3a6e17 --- /dev/null +++ b/final/runtime/test/ompt/parallel/dynamic_not_enough_threads.c @@ -0,0 +1,43 @@ +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ + omp_set_dynamic(1); + + #pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + + //team-size of 1-4 is expected + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[1-4]}} + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/max_active_levels_serialized.c b/final/runtime/test/ompt/parallel/max_active_levels_serialized.c new file mode 100644 index 0000000..bbe73ef --- /dev/null +++ b/final/runtime/test/ompt/parallel/max_active_levels_serialized.c @@ -0,0 +1,73 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + omp_set_nested(1); + omp_set_max_active_levels(1); + + #pragma omp parallel num_threads(2) + { + print_ids(0); + print_ids(1); + #pragma omp parallel num_threads(2) + { + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/nested.c b/final/runtime/test/ompt/parallel/nested.c new file mode 100644 index 0000000..035529c --- /dev/null +++ b/final/runtime/test/ompt/parallel/nested.c @@ -0,0 +1,298 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN +#include "callback.h" +#include <omp.h> +#include <unistd.h> + +int main() +{ + int condition=0; + omp_set_nested(1); + print_frame(0); + + #pragma omp parallel num_threads(4) + { + print_frame_from_outlined_fn(1); + print_ids(0); + print_ids(1); + print_frame(0); + + //get all implicit task events before starting nested: + #pragma omp barrier + + #pragma omp parallel num_threads(4) + { + print_frame_from_outlined_fn(1); + print_ids(0); + print_ids(1); + print_ids(2); + print_frame(0); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,16); + #pragma omp barrier + print_fuzzy_address(1); + print_ids(0); + } + print_fuzzy_address(2); + print_ids(0); + } + print_fuzzy_address(3); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + + // THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // nested parallel masters + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[NESTED_EXIT:0x[0-f]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // explicit barrier + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] + // implicit barrier + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // implicit barrier + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // nested parallel worker threads + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/nested_lwt.c b/final/runtime/test/ompt/parallel/nested_lwt.c new file mode 100644 index 0000000..8348376 --- /dev/null +++ b/final/runtime/test/ompt/parallel/nested_lwt.c @@ -0,0 +1,334 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> +#include <unistd.h> + +int main() +{ + omp_set_nested(1); + int condition = 0; + + #pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + //get all implicit task events before starting nested: + #pragma omp barrier + #pragma omp parallel num_threads(1) + { + print_ids(0); + print_ids(1); + print_ids(2); + //get all implicit task events before starting nested: + #pragma omp barrier + #pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + print_ids(2); + print_ids(3); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,16); + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + } + print_fuzzy_address(3); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // nested parallel masters + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // nested parallel worker threads + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // can't reliably tell which parallel region is the parent... + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/nested_serialized.c b/final/runtime/test/ompt/parallel/nested_serialized.c new file mode 100644 index 0000000..f87b8f4 --- /dev/null +++ b/final/runtime/test/ompt/parallel/nested_serialized.c @@ -0,0 +1,128 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + omp_set_nested(0); + + #pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + #pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/nested_thread_num.c b/final/runtime/test/ompt/parallel/nested_thread_num.c new file mode 100644 index 0000000..e952f80 --- /dev/null +++ b/final/runtime/test/ompt/parallel/nested_thread_num.c @@ -0,0 +1,357 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN +#include "callback.h" +#include <omp.h> +#include <unistd.h> + +int main() { + int condition = 0; + omp_set_nested(1); + print_frame(0); + +#pragma omp parallel num_threads(2) + { + print_frame_from_outlined_fn(1); + print_ids(0); + print_ids(1); + print_frame(0); + +// get all implicit task events before starting nested: +#pragma omp barrier + +#pragma omp parallel num_threads(2) + { + print_frame_from_outlined_fn(1); + print_ids(0); + print_ids(1); + print_ids(2); + print_frame(0); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition, 4); +#pragma omp barrier + print_fuzzy_address(1); + print_ids(0); + } + print_fuzzy_address(2); + print_ids(0); + } + print_fuzzy_address(3); + + return 0; +} +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// make sure initial data pointers are null +// CHECK-NOT: 0: parallel_data initially not null +// CHECK-NOT: 0: task_data initially not null +// CHECK-NOT: 0: thread_data initially not null + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], +// CHECK-SAME: parent_task_frame.exit=[[NULL]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], +// CHECK-SAME: requested_team_size=2, +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// CHECK-SAME: invoker=[[PARALLEL_INVOKER:[0-9]+]] + +// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: +// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: + +// Note that we cannot ensure that the worker threads have already called +// barrier_end and implicit_task_end before parallel_end! + +// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: + + +// CHECK: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] +// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + +// THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]] +// THREADS: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] +// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: +// THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], +// THREADS-SAME: parent_task_frame.exit=[[NULL]], +// THREADS-SAME: parent_task_frame.reenter=[[MAIN_REENTER]], +// THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, +// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// THREADS-SAME: invoker=[[PARALLEL_INVOKER:[0-9]+]] + +// nested parallel masters +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: +// THREADS-SAME: parallel_id=[[PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]], +// THREADS-SAME: team_size=2, thread_num=0 + +// THREADS: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] + +// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], +// THREADS-SAME: reenter_frame=[[NULL]], +// THREADS-SAME: thread_num=0 + +// THREADS: {{^}}[[MASTER_ID]]: task level 1: +// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], +// THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], +// THREADS-SAME: reenter_frame=[[MAIN_REENTER]] + +// THREADS: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: +// THREADS-SAME: parent_task_id=[[IMPLICIT_TASK_ID]], +// THREADS-SAME: parent_task_frame.exit=[[EXIT]], +// THREADS-SAME: parent_task_frame.reenter=[[REENTER]], +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], +// THREADS-SAME: requested_team_size=2, +// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// THREADS-SAME: invoker=[[PARALLEL_INVOKER]] + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// THREADS-SAME: thread_num=0 + +// THREADS: __builtin_frame_address({{.}})=[[NESTED_EXIT:0x[0-f]+]] + +// THREADS: {{^}}[[MASTER_ID]]: task level 0: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]], +// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]], +// THREADS-SAME: thread_num=0 + +// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], +// THREADS-SAME: reenter_frame=[[REENTER]] + +// THREADS: {{^}}[[MASTER_ID]]: task level 2: +// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]], +// THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], +// THREADS-SAME: reenter_frame=[[MAIN_REENTER]] + +// THREADS: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]] + +// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end +// explicit barrier + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]], +// THREADS-SAME: codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + +// THREADS: {{^}}[[MASTER_ID]]: task level 0: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]], +// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]] + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]] + +// THREADS: {{^}}[[MASTER_ID]]: task level 0: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]], +// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] + +// implicit barrier +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]], +// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + +// THREADS: {{^}}[[MASTER_ID]]: task level 0: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]], +// THREADS-SAME: exit_frame=[[NULL]], reenter_frame=[[NULL]] + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]], +// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], +// THREADS-SAME: invoker=[[PARALLEL_INVOKER]], +// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + +// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + +// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + +// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], +// THREADS-SAME: reenter_frame=[[NULL]] + +// implicit barrier +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: +// THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], +// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + +// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], +// THREADS-SAME: reenter_frame=[[NULL]] + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], +// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: +// THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], +// THREADS-SAME: invoker=[[PARALLEL_INVOKER]], +// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + +// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + +// Worker of first nesting level + +// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: +// THREADS-SAME: parallel_id=[[PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// THREADS-SAME: thread_num=[[OUTER_THREADNUM:[0-9]+]] + +// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], +// THREADS-SAME: thread_num=[[OUTER_THREADNUM]] + +// THREADS: {{^}}[[THREAD_ID]]: task level 1: +// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]], +// THREADS-SAME: task_id=[[PARENT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: +// THREADS-SAME: parent_task_id=[[IMPLICIT_TASK_ID]], +// THREADS-SAME: parent_task_frame.exit={{0x[0-f]+}}, +// THREADS-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, +// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, +// THREADS-SAME: invoker=[[PARALLEL_INVOKER]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// THREADS-SAME: thread_num=[[INNER_THREADNUM:[0-9]+]] + +// THREADS: {{^}}[[THREAD_ID]]: task level 0: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]], +// THREADS-SAME: thread_num=[[INNER_THREADNUM]] + +// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], +// THREADS-SAME: thread_num=[[OUTER_THREADNUM]] + +// THREADS: {{^}}[[THREAD_ID]]: task level 2: +// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]], +// THREADS-SAME: task_id=[[PARENT_TASK_ID]] + +// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + +// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: +// THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + +// nested parallel worker threads + +// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] +// THREADS-SAME: thread_num=[[THREADNUM:[0-9]+]] + +// THREADS: {{^}}[[THREAD_ID]]: task level 0: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]] +// THREADS-SAME: thread_num=[[THREADNUM]] + +// can't reliably tell which parallel region is the parent... + +// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, +// THREADS-SAME: task_id={{[0-9]+}} +// THREADS-SAME: thread_num={{[01]}} + +// THREADS: {{^}}[[THREAD_ID]]: task level 2: +// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]], +// THREADS-SAME: task_id=[[PARENT_TASK_ID]] +// THREADS-SAME: thread_num=0 + +// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + +// other nested parallel worker threads + +// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] +// THREADS-SAME: thread_num=[[THREADNUM:[0-9]+]] + +// THREADS: {{^}}[[THREAD_ID]]: task level 0: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]] +// THREADS-SAME: thread_num=[[THREADNUM]] + +// can't reliably tell which parallel region is the parent... + +// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, +// THREADS-SAME: task_id={{[0-9]+}} +// THREADS-SAME: thread_num={{[01]}} + +// THREADS: {{^}}[[THREAD_ID]]: task level 2: +// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]], +// THREADS-SAME: task_id=[[PARENT_TASK_ID]] +// THREADS-SAME: thread_num=0 + +// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: +// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], +// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + +// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: +// THREADS-SAME: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + diff --git a/final/runtime/test/ompt/parallel/no_thread_num_clause.c b/final/runtime/test/ompt/parallel/no_thread_num_clause.c new file mode 100644 index 0000000..e23d89a --- /dev/null +++ b/final/runtime/test/ompt/parallel/no_thread_num_clause.c @@ -0,0 +1,95 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ + omp_set_num_threads(4); + #pragma omp parallel + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=281474976710658, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}} + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/normal.c b/final/runtime/test/ompt/parallel/normal.c new file mode 100644 index 0000000..2cc9ce1 --- /dev/null +++ b/final/runtime/test/ompt/parallel/normal.c @@ -0,0 +1,132 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads \ +// RUN: | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +#include "callback.h" + +int main() { +#pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // Only check callback names, arguments are verified in THREADS below. + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + + // Note that we cannot ensure that the worker threads have already called + // barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin + // THREADS-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin + // THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]] + // THREADS-SAME: parent_task_frame.exit=[[NULL]] + // THREADS-SAME: parent_task_frame.reenter={{0x[0-f]+}} + // THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4 + // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]] + // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0 + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1 + // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]] + // THREADS-SAME: task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end + // parallel_id is 0 because the region ended in the barrier! + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin + // THREADS-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]] + // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0 + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1 + // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]] + // THREADS-SAME: task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end + // parallel_id is 0 because the region ended in the barrier! + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin + // THREADS-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]] + // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0 + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1 + // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]] + // THREADS-SAME: task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end + // parallel_id is 0 because the region ended in the barrier! + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin + // THREADS-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]] + // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0 + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1 + // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]] + // THREADS-SAME: task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end + // parallel_id is 0 because the region ended in the barrier! + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/not_enough_threads.c b/final/runtime/test/ompt/parallel/not_enough_threads.c new file mode 100644 index 0000000..8a0469a --- /dev/null +++ b/final/runtime/test/ompt/parallel/not_enough_threads.c @@ -0,0 +1,90 @@ +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | %sort-threads \ +// RUN: | FileCheck --check-prefix=THREADS %s + +// REQUIRES: ompt + +#include "callback.h" + +int main() { +#pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback + + // Make sure initial data pointers are null. + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // Only check callback names, arguments are verified in THREADS below. + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + + // Note that we cannot ensure that the worker threads have already called + // barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin + // THREADS-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin + // THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]] + // THREADS-SAME: parent_task_frame.exit=[[NULL]] + // THREADS-SAME: parent_task_frame.reenter={{0x[0-f]+}} + // THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4 + // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]] + // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0 + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1 + // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]] + // THREADS-SAME: task_id=[[PARENT_TASK_ID]] + + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // parallel_id is 0 because the region ended in the barrier! + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin + // THREADS-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]] + // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0 + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1 + // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]] + // THREADS-SAME: task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + // THREADS-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // parallel_id is 0 because the region ended in the barrier! + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/parallel_if0.c b/final/runtime/test/ompt/parallel/parallel_if0.c new file mode 100644 index 0000000..f5c4454 --- /dev/null +++ b/final/runtime/test/ompt/parallel/parallel_if0.c @@ -0,0 +1,76 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" + +int main() +{ +// print_frame(0); + #pragma omp parallel if(0) + { +// print_frame(1); + print_ids(0); + print_ids(1); +// print_frame(0); + #pragma omp parallel if(0) + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); +// print_frame(0); + #pragma omp task + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + print_ids(3); + } + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]] + + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/parallel/serialized.c b/final/runtime/test/ompt/parallel/serialized.c new file mode 100644 index 0000000..e7a9207 --- /dev/null +++ b/final/runtime/test/ompt/parallel/serialized.c @@ -0,0 +1,77 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" + +int main() +{ +// print_frame(0); + #pragma omp parallel num_threads(1) + { +// print_frame(1); + print_ids(0); + print_ids(1); +// print_frame(0); + #pragma omp parallel num_threads(1) + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); +// print_frame(0); + #pragma omp task + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + print_ids(3); + } + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[OUTER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[INNER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]] + + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[INNER_RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[INNER_RETURN_ADDRESS]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[OUTER_RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[OUTER_RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/barrier/explicit.c b/final/runtime/test/ompt/synchronization/barrier/explicit.c new file mode 100644 index 0000000..d60acd6 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/barrier/explicit.c @@ -0,0 +1,58 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + + #pragma omp barrier + print_current_address(); + + #pragma omp atomic + x++; + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread explicit barrier + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + + + // worker thread explicit barrier + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/barrier/for_loop.c b/final/runtime/test/ompt/synchronization/barrier/for_loop.c new file mode 100644 index 0000000..5259447 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/barrier/for_loop.c @@ -0,0 +1,56 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + int y[] = {0,1,2,3}; + + #pragma omp parallel num_threads(2) + { + //implicit barrier at end of for loop + int i; + #pragma omp for + for (i = 0; i < 4; i++) + { + y[i]++; + } + print_current_address(); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at loop end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread explicit barrier + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // worker thread implicit barrier after parallel + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/barrier/for_simd.c b/final/runtime/test/ompt/synchronization/barrier/for_simd.c new file mode 100644 index 0000000..351b2c2 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/barrier/for_simd.c @@ -0,0 +1,33 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// XFAIL: gcc-4 + +#include "callback.h" +#include <omp.h> + +int main() +{ + int y[] = {0,1,2,3}; + + int i; + #pragma omp for simd + for (i = 0; i < 4; i++) + { + y[i]++; + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at simd loop end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/barrier/implicit_task_data.c b/final/runtime/test/ompt/synchronization/barrier/implicit_task_data.c new file mode 100644 index 0000000..0824b47 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/barrier/implicit_task_data.c @@ -0,0 +1,150 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt + +// This test checks that values stored in task_data in a barrier_begin event +// are still present in the corresponding barrier_end event. +// Therefore, callback implementations different from the ones in callback.h are neccessary. +// This is a test for an issue reported in +// https://github.com/OpenMPToolsInterface/LLVM-openmp/issues/39 + +#define _BSD_SOURCE +#include <stdio.h> +#include <unistd.h> +#include <inttypes.h> +#include <omp.h> +#include <ompt.h> + +static const char* ompt_thread_type_t_values[] = { + NULL, + "ompt_thread_initial", + "ompt_thread_worker", + "ompt_thread_other" +}; + +static ompt_get_unique_id_t ompt_get_unique_id; +static ompt_get_thread_data_t ompt_get_thread_data; + +int main() +{ + #pragma omp parallel num_threads(4) + { + #pragma omp master + { + sleep(1); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id=0, task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}} + + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id=0, task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]] + + return 0; +} + +static void +on_ompt_callback_thread_begin( + ompt_thread_type_t thread_type, + ompt_data_t *thread_data) +{ + if(thread_data->ptr) + printf("%s\n", "0: thread_data initially not null"); + thread_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value); +} + +static void +on_ompt_callback_sync_region( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + task_data->value = ompt_get_unique_id(); + if(kind == ompt_sync_region_barrier) + printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_scope_end: + if(kind == ompt_sync_region_barrier) + printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } +} + +static void +on_ompt_callback_sync_region_wait( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + if(kind == ompt_sync_region_barrier) + printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_scope_end: + if(kind == ompt_sync_region_barrier) + printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } +} + +#define register_callback_t(name, type) \ +do{ \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \ + ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ +}while(0) + +#define register_callback(name) register_callback_t(name, name##_t) + +int ompt_initialize( + ompt_function_lookup_t lookup, + ompt_data_t *tool_data) +{ + ompt_set_callback_t ompt_set_callback; + ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); + ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id"); + ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data"); + register_callback(ompt_callback_sync_region); + register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t); + register_callback(ompt_callback_thread_begin); + printf("0: NULL_POINTER=%p\n", (void*)NULL); + return 1; //success +} + +void ompt_finalize(ompt_data_t *tool_data) +{ + printf("0: ompt_event_runtime_shutdown\n"); +} + +ompt_start_tool_result_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; + return &ompt_start_tool_result; +} diff --git a/final/runtime/test/ompt/synchronization/barrier/parallel_region.c b/final/runtime/test/ompt/synchronization/barrier/parallel_region.c new file mode 100644 index 0000000..ea0a23f --- /dev/null +++ b/final/runtime/test/ompt/synchronization/barrier/parallel_region.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() +{ + int x = 0; + + //implicit barrier at end of a parallel region + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + } + print_fuzzy_address(); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/barrier/sections.c b/final/runtime/test/ompt/synchronization/barrier/sections.c new file mode 100644 index 0000000..4e1dfdd --- /dev/null +++ b/final/runtime/test/ompt/synchronization/barrier/sections.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + //implicit barrier after sections with nowait but with lastprivates + //implicit barrier at end of sections + #pragma omp sections + { + #pragma omp section + { + #pragma omp atomic + x++; + } + + #pragma omp section + { + #pragma omp atomic + x++; + } + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at sections end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread implicit barrier at sections end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/barrier/single.c b/final/runtime/test/ompt/synchronization/barrier/single.c new file mode 100644 index 0000000..8ba8b52 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/barrier/single.c @@ -0,0 +1,61 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + //implicit barrier at end of single + #pragma omp single + { + x++; + } + print_fuzzy_address(); + //critical section to avoid merge of two barriers into one + #pragma omp critical + { + x++; + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at single end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread implicit barrier at single end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/critical.c b/final/runtime/test/ompt/synchronization/critical.c new file mode 100644 index 0000000..ed982b7 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/critical.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + #pragma omp critical + { + print_current_address(1); + print_ids(0); + } + print_current_address(2); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_critical: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/flush.c b/final/runtime/test/ompt/synchronization/flush.c new file mode 100644 index 0000000..287d035 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/flush.c @@ -0,0 +1,30 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// GCC generates code that does not call the runtime for the flush construct +// XFAIL: gcc + +#include "callback.h" +#include <omp.h> + +int main() { +#pragma omp parallel num_threads(2) + { + int tid = omp_get_thread_num(); + +#pragma omp flush + print_current_address(1); + } + + return 0; +} +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_flush' + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_flush: +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] +// CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] +// +// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_flush: +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] +// CHECK: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] diff --git a/final/runtime/test/ompt/synchronization/lock.c b/final/runtime/test/ompt/synchronization/lock.c new file mode 100644 index 0000000..eae1575 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/lock.c @@ -0,0 +1,44 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() +{ + //need to use an OpenMP construct so that OMPT will be initalized + #pragma omp parallel num_threads(1) + print_ids(0); + + omp_lock_t lock; + printf("%" PRIu64 ": &lock: %" PRIu64 "\n", ompt_get_thread_data()->value, (uint64_t) &lock); + omp_init_lock(&lock); + print_fuzzy_address(1); + omp_set_lock(&lock); + print_fuzzy_address(2); + omp_unset_lock(&lock); + print_fuzzy_address(3); + omp_destroy_lock(&lock); + print_fuzzy_address(4); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: &lock: [[WAIT_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_init_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/master.c b/final/runtime/test/ompt/synchronization/master.c new file mode 100644 index 0000000..8cc2d46 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/master.c @@ -0,0 +1,38 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// GCC generates code that does not call the runtime for the master construct +// XFAIL: gcc + +#include "callback.h" +#include <omp.h> + +int main() { + int x = 0; +#pragma omp parallel num_threads(2) + { +#pragma omp master + { + print_fuzzy_address(1); + x++; + } + print_current_address(2); + } + + printf("%" PRIu64 ": x=%d\n", ompt_get_thread_data()->value, x); + + return 0; +} + +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} +// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_master_end: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS_END:0x[0-f]+]] +// CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS_END]] diff --git a/final/runtime/test/ompt/synchronization/nest_lock.c b/final/runtime/test/ompt/synchronization/nest_lock.c new file mode 100644 index 0000000..c83ceaf --- /dev/null +++ b/final/runtime/test/ompt/synchronization/nest_lock.c @@ -0,0 +1,52 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> + +int main() +{ + //need to use an OpenMP construct so that OMPT will be initalized + #pragma omp parallel num_threads(1) + print_ids(0); + + omp_nest_lock_t nest_lock; + printf("%" PRIu64 ": &nest_lock: %lli\n", ompt_get_thread_data()->value, (long long) &nest_lock); + omp_init_nest_lock(&nest_lock); + print_fuzzy_address(1); + omp_set_nest_lock(&nest_lock); + print_fuzzy_address(2); + omp_set_nest_lock(&nest_lock); + print_fuzzy_address(3); + omp_unset_nest_lock(&nest_lock); + print_fuzzy_address(4); + omp_unset_nest_lock(&nest_lock); + print_fuzzy_address(5); + omp_destroy_nest_lock(&nest_lock); + print_fuzzy_address(6); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/ordered.c b/final/runtime/test/ompt/synchronization/ordered.c new file mode 100644 index 0000000..14284a4 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/ordered.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + #pragma omp ordered + { + print_current_address(1); + print_ids(0); + } + print_current_address(2); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_ordered: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/taskgroup.c b/final/runtime/test/ompt/synchronization/taskgroup.c new file mode 100644 index 0000000..7309c0a --- /dev/null +++ b/final/runtime/test/ompt/synchronization/taskgroup.c @@ -0,0 +1,49 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 + +#include "callback.h" +#include <unistd.h> +#include <stdio.h> + +int main() +{ + int condition=0; + int x=0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp taskgroup + { + print_current_address(1); + #pragma omp task + { + #pragma omp atomic + x++; + } + } + print_current_address(2); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskgroup_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/taskwait.c b/final/runtime/test/ompt/synchronization/taskwait.c new file mode 100644 index 0000000..c431024 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/taskwait.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp task + { + x++; + } + #pragma omp taskwait + print_current_address(1); + } + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: ompt_event_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/test_lock.c b/final/runtime/test/ompt/synchronization/test_lock.c new file mode 100644 index 0000000..d24e4d6 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/test_lock.c @@ -0,0 +1,54 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include <omp.h> + +int main() +{ + omp_lock_t lock; + omp_init_lock(&lock); + print_fuzzy_address(1); + + omp_test_lock(&lock); + print_fuzzy_address(2); + omp_unset_lock(&lock); + print_fuzzy_address(3); + + omp_set_lock(&lock); + print_fuzzy_address(4); + omp_test_lock(&lock); + print_fuzzy_address(5); + omp_unset_lock(&lock); + print_fuzzy_address(6); + + omp_destroy_lock(&lock); + print_fuzzy_address(7); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/test_nest_lock.c b/final/runtime/test/ompt/synchronization/test_nest_lock.c new file mode 100644 index 0000000..ad02d32 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/test_nest_lock.c @@ -0,0 +1,42 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include <omp.h> + +int main() +{ + omp_nest_lock_t nest_lock; + omp_init_nest_lock(&nest_lock); + + omp_test_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + + omp_set_nest_lock(&nest_lock); + omp_test_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + + omp_destroy_nest_lock(&nest_lock); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + + return 0; +} diff --git a/final/runtime/test/ompt/synchronization/test_nest_lock_parallel.c b/final/runtime/test/ompt/synchronization/test_nest_lock_parallel.c new file mode 100644 index 0000000..e9240f7 --- /dev/null +++ b/final/runtime/test/ompt/synchronization/test_nest_lock_parallel.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include <omp.h> + +int main() +{ + omp_nest_lock_t nest_lock; + omp_init_nest_lock(&nest_lock); + + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + omp_set_nest_lock(&nest_lock); + print_fuzzy_address(1); + } + #pragma omp barrier + omp_test_nest_lock(&nest_lock); //should fail for non-master + print_fuzzy_address(2); + #pragma omp barrier + #pragma omp master + { + omp_unset_nest_lock(&nest_lock); + print_fuzzy_address(3); + omp_unset_nest_lock(&nest_lock); + print_fuzzy_address(4); + } + } + + omp_destroy_nest_lock(&nest_lock); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NEXT: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]] + // CHECK-NEXT: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/dependences.c b/final/runtime/test/ompt/tasks/dependences.c new file mode 100644 index 0000000..57b61f9 --- /dev/null +++ b/final/runtime/test/ompt/tasks/dependences.c @@ -0,0 +1,61 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 + +#include "callback.h" +#include <omp.h> +#include <math.h> +#include <unistd.h> + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + print_ids(0); + #pragma omp task depend(out:x) + { + x++; + delay(100); + } + print_fuzzy_address(1); + print_ids(0); + + #pragma omp task depend(in:x) + { + x = -1; + } + print_ids(0); + } + } + + x++; + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1 + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/explicit_task.c b/final/runtime/test/ompt/tasks/explicit_task.c new file mode 100644 index 0000000..01fb3f8 --- /dev/null +++ b/final/runtime/test/ompt/tasks/explicit_task.c @@ -0,0 +1,102 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN +#include "callback.h" +#include <omp.h> + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame_from_outlined_fn(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + OMPT_WAIT(condition,1); + print_ids(0); + } + #pragma omp barrier + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // explicit barrier after master + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // this is expected to come earlier and at MASTER: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/serialized.c b/final/runtime/test/ompt/tasks/serialized.c new file mode 100644 index 0000000..12a0281 --- /dev/null +++ b/final/runtime/test/ompt/tasks/serialized.c @@ -0,0 +1,154 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN +#include "callback.h" +#include <omp.h> +#include <math.h> + +int main() { + omp_set_nested(0); + print_frame(0); +#pragma omp parallel num_threads(2) + { + print_frame_from_outlined_fn(1); + print_ids(0); + print_ids(1); + print_frame(0); +#pragma omp master + { + print_ids(0); + void *creator_frame = get_frame_address(0); + int t = (int)sin(0.1); +#pragma omp task if (t) + { + void *task_frame = get_frame_address(0); + if (creator_frame == task_frame) { + // Assume this code was inlined which the compiler is allowed to do. + print_frame(0); + } else { + // The exit frame must be our parent! + print_frame_from_outlined_fn(1); + } + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + print_ids(0); + } + print_ids(0); + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create + // CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]] + // CHECK-SAME: parent_task_frame.reenter=[[NULL]] + // CHECK-SAME: new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK-SAME: task_type=ompt_task_initial=1, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0) + // CHECK-SAME: =[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin + // CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]] + // CHECK-SAME: parent_task_frame.exit=[[NULL]] + // CHECK-SAME: parent_task_frame.reenter=[[MAIN_REENTER]] + // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2 + // CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}} + + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address + // CHECK-SAME: =[[EXIT:0x[0-f]+]] + + // CHECK: {{^}}[[MASTER_ID]]: task level 0 + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[NULL]] + + // CHECK: {{^}}[[MASTER_ID]]: task level 1 + // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]] + // CHECK-SAME: task_id=[[PARENT_TASK_ID]], + // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]] + // CHECK-SAME: parent_task_frame.exit=[[EXIT]] + // CHECK-SAME: parent_task_frame.reenter=[[REENTER]] + // CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]] + // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: + // CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address + // CHECK-SAME: =[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0 + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]] + // CHECK-SAME: exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + + // CHECK: {{^}}[[MASTER_ID]]: task level 1 + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + + // CHECK: {{^}}[[MASTER_ID]]: task level 2 + // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]] + // CHECK-SAME: task_id=[[PARENT_TASK_ID]] + // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule + // CHECK-SAME: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // CHECK: {{^}}[[MASTER_ID]]: task level 0 + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[NULL]] + + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0 + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end + // parallel_id is 0 because the region ended in the barrier! + // CHECK-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // CHECK-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address + // CHECK-SAME: =[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0 + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1 + // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]] + // CHECK-SAME: task_id=[[PARENT_TASK_ID]] + // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0 + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[NULL]] + // parallel_id is 0 because the region ended in the barrier! + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end + // CHECK-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // CHECK-SAME: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/task_in_joinbarrier.c b/final/runtime/test/ompt/tasks/task_in_joinbarrier.c new file mode 100644 index 0000000..25b57a9 --- /dev/null +++ b/final/runtime/test/ompt/tasks/task_in_joinbarrier.c @@ -0,0 +1,91 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN +#include "callback.h" +#include <omp.h> + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame_from_outlined_fn(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + } + OMPT_WAIT(condition,1); + print_ids(0); + } + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // implicit barrier parallel + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/task_types.c b/final/runtime/test/ompt/tasks/task_types.c new file mode 100644 index 0000000..40ceb2d --- /dev/null +++ b/final/runtime/test/ompt/tasks/task_types.c @@ -0,0 +1,222 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include <omp.h> +#include <math.h> + +int main() { + //initialize the OpenMP runtime + omp_get_num_threads(); + + // initial task + print_ids(0); + + int x; +// implicit task +#pragma omp parallel num_threads(1) + { + print_ids(0); + x++; + } + +#pragma omp parallel num_threads(2) + { +// explicit task +#pragma omp single +#pragma omp task + { + print_ids(0); + x++; + } +// explicit task with undeferred +#pragma omp single +#pragma omp task if (0) + { + print_ids(0); + x++; + } + +// explicit task with untied +#pragma omp single +#pragma omp task untied + { + // Output of thread_id is needed to know on which thread task is executed + printf("%" PRIu64 ": explicit_untied\n", ompt_get_thread_data()->value); + print_ids(0); + print_frame(1); + x++; +#pragma omp taskyield + printf("%" PRIu64 ": explicit_untied(2)\n", + ompt_get_thread_data()->value); + print_ids(0); + print_frame(1); + x++; +#pragma omp taskwait + printf("%" PRIu64 ": explicit_untied(3)\n", + ompt_get_thread_data()->value); + print_ids(0); + print_frame(1); + x++; + } +// explicit task with final +#pragma omp single +#pragma omp task final(1) + { + print_ids(0); + x++; +// nested explicit task with final and undeferred +#pragma omp task + { + print_ids(0); + x++; + } + } + + // Mergeable task test deactivated for now + // explicit task with mergeable + /* + #pragma omp task mergeable if((int)sin(0)) + { + print_ids(0); + x++; + } + */ + + // TODO: merged task + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0 + // CHECK-SAME: parent_task_frame.exit=[[NULL]] + // CHECK-SAME: parent_task_frame.reenter=[[NULL]] + // CHECK-SAME: new_task_id=[[INITIAL_TASK_ID:[0-9]+]], codeptr_ra=[[NULL]] + // CHECK-SAME: task_type=ompt_task_initial=1, has_dependences=no + + // CHECK-NOT: 0: parallel_data initially not null + + // initial task + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id={{[0-9]+}} + // CHECK-SAME: task_id=[[INITIAL_TASK_ID]], exit_frame=[[NULL]] + // CHECK-SAME: reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_initial=1, thread_num=0 + + // implicit task + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id={{[0-9]+}} + // CHECK-SAME: task_id={{[0-9]+}}, exit_frame={{0x[0-f]+}} + // CHECK-SAME: reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_implicit|ompt_task_undeferred=134217730 + // CHECK-SAME: thread_num=0 + + // explicit task + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}} + // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}} + // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}} + // CHECK-SAME: new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] + // CHECK-SAME: codeptr_ra={{0x[0-f]+}} + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK-SAME: has_dependences=no + + // CHECK: [[THREAD_ID_1:[0-9]+]]: ompt_event_task_schedule: + // CHECK-SAME: second_task_id=[[EXPLICIT_TASK_ID]] + + // CHECK: [[THREAD_ID_1]]: task level 0: parallel_id=[[PARALLEL_ID:[0-9]+]] + // CHECK-SAME: task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}} + // CHECK-SAME: reenter_frame=[[NULL]], task_type=ompt_task_explicit=4 + // CHECK-SAME: thread_num={{[01]}} + + // explicit task with undeferred + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}} + // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}} + // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}} + // CHECK-SAME: new_task_id=[[EXPLICIT_UNDEFERRED_TASK_ID:[0-9]+]] + // CHECK-SAME: codeptr_ra={{0x[0-f]+}} + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + // CHECK-SAME: has_dependences=no + + // CHECK: [[THREAD_ID_2:[0-9]+]]: ompt_event_task_schedule: + // CHECK-SAME: second_task_id=[[EXPLICIT_UNDEFERRED_TASK_ID]] + + // CHECK: [[THREAD_ID_2]]: task level 0: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[EXPLICIT_UNDEFERRED_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + // CHECK-SAME: thread_num={{[01]}} + + // explicit task with untied + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}} + // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}} + // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}} + // CHECK-SAME: new_task_id=[[EXPLICIT_UNTIED_TASK_ID:[0-9]+]] + // CHECK-SAME: codeptr_ra={{0x[0-f]+}} + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460 + // CHECK-SAME: has_dependences=no + + // Here the thread_id cannot be taken from a schedule event as there + // may be multiple of those + // CHECK: [[THREAD_ID_3:[0-9]+]]: explicit_untied + // CHECK: [[THREAD_ID_3]]: task level 0: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460 + // CHECK-SAME: thread_num={{[01]}} + + // after taskyield + // CHECK: [[THREAD_ID_3_2:[0-9]+]]: explicit_untied(2) + // CHECK: [[THREAD_ID_3_2]]: task level 0: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460 + // CHECK-SAME: thread_num={{[01]}} + + // after taskwait + // CHECK: [[THREAD_ID_3_3:[0-9]+]]: explicit_untied(3) + // CHECK: [[THREAD_ID_3_3]]: task level 0: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460 + // CHECK-SAME: thread_num={{[01]}} + + // explicit task with final + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}} + // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}} + // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}} + // CHECK-SAME: new_task_id=[[EXPLICIT_FINAL_TASK_ID:[0-9]+]] + // CHECK-SAME: codeptr_ra={{0x[0-f]+}} + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_final=536870916 + // CHECK-SAME: has_dependences=no + + // CHECK: [[THREAD_ID_4:[0-9]+]]: ompt_event_task_schedule: + // CHECK-SAME: second_task_id=[[EXPLICIT_FINAL_TASK_ID]] + + // CHECK: [[THREAD_ID_4]]: task level 0: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[EXPLICIT_FINAL_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_final=536870916 + // CHECK-SAME: thread_num={{[01]}} + + // nested explicit task with final and undeferred + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}} + // CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}} + // CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}} + // CHECK-SAME: new_task_id=[[NESTED_FINAL_UNDEFERRED_TASK_ID:[0-9]+]] + // CHECK-SAME: codeptr_ra={{0x[0-f]+}} + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred + // CHECK-SAME:|ompt_task_final=671088644 + // CHECK-SAME: has_dependences=no + + // CHECK: [[THREAD_ID_5:[0-9]+]]: ompt_event_task_schedule: + // CHECK-SAME: second_task_id=[[NESTED_FINAL_UNDEFERRED_TASK_ID]] + + // CHECK: [[THREAD_ID_5]]: task level 0: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[NESTED_FINAL_UNDEFERRED_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred + // CHECK-SAME:|ompt_task_final=671088644 + // CHECK-SAME: thread_num={{[01]}} + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/task_types_serialized.c b/final/runtime/test/ompt/tasks/task_types_serialized.c new file mode 100644 index 0000000..7726f5b --- /dev/null +++ b/final/runtime/test/ompt/tasks/task_types_serialized.c @@ -0,0 +1,113 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include <omp.h> + +__attribute__ ((noinline)) // workaround for bug in icc +void print_task_type(int id) +{ + #pragma omp critical + { + int task_type; + char buffer[2048]; + ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL); + format_task_type(task_type, buffer); + printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type); + } +}; + +int main() +{ + //initial task + print_task_type(0); + + int x; + //implicit task + #pragma omp parallel num_threads(1) + { + print_task_type(1); + x++; + } + + #pragma omp parallel num_threads(1) + #pragma omp master + { + //explicit task + #pragma omp task + { + print_task_type(2); + x++; + } + + //explicit task with undeferred + #pragma omp task if(0) + { + print_task_type(3); + x++; + } + + //explicit task with untied + #pragma omp task untied + { + print_task_type(4); + x++; + } + + //explicit task with final + #pragma omp task final(1) + { + print_task_type(5); + x++; + //nested explicit task with final and undeferred + #pragma omp task + { + print_task_type(6); + x++; + } + } + +/* + //TODO:not working + //explicit task with mergeable + #pragma omp task mergeable + { + print_task_type(7); + x++; + } +*/ + + //TODO: merged task + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1 + // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188, has_dependences=no + // CHECK: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no + // CHECK: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no + // CHECK: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644 + + // ___CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // ___CHECK: {{^[0-9]+}}: id=7 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/taskloop.c b/final/runtime/test/ompt/tasks/taskloop.c new file mode 100644 index 0000000..59a47bf --- /dev/null +++ b/final/runtime/test/ompt/tasks/taskloop.c @@ -0,0 +1,81 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | FileCheck --check-prefix=TASKS %s +// REQUIRES: ompt + +// These compilers don't support the taskloop construct +// UNSUPPORTED: gcc-4, gcc-5, icc-16 +// GCC 6 has support for taskloops, but at least 6.3.0 is crashing on this test +// UNSUPPORTED: gcc-6 + +#include "callback.h" +#include <omp.h> + +int main() { + unsigned int i, x; + +#pragma omp parallel num_threads(2) + { +#pragma omp barrier + +#pragma omp master +#pragma omp taskloop + for (i = 0; i < 5; i += 3) { + x++; + } + } + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: + // CHECK-SAME: parent_task_id={{[0-9]+}} + // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]] + // CHECK-SAME: requested_team_size=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1:[0-9]+]] + // CHECK-SAME: team_size=2, thread_num=0 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: new_task_id=[[TASK_ID1:[0-9]+]] + // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: new_task_id=[[TASK_ID2:[0-9]+]] + // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK-NOT: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: count=2 + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0 + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]], team_size=2, thread_num=0 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + + // TASKS: ompt_event_task_create:{{.*}} new_task_id={{[0-9]+}} + // TASKS-SAME: task_type=ompt_task_initial + // TASKS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskloop_begin: + // TASKS: ompt_event_task_create:{{.*}} new_task_id=[[TASK_ID1:[0-9]+]] + // TASKS-SAME: task_type=ompt_task_explicit + // TASKS-DAG: ompt_event_task_create:{{.*}} new_task_id=[[TASK_ID2:[0-9]+]] + // Schedule events: + // TASKS-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID1]] + // TASKS-DAG: {{^.*}}first_task_id=[[TASK_ID1]], second_task_id={{[0-9]+}} + // TASKS-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID2]] + // TASKS-DAG: {{^.*}}first_task_id=[[TASK_ID2]], second_task_id={{[0-9]+}} + // TASKS-NOT: ompt_event_task_schedule + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/taskyield.c b/final/runtime/test/ompt/tasks/taskyield.c new file mode 100644 index 0000000..56a4697 --- /dev/null +++ b/final/runtime/test/ompt/tasks/taskyield.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// Current GOMP interface implements taskyield as stub +// XFAIL: gcc + +#include "callback.h" +#include <omp.h> +#include <unistd.h> + +int main() +{ + int condition=0, x=0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + } + OMPT_WAIT(condition,1); + #pragma omp task shared(x) + { + x++; + } + printf("%" PRIu64 ": before yield\n", ompt_get_thread_data()->value); + #pragma omp taskyield + printf("%" PRIu64 ": after yield\n", ompt_get_thread_data()->value); + OMPT_SIGNAL(condition); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[0-9]+}}, thread_num={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[WORKER_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[MAIN_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[MAIN_TASK]], prior_task_status=ompt_task_yield=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[MAIN_TASK]], second_task_id=[[IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[WORKER_TASK]], second_task_id={{[0-9]+}}, prior_task_status=ompt_task_complete=1 + + + + + + return 0; +} diff --git a/final/runtime/test/ompt/tasks/untied_task.c b/final/runtime/test/ompt/tasks/untied_task.c new file mode 100644 index 0000000..e68fa26 --- /dev/null +++ b/final/runtime/test/ompt/tasks/untied_task.c @@ -0,0 +1,108 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN +#include "callback.h" +#include <omp.h> + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame_from_outlined_fn(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task untied shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + #pragma omp task if(0) + { + print_ids(0); + print_ids(1); + print_ids(2); + } + print_ids(0); + print_ids(1); + print_ids(2); + } + OMPT_WAIT(condition,1); + print_ids(0); + } + #pragma omp barrier + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // explicit barrier after master + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // this is expected to come earlier and at MASTER: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} diff --git a/final/runtime/test/ompt/worksharing/for/auto.c b/final/runtime/test/ompt/worksharing/for/auto.c new file mode 100644 index 0000000..17d26f5 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/auto.c @@ -0,0 +1,7 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h +// REQUIRES: ompt +// GCC doesn't call runtime for auto = static schedule +// XFAIL: gcc + +#define SCHEDULE auto +#include "base.h" diff --git a/final/runtime/test/ompt/worksharing/for/auto_serialized.c b/final/runtime/test/ompt/worksharing/for/auto_serialized.c new file mode 100644 index 0000000..f756166 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/auto_serialized.c @@ -0,0 +1,7 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h +// REQUIRES: ompt +// GCC doesn't call runtime for auto = static schedule +// XFAIL: gcc + +#define SCHEDULE auto +#include "base_serialized.h" diff --git a/final/runtime/test/ompt/worksharing/for/auto_split.c b/final/runtime/test/ompt/worksharing/for/auto_split.c new file mode 100644 index 0000000..d82e3fd --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/auto_split.c @@ -0,0 +1,8 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// GCC doesn't call runtime for auto = static schedule +// XFAIL: gcc + +#define SCHEDULE auto +#include "base_split.h" diff --git a/final/runtime/test/ompt/worksharing/for/base.h b/final/runtime/test/ompt/worksharing/for/base.h new file mode 100644 index 0000000..8a496d9 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/base.h @@ -0,0 +1,43 @@ +#include "callback.h" +#include <omp.h> + +int main() +{ + unsigned int i; + + #pragma omp parallel for num_threads(4) schedule(SCHEDULE) + for (i = 0; i < 4; i++) { + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra= + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra= + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra= + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra= + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/worksharing/for/base_serialized.h b/final/runtime/test/ompt/worksharing/for/base_serialized.h new file mode 100644 index 0000000..3376b37 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/base_serialized.h @@ -0,0 +1,28 @@ +#include "callback.h" +#include <omp.h> + +int main() +{ + unsigned int i; + + #pragma omp parallel for num_threads(1) schedule(SCHEDULE) + for (i = 0; i < 1; i++) { + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[PARALLEL_ID,0]}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} diff --git a/final/runtime/test/ompt/worksharing/for/base_split.h b/final/runtime/test/ompt/worksharing/for/base_split.h new file mode 100644 index 0000000..0f1fed3 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/base_split.h @@ -0,0 +1,66 @@ +#include "callback.h" +#include <omp.h> + +/* With the combined parallel-for construct (base.h), the return-addresses are hard to compare. + With the separate parallel and for-nowait construct, the addresses become more predictable, + but the begin of the for-loop still generates additional code, so the offset of loop-begin + to the label is >4 Byte. +*/ + +int main() +{ + unsigned int i; + + #pragma omp parallel num_threads(4) + { + print_current_address(0); + #pragma omp for schedule(SCHEDULE) nowait + for (i = 0; i < 4; i++) { + print_fuzzy_address(1); + } + print_fuzzy_address(2); + } + print_fuzzy_address(3); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[PARALLEL_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, invoker={{[0-9]+}}, codeptr_ra=[[PARALLEL_RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[PARALLEL_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + + // CHECK-LOOP: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK-LOOP: 0: ompt_event_runtime_shutdown + // CHECK-LOOP: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra={{0x[0-f]+}}, invoker={{[0-9]+}} + // CHECK-LOOP: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + + + return 0; +} diff --git a/final/runtime/test/ompt/worksharing/for/dynamic.c b/final/runtime/test/ompt/worksharing/for/dynamic.c new file mode 100644 index 0000000..ca5ae10 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/dynamic.c @@ -0,0 +1,5 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h +// REQUIRES: ompt + +#define SCHEDULE dynamic +#include "base.h" diff --git a/final/runtime/test/ompt/worksharing/for/dynamic_serialized.c b/final/runtime/test/ompt/worksharing/for/dynamic_serialized.c new file mode 100644 index 0000000..0f80929 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/dynamic_serialized.c @@ -0,0 +1,5 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h +// REQUIRES: ompt + +#define SCHEDULE dynamic +#include "base_serialized.h" diff --git a/final/runtime/test/ompt/worksharing/for/dynamic_split.c b/final/runtime/test/ompt/worksharing/for/dynamic_split.c new file mode 100644 index 0000000..cf14971 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/dynamic_split.c @@ -0,0 +1,7 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 + +#define SCHEDULE dynamic +#include "base_split.h" diff --git a/final/runtime/test/ompt/worksharing/for/guided.c b/final/runtime/test/ompt/worksharing/for/guided.c new file mode 100644 index 0000000..01bff4e --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/guided.c @@ -0,0 +1,5 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h +// REQUIRES: ompt + +#define SCHEDULE guided +#include "base.h" diff --git a/final/runtime/test/ompt/worksharing/for/guided_serialized.c b/final/runtime/test/ompt/worksharing/for/guided_serialized.c new file mode 100644 index 0000000..4b5096d --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/guided_serialized.c @@ -0,0 +1,5 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h +// REQUIRES: ompt + +#define SCHEDULE guided +#include "base_serialized.h" diff --git a/final/runtime/test/ompt/worksharing/for/guided_split.c b/final/runtime/test/ompt/worksharing/for/guided_split.c new file mode 100644 index 0000000..7d560c2 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/guided_split.c @@ -0,0 +1,7 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 + +#define SCHEDULE guided +#include "base_split.h" diff --git a/final/runtime/test/ompt/worksharing/for/runtime.c b/final/runtime/test/ompt/worksharing/for/runtime.c new file mode 100644 index 0000000..bcf160f --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/runtime.c @@ -0,0 +1,5 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h +// REQUIRES: ompt + +#define SCHEDULE runtime +#include "base.h" diff --git a/final/runtime/test/ompt/worksharing/for/runtime_serialized.c b/final/runtime/test/ompt/worksharing/for/runtime_serialized.c new file mode 100644 index 0000000..231d67d --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/runtime_serialized.c @@ -0,0 +1,5 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h +// REQUIRES: ompt + +#define SCHEDULE runtime +#include "base_serialized.h" diff --git a/final/runtime/test/ompt/worksharing/for/runtime_split.c b/final/runtime/test/ompt/worksharing/for/runtime_split.c new file mode 100644 index 0000000..7a677ed --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/runtime_split.c @@ -0,0 +1,7 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 + +#define SCHEDULE runtime +#include "base_split.h" diff --git a/final/runtime/test/ompt/worksharing/for/static.c b/final/runtime/test/ompt/worksharing/for/static.c new file mode 100644 index 0000000..4d99059 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/static.c @@ -0,0 +1,7 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base.h +// REQUIRES: ompt +// GCC doesn't call runtime for static schedule +// XFAIL: gcc + +#define SCHEDULE static +#include "base.h" diff --git a/final/runtime/test/ompt/worksharing/for/static_serialized.c b/final/runtime/test/ompt/worksharing/for/static_serialized.c new file mode 100644 index 0000000..4860d49 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/static_serialized.c @@ -0,0 +1,7 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_serialized.h +// REQUIRES: ompt +// GCC doesn't call runtime for static schedule +// XFAIL: gcc + +#define SCHEDULE static +#include "base_serialized.h" diff --git a/final/runtime/test/ompt/worksharing/for/static_split.c b/final/runtime/test/ompt/worksharing/for/static_split.c new file mode 100644 index 0000000..d8c88dd --- /dev/null +++ b/final/runtime/test/ompt/worksharing/for/static_split.c @@ -0,0 +1,8 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// GCC doesn't call runtime for static schedule +// XFAIL: gcc + +#define SCHEDULE static +#include "base_split.h" diff --git a/final/runtime/test/ompt/worksharing/sections.c b/final/runtime/test/ompt/worksharing/sections.c new file mode 100644 index 0000000..bafb743 --- /dev/null +++ b/final/runtime/test/ompt/worksharing/sections.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// Some compilers generate code that does not distinguish between sections and loops +// XFAIL: gcc, clang-3, clang-4, clang-5, icc-16, icc-17 +// UNSUPPORTED: icc-18 + +#include "callback.h" +#include <omp.h> + +int main() +{ + #pragma omp parallel sections num_threads(2) + { + #pragma omp section + { + printf("%lu: section 1\n", ompt_get_thread_data()->value); + } + #pragma omp section + { + printf("%lu: section 2\n", ompt_get_thread_data()->value); + } + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN:0x[0-f]+]], count=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END:0x[0-f]+]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN]], count=2 + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END]] + + return 0; +} diff --git a/final/runtime/test/ompt/worksharing/single.c b/final/runtime/test/ompt/worksharing/single.c new file mode 100644 index 0000000..6b24f2d --- /dev/null +++ b/final/runtime/test/ompt/worksharing/single.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// GCC generates code that does not call the runtime for the single construct +// XFAIL: gcc + +#include "callback.h" +#include <omp.h> + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp single + { + printf("%" PRIu64 ": in single\n", ompt_get_thread_data()->value); + x++; + } + } + + printf("x=%d\n", x); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK-DAG: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_single_in_block_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1 + // CHECK-DAG: {{^}}[[THREAD_ID_1]]: in single + // CHECK-DAG: {{^}}[[THREAD_ID_1]]: ompt_event_single_in_block_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1 + + // CHECK-DAG: {{^}}[[THREAD_ID_2:[0-9]+]]: ompt_event_single_others_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1 + // CHECK-DAG: {{^}}[[THREAD_ID_2]]: ompt_event_single_others_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1 + + return 0; +} diff --git a/final/runtime/test/parallel/omp_nested.c b/final/runtime/test/parallel/omp_nested.c new file mode 100644 index 0000000..8b78088 --- /dev/null +++ b/final/runtime/test/parallel/omp_nested.c @@ -0,0 +1,46 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +/* + * Test if the compiler supports nested parallelism + * By Chunhua Liao, University of Houston + * Oct. 2005 + */ +int test_omp_nested() +{ +#ifdef _OPENMP + if (omp_get_max_threads() > 4) + omp_set_num_threads(4); +#endif + + int counter = 0; +#ifdef _OPENMP + omp_set_nested(1); +#endif + + #pragma omp parallel shared(counter) + { + #pragma omp critical + counter++; + #pragma omp parallel + { + #pragma omp critical + counter--; + } + } + return (counter != 0); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_nested()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/parallel/omp_parallel_copyin.c b/final/runtime/test/parallel/omp_parallel_copyin.c new file mode 100644 index 0000000..600f9b7 --- /dev/null +++ b/final/runtime/test/parallel/omp_parallel_copyin.c @@ -0,0 +1,47 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +static int sum1 = 789; +#pragma omp threadprivate(sum1) + +int test_omp_parallel_copyin() +{ + int sum, num_threads; + int known_sum; + + sum = 0; + sum1 = 7; + num_threads = 0; + + #pragma omp parallel copyin(sum1) + { + /*printf("sum1=%d\n",sum1);*/ + int i; + #pragma omp for + for (i = 1; i < 1000; i++) { + sum1 = sum1 + i; + } /*end of for*/ + #pragma omp critical + { + sum = sum + sum1; + num_threads++; + } /*end of critical*/ + } /* end of parallel*/ + known_sum = (999 * 1000) / 2 + 7 * num_threads; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_copyin()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/parallel/omp_parallel_default.c b/final/runtime/test/parallel/omp_parallel_default.c new file mode 100644 index 0000000..0a8e09e --- /dev/null +++ b/final/runtime/test/parallel/omp_parallel_default.c @@ -0,0 +1,43 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_default() +{ + int i; + int sum; + int mysum; + int known_sum; + sum =0; + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 ; + + #pragma omp parallel default(shared) private(i) private(mysum) + { + mysum = 0; + #pragma omp for + for (i = 1; i <= LOOPCOUNT; i++) { + mysum = mysum + i; + } + #pragma omp critical + { + sum = sum + mysum; + } /* end of critical */ + } /* end of parallel */ + if (known_sum != sum) { + fprintf(stderr, "KNOWN_SUM = %d; SUM = %d\n", known_sum, sum); + } + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_default()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/parallel/omp_parallel_firstprivate.c b/final/runtime/test/parallel/omp_parallel_firstprivate.c new file mode 100644 index 0000000..dbee76c --- /dev/null +++ b/final/runtime/test/parallel/omp_parallel_firstprivate.c @@ -0,0 +1,46 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +//static int sum1 = 789; + +int test_omp_parallel_firstprivate() +{ + int sum, num_threads,sum1; + int known_sum; + + sum = 0; + sum1=7; + num_threads = 0; + + #pragma omp parallel firstprivate(sum1) + { + /*printf("sum1=%d\n",sum1);*/ + int i; + #pragma omp for + for (i = 1; i < 1000; i++) { + sum1 = sum1 + i; + } /*end of for*/ + #pragma omp critical + { + sum = sum + sum1; + num_threads++; + } /*end of critical*/ + } /* end of parallel*/ + known_sum = (999 * 1000) / 2 + 7 * num_threads; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/parallel/omp_parallel_if.c b/final/runtime/test/parallel/omp_parallel_if.c new file mode 100644 index 0000000..abbf3cd --- /dev/null +++ b/final/runtime/test/parallel/omp_parallel_if.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_if() +{ + int i; + int sum; + int known_sum; + int mysum; + int control=1; + + sum =0; + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 ; + #pragma omp parallel private(i) if(control==0) + { + mysum = 0; + for (i = 1; i <= LOOPCOUNT; i++) { + mysum = mysum + i; + } + #pragma omp critical + { + sum = sum + mysum; + } + } + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_if()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/parallel/omp_parallel_num_threads.c b/final/runtime/test/parallel/omp_parallel_num_threads.c new file mode 100644 index 0000000..8af1f9d --- /dev/null +++ b/final/runtime/test/parallel/omp_parallel_num_threads.c @@ -0,0 +1,46 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_num_threads() +{ + int num_failed; + int threads; + int nthreads; + int max_threads = 0; + + num_failed = 0; + + /* first we check how many threads are available */ + #pragma omp parallel + { + #pragma omp master + max_threads = omp_get_num_threads (); + } + + /* we increase the number of threads from one to maximum:*/ + for(threads = 1; threads <= max_threads; threads++) { + nthreads = 0; + #pragma omp parallel reduction(+:num_failed) num_threads(threads) + { + num_failed = num_failed + !(threads == omp_get_num_threads()); + #pragma omp atomic + nthreads += 1; + } + num_failed = num_failed + !(nthreads == threads); + } + return (!num_failed); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_num_threads()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/parallel/omp_parallel_private.c b/final/runtime/test/parallel/omp_parallel_private.c new file mode 100644 index 0000000..238e806 --- /dev/null +++ b/final/runtime/test/parallel/omp_parallel_private.c @@ -0,0 +1,46 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +//static int sum1 = 789; + +int test_omp_parallel_private() +{ + int sum, num_threads,sum1; + int known_sum; + + sum = 0; + num_threads = 0; + + #pragma omp parallel private(sum1) + { + int i; + sum1 = 7; + /*printf("sum1=%d\n",sum1);*/ + #pragma omp for + for (i = 1; i < 1000; i++) { + sum1 = sum1 + i; + } + #pragma omp critical + { + sum = sum + sum1; + num_threads++; + } + } + known_sum = (999 * 1000) / 2 + 7 * num_threads; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/parallel/omp_parallel_reduction.c b/final/runtime/test/parallel/omp_parallel_reduction.c new file mode 100644 index 0000000..bb00939 --- /dev/null +++ b/final/runtime/test/parallel/omp_parallel_reduction.c @@ -0,0 +1,254 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */ +#define MAX_FACTOR 10 +#define KNOWN_PRODUCT 3628800 /* 10! */ + +int test_omp_parallel_reduction() +{ + int sum; + int known_sum; + double dsum; + double dknown_sum; + double dt=0.5; /* base of geometric row for + and - test*/ + double rounding_error= 1.E-9; + int diff; + double ddiff; + int product; + int known_product; + int logic_and; + int logic_or; + int bit_and; + int bit_or; + int exclusiv_bit_or; + int logics[LOOPCOUNT]; + int i; + double dpt; + int result; + + sum =0; + dsum=0; + product=1; + logic_and=1; + logic_or=0; + bit_and=1; + bit_or=0; + exclusiv_bit_or=0; + result=0; + dt = 1./3.; + known_sum = (LOOPCOUNT*(LOOPCOUNT+1))/2; + + /* Tests for integers */ + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:sum) + for (i=1;i<=LOOPCOUNT;i++) { + sum=sum+i; + } + + if(known_sum!=sum) { + result++; + fprintf(stderr,"Error in sum with integers: Result was %d instead of %d\n",sum,known_sum); + } + + diff = (LOOPCOUNT*(LOOPCOUNT+1))/2; + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:diff) + for (i=1;i<=LOOPCOUNT;++i) { + diff=diff-i; + } + + if(diff != 0) { + result++; + fprintf(stderr,"Error in difference with integers: Result was %d instead of 0.\n",diff); + } + + /* Tests for doubles */ + dsum=0; + dpt=1; + for (i=0;i<DOUBLE_DIGITS;++i) { + dpt*=dt; + } + dknown_sum = (1-dpt)/(1-dt); + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:dsum) + for (i=0;i<DOUBLE_DIGITS;++i) { + dsum += pow(dt,i); + } + + if( fabs(dsum-dknown_sum) > rounding_error ) { + result++; + fprintf(stderr,"Error in sum with doubles: Result was %f instead of %f (Difference: %E)\n",dsum,dknown_sum, dsum-dknown_sum); + } + + dpt=1; + + for (i=0;i<DOUBLE_DIGITS;++i) { + dpt*=dt; + } + fprintf(stderr,"\n"); + ddiff = (1-dpt)/(1-dt); + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:ddiff) + for (i=0;i<DOUBLE_DIGITS;++i) { + ddiff -= pow(dt,i); + } + if( fabs(ddiff) > rounding_error) { + result++; + fprintf(stderr,"Error in Difference with doubles: Result was %E instead of 0.0\n",ddiff); + } + + /* Tests for product of integers */ + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(*:product) + for(i=1;i<=MAX_FACTOR;i++) { + product *= i; + } + + known_product = KNOWN_PRODUCT; + if(known_product != product) { + result++; + fprintf(stderr,"Error in Product with integers: Result was %d instead of %d\n\n",product,known_product); + } + + /* Tests for logical and */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=1; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(&&:logic_and) + for(i=0;i<LOOPCOUNT;++i) { + logic_and = (logic_and && logics[i]); + } + if(!logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 1.\n"); + } + + logic_and = 1; + logics[LOOPCOUNT/2]=0; + + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(&&:logic_and) + for(i=0;i<LOOPCOUNT;++i) { + logic_and = logic_and && logics[i]; + } + if(logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 2.\n"); + } + + /* Tests for logical or */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(||:logic_or) + for(i=0;i<LOOPCOUNT;++i) { + logic_or = logic_or || logics[i]; + } + if(logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 1.\n"); + } + logic_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(||:logic_or) + for(i=0;i<LOOPCOUNT;++i) { + logic_or = logic_or || logics[i]; + } + if(!logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 2.\n"); + } + + /* Tests for bitwise and */ + for(i=0;i<LOOPCOUNT;++i) { + logics[i]=1; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(&:bit_and) + for(i=0;i<LOOPCOUNT;++i) { + bit_and = (bit_and & logics[i]); + } + if(!bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 1.\n"); + } + + bit_and = 1; + logics[LOOPCOUNT/2]=0; + + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(&:bit_and) + for(i=0;i<LOOPCOUNT;++i) { + bit_and = bit_and & logics[i]; + } + if(bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 2.\n"); + } + + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + /* Tests for bitwise or */ + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(|:bit_or) + for(i=0;i<LOOPCOUNT;++i) { + bit_or = bit_or | logics[i]; + } + if(bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 1\n"); + } + bit_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(|:bit_or) + for(i=0;i<LOOPCOUNT;++i) { + bit_or = bit_or | logics[i]; + } + if(!bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 2\n"); + } + + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + /* Tests for bitwise xor */ + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(^:exclusiv_bit_or) + for(i=0;i<LOOPCOUNT;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + if(exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(^:exclusiv_bit_or) + for(i=0;i<LOOPCOUNT;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + if(!exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n"); + } + + /*printf("\nResult:%d\n",result);*/ + return (result==0); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/parallel/omp_parallel_shared.c b/final/runtime/test/parallel/omp_parallel_shared.c new file mode 100644 index 0000000..3146ca6 --- /dev/null +++ b/final/runtime/test/parallel/omp_parallel_shared.c @@ -0,0 +1,46 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_shared() +{ + int i; + int sum; + int known_sum; + + sum = 0; + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 ; + + #pragma omp parallel private(i) shared(sum) + { + + int mysum = 0; + #pragma omp for + for (i = 1; i <= LOOPCOUNT; i++) { + mysum = mysum + i; + } + #pragma omp critical + { + sum = sum + mysum; + } + + + } + if (known_sum != sum) { + fprintf(stderr, "KNOWN_SUM = %d; SUM = %d\n", known_sum, sum); + } + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_shared()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/bug_36720.c b/final/runtime/test/tasking/bug_36720.c new file mode 100644 index 0000000..684d675 --- /dev/null +++ b/final/runtime/test/tasking/bug_36720.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile-and-run + +/* +Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=36720 + +Assertion failure at kmp_runtime.cpp(1715): nthreads > 0. +OMP: Error #13: Assertion failure at kmp_runtime.cpp(1715). + +The assertion fails even with OMP_NUM_THREADS=1. If the second task is removed, +everything runs to completion. If the "omp parallel for" directives are removed +from inside the tasks, once again everything runs fine. +*/ + +#define N 1024 + +int main() { + #pragma omp task + { + int i; + #pragma omp parallel for + for (i = 0; i < N; i++) + (void)0; + } + + #pragma omp task + { + int i; + #pragma omp parallel for + for (i = 0; i < N; ++i) + (void)0; + } + + #pragma omp taskwait + + return 0; +} diff --git a/final/runtime/test/tasking/bug_nested_proxy_task.c b/final/runtime/test/tasking/bug_nested_proxy_task.c new file mode 100644 index 0000000..6c00822 --- /dev/null +++ b/final/runtime/test/tasking/bug_nested_proxy_task.c @@ -0,0 +1,131 @@ +// RUN: %libomp-compile -lpthread && %libomp-run +// The runtime currently does not get dependency information from GCC. +// UNSUPPORTED: gcc + +#include <stdio.h> +#include <omp.h> +#include <pthread.h> +#include "omp_my_sleep.h" + +/* + With task dependencies one can generate proxy tasks from an explicit task + being executed by a serial task team. The OpenMP runtime library didn't + expect that and tries to free the explicit task that is the parent of the + proxy task still working in background. It therefore has incomplete children + which triggers a debugging assertion. +*/ + +// Compiler-generated code (emulation) +typedef long kmp_intptr_t; +typedef int kmp_int32; + +typedef char bool; + +typedef struct ident { + kmp_int32 reserved_1; /**< might be used in Fortran; see above */ + kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */ + kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ +#if USE_ITT_BUILD + /* but currently used for storing region-specific ITT */ + /* contextual information. */ +#endif /* USE_ITT_BUILD */ + kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ + char const *psource; /**< String describing the source location. + The string is composed of semi-colon separated fields which describe the source file, + the function and a pair of line numbers that delimit the construct. + */ +} ident_t; + +typedef struct kmp_depend_info { + kmp_intptr_t base_addr; + size_t len; + struct { + bool in:1; + bool out:1; + } flags; +} kmp_depend_info_t; + +struct kmp_task; +typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, struct kmp_task * ); + +typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ + void * shareds; /**< pointer to block of pointers to shared vars */ + kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ + kmp_int32 part_id; /**< part id for the task */ +} kmp_task_t; + +#ifdef __cplusplus +extern "C" { +#endif +kmp_int32 __kmpc_global_thread_num ( ident_t * ); +kmp_task_t* +__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry ); +void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ); +kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ); +kmp_int32 +__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); +#ifdef __cplusplus +} +#endif + +void *target(void *task) +{ + my_sleep( 0.1 ); + __kmpc_proxy_task_completed_ooo((kmp_task_t*) task); + return NULL; +} + +pthread_t target_thread; + +// User's code +int task_entry(kmp_int32 gtid, kmp_task_t *task) +{ + pthread_create(&target_thread, NULL, &target, task); + return 0; +} + +int main() +{ + int dep; + +#pragma omp taskgroup +{ +/* + * Corresponds to: + #pragma omp target nowait depend(out: dep) + { + my_sleep( 0.1 ); + } +*/ + kmp_depend_info_t dep_info; + dep_info.base_addr = (long) &dep; + dep_info.len = sizeof(int); + // out = inout per spec and runtime expects this + dep_info.flags.in = 1; + dep_info.flags.out = 1; + + kmp_int32 gtid = __kmpc_global_thread_num(NULL); + kmp_task_t *proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry); + __kmpc_omp_task_with_deps(NULL,gtid,proxy_task,1,&dep_info,0,NULL); + + #pragma omp task depend(in: dep) + { +/* + * Corresponds to: + #pragma omp target nowait + { + my_sleep( 0.1 ); + } +*/ + kmp_task_t *nested_proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry); + __kmpc_omp_task(NULL,gtid,nested_proxy_task); + } +} + + // only check that it didn't crash + return 0; +} diff --git a/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c new file mode 100644 index 0000000..e6dd895 --- /dev/null +++ b/final/runtime/test/tasking/bug_proxy_task_dep_waiting.c @@ -0,0 +1,134 @@ +// RUN: %libomp-compile -lpthread && %libomp-run +// The runtime currently does not get dependency information from GCC. +// UNSUPPORTED: gcc + +#include <stdio.h> +#include <omp.h> +#include <pthread.h> +#include "omp_my_sleep.h" + +/* + An explicit task can have a dependency on a target task. If it is not + directly satisfied, the runtime should not wait but resume execution. +*/ + +// Compiler-generated code (emulation) +typedef long kmp_intptr_t; +typedef int kmp_int32; + +typedef char bool; + +typedef struct ident { + kmp_int32 reserved_1; /**< might be used in Fortran; see above */ + kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */ + kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ +#if USE_ITT_BUILD + /* but currently used for storing region-specific ITT */ + /* contextual information. */ +#endif /* USE_ITT_BUILD */ + kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ + char const *psource; /**< String describing the source location. + The string is composed of semi-colon separated fields which describe the source file, + the function and a pair of line numbers that delimit the construct. + */ +} ident_t; + +typedef struct kmp_depend_info { + kmp_intptr_t base_addr; + size_t len; + struct { + bool in:1; + bool out:1; + } flags; +} kmp_depend_info_t; + +struct kmp_task; +typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, struct kmp_task * ); + +typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ + void * shareds; /**< pointer to block of pointers to shared vars */ + kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ + kmp_int32 part_id; /**< part id for the task */ +} kmp_task_t; + +#ifdef __cplusplus +extern "C" { +#endif +kmp_int32 __kmpc_global_thread_num ( ident_t * ); +kmp_task_t* +__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry ); +void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ); +kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ); +kmp_int32 +__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); +#ifdef __cplusplus +} +#endif + +void *target(void *task) +{ + my_sleep( 0.1 ); + __kmpc_proxy_task_completed_ooo((kmp_task_t*) task); + return NULL; +} + +pthread_t target_thread; + +// User's code +int task_entry(kmp_int32 gtid, kmp_task_t *task) +{ + pthread_create(&target_thread, NULL, &target, task); + return 0; +} + +int main() +{ + int dep; + +/* + * Corresponds to: + #pragma omp target nowait depend(out: dep) + { + my_sleep( 0.1 ); + } +*/ + kmp_depend_info_t dep_info; + dep_info.base_addr = (long) &dep; + dep_info.len = sizeof(int); + // out = inout per spec and runtime expects this + dep_info.flags.in = 1; + dep_info.flags.out = 1; + + kmp_int32 gtid = __kmpc_global_thread_num(NULL); + kmp_task_t *proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry); + __kmpc_omp_task_with_deps(NULL,gtid,proxy_task,1,&dep_info,0,NULL); + + int first_task_finished = 0; + #pragma omp task shared(first_task_finished) depend(inout: dep) + { + first_task_finished = 1; + } + + int second_task_finished = 0; + #pragma omp task shared(second_task_finished) depend(in: dep) + { + second_task_finished = 1; + } + + // check that execution has been resumed and the runtime has not waited + // for the dependencies to be satisfied. + int error = (first_task_finished == 1); + error += (second_task_finished == 1); + + #pragma omp taskwait + + // by now all tasks should have finished + error += (first_task_finished != 1); + error += (second_task_finished != 1); + + return error; +} diff --git a/final/runtime/test/tasking/bug_serial_taskgroup.c b/final/runtime/test/tasking/bug_serial_taskgroup.c new file mode 100644 index 0000000..850bc90 --- /dev/null +++ b/final/runtime/test/tasking/bug_serial_taskgroup.c @@ -0,0 +1,16 @@ +// RUN: %libomp-compile-and-run + +/* + GCC failed this test because __kmp_get_gtid() instead of __kmp_entry_gtid() + was called in xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void). + __kmp_entry_gtid() will initialize the runtime if not yet done which does not + happen with __kmp_get_gtid(). + */ + +int main() +{ + #pragma omp taskgroup + { } + + return 0; +} diff --git a/final/runtime/test/tasking/kmp_task_reduction_nest.cpp b/final/runtime/test/tasking/kmp_task_reduction_nest.cpp new file mode 100644 index 0000000..63dffe4 --- /dev/null +++ b/final/runtime/test/tasking/kmp_task_reduction_nest.cpp @@ -0,0 +1,376 @@ +// RUN: %libomp-cxx-compile-and-run +// RUN: %libomp-cxx-compile -DFLG=1 && %libomp-run +// GCC-5 is needed for OpenMP 4.0 support (taskgroup) +// XFAIL: gcc-4 +#include <cstdio> +#include <cmath> +#include <cassert> +#include <omp.h> + +// Total number of loop iterations, should be multiple of T for this test +#define N 10000 + +// Flag to request lazy (1) or eager (0) allocation of reduction objects +#ifndef FLG +#define FLG 0 +#endif + +/* + // initial user's code that corresponds to pseudo code of the test + #pragma omp taskgroup task_reduction(+:i,j) task_reduction(*:x) + { + for( int l = 0; l < N; ++l ) { + #pragma omp task firstprivate(l) in_reduction(+:i) in_reduction(*:x) + { + i += l; + if( l%2 ) + x *= 1.0 / (l + 1); + else + x *= (l + 1); + } + } + + #pragma omp taskgroup task_reduction(-:i,k) task_reduction(+:y) + { + for( int l = 0; l < N; ++l ) { + #pragma omp task firstprivate(l) in_reduction(+:j,y) \ + in_reduction(*:x) in_reduction(-:k) + { + j += l; + k -= l; + y += (double)l; + if( l%2 ) + x *= 1.0 / (l + 1); + else + x *= (l + 1); + } + #pragma omp task firstprivate(l) in_reduction(+:y) in_reduction(-:i,k) + { + i -= l; + k -= l; + y += (double)l; + } + #pragma omp task firstprivate(l) in_reduction(+:j) in_reduction(*:x) + { + j += l; + if( l%2 ) + x *= 1.0 / (l + 1); + else + x *= (l + 1); + } + } + } // inner reduction + + for( int l = 0; l < N; ++l ) { + #pragma omp task firstprivate(l) in_reduction(+:j) + j += l; + } + } // outer reduction +*/ + +//------------------------------------------------ +// OpenMP runtime library routines +#ifdef __cplusplus +extern "C" { +#endif +extern void* __kmpc_task_reduction_get_th_data(int gtid, void* tg, void* item); +extern void* __kmpc_task_reduction_init(int gtid, int num, void* data); +extern int __kmpc_global_thread_num(void*); +#ifdef __cplusplus +} +#endif + +//------------------------------------------------ +// Compiler-generated code + +typedef struct _task_red_item { + void *shar; // shared reduction item + size_t size; // size of data item + void *f_init; // data initialization routine + void *f_fini; // data finalization routine + void *f_comb; // data combiner routine + unsigned flags; +} _task_red_item_t; + +// int:+ no need in init/fini callbacks, valid for subtraction +void __red_int_add_comb(void *lhs, void *rhs) // combiner +{ *(int*)lhs += *(int*)rhs; } + +// long long:+ no need in init/fini callbacks, valid for subtraction +void __red_llong_add_comb(void *lhs, void *rhs) // combiner +{ *(long long*)lhs += *(long long*)rhs; } + +// double:* no need in fini callback +void __red_dbl_mul_init(void *data) // initializer +{ *(double*)data = 1.0; } +void __red_dbl_mul_comb(void *lhs, void *rhs) // combiner +{ *(double*)lhs *= *(double*)rhs; } + +// double:+ no need in init/fini callbacks +void __red_dbl_add_comb(void *lhs, void *rhs) // combiner +{ *(double*)lhs += *(double*)rhs; } + +// ============================== + +void calc_serial(int *pi, long long *pj, double *px, long long *pk, double *py) +{ + for( int l = 0; l < N; ++l ) { + *pi += l; + if( l%2 ) + *px *= 1.0 / (l + 1); + else + *px *= (l + 1); + } + for( int l = 0; l < N; ++l ) { + *pj += l; + *pk -= l; + *py += (double)l; + if( l%2 ) + *px *= 1.0 / (l + 1); + else + *px *= (l + 1); + + *pi -= l; + *pk -= l; + *py += (double)l; + + *pj += l; + if( l%2 ) + *px *= 1.0 / (l + 1); + else + *px *= (l + 1); + } + for( int l = 0; l < N; ++l ) { + *pj += l; + } +} + +//------------------------------------------------ +// Test case +int main() +{ + int nthreads = omp_get_max_threads(); + int err = 0; + void** ptrs = (void**)malloc(nthreads*sizeof(void*)); + + // user's code ====================================== + // variables for serial calculations: + int is = 3; + long long js = -9999999; + double xs = 99999.0; + long long ks = 99999999; + double ys = -99999999.0; + // variables for parallel calculations: + int ip = 3; + long long jp = -9999999; + double xp = 99999.0; + long long kp = 99999999; + double yp = -99999999.0; + + calc_serial(&is, &js, &xs, &ks, &ys); + // ================================================== + for (int i = 0; i < nthreads; ++i) + ptrs[i] = NULL; + #pragma omp parallel + { + #pragma omp single nowait + { + // outer taskgroup reduces (i,j,x) + #pragma omp taskgroup // task_reduction(+:i,j) task_reduction(*:x) + { + _task_red_item_t red_data[3]; + red_data[0].shar = &ip; + red_data[0].size = sizeof(ip); + red_data[0].f_init = NULL; // RTL will zero thread-specific objects + red_data[0].f_fini = NULL; // no destructors needed + red_data[0].f_comb = (void*)&__red_int_add_comb; + red_data[0].flags = FLG; + red_data[1].shar = &jp; + red_data[1].size = sizeof(jp); + red_data[1].f_init = NULL; // RTL will zero thread-specific objects + red_data[1].f_fini = NULL; // no destructors needed + red_data[1].f_comb = (void*)&__red_llong_add_comb; + red_data[1].flags = FLG; + red_data[2].shar = &xp; + red_data[2].size = sizeof(xp); + red_data[2].f_init = (void*)&__red_dbl_mul_init; + red_data[2].f_fini = NULL; // no destructors needed + red_data[2].f_comb = (void*)&__red_dbl_mul_comb; + red_data[2].flags = FLG; + int gtid = __kmpc_global_thread_num(NULL); + void* tg1 = __kmpc_task_reduction_init(gtid, 3, red_data); + + for( int l = 0; l < N; l += 2 ) { + // 2 iterations per task to get correct x value; actually any even + // number of iters per task will work, otherwise x looses precision + #pragma omp task firstprivate(l) //in_reduction(+:i) in_reduction(*:x) + { + int gtid = __kmpc_global_thread_num(NULL); + int *p_ip = (int*)__kmpc_task_reduction_get_th_data(gtid, tg1, &ip); + double *p_xp = (double*)__kmpc_task_reduction_get_th_data( + gtid, tg1, &xp); + if (!ptrs[gtid]) ptrs[gtid] = p_xp; + + // user's pseudo-code ============================== + *p_ip += l; + *p_xp *= (l + 1); + + *p_ip += l + 1; + *p_xp *= 1.0 / (l + 2); + // ================================================== + } + } + // inner taskgroup reduces (i,k,y), i is same object as in outer one + #pragma omp taskgroup // task_reduction(-:i,k) task_reduction(+:y) + { + _task_red_item_t red_data[3]; + red_data[0].shar = &ip; + red_data[0].size = sizeof(ip); + red_data[0].f_init = NULL; // RTL will zero thread-specific objects + red_data[0].f_fini = NULL; // no destructors needed + red_data[0].f_comb = (void*)&__red_int_add_comb; + red_data[0].flags = FLG; + red_data[1].shar = &kp; + red_data[1].size = sizeof(kp); + red_data[1].f_init = NULL; // RTL will zero thread-specific objects + red_data[1].f_fini = NULL; // no destructors needed + red_data[1].f_comb = (void*)&__red_llong_add_comb; // same for + and - + red_data[1].flags = FLG; + red_data[2].shar = &yp; + red_data[2].size = sizeof(yp); + red_data[2].f_init = NULL; // RTL will zero thread-specific objects + red_data[2].f_fini = NULL; // no destructors needed + red_data[2].f_comb = (void*)&__red_dbl_add_comb; + red_data[2].flags = FLG; + int gtid = __kmpc_global_thread_num(NULL); + void* tg2 = __kmpc_task_reduction_init(gtid, 3, red_data); + + for( int l = 0; l < N; l += 2 ) { + #pragma omp task firstprivate(l) + // in_reduction(+:j,y) in_reduction(*:x) in_reduction(-:k) + { + int gtid = __kmpc_global_thread_num(NULL); + long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data( + gtid, tg1, &jp); + long long *p_kp = (long long*)__kmpc_task_reduction_get_th_data( + gtid, tg2, &kp); + double *p_xp = (double*)__kmpc_task_reduction_get_th_data( + gtid, tg1, &xp); + double *p_yp = (double*)__kmpc_task_reduction_get_th_data( + gtid, tg2, &yp); + // user's pseudo-code ============================== + *p_jp += l; + *p_kp -= l; + *p_yp += (double)l; + *p_xp *= (l + 1); + + *p_jp += l + 1; + *p_kp -= l + 1; + *p_yp += (double)(l + 1); + *p_xp *= 1.0 / (l + 2); + // ================================================= +{ + // the following code is here just to check __kmpc_task_reduction_get_th_data: + int tid = omp_get_thread_num(); + void *addr1; + void *addr2; + addr1 = __kmpc_task_reduction_get_th_data(gtid, tg1, &xp); // from shared + addr2 = __kmpc_task_reduction_get_th_data(gtid, tg1, addr1); // from private + if (addr1 != addr2) { + #pragma omp atomic + ++err; + printf("Wrong thread-specific addresses %d s:%p p:%p\n", tid, addr1, addr2); + } + // from neighbour w/o taskgroup (should start lookup from current tg2) + if (tid > 0) { + if (ptrs[tid-1]) { + addr2 = __kmpc_task_reduction_get_th_data(gtid, NULL, ptrs[tid-1]); + if (addr1 != addr2) { + #pragma omp atomic + ++err; + printf("Wrong thread-specific addresses %d s:%p n:%p\n", + tid, addr1, addr2); + } + } + } else { + if (ptrs[nthreads-1]) { + addr2 = __kmpc_task_reduction_get_th_data(gtid, NULL, ptrs[nthreads-1]); + if (addr1 != addr2) { + #pragma omp atomic + ++err; + printf("Wrong thread-specific addresses %d s:%p n:%p\n", + tid, addr1, addr2); + } + } + } + // ---------------------------------------------- +} + } + #pragma omp task firstprivate(l) + // in_reduction(+:y) in_reduction(-:i,k) + { + int gtid = __kmpc_global_thread_num(NULL); + int *p_ip = (int*)__kmpc_task_reduction_get_th_data( + gtid, tg2, &ip); + long long *p_kp = (long long*)__kmpc_task_reduction_get_th_data( + gtid, tg2, &kp); + double *p_yp = (double*)__kmpc_task_reduction_get_th_data( + gtid, tg2, &yp); + + // user's pseudo-code ============================== + *p_ip -= l; + *p_kp -= l; + *p_yp += (double)l; + + *p_ip -= l + 1; + *p_kp -= l + 1; + *p_yp += (double)(l + 1); + // ================================================= + } + #pragma omp task firstprivate(l) + // in_reduction(+:j) in_reduction(*:x) + { + int gtid = __kmpc_global_thread_num(NULL); + long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data( + gtid, tg1, &jp); + double *p_xp = (double*)__kmpc_task_reduction_get_th_data( + gtid, tg1, &xp); + // user's pseudo-code ============================== + *p_jp += l; + *p_xp *= (l + 1); + + *p_jp += l + 1; + *p_xp *= 1.0 / (l + 2); + // ================================================= + } + } + } // inner reduction + + for( int l = 0; l < N; l += 2 ) { + #pragma omp task firstprivate(l) // in_reduction(+:j) + { + int gtid = __kmpc_global_thread_num(NULL); + long long *p_jp = (long long*)__kmpc_task_reduction_get_th_data( + gtid, tg1, &jp); + // user's pseudo-code ============================== + *p_jp += l; + *p_jp += l + 1; + // ================================================= + } + } + } // outer reduction + } // end single + } // end parallel + // check results +#if _DEBUG + printf("reduction flags = %u\n", FLG); +#endif + if (ip == is && jp == js && ks == kp && + fabs(xp - xs) < 0.01 && fabs(yp - ys) < 0.01) + printf("passed\n"); + else + printf("failed,\n ser:(%d %lld %f %lld %f)\n par:(%d %lld %f %lld %f)\n", + is, js, xs, ks, ys, + ip, jp, xp, kp, yp); + return 0; +} diff --git a/final/runtime/test/tasking/kmp_taskloop.c b/final/runtime/test/tasking/kmp_taskloop.c new file mode 100644 index 0000000..4b13793 --- /dev/null +++ b/final/runtime/test/tasking/kmp_taskloop.c @@ -0,0 +1,159 @@ +// RUN: %libomp-compile-and-run +// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run +#include <stdio.h> +#include <omp.h> +#include "omp_my_sleep.h" + +#define N 4 +#define GRAIN 10 +#define STRIDE 3 + +// globals +int th_counter[N]; +int counter; + + +// Compiler-generated code (emulation) +typedef struct ident { + void* dummy; +} ident_t; + +typedef struct shar { + int(*pth_counter)[N]; + int *pcounter; + int *pj; +} *pshareds; + +typedef struct task { + pshareds shareds; + int(* routine)(int,struct task*); + int part_id; +// privates: + unsigned long long lb; // library always uses ULONG + unsigned long long ub; + int st; + int last; + int i; + int j; + int th; +} *ptask, kmp_task_t; + +typedef int(* task_entry_t)( int, ptask ); + +void +__task_dup_entry(ptask task_dst, ptask task_src, int lastpriv) +{ +// setup lastprivate flag + task_dst->last = lastpriv; +// could be constructor calls here... +} + + +// OpenMP RTL interfaces +typedef unsigned long long kmp_uint64; +typedef long long kmp_int64; + +#ifdef __cplusplus +extern "C" { +#endif +void +__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, + int nogroup, int sched, kmp_int64 grainsize, void *task_dup ); +ptask +__kmpc_omp_task_alloc( ident_t *loc, int gtid, int flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + task_entry_t task_entry ); +void __kmpc_atomic_fixed4_add(void *id_ref, int gtid, int * lhs, int rhs); +int __kmpc_global_thread_num(void *id_ref); +#ifdef __cplusplus +} +#endif + + +// User's code +int task_entry(int gtid, ptask task) +{ + pshareds pshar = task->shareds; + for( task->i = task->lb; task->i <= (int)task->ub; task->i += task->st ) { + task->th = omp_get_thread_num(); + __kmpc_atomic_fixed4_add(NULL,gtid,pshar->pcounter,1); + __kmpc_atomic_fixed4_add(NULL,gtid,&((*pshar->pth_counter)[task->th]),1); + task->j = task->i; + } + my_sleep( 0.1 ); // sleep 100 ms in order to allow other threads to steal tasks + if( task->last ) { + *(pshar->pj) = task->j; // lastprivate + } + return 0; +} + +int main() +{ + int i, j, gtid = __kmpc_global_thread_num(NULL); + ptask task; + pshareds psh; + omp_set_dynamic(0); + counter = 0; + for( i=0; i<N; ++i ) + th_counter[i] = 0; + #pragma omp parallel num_threads(N) + { + #pragma omp master + { + int gtid = __kmpc_global_thread_num(NULL); +/* + * This is what the OpenMP runtime calls correspond to: + #pragma omp taskloop num_tasks(N) lastprivate(j) + for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) + { + int th = omp_get_thread_num(); + #pragma omp atomic + counter++; + #pragma omp atomic + th_counter[th]++; + j = i; + } +*/ + task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct task),sizeof(struct shar),&task_entry); + psh = task->shareds; + psh->pth_counter = &th_counter; + psh->pcounter = &counter; + psh->pj = &j; + task->lb = 0; + task->ub = N*GRAIN*STRIDE-2; + task->st = STRIDE; + + __kmpc_taskloop( + NULL, // location + gtid, // gtid + task, // task structure + 1, // if clause value + &task->lb, // lower bound + &task->ub, // upper bound + STRIDE, // loop increment + 0, // 1 if nogroup specified + 2, // schedule type: 0-none, 1-grainsize, 2-num_tasks + N, // schedule value (ignored for type 0) + (void*)&__task_dup_entry // tasks duplication routine + ); + } // end master + } // end parallel +// check results + if( j != N*GRAIN*STRIDE-STRIDE ) { + printf("Error in lastprivate, %d != %d\n",j,N*GRAIN*STRIDE-STRIDE); + return 1; + } + if( counter != N*GRAIN ) { + printf("Error, counter %d != %d\n",counter,N*GRAIN); + return 1; + } + for( i=0; i<N; ++i ) { + if( th_counter[i] % GRAIN ) { + printf("Error, th_counter[%d] = %d\n",i,th_counter[i]); + return 1; + } + } + printf("passed\n"); + return 0; +} diff --git a/final/runtime/test/tasking/nested_parallel_tasking.c b/final/runtime/test/tasking/nested_parallel_tasking.c new file mode 100644 index 0000000..4374d6e --- /dev/null +++ b/final/runtime/test/tasking/nested_parallel_tasking.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <omp.h> + +/* + * This test would hang when level instead of active level + * used to push task state. + */ + +int main() +{ + // If num_threads is changed to a value greater than 1, then the test passes + #pragma omp parallel num_threads(1) + { + #pragma omp parallel + printf("Hello World from thread %d\n", omp_get_thread_num()); + } + + printf("omp_num_threads: %d\n", omp_get_max_threads()); + + #pragma omp parallel + { + #pragma omp master + #pragma omp task default(none) + { + printf("%d is executing this task\n", omp_get_thread_num()); + } + } + + printf("pass\n"); + return 0; +} diff --git a/final/runtime/test/tasking/nested_task_creation.c b/final/runtime/test/tasking/nested_task_creation.c new file mode 100644 index 0000000..c7c25fc --- /dev/null +++ b/final/runtime/test/tasking/nested_task_creation.c @@ -0,0 +1,35 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <omp.h> +#include "omp_my_sleep.h" + +/* + * This test creates tasks that themselves create a new task. + * The runtime has to take care that they are correctly freed. + */ + +int main() +{ + #pragma omp task + { + #pragma omp task + { + my_sleep( 0.1 ); + } + } + + #pragma omp parallel num_threads(2) + { + #pragma omp single + #pragma omp task + { + #pragma omp task + { + my_sleep( 0.1 ); + } + } + } + + printf("pass\n"); + return 0; +} diff --git a/final/runtime/test/tasking/omp_task.c b/final/runtime/test/tasking/omp_task.c new file mode 100644 index 0000000..c534abe --- /dev/null +++ b/final/runtime/test/tasking/omp_task.c @@ -0,0 +1,52 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +int test_omp_task() +{ + int tids[NUM_TASKS]; + int i; + + #pragma omp parallel + { + #pragma omp single + { + for (i = 0; i < NUM_TASKS; i++) { + /* First we have to store the value of the loop index in a new variable + * which will be private for each task because otherwise it will be overwritten + * if the execution of the task takes longer than the time which is needed to + * enter the next step of the loop! + */ + int myi; + myi = i; + #pragma omp task + { + my_sleep (SLEEPTIME); + tids[myi] = omp_get_thread_num(); + } /* end of omp task */ + } /* end of for */ + } /* end of single */ + } /*end of parallel */ + + /* Now we ckeck if more than one thread executed the tasks. */ + for (i = 1; i < NUM_TASKS; i++) { + if (tids[0] != tids[i]) + return 1; + } + return 0; +} /* end of check_parallel_for_private */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_task()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_task_final.c b/final/runtime/test/tasking/omp_task_final.c new file mode 100644 index 0000000..b531af6 --- /dev/null +++ b/final/runtime/test/tasking/omp_task_final.c @@ -0,0 +1,65 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +int test_omp_task_final() +{ + int tids[NUM_TASKS]; + int includedtids[NUM_TASKS]; + int i; + int error = 0; + #pragma omp parallel + { + #pragma omp single + { + for (i = 0; i < NUM_TASKS; i++) { + /* First we have to store the value of the loop index in a new variable + * which will be private for each task because otherwise it will be overwritten + * if the execution of the task takes longer than the time which is needed to + * enter the next step of the loop! + */ + int myi; + myi = i; + + #pragma omp task final(i>=10) + { + tids[myi] = omp_get_thread_num(); + /* we generate included tasks for final tasks */ + if(myi >= 10) { + int included = myi; + #pragma omp task + { + my_sleep (SLEEPTIME); + includedtids[included] = omp_get_thread_num(); + } /* end of omp included task of the final task */ + my_sleep (SLEEPTIME); + } /* end of if it is a final task*/ + } /* end of omp task */ + } /* end of for */ + } /* end of single */ + } /*end of parallel */ + + /* Now we ckeck if more than one thread executed the final task and its included task. */ + for (i = 10; i < NUM_TASKS; i++) { + if (tids[i] != includedtids[i]) { + error++; + } + } + return (error==0); +} /* end of check_paralel_for_private */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_task_final()) { + num_failed++; + } + } + return num_failed; +} + diff --git a/final/runtime/test/tasking/omp_task_firstprivate.c b/final/runtime/test/tasking/omp_task_firstprivate.c new file mode 100644 index 0000000..d1f7c35 --- /dev/null +++ b/final/runtime/test/tasking/omp_task_firstprivate.c @@ -0,0 +1,51 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int test_omp_task_firstprivate() +{ + int i; + int sum = 1234; + int known_sum; + int result = 0; /* counts the wrong sums from tasks */ + + known_sum = 1234 + (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + + #pragma omp parallel + { + #pragma omp single + { + for (i = 0; i < NUM_TASKS; i++) { + #pragma omp task firstprivate(sum) + { + int j; + for (j = 0; j <= LOOPCOUNT; j++) { + #pragma omp flush + sum += j; + } + + /* check if calculated sum was right */ + if (sum != known_sum) { + #pragma omp critical + { result++; } + } + } /* omp task */ + } /* for loop */ + } /* omp single */ + } /* omp parallel */ + return (result == 0); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_task_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_task_if.c b/final/runtime/test/tasking/omp_task_if.c new file mode 100644 index 0000000..8b4728e --- /dev/null +++ b/final/runtime/test/tasking/omp_task_if.c @@ -0,0 +1,43 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +int test_omp_task_if() +{ + int condition_false; + int count; + int result; + + count=0; + condition_false = (count == 1); + #pragma omp parallel + { + #pragma omp single + { + #pragma omp task if (condition_false) shared(count, result) + { + my_sleep (SLEEPTIME); + #pragma omp critical + result = (0 == count); + } /* end of omp task */ + #pragma omp critical + count = 1; + } /* end of single */ + } /*end of parallel */ + return result; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_task_if()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_task_imp_firstprivate.c b/final/runtime/test/tasking/omp_task_imp_firstprivate.c new file mode 100644 index 0000000..905ab9a --- /dev/null +++ b/final/runtime/test/tasking/omp_task_imp_firstprivate.c @@ -0,0 +1,47 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/* Utility function do spend some time in a loop */ +int test_omp_task_imp_firstprivate() +{ + int i=5; + int k = 0; + int result = 0; + int task_result = 1; + #pragma omp parallel firstprivate(i) + { + #pragma omp single + { + for (k = 0; k < NUM_TASKS; k++) { + #pragma omp task shared(result , task_result) + { + int j; + //check if i is private + if(i != 5) + task_result = 0; + for(j = 0; j < NUM_TASKS; j++) + i++; + //this should be firstprivate implicitly + } + } + #pragma omp taskwait + result = (task_result && i==5); + } + } + return result; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_task_imp_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_task_priority.c b/final/runtime/test/tasking/omp_task_priority.c new file mode 100644 index 0000000..7b62360 --- /dev/null +++ b/final/runtime/test/tasking/omp_task_priority.c @@ -0,0 +1,22 @@ +// RUN: %libomp-compile && env OMP_MAX_TASK_PRIORITY=42 %libomp-run +// Test OMP 4.5 task priorities +// Currently only API function and envirable parsing implemented. +// Test environment sets envirable: OMP_MAX_TASK_PRIORITY=42 as tested below. +#include <stdio.h> +#include <omp.h> + +int main (void) { + int passed; + + passed = (omp_get_max_task_priority() == 42); + printf("Got %d\n", omp_get_max_task_priority()); + + if (passed) { + printf("passed\n"); + return 0; + } + + printf("failed\n"); + return 1; +} + diff --git a/final/runtime/test/tasking/omp_task_private.c b/final/runtime/test/tasking/omp_task_private.c new file mode 100644 index 0000000..7a93716 --- /dev/null +++ b/final/runtime/test/tasking/omp_task_private.c @@ -0,0 +1,53 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/* Utility function do spend some time in a loop */ +int test_omp_task_private() +{ + int i; + int known_sum; + int sum = 0; + int result = 0; /* counts the wrong sums from tasks */ + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + + #pragma omp parallel + { + #pragma omp single + { + for (i = 0; i < NUM_TASKS; i++) { + #pragma omp task private(sum) shared(result, known_sum) + { + int j; + //if sum is private, initialize to 0 + sum = 0; + for (j = 0; j <= LOOPCOUNT; j++) { + #pragma omp flush + sum += j; + } + /* check if calculated sum was right */ + if (sum != known_sum) { + #pragma omp critical + result++; + } + } /* end of omp task */ + } /* end of for */ + } /* end of single */ + } /* end of parallel*/ + return (result == 0); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_task_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_task_shared.c b/final/runtime/test/tasking/omp_task_shared.c new file mode 100644 index 0000000..0304026 --- /dev/null +++ b/final/runtime/test/tasking/omp_task_shared.c @@ -0,0 +1,41 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/* Utility function do spend some time in a loop */ +int test_omp_task_imp_shared() +{ + int i; + int k = 0; + int result = 0; + i=0; + + #pragma omp parallel + { + #pragma omp single + for (k = 0; k < NUM_TASKS; k++) { + #pragma omp task shared(i) + { + #pragma omp atomic + i++; + //this should be shared implicitly + } + } + } + result = i; + return ((result == NUM_TASKS)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_task_imp_shared()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_taskloop_grainsize.c b/final/runtime/test/tasking/omp_taskloop_grainsize.c new file mode 100644 index 0000000..0833073 --- /dev/null +++ b/final/runtime/test/tasking/omp_taskloop_grainsize.c @@ -0,0 +1,113 @@ +// RUN: %libomp-compile-and-run +// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run + +// These compilers don't support the taskloop construct +// UNSUPPORTED: gcc-4, gcc-5, icc-16 +// GCC 6 has support for taskloops, but at least 6.3.0 is crashing on this test +// UNSUPPORTED: gcc-6 + +/* + * Test for taskloop + * Method: caculate how many times the iteration space is dispatched + * and judge if each dispatch has the requested grainsize + * It is possible for two adjacent chunks are executed by the same thread + */ +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +#define CFDMAX_SIZE 1120 + +int test_omp_taskloop_grainsize() +{ + int result = 0; + int i, grainsize, count, tmp_count, num_off; + int *tmp, *tids, *tidsArray; + + tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE); + tids = tidsArray; + + for (grainsize = 1; grainsize < 48; ++grainsize) { + fprintf(stderr, "Grainsize %d\n", grainsize); + count = tmp_count = num_off = 0; + + for (i = 0; i < CFDMAX_SIZE; ++i) { + tids[i] = -1; + } + + #pragma omp parallel shared(tids) + { + #pragma omp master + #pragma omp taskloop grainsize(grainsize) + for (i = 0; i < CFDMAX_SIZE; i++) { + tids[i] = omp_get_thread_num(); + } + } + + for (i = 0; i < CFDMAX_SIZE; ++i) { + if (tids[i] == -1) { + fprintf(stderr, " Iteration %d not touched!\n", i); + result++; + } + } + + for (i = 0; i < CFDMAX_SIZE - 1; ++i) { + if (tids[i] != tids[i + 1]) { + count++; + } + } + + tmp = (int *)malloc(sizeof(int) * (count + 1)); + tmp[0] = 1; + + for (i = 0; i < CFDMAX_SIZE - 1; ++i) { + if (tmp_count > count) { + printf("--------------------\nTestinternal Error: List too " + "small!!!\n--------------------\n"); + break; + } + if (tids[i] != tids[i + 1]) { + tmp_count++; + tmp[tmp_count] = 1; + } else { + tmp[tmp_count]++; + } + } + + // is grainsize statement working? + int num_tasks = CFDMAX_SIZE / grainsize; + int multiple1 = CFDMAX_SIZE / num_tasks; + int multiple2 = CFDMAX_SIZE / num_tasks + 1; + for (i = 0; i < count; i++) { + // it is possible for 2 adjacent chunks assigned to a same thread + if (tmp[i] % multiple1 != 0 && tmp[i] % multiple2 != 0) { + num_off++; + } + } + + if (num_off > 1) { + fprintf(stderr, " The number of bad chunks is %d\n", num_off); + result++; + } else { + fprintf(stderr, " Everything ok\n"); + } + + free(tmp); + } + free(tidsArray); + return (result==0); +} + +int main() +{ + int i; + int num_failed=0; + + for (i = 0; i < REPETITIONS; i++) { + if (!test_omp_taskloop_grainsize()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_taskloop_num_tasks.c b/final/runtime/test/tasking/omp_taskloop_num_tasks.c new file mode 100644 index 0000000..7c3c704 --- /dev/null +++ b/final/runtime/test/tasking/omp_taskloop_num_tasks.c @@ -0,0 +1,71 @@ +// RUN: %libomp-compile-and-run +// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run + +// These compilers don't support the taskloop construct +// UNSUPPORTED: gcc-4, gcc-5, icc-16 + +/* + * Test for taskloop + * Method: caculate how many times the iteration space is dispatched + * and judge if each dispatch has the requested grainsize + * It is possible for two adjacent chunks are executed by the same thread + */ +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +#define CFDMAX_SIZE 1120 + +int test_omp_taskloop_num_tasks() +{ + int i; + int *tids; + int *tidsArray; + int count; + int result = 0; + int num_tasks; + + for (num_tasks = 1; num_tasks < 120; ++num_tasks) { + count = 0; + tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE); + tids = tidsArray; + + #pragma omp parallel shared(tids) + { + int i; + #pragma omp master + #pragma omp taskloop num_tasks(num_tasks) + for (i = 0; i < CFDMAX_SIZE; i++) { + tids[i] = omp_get_thread_num(); + } + } + + for (i = 0; i < CFDMAX_SIZE - 1; ++i) { + if (tids[i] != tids[i + 1]) { + count++; + } + } + + if (count > num_tasks) { + fprintf(stderr, "counted too many tasks: (wanted %d, got %d)\n", + num_tasks, count); + result++; + } + } + + return (result==0); +} + +int main() +{ + int i; + int num_failed=0; + + for (i = 0; i < REPETITIONS; i++) { + if (!test_omp_taskloop_num_tasks()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_taskwait.c b/final/runtime/test/tasking/omp_taskwait.c new file mode 100644 index 0000000..c3a0ea7 --- /dev/null +++ b/final/runtime/test/tasking/omp_taskwait.c @@ -0,0 +1,74 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +int test_omp_taskwait() +{ + int result1 = 0; /* Stores number of not finished tasks after the taskwait */ + int result2 = 0; /* Stores number of wrong array elements at the end */ + int array[NUM_TASKS]; + int i; + + /* fill array */ + for (i = 0; i < NUM_TASKS; i++) + array[i] = 0; + + #pragma omp parallel + { + #pragma omp single + { + for (i = 0; i < NUM_TASKS; i++) { + /* First we have to store the value of the loop index in a new variable + * which will be private for each task because otherwise it will be overwritten + * if the execution of the task takes longer than the time which is needed to + * enter the next step of the loop! + */ + int myi; + myi = i; + #pragma omp task + { + my_sleep (SLEEPTIME); + array[myi] = 1; + } /* end of omp task */ + } /* end of for */ + #pragma omp taskwait + /* check if all tasks were finished */ + for (i = 0; i < NUM_TASKS; i++) + if (array[i] != 1) + result1++; + + /* generate some more tasks which now shall overwrite + * the values in the tids array */ + for (i = 0; i < NUM_TASKS; i++) { + int myi; + myi = i; + #pragma omp task + { + array[myi] = 2; + } /* end of omp task */ + } /* end of for */ + } /* end of single */ + } /*end of parallel */ + + /* final check, if all array elements contain the right values: */ + for (i = 0; i < NUM_TASKS; i++) { + if (array[i] != 2) + result2++; + } + return ((result1 == 0) && (result2 == 0)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_taskwait()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/tasking/omp_taskyield.c b/final/runtime/test/tasking/omp_taskyield.c new file mode 100644 index 0000000..5bb6984 --- /dev/null +++ b/final/runtime/test/tasking/omp_taskyield.c @@ -0,0 +1,58 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +int test_omp_taskyield() +{ + int i; + int count = 0; + int start_tid[NUM_TASKS]; + int current_tid[NUM_TASKS]; + + for (i=0; i< NUM_TASKS; i++) { + start_tid[i]=0; + current_tid[i]=0; + } + + #pragma omp parallel + { + #pragma omp single + { + for (i = 0; i < NUM_TASKS; i++) { + int myi = i; + #pragma omp task untied + { + my_sleep(SLEEPTIME); + start_tid[myi] = omp_get_thread_num(); + #pragma omp taskyield + if((start_tid[myi] %2) ==0){ + my_sleep(SLEEPTIME); + current_tid[myi] = omp_get_thread_num(); + } /*end of if*/ + } /* end of omp task */ + } /* end of for */ + } /* end of single */ + } /* end of parallel */ + for (i=0;i<NUM_TASKS; i++) { + //printf("start_tid[%d]=%d, current_tid[%d]=%d\n", + //i, start_tid[i], i , current_tid[i]); + if (current_tid[i] == start_tid[i]) + count++; + } + return (count<NUM_TASKS); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_taskyield()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/threadprivate/omp_threadprivate.c b/final/runtime/test/threadprivate/omp_threadprivate.c new file mode 100644 index 0000000..a3dd80d --- /dev/null +++ b/final/runtime/test/threadprivate/omp_threadprivate.c @@ -0,0 +1,102 @@ +// RUN: %libomp-compile-and-run +/* + * Threadprivate is tested in 2 ways: + * 1. The global variable declared as threadprivate should have + * local copy for each thread. Otherwise race condition and + * wrong result. + * 2. If the value of local copy is retained for the two adjacent + * parallel regions + */ +#include "omp_testsuite.h" +#include <stdlib.h> +#include <stdio.h> + +static int sum0=0; +static int myvalue = 0; + +#pragma omp threadprivate(sum0) +#pragma omp threadprivate(myvalue) + +int test_omp_threadprivate() +{ + int sum = 0; + int known_sum; + int i; + int iter; + int *data; + int size; + int num_failed = 0; + int my_random; + omp_set_dynamic(0); + + #pragma omp parallel private(i) + { + sum0 = 0; + #pragma omp for + for (i = 1; i <= LOOPCOUNT; i++) { + sum0 = sum0 + i; + } /*end of for*/ + #pragma omp critical + { + sum = sum + sum0; + } /*end of critical */ + } /* end of parallel */ + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + if (known_sum != sum ) { + fprintf (stderr, " known_sum = %d, sum = %d\n", known_sum, sum); + } + + /* the next parallel region is just used to get the number of threads*/ + omp_set_dynamic(0); + #pragma omp parallel + { + #pragma omp master + { + size=omp_get_num_threads(); + data=(int*) malloc(size*sizeof(int)); + } + }/* end parallel*/ + + srand(45); + for (iter = 0; iter < 100; iter++) { + my_random = rand(); /* random number generator is + called inside serial region*/ + + /* the first parallel region is used to initialiye myvalue + and the array with my_random+rank */ + #pragma omp parallel + { + int rank; + rank = omp_get_thread_num (); + myvalue = data[rank] = my_random + rank; + } + + /* the second parallel region verifies that the + value of "myvalue" is retained */ + #pragma omp parallel reduction(+:num_failed) + { + int rank; + rank = omp_get_thread_num (); + num_failed = num_failed + (myvalue != data[rank]); + if(myvalue != data[rank]) { + fprintf (stderr, " myvalue = %d, data[rank]= %d\n", + myvalue, data[rank]); + } + } + } + free (data); + return (known_sum == sum) && !num_failed; +} /* end of check_threadprivate*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_threadprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/threadprivate/omp_threadprivate_for.c b/final/runtime/test/threadprivate/omp_threadprivate_for.c new file mode 100644 index 0000000..3342e63 --- /dev/null +++ b/final/runtime/test/threadprivate/omp_threadprivate_for.c @@ -0,0 +1,48 @@ +// RUN: %libomp-compile-and-run +#include "omp_testsuite.h" +#include <stdlib.h> +#include <stdio.h> + +static int i; +#pragma omp threadprivate(i) + +int test_omp_threadprivate_for() +{ + int known_sum; + int sum; + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + sum = 0; + + #pragma omp parallel + { + int sum0 = 0, i0; + #pragma omp for + for (i0 = 1; i0 <= LOOPCOUNT; i0++) { + i = i0; + sum0 = sum0 + i; + } + #pragma omp critical + { + sum = sum + sum0; + } + } /* end of parallel */ + + if (known_sum != sum ) { + fprintf(stderr, " known_sum = %d, sum = %d\n", known_sum, sum); + } + return (known_sum == sum); +} /* end of check_threadprivate*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_threadprivate_for()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/bug_set_schedule_0.c b/final/runtime/test/worksharing/for/bug_set_schedule_0.c new file mode 100644 index 0000000..889e239 --- /dev/null +++ b/final/runtime/test/worksharing/for/bug_set_schedule_0.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <omp.h> +#include "omp_testsuite.h" + +/* Test that the chunk size is set to default (1) when + chunk size <= 0 is specified */ +int a = 0; + +int test_set_schedule_0() +{ + int i; + a = 0; + omp_set_schedule(omp_sched_dynamic,0); + + #pragma omp parallel + { + #pragma omp for schedule(runtime) + for(i = 0; i < 10; i++) { + #pragma omp atomic + a++; + if(a > 10) + exit(1); + } + } + return a==10; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_set_schedule_0()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/kmp_doacross_check.c b/final/runtime/test/worksharing/for/kmp_doacross_check.c new file mode 100644 index 0000000..59b61e3 --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_doacross_check.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile-and-run +// UNSUPPORTED: gcc +// This test is incompatible with gcc because of the explicit call to +// __kmpc_doacross_fini(). gcc relies on an implicit call to this function +// when the last iteration is executed inside the GOMP_loop_*_next() functions. +// Hence, in gcc, having the explicit call leads to __kmpc_doacross_fini() +// being called twice. +#include <stdio.h> + +#define N 1000 + +struct dim { + long long lo; // lower + long long up; // upper + long long st; // stride +}; +extern void __kmpc_doacross_init(void*, int, int, struct dim *); +extern void __kmpc_doacross_wait(void*, int, long long*); +extern void __kmpc_doacross_post(void*, int, long long*); +extern void __kmpc_doacross_fini(void*, int); +extern int __kmpc_global_thread_num(void*); + +int main() +{ + int i; + int iter[N]; + struct dim dims; + for( i = 0; i < N; ++i ) + iter[i] = 1; + dims.lo = 1; + dims.up = N-1; + dims.st = 1; + #pragma omp parallel num_threads(4) + { + int i, gtid; + long long vec; + gtid = __kmpc_global_thread_num(NULL); + __kmpc_doacross_init(NULL,gtid,1,&dims); // thread starts the loop + #pragma omp for nowait schedule(dynamic) + for( i = 1; i < N; ++i ) + { + // runtime call corresponding to #pragma omp ordered depend(sink:i-1) + vec=i-1; + __kmpc_doacross_wait(NULL,gtid,&vec); + // user's code + iter[i] = iter[i-1] + 1; + // runtime call corresponding to #pragma omp ordered depend(source) + vec=i; + __kmpc_doacross_post(NULL,gtid,&vec); + } + // thread finishes the loop (should be before the loop barrier) + __kmpc_doacross_fini(NULL,gtid); + } + if( iter[N-1] == N ) { + printf("passed\n"); + } else { + printf("failed %d != %d\n", iter[N-1], N); + return 1; + } + return 0; +} + diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c new file mode 100644 index 0000000..5c6f94b --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_guided.c @@ -0,0 +1,410 @@ +// RUN: %libomp-compile-and-run +/* + Test for the 'schedule(simd:guided)' clause. + Compiler needs to generate a dynamic dispatching and pass the schedule + value 46 to the OpenMP RTL. Test uses numerous loop parameter combinations. +*/ +#include <stdio.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#else +#include <unistd.h> +#define delay() usleep(10); +#endif + +// uncomment for debug diagnostics: +//#define DEBUG + +#define SIMD_LEN 4 + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +extern int __kmpc_global_thread_num(id*); +extern void __kmpc_barrier(id*, int gtid); +extern void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); +extern void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); +extern int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); +extern int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +int run_loop_64(i64 loop_lb, i64 loop_ub, i64 loop_st, int loop_chunk) { + int err = 0; + static int volatile loop_sync = 0; + i64 lb; // Chunk lower bound + i64 ub; // Chunk upper bound + i64 st; // Chunk stride + int rc; + int tid = omp_get_thread_num(); + int gtid = tid; + int last; +#if DEBUG + printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n", + (int)sizeof(i64), gtid, tid, + (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen + if (loop_st == 0) + return 0; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return 0; + + __kmpc_dispatch_init_8(&loc, gtid, kmp_sch_guided_simd, + loop_lb, loop_ub, loop_st, loop_chunk); + if (tid == 0) { + // Let the master thread handle the chunks alone + int chunk; // No of current chunk + i64 next_lb; // Lower bound of the next chunk + i64 last_ub; // Upper bound of the last processed chunk + u64 cur; // Number of interations in current chunk + u64 max; // Max allowed iterations for current chunk + int undersized = 0; + + chunk = 0; + next_lb = loop_lb; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations + while (__kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if DEBUG + printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub); +#endif + // Check if previous chunk (it is not the final chunk) is undersized + if (undersized) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Check lower and upper bounds + if (lb != next_lb) { + printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk); + err++; + } + if (loop_st > 0) { + if (!(ub <= loop_ub)) { + printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb <= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + } else { + if (!(ub >= loop_ub)) { + printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb >= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + }; // if + // Stride should not change + if (!(st == loop_st)) { + printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk); + err++; + } + cur = (ub - lb) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum + if (!(cur <= max + 1)) { + printf("Error with iter %d, %d\n", cur, max); + err++; + } + // Update maximum for the next chunk + if (cur < max) + max = cur; + next_lb = ub + loop_st; + last_ub = ub; + undersized = (cur < loop_chunk); + }; // while + // Must have at least one chunk + if (!(chunk > 0)) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Must have the right last iteration index + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st > loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + } else { + if (!(last_ub >= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st < loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + }; // if + // Let non-master threads go + loop_sync = 1; + } else { + int i; + // Workers wait for master thread to finish, then call __kmpc_dispatch_next + for (i = 0; i < 1000000; ++ i) { + if (loop_sync != 0) { + break; + }; // if + }; // for i + while (loop_sync == 0) { + delay(); + }; // while + // At this moment we do not have any more chunks -- all the chunks already + // processed by master thread + rc = __kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st); + if (rc) { + printf("Error return value\n"); + err++; + } + }; // if + + __kmpc_barrier(&loc, gtid); + if (tid == 0) { + loop_sync = 0; // Restore original state +#if DEBUG + printf("run_loop_64(): at the end\n"); +#endif + }; // if + __kmpc_barrier(&loc, gtid); + return err; +} // run_loop + +// --------------------------------------------------------------------------- +int run_loop_32(int loop_lb, int loop_ub, int loop_st, int loop_chunk) { + int err = 0; + static int volatile loop_sync = 0; + int lb; // Chunk lower bound + int ub; // Chunk upper bound + int st; // Chunk stride + int rc; + int tid = omp_get_thread_num(); + int gtid = tid; + int last; +#if DEBUG + printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n", + (int)sizeof(int), gtid, tid, + (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen + if (loop_st == 0) + return 0; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return 0; + + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_guided_simd, + loop_lb, loop_ub, loop_st, loop_chunk); + if (tid == 0) { + // Let the master thread handle the chunks alone + int chunk; // No of current chunk + int next_lb; // Lower bound of the next chunk + int last_ub; // Upper bound of the last processed chunk + u64 cur; // Number of interations in current chunk + u64 max; // Max allowed iterations for current chunk + int undersized = 0; + + chunk = 0; + next_lb = loop_lb; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if DEBUG + printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub); +#endif + // Check if previous chunk (it is not the final chunk) is undersized + if (undersized) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Check lower and upper bounds + if (lb != next_lb) { + printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk); + err++; + } + if (loop_st > 0) { + if (!(ub <= loop_ub)) { + printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb <= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + } else { + if (!(ub >= loop_ub)) { + printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk); + err++; + } + if (!(lb >= ub)) { + printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); + err++; + } + }; // if + // Stride should not change + if (!(st == loop_st)) { + printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk); + err++; + } + cur = (ub - lb) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum + if (!(cur <= max + 1)) { + printf("Error with iter %d, %d\n", cur, max); + err++; + } + // Update maximum for the next chunk + if (cur < max) + max = cur; + next_lb = ub + loop_st; + last_ub = ub; + undersized = (cur < loop_chunk); + }; // while + // Must have at least one chunk + if (!(chunk > 0)) { + printf("Error with chunk %d\n", chunk); + err++; + } + // Must have the right last iteration index + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st > loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + } else { + if (!(last_ub >= loop_ub)) { + printf("Error with last1 %d, %d, ch %d\n", + (int)last_ub, (int)loop_ub, chunk); + err++; + } + if (!(last_ub + loop_st < loop_ub)) { + printf("Error with last2 %d, %d, %d, ch %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk); + err++; + } + }; // if + // Let non-master threads go + loop_sync = 1; + } else { + int i; + // Workers wait for master thread to finish, then call __kmpc_dispatch_next + for (i = 0; i < 1000000; ++ i) { + if (loop_sync != 0) { + break; + }; // if + }; // for i + while (loop_sync == 0) { + delay(); + }; // while + // At this moment we do not have any more chunks -- all the chunks already + // processed by the master thread + rc = __kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st); + if (rc) { + printf("Error return value\n"); + err++; + } + }; // if + + __kmpc_barrier(&loc, gtid); + if (tid == 0) { + loop_sync = 0; // Restore original state +#if DEBUG + printf("run_loop<>(): at the end\n"); +#endif + }; // if + __kmpc_barrier(&loc, gtid); + return err; +} // run_loop + +// --------------------------------------------------------------------------- +int run_64(int num_th) +{ + int err = 0; +#pragma omp parallel num_threads(num_th) + { + int chunk; + i64 st, lb, ub; + for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) { + for (st = 1; st <= 3; ++ st) { + for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) { + for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) { + err += run_loop_64(lb, ub, st, chunk); + err += run_loop_64(ub, lb, -st, chunk); + }; // for ub + }; // for lb + }; // for st + }; // for chunk + } + return err; +} // run_all + +int run_32(int num_th) +{ + int err = 0; +#pragma omp parallel num_threads(num_th) + { + int chunk, st, lb, ub; + for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) { + for (st = 1; st <= 3; ++ st) { + for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) { + for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) { + err += run_loop_32(lb, ub, st, chunk); + err += run_loop_32(ub, lb, -st, chunk); + }; // for ub + }; // for lb + }; // for st + }; // for chunk + } + return err; +} // run_all + +// --------------------------------------------------------------------------- +int main() +{ + int n, err = 0; + for (n = 1; n <= 4; ++ n) { + err += run_32(n); + err += run_64(n); + }; // for n + if (err) + printf("failed with %d errors\n", err); + else + printf("passed\n"); + return err; +} diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c new file mode 100644 index 0000000..bb538d1 --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c @@ -0,0 +1,221 @@ +// RUN: %libomp-compile-and-run + +// The test checks schedule(simd:runtime) +// in combination with omp_set_schedule() +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#define seten(a,b,c) _putenv_s((a),(b)) +#else +#include <unistd.h> +#define delay() usleep(10); +#define seten(a,b,c) setenv((a),(b),(c)) +#endif + +#define SIMD_LEN 4 +int err = 0; + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL. +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#ifdef __cplusplus +extern "C" { +#endif + int __kmpc_global_thread_num(id*); + void __kmpc_barrier(id*, int gtid); + void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); + void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); + int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); + int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +#ifdef __cplusplus +} // extern "C" +#endif +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +void +run_loop( + int loop_lb, // Loop lower bound. + int loop_ub, // Loop upper bound. + int loop_st, // Loop stride. + int lchunk +) { + static int volatile loop_sync = 0; + int lb; // Chunk lower bound. + int ub; // Chunk upper bound. + int st; // Chunk stride. + int rc; + int tid = omp_get_thread_num(); + int gtid = __kmpc_global_thread_num(&loc); + int last; + int tc = (loop_ub - loop_lb) / loop_st + 1; + int ch; + int no_chunk = 0; + if (lchunk == 0) { + no_chunk = 1; + lchunk = 1; + } + ch = lchunk * SIMD_LEN; +#if _DEBUG > 1 + printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", + gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen. + if (loop_st == 0) + return; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return; + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, + loop_lb, loop_ub, loop_st, SIMD_LEN); + { + // Let the master thread handle the chunks alone. + int chunk; // No of current chunk. + int last_ub; // Upper bound of the last processed chunk. + u64 cur; // Number of interations in current chunk. + u64 max; // Max allowed iterations for current chunk. + int undersized = 0; + last_ub = loop_ub; + chunk = 0; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations. + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if _DEBUG + printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", + tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); +#endif + // Check if previous chunk (it is not the final chunk) is undersized. + if (undersized) + printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); + if (loop_st > 0) { + if (!(ub <= loop_ub)) + printf("Error with ub %d, %d, ch %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb <= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + } else { + if (!(ub >= loop_ub)) + printf("Error with ub %d, %d, %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb >= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + }; // if + // Stride should not change. + if (!(st == loop_st)) + printf("Error with st %d, %d, ch %d, err %d\n", + (int)st, (int)loop_st, chunk, ++err); + cur = ( ub - lb ) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum. + if (!( cur <= max + 1)) + printf("Error with iter %d, %d, err %d\n", cur, max, ++err); + // Update maximum for the next chunk. + if (last) { + if (!no_chunk && cur > ch) + printf("Error: too big last chunk %d (%d), tid %d, err %d\n", + (int)cur, ch, tid, ++err); + } else { + if (cur % ch) + printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", + chunk, (int)cur, ch, tid, ++err); + } + if (cur < max) + max = cur; + last_ub = ub; + undersized = (cur < ch); +#if _DEBUG > 1 + if (last) + printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", + undersized,cur,ch,tid,ub,lb,loop_st); +#endif + } // while + // Must have the right last iteration index. + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st > loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } else { + if (!(last_ub >= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st < loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } // if + } + __kmpc_barrier(&loc, gtid); +} // run_loop + +int main(int argc, char *argv[]) +{ + int chunk = 0; +// static (no chunk) + omp_set_schedule(omp_sched_static,0); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// auto (chunk should be ignorted) + omp_set_schedule(omp_sched_auto,0); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// static,1 + chunk = 1; + omp_set_schedule(omp_sched_static,1); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// dynamic,1 + omp_set_schedule(omp_sched_dynamic,1); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// guided,1 + omp_set_schedule(omp_sched_guided,1); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// dynamic,0 - use default chunk size 1 + omp_set_schedule(omp_sched_dynamic,0); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + +// guided,0 - use default chunk size 1 + omp_set_schedule(omp_sched_guided,0); +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + + if (err) { + printf("failed, err = %d\n", err); + return 1; + } else { + printf("passed\n"); + return 0; + } +} diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c new file mode 100644 index 0000000..d137831 --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c @@ -0,0 +1,196 @@ +// RUN: %libomp-compile +// RUN: env OMP_SCHEDULE=guided %libomp-run +// RUN: env OMP_SCHEDULE=guided,1 %libomp-run 1 +// RUN: env OMP_SCHEDULE=guided,2 %libomp-run 2 +// RUN: env OMP_SCHEDULE=dynamic %libomp-run +// RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1 +// RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2 +// RUN: env OMP_SCHEDULE=auto %libomp-run + +// The test checks schedule(simd:runtime) +// in combination with OMP_SCHEDULE=guided[,chunk] +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#define seten(a,b,c) _putenv_s((a),(b)) +#else +#include <unistd.h> +#define delay() usleep(10); +#define seten(a,b,c) setenv((a),(b),(c)) +#endif + +#define UBOUND 100 +#define SIMD_LEN 4 +int err = 0; + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL. +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#ifdef __cplusplus +extern "C" { +#endif + int __kmpc_global_thread_num(id*); + void __kmpc_barrier(id*, int gtid); + void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); + void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); + int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); + int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +#ifdef __cplusplus +} // extern "C" +#endif +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +void +run_loop( + int loop_lb, // Loop lower bound. + int loop_ub, // Loop upper bound. + int loop_st, // Loop stride. + int lchunk +) { + static int volatile loop_sync = 0; + int lb; // Chunk lower bound. + int ub; // Chunk upper bound. + int st; // Chunk stride. + int rc; + int tid = omp_get_thread_num(); + int gtid = __kmpc_global_thread_num(&loc); + int last; + int tc = (loop_ub - loop_lb) / loop_st + 1; + int ch; + int no_chunk = 0; + if (lchunk == 0) { + no_chunk = 1; + lchunk = 1; + } + ch = lchunk * SIMD_LEN; +#if _DEBUG > 1 + printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", + gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen. + if (loop_st == 0) + return; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return; + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, + loop_lb, loop_ub, loop_st, SIMD_LEN); + { + // Let the master thread handle the chunks alone. + int chunk; // No of current chunk. + int last_ub; // Upper bound of the last processed chunk. + u64 cur; // Number of interations in current chunk. + u64 max; // Max allowed iterations for current chunk. + int undersized = 0; + last_ub = loop_ub; + chunk = 0; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations. + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if _DEBUG + printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", + tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); +#endif + // Check if previous chunk (it is not the final chunk) is undersized. + if (undersized) + printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); + if (loop_st > 0) { + if (!(ub <= loop_ub)) + printf("Error with ub %d, %d, ch %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb <= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + } else { + if (!(ub >= loop_ub)) + printf("Error with ub %d, %d, %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb >= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + }; // if + // Stride should not change. + if (!(st == loop_st)) + printf("Error with st %d, %d, ch %d, err %d\n", + (int)st, (int)loop_st, chunk, ++err); + cur = ( ub - lb ) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum. + if (!( cur <= max + 1)) + printf("Error with iter %d, %d, err %d\n", cur, max, ++err); + // Update maximum for the next chunk. + if (!last && cur % ch) + printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", + chunk, (int)cur, ch, tid, ++err); + if (last && !no_chunk && cur > ch) + printf("Error: too big last chunk %d (%d), tid %d, err %d\n", + (int)cur, ch, tid, ++err); + if (cur < max) + max = cur; + last_ub = ub; + undersized = (cur < ch); +#if _DEBUG > 1 + if (last) + printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", + undersized,cur,ch,tid,ub,lb,loop_st); +#endif + } // while + // Must have the right last iteration index. + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st > loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } else { + if (!(last_ub >= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st < loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } // if + } + __kmpc_barrier(&loc, gtid); +} // run_loop + +int main(int argc, char *argv[]) +{ + int chunk = 0; + if (argc > 1) { + // expect chunk size as a parameter + chunk = atoi(argv[1]); + } +#pragma omp parallel //num_threads(num_th) + run_loop(0, UBOUND, 1, chunk); + if (err) { + printf("failed, err = %d\n", err); + return 1; + } else { + printf("passed\n"); + return 0; + } +} diff --git a/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c new file mode 100644 index 0000000..4cb15d6 --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c @@ -0,0 +1,201 @@ +// RUN: %libomp-compile && %libomp-run +// RUN: %libomp-run 1 && %libomp-run 2 + +// The test checks schedule(simd:runtime) +// in combination with OMP_SCHEDULE=static[,chunk] +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +#if defined(WIN32) || defined(_WIN32) +#include <windows.h> +#define delay() Sleep(1); +#define seten(a,b,c) _putenv_s((a),(b)) +#else +#include <unistd.h> +#define delay() usleep(10); +#define seten(a,b,c) setenv((a),(b),(c)) +#endif + +#define SIMD_LEN 4 +int err = 0; + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL. +enum sched { + kmp_sch_static_balanced_chunked = 45, + kmp_sch_guided_simd = 46, + kmp_sch_runtime_simd = 47, +}; +typedef unsigned u32; +typedef long long i64; +typedef unsigned long long u64; +typedef struct { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#ifdef __cplusplus +extern "C" { +#endif + int __kmpc_global_thread_num(id*); + void __kmpc_barrier(id*, int gtid); + void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); + void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); + int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); + int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); +#ifdef __cplusplus +} // extern "C" +#endif +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; + +// --------------------------------------------------------------------------- +void +run_loop( + int loop_lb, // Loop lower bound. + int loop_ub, // Loop upper bound. + int loop_st, // Loop stride. + int lchunk +) { + static int volatile loop_sync = 0; + int lb; // Chunk lower bound. + int ub; // Chunk upper bound. + int st; // Chunk stride. + int rc; + int tid = omp_get_thread_num(); + int gtid = __kmpc_global_thread_num(&loc); + int last; + int tc = (loop_ub - loop_lb) / loop_st + 1; + int ch; + int no_chunk = 0; + if (lchunk == 0) { + no_chunk = 1; + lchunk = 1; + } + ch = lchunk * SIMD_LEN; +#if _DEBUG > 1 + printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", + gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); +#endif + // Don't test degenerate cases that should have been discovered by codegen. + if (loop_st == 0) + return; + if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) + return; + __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, + loop_lb, loop_ub, loop_st, SIMD_LEN); + { + // Let the master thread handle the chunks alone. + int chunk; // No of current chunk. + int last_ub; // Upper bound of the last processed chunk. + u64 cur; // Number of interations in current chunk. + u64 max; // Max allowed iterations for current chunk. + int undersized = 0; + last_ub = loop_ub; + chunk = 0; + max = (loop_ub - loop_lb) / loop_st + 1; + // The first chunk can consume all iterations. + while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { + ++ chunk; +#if _DEBUG + printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", + tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); +#endif + // Check if previous chunk (it is not the final chunk) is undersized. + if (undersized) + printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); + if (loop_st > 0) { + if (!(ub <= loop_ub)) + printf("Error with ub %d, %d, ch %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb <= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + } else { + if (!(ub >= loop_ub)) + printf("Error with ub %d, %d, %d, err %d\n", + (int)ub, (int)loop_ub, chunk, ++err); + if (!(lb >= ub)) + printf("Error with bounds %d, %d, %d, err %d\n", + (int)lb, (int)ub, chunk, ++err); + }; // if + // Stride should not change. + if (!(st == loop_st)) + printf("Error with st %d, %d, ch %d, err %d\n", + (int)st, (int)loop_st, chunk, ++err); + cur = ( ub - lb ) / loop_st + 1; + // Guided scheduling uses FP computations, so current chunk may + // be a bit bigger (+1) than allowed maximum. + if (!( cur <= max + 1)) + printf("Error with iter %d, %d, err %d\n", cur, max, ++err); + // Update maximum for the next chunk. + if (last) { + if (!no_chunk && cur > ch) + printf("Error: too big last chunk %d (%d), tid %d, err %d\n", + (int)cur, ch, tid, ++err); + } else { + if (cur % ch) + printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", + chunk, (int)cur, ch, tid, ++err); + } + if (cur < max) + max = cur; + last_ub = ub; + undersized = (cur < ch); +#if _DEBUG > 1 + if (last) + printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", + undersized,cur,ch,tid,ub,lb,loop_st); +#endif + } // while + // Must have the right last iteration index. + if (loop_st > 0) { + if (!(last_ub <= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st > loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } else { + if (!(last_ub >= loop_ub)) + printf("Error with last1 %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_ub, chunk, ++err); + if (last && !(last_ub + loop_st < loop_ub)) + printf("Error with last2 %d, %d, %d, ch %d, err %d\n", + (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); + } // if + } + __kmpc_barrier(&loc, gtid); +} // run_loop + +int main(int argc, char *argv[]) +{ + int chunk = 0; + if (argc > 1) { + char *buf = malloc(8 + strlen(argv[1])); + // expect chunk size as a parameter + chunk = atoi(argv[1]); + strcpy(buf,"static,"); + strcat(buf,argv[1]); + seten("OMP_SCHEDULE",buf,1); + printf("Testing schedule(simd:%s)\n", buf); + free(buf); + } else { + seten("OMP_SCHEDULE","static",1); + printf("Testing schedule(simd:static)\n"); + } +#pragma omp parallel// num_threads(num_th) + run_loop(0, 26, 1, chunk); + if (err) { + printf("failed, err = %d\n", err); + return 1; + } else { + printf("passed\n"); + return 0; + } +} diff --git a/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c b/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c new file mode 100644 index 0000000..a6378fe --- /dev/null +++ b/final/runtime/test/worksharing/for/kmp_set_dispatch_buf.c @@ -0,0 +1,91 @@ +// RUN: %libomp-compile && %libomp-run 7 +// RUN: %libomp-run 0 && %libomp-run -1 +// RUN: %libomp-run 1 && %libomp-run 2 && %libomp-run 5 +// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run 7 +// RUN: %libomp-run 1 && %libomp-run 2 && %libomp-run 5 +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include <limits.h> +#include "omp_testsuite.h" + +#define INCR 7 +#define MY_MAX 200 +#define MY_MIN -200 +#ifndef MY_SCHEDULE +# define MY_SCHEDULE dynamic +#endif + +int num_disp_buffers, num_loops; +int a, b, a_known_value, b_known_value; + +int test_kmp_set_disp_num_buffers() +{ + int success = 1; + a = 0; + b = 0; + // run many small dynamic loops to stress the dispatch buffer system + #pragma omp parallel + { + int i,j; + for (j = 0; j < num_loops; j++) { + #pragma omp for schedule(MY_SCHEDULE) nowait + for (i = MY_MIN; i < MY_MAX; i+=INCR) { + #pragma omp atomic + a++; + } + #pragma omp for schedule(MY_SCHEDULE) nowait + for (i = MY_MAX; i >= MY_MIN; i-=INCR) { + #pragma omp atomic + b++; + } + } + } + // detect failure + if (a != a_known_value || b != b_known_value) { + success = 0; + printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, + b, b_known_value); + } + return success; +} + +int main(int argc, char** argv) +{ + int i,j; + int num_failed=0; + + if (argc != 2) { + fprintf(stderr, "usage: %s num_disp_buffers\n", argv[0]); + exit(1); + } + + // set the number of dispatch buffers + num_disp_buffers = atoi(argv[1]); + kmp_set_disp_num_buffers(num_disp_buffers); + + // figure out the known values to compare with calculated result + a_known_value = 0; + b_known_value = 0; + + // if specified to use bad num_disp_buffers set num_loops + // to something reasonable + if (num_disp_buffers <= 0) + num_loops = 10; + else + num_loops = num_disp_buffers*10; + + for (j = 0; j < num_loops; j++) { + for (i = MY_MIN; i < MY_MAX; i+=INCR) + a_known_value++; + for (i = MY_MAX; i >= MY_MIN; i-=INCR) + b_known_value++; + } + + for(i = 0; i < REPETITIONS; i++) { + if(!test_kmp_set_disp_num_buffers()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_doacross.c b/final/runtime/test/worksharing/for/omp_doacross.c new file mode 100644 index 0000000..4187112 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_doacross.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile-and-run +// XFAIL: gcc-4, gcc-5, clang-3.7, clang-3.8, icc-15, icc-16 +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +#ifndef N +#define N 750 +#endif + +int test_doacross() { + int i, j; + // Allocate and zero out the matrix + int *m = (int *)malloc(sizeof(int) * N * N); + for (i = 0; i < N; ++i) { + for (j = 0; j < N; ++j) { + m[i * N + j] = 0; + } + } + // Have first row and column be 0, 1, 2, 3, etc. + for (i = 0; i < N; ++i) + m[i * N] = i; + for (j = 0; j < N; ++j) + m[j] = j; + // Perform wavefront which results in matrix: + // 0 1 2 3 4 + // 1 2 3 4 5 + // 2 3 4 5 6 + // 3 4 5 6 7 + // 4 5 6 7 8 + #pragma omp parallel shared(m) + { + int row, col; + #pragma omp for ordered(2) + for (row = 1; row < N; ++row) { + for (col = 1; col < N; ++col) { + #pragma omp ordered depend(sink : row - 1, col) depend(sink : row, col - 1) + m[row * N + col] = m[(row - 1) * N + col] + m[row * N + (col - 1)] - + m[(row - 1) * N + (col - 1)]; + #pragma omp ordered depend(source) + } + } + } + + // Check the bottom right element to see if iteration dependencies were held + int retval = (m[(N - 1) * N + N - 1] == 2 * (N - 1)); + free(m); + return retval; +} + +int main(int argc, char **argv) { + int i; + int num_failed = 0; + for (i = 0; i < REPETITIONS; i++) { + if (!test_doacross()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_bigbounds.c b/final/runtime/test/worksharing/for/omp_for_bigbounds.c new file mode 100644 index 0000000..901d760 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_bigbounds.c @@ -0,0 +1,70 @@ +// RUN: %libomp-compile -DMY_SCHEDULE=static && %libomp-run +// RUN: %libomp-compile -DMY_SCHEDULE=dynamic && %libomp-run +// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run + +// Only works with Intel Compiler since at least version 15.0 +// XFAIL: gcc, clang + +/* + * Test that large bounds are handled properly and calculations of + * loop iterations don't accidently overflow + */ +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include <limits.h> +#include "omp_testsuite.h" + +#define INCR 50000000 +#define MY_MAX 2000000000 +#define MY_MIN -2000000000 +#ifndef MY_SCHEDULE +# define MY_SCHEDULE static +#endif + +int a, b, a_known_value, b_known_value; + +int test_omp_for_bigbounds() +{ + a = 0; + b = 0; + #pragma omp parallel + { + int i; + #pragma omp for schedule(MY_SCHEDULE) + for (i = INT_MIN; i < MY_MAX; i+=INCR) { + #pragma omp atomic + a++; + } + #pragma omp for schedule(MY_SCHEDULE) + for (i = INT_MAX; i >= MY_MIN; i-=INCR) { + #pragma omp atomic + b++; + } + } + printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, b, b_known_value); + return (a == a_known_value && b == b_known_value); +} + +int main() +{ + int i; + int num_failed=0; + + a_known_value = 0; + for (i = INT_MIN; i < MY_MAX; i+=INCR) { + a_known_value++; + } + + b_known_value = 0; + for (i = INT_MAX; i >= MY_MIN; i-=INCR) { + b_known_value++; + } + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_bigbounds()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_collapse.c b/final/runtime/test/worksharing/for/omp_for_collapse.c new file mode 100644 index 0000000..a08086d --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_collapse.c @@ -0,0 +1,51 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/* Utility function to check that i is increasing monotonically + with each call */ +static int check_i_islarger (int i) +{ + static int last_i; + int islarger; + if (i==1) + last_i=0; + islarger = ((i >= last_i)&&(i - last_i<=1)); + last_i = i; + return (islarger); +} + +int test_omp_for_collapse() +{ + int is_larger = 1; + + #pragma omp parallel + { + int i,j; + int my_islarger = 1; + #pragma omp for private(i,j) schedule(static,1) collapse(2) ordered + for (i = 1; i < 100; i++) { + for (j =1; j <100; j++) { + #pragma omp ordered + my_islarger = check_i_islarger(i)&&my_islarger; + } + } + #pragma omp critical + is_larger = is_larger && my_islarger; + } + return (is_larger); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_collapse()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_firstprivate.c b/final/runtime/test/worksharing/for/omp_for_firstprivate.c new file mode 100644 index 0000000..6c4121c --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_firstprivate.c @@ -0,0 +1,55 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int sum1; +#pragma omp threadprivate(sum1) + +int test_omp_for_firstprivate() +{ + int sum; + int sum0; + int known_sum; + int threadsnum; + + sum = 0; + sum0 = 12345; + sum1 = 0; + + #pragma omp parallel + { + #pragma omp single + { + threadsnum=omp_get_num_threads(); + } + /* sum0 = 0; */ + + int i; + #pragma omp for firstprivate(sum0) + for (i = 1; i <= LOOPCOUNT; i++) { + sum0 = sum0 + i; + sum1 = sum0; + } /* end of for */ + + #pragma omp critical + { + sum = sum + sum1; + } /* end of critical */ + } /* end of parallel */ + known_sum = 12345* threadsnum+ (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_lastprivate.c b/final/runtime/test/worksharing/for/omp_for_lastprivate.c new file mode 100644 index 0000000..88694b8 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_lastprivate.c @@ -0,0 +1,52 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int sum0; +#pragma omp threadprivate(sum0) + +int test_omp_for_lastprivate() +{ + int sum = 0; + int known_sum; + int i0; + + i0 = -1; + + #pragma omp parallel + { + sum0 = 0; + { /* Begin of orphaned block */ + int i; + #pragma omp for schedule(static,7) lastprivate(i0) + for (i = 1; i <= LOOPCOUNT; i++) { + sum0 = sum0 + i; + i0 = i; + } /* end of for */ + } /* end of orphaned block */ + + #pragma omp critical + { + sum = sum + sum0; + } /* end of critical */ + } /* end of parallel */ + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + fprintf(stderr, "known_sum = %d , sum = %d\n",known_sum,sum); + fprintf(stderr, "LOOPCOUNT = %d , i0 = %d\n",LOOPCOUNT,i0); + return ((known_sum == sum) && (i0 == LOOPCOUNT)); +} + +int main() +{ + int i; + int num_failed=0; + + for (i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_lastprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_nowait.c b/final/runtime/test/worksharing/for/omp_for_nowait.c new file mode 100644 index 0000000..95a4775 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_nowait.c @@ -0,0 +1,77 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +/* + * This test will hang if the nowait is not working properly. + * + * It relies on a thread skipping to the second for construct to + * release the threads in the first for construct. + * + * Also, we use static scheduling to guarantee that one + * thread will make it to the second for construct. + */ +volatile int release; +volatile int count; + +void wait_for_release_then_increment(int rank) +{ + fprintf(stderr, "Thread nr %d enters first for construct" + " and waits.\n", rank); + while (release == 0); + #pragma omp atomic + count++; +} + +void release_and_increment(int rank) +{ + fprintf(stderr, "Thread nr %d sets release to 1\n", rank); + release = 1; + #pragma omp atomic + count++; +} + +int test_omp_for_nowait() +{ + release = 0; + count = 0; + + #pragma omp parallel num_threads(4) + { + int rank; + int i; + + rank = omp_get_thread_num(); + + #pragma omp for schedule(static) nowait + for (i = 0; i < 4; i++) { + if (i < 3) + wait_for_release_then_increment(rank); + else { + fprintf(stderr, "Thread nr %d enters first for and goes " + "immediately to the next for construct to release.\n", rank); + #pragma omp atomic + count++; + } + } + + #pragma omp for schedule(static) + for (i = 0; i < 4; i++) { + release_and_increment(rank); + } + } + return (count==8); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_nowait()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_ordered.c b/final/runtime/test/worksharing/for/omp_for_ordered.c new file mode 100644 index 0000000..18ac7eb --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_ordered.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +static int last_i = 0; + +/* Utility function to check that i is increasing monotonically + with each call */ +static int check_i_islarger (int i) +{ + int islarger; + islarger = (i > last_i); + last_i = i; + return (islarger); +} + +int test_omp_for_ordered() +{ + int sum; + int is_larger = 1; + int known_sum; + + last_i = 0; + sum = 0; + + #pragma omp parallel + { + int i; + int my_islarger = 1; + #pragma omp for schedule(static,1) ordered + for (i = 1; i < 100; i++) { + #pragma omp ordered + { + my_islarger = check_i_islarger(i) && my_islarger; + sum = sum + i; + } + } + #pragma omp critical + { + is_larger = is_larger && my_islarger; + } + } + + known_sum=(99 * 100) / 2; + return ((known_sum == sum) && is_larger); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_ordered()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_private.c b/final/runtime/test/worksharing/for/omp_for_private.c new file mode 100644 index 0000000..1f537b9 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_private.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/* Utility function do spend some time in a loop */ +static void do_some_work() +{ + int i; + double sum = 0; + for(i = 0; i < 1000; i++){ + sum += sqrt ((double) i); + } +} + +int sum1; +#pragma omp threadprivate(sum1) + +int test_omp_for_private() +{ + int sum = 0; + int sum0; + int known_sum; + + sum0 = 0; /* setting (global) sum0 = 0 */ + + #pragma omp parallel + { + sum1 = 0; /* setting sum1 in each thread to 0 */ + { /* begin of orphaned block */ + int i; + #pragma omp for private(sum0) schedule(static,1) + for (i = 1; i <= LOOPCOUNT; i++) { + sum0 = sum1; + #pragma omp flush + sum0 = sum0 + i; + do_some_work (); + #pragma omp flush + sum1 = sum0; + } + } /* end of orphaned block */ + + #pragma omp critical + { + sum = sum + sum1; + } /*end of critical*/ + } /* end of parallel*/ + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_reduction.c b/final/runtime/test/worksharing/for/omp_for_reduction.c new file mode 100644 index 0000000..28f0907 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_reduction.c @@ -0,0 +1,339 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include "omp_testsuite.h" + +#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */ +#define MAX_FACTOR 10 +#define KNOWN_PRODUCT 3628800 /* 10! */ + +int test_omp_for_reduction () +{ + double dt; + int sum; + int diff; + int product = 1; + double dsum; + double dknown_sum; + double ddiff; + int logic_and; + int logic_or; + int bit_and; + int bit_or; + int exclusiv_bit_or; + int *logics; + int i; + int known_sum; + int known_product; + double rounding_error = 1.E-9; /* over all rounding error to be + ignored in the double tests */ + double dpt; + int result = 0; + int logicsArray[LOOPCOUNT]; + + /* Variables for integer tests */ + sum = 0; + product = 1; + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + /* variabels for double tests */ + dt = 1. / 3.; /* base of geometric row for + and - test*/ + dsum = 0.; + /* Variabeles for logic tests */ + logics = logicsArray; + logic_and = 1; + logic_or = 0; + /* Variabeles for bit operators tests */ + bit_and = 1; + bit_or = 0; + /* Variables for exclusiv bit or */ + exclusiv_bit_or = 0; + + /************************************************************************/ + /** Tests for integers **/ + /************************************************************************/ + + /**** Testing integer addition ****/ + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(+:sum) + for (j = 1; j <= LOOPCOUNT; j++) { + sum = sum + j; + } + } + if (known_sum != sum) { + result++; + fprintf (stderr, "Error in sum with integers: Result was %d" + " instead of %d.\n", sum, known_sum); + } + + /**** Testing integer subtracton ****/ + diff = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(-:diff) + for (j = 1; j <= LOOPCOUNT; j++) { + diff = diff - j; + } + } + if (diff != 0) { + result++; + fprintf (stderr, "Error in difference with integers: Result was %d" + " instead of 0.\n", diff); + } + + /**** Testing integer multiplication ****/ + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(*:product) + for (j = 1; j <= MAX_FACTOR; j++) { + product *= j; + } + } + known_product = KNOWN_PRODUCT; + if(known_product != product) { + result++; + fprintf (stderr,"Error in Product with integers: Result was %d" + " instead of %d\n",product,known_product); + } + + /************************************************************************/ + /** Tests for doubles **/ + /************************************************************************/ + + /**** Testing double addition ****/ + dsum = 0.; + dpt = 1.; + for (i = 0; i < DOUBLE_DIGITS; ++i) { + dpt *= dt; + } + dknown_sum = (1 - dpt) / (1 - dt); + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(+:dsum) + for (j = 0; j < DOUBLE_DIGITS; j++) { + dsum += pow (dt, j); + } + } + if (fabs (dsum - dknown_sum) > rounding_error) { + result++; + fprintf (stderr, "\nError in sum with doubles: Result was %f" + " instead of: %f (Difference: %E)\n", + dsum, dknown_sum, dsum-dknown_sum); + } + + /**** Testing double subtraction ****/ + ddiff = (1 - dpt) / (1 - dt); + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(-:ddiff) + for (j = 0; j < DOUBLE_DIGITS; ++j) { + ddiff -= pow (dt, j); + } + } + if (fabs (ddiff) > rounding_error) { + result++; + fprintf (stderr, "Error in Difference with doubles: Result was %E" + " instead of 0.0\n", ddiff); + } + + + /************************************************************************/ + /** Tests for logical values **/ + /************************************************************************/ + + /**** Testing logic and ****/ + for (i = 0; i < LOOPCOUNT; i++) { + logics[i] = 1; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(&&:logic_and) + for (j = 0; j < LOOPCOUNT; ++j) { + logic_and = (logic_and && logics[j]); + } + } + if(!logic_and) { + result++; + fprintf (stderr, "Error in logic AND part 1\n"); + } + + logic_and = 1; + logics[LOOPCOUNT / 2] = 0; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(&&:logic_and) + for (j = 0; j < LOOPCOUNT; ++j) { + logic_and = logic_and && logics[j]; + } + } + if(logic_and) { + result++; + fprintf (stderr, "Error in logic AND part 2\n"); + } + + /**** Testing logic or ****/ + for (i = 0; i < LOOPCOUNT; i++) { + logics[i] = 0; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(||:logic_or) + for (j = 0; j < LOOPCOUNT; ++j) { + logic_or = logic_or || logics[j]; + } + } + if (logic_or) { + result++; + fprintf (stderr, "Error in logic OR part 1\n"); + } + + logic_or = 0; + logics[LOOPCOUNT / 2] = 1; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(||:logic_or) + for (j = 0; j < LOOPCOUNT; ++j) { + logic_or = logic_or || logics[j]; + } + } + if(!logic_or) { + result++; + fprintf (stderr, "Error in logic OR part 2\n"); + } + + /************************************************************************/ + /** Tests for bit values **/ + /************************************************************************/ + + /**** Testing bit and ****/ + for (i = 0; i < LOOPCOUNT; ++i) { + logics[i] = 1; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(&:bit_and) + for (j = 0; j < LOOPCOUNT; ++j) { + bit_and = (bit_and & logics[j]); + } + } + if (!bit_and) { + result++; + fprintf (stderr, "Error in BIT AND part 1\n"); + } + + bit_and = 1; + logics[LOOPCOUNT / 2] = 0; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(&:bit_and) + for (j = 0; j < LOOPCOUNT; ++j) { + bit_and = bit_and & logics[j]; + } + } + if (bit_and) { + result++; + fprintf (stderr, "Error in BIT AND part 2\n"); + } + + /**** Testing bit or ****/ + for (i = 0; i < LOOPCOUNT; i++) { + logics[i] = 0; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(|:bit_or) + for (j = 0; j < LOOPCOUNT; ++j) { + bit_or = bit_or | logics[j]; + } + } + if (bit_or) { + result++; + fprintf (stderr, "Error in BIT OR part 1\n"); + } + + bit_or = 0; + logics[LOOPCOUNT / 2] = 1; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(|:bit_or) + for (j = 0; j < LOOPCOUNT; ++j) { + bit_or = bit_or | logics[j]; + } + } + if (!bit_or) { + result++; + fprintf (stderr, "Error in BIT OR part 2\n"); + } + + /**** Testing exclusive bit or ****/ + for (i = 0; i < LOOPCOUNT; i++) { + logics[i] = 0; + } + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(^:exclusiv_bit_or) + for (j = 0; j < LOOPCOUNT; ++j) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[j]; + } + } + if (exclusiv_bit_or) { + result++; + fprintf (stderr, "Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[LOOPCOUNT / 2] = 1; + + #pragma omp parallel + { + int j; + #pragma omp for schedule(dynamic,1) reduction(^:exclusiv_bit_or) + for (j = 0; j < LOOPCOUNT; ++j) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[j]; + } + } + if (!exclusiv_bit_or) { + result++; + fprintf (stderr, "Error in EXCLUSIV BIT OR part 2\n"); + } + + return (result == 0); + free (logics); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_auto.c b/final/runtime/test/worksharing/for/omp_for_schedule_auto.c new file mode 100644 index 0000000..075617c --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_auto.c @@ -0,0 +1,69 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include "omp_testsuite.h" + +int sum1; +#pragma omp threadprivate(sum1) + +int test_omp_for_auto() +{ + int j; + int sum; + int sum0; + int known_sum; + int threadsnum; + + sum = 0; + sum0 = 12345; + + // array which keeps track of which threads participated in the for loop + // e.g., given 4 threads, [ 0 | 1 | 1 | 0 ] implies + // threads 0 and 3 did not, threads 1 and 2 did + int max_threads = omp_get_max_threads(); + int* active_threads = (int*)malloc(sizeof(int)*max_threads); + for(j = 0; j < max_threads; j++) + active_threads[j] = 0; + + #pragma omp parallel + { + int i; + sum1 = 0; + #pragma omp for firstprivate(sum0) schedule(auto) + for (i = 1; i <= LOOPCOUNT; i++) { + active_threads[omp_get_thread_num()] = 1; + sum0 = sum0 + i; + sum1 = sum0; + } + + #pragma omp critical + { + sum = sum + sum1; + } + } + + // count the threads that participated (sum is stored in threadsnum) + threadsnum=0; + for(j = 0; j < max_threads; j++) { + if(active_threads[j]) + threadsnum++; + } + free(active_threads); + + known_sum = 12345 * threadsnum + (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return (known_sum == sum); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_auto()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c b/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c new file mode 100644 index 0000000..6d4f59b --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_dynamic.c @@ -0,0 +1,89 @@ +// RUN: %libomp-compile-and-run +/* + * Test for dynamic scheduling with chunk size + * Method: caculate how many times the iteration space is dispatched + * and judge if each dispatch has the requested chunk size + * unless it is the last one. + * It is possible for two adjacent chunks are assigned to the same thread + * Modified by Chunhua Liao + */ +#include <stdio.h> +#include <omp.h> +#include <stdlib.h> +#include "omp_testsuite.h" + +#define CFDMAX_SIZE 100 +const int chunk_size = 7; + +int test_omp_for_schedule_dynamic() +{ + int tid; + int *tids; + int i; + int tidsArray[CFDMAX_SIZE]; + int count = 0; + int tmp_count = 0; /*dispatch times*/ + int *tmp; /*store chunk size for each dispatch*/ + int result = 0; + + tids = tidsArray; + + #pragma omp parallel private(tid) shared(tids) + { /* begin of parallel */ + int tid; + tid = omp_get_thread_num (); + #pragma omp for schedule(dynamic,chunk_size) + for (i = 0; i < CFDMAX_SIZE; i++) { + tids[i] = tid; + } + } + + for (i = 0; i < CFDMAX_SIZE - 1; ++i) { + if (tids[i] != tids[i + 1]) { + count++; + } + } + + tmp = (int *) malloc (sizeof (int) * (count + 1)); + tmp[0] = 1; + + for (i = 0; i < CFDMAX_SIZE - 1; ++i) { + if (tmp_count > count) { + printf ("--------------------\nTestinternal Error: List too small!!!\n--------------------\n"); /* Error handling */ + break; + } + if (tids[i] != tids[i + 1]) { + tmp_count++; + tmp[tmp_count] = 1; + } else { + tmp[tmp_count]++; + } + } + /* is dynamic statement working? */ + for (i = 0; i < count; i++) { + if ((tmp[i]%chunk_size)!=0) { + /* it is possible for 2 adjacent chunks assigned to a same thread */ + result++; + fprintf(stderr,"The intermediate dispatch has wrong chunksize.\n"); + /* result += ((tmp[i] / chunk_size) - 1); */ + } + } + if ((tmp[count]%chunk_size)!=(CFDMAX_SIZE%chunk_size)) { + result++; + fprintf(stderr,"the last dispatch has wrong chunksize.\n"); + } + /* for (int i=0;i<count+1;++i) printf("%d\t:=\t%d\n",i+1,tmp[i]); */ + return (result==0); +} +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_schedule_dynamic()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_guided.c b/final/runtime/test/worksharing/for/omp_for_schedule_guided.c new file mode 100644 index 0000000..1ee7449 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_guided.c @@ -0,0 +1,217 @@ +// RUN: %libomp-compile-and-run + +/* Test for guided scheduling + * Ensure threads get chunks interleavely first + * Then judge the chunk sizes are decreasing to a stable value + * Modified by Chunhua Liao + * For example, 100 iteration on 2 threads, chunksize 7 + * one line for each dispatch, 0/1 means thread id + * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24 + * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18 + * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 + * 1 1 1 1 1 1 1 1 1 1 10 + * 0 0 0 0 0 0 0 0 8 + * 1 1 1 1 1 1 1 7 + * 0 0 0 0 0 0 0 7 + * 1 1 1 1 1 1 1 7 + * 0 0 0 0 0 5 +*/ +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +#define CFSMAX_SIZE 1000 +#define MAX_TIME 0.005 + +#ifdef SLEEPTIME +#undef SLEEPTIME +#define SLEEPTIME 0.0001 +#endif + +int test_omp_for_schedule_guided() +{ + int * tids; + int * chunksizes; + int notout; + int maxiter; + int threads; + int i; + int result; + + tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1)); + maxiter = 0; + result = 1; + notout = 1; + + /* Testing if enough threads are available for this check. */ + #pragma omp parallel + { + #pragma omp single + { + threads = omp_get_num_threads(); + } + } + + /* ensure there are at least two threads */ + if (threads < 2) { + omp_set_num_threads(2); + threads = 2; + } + + /* Now the real parallel work: + * Each thread will start immediately with the first chunk. + */ + #pragma omp parallel shared(tids,maxiter) + { /* begin of parallel */ + double count; + int tid; + int j; + + tid = omp_get_thread_num (); + + #pragma omp for nowait schedule(guided) + for(j = 0; j < CFSMAX_SIZE; ++j) { + count = 0.; + #pragma omp flush(maxiter) + if (j > maxiter) { + #pragma omp critical + { + maxiter = j; + } + } + /*printf ("thread %d sleeping\n", tid);*/ + #pragma omp flush(maxiter,notout) + while (notout && (count < MAX_TIME) && (maxiter == j)) { + #pragma omp flush(maxiter,notout) + my_sleep (SLEEPTIME); + count += SLEEPTIME; +#ifdef VERBOSE + printf("."); +#endif + } +#ifdef VERBOSE + if (count > 0.) printf(" waited %lf s\n", count); +#endif + /*printf ("thread %d awake\n", tid);*/ + tids[j] = tid; +#ifdef VERBOSE + printf("%d finished by %d\n",j,tid); +#endif + } /* end of for */ + notout = 0; + #pragma omp flush(maxiter,notout) + } /* end of parallel */ + + /******************************************************* + * evaluation of the values * + *******************************************************/ + { + int determined_chunksize = 1; + int last_threadnr = tids[0]; + int global_chunknr = 0; + int openwork = CFSMAX_SIZE; + int expected_chunk_size; + int* local_chunknr = (int*)malloc(threads * sizeof(int)); + double c = 1; + + for (i = 0; i < threads; i++) + local_chunknr[i] = 0; + + tids[CFSMAX_SIZE] = -1; + + /* + * determine the number of global chunks + */ + // fprintf(stderr,"# global_chunknr thread local_chunknr chunksize\n"); + for(i = 1; i <= CFSMAX_SIZE; ++i) { + if (last_threadnr==tids[i]) { + determined_chunksize++; + } else { + /* fprintf(stderr, "%d\t%d\t%d\t%d\n", global_chunknr, + last_threadnr, local_chunknr[last_threadnr], m); */ + global_chunknr++; + local_chunknr[last_threadnr]++; + last_threadnr = tids[i]; + determined_chunksize = 1; + } + } + /* now allocate the memory for saving the sizes of the global chunks */ + chunksizes = (int*)malloc(global_chunknr * sizeof(int)); + + /* + * Evaluate the sizes of the global chunks + */ + global_chunknr = 0; + determined_chunksize = 1; + last_threadnr = tids[0]; + for (i = 1; i <= CFSMAX_SIZE; ++i) { + /* If the threadnumber was the same as before increase the + * detected chunksize for this chunk otherwise set the detected + * chunksize again to one and save the number of the next + * thread in last_threadnr. + */ + if (last_threadnr == tids[i]) { + determined_chunksize++; + } else { + chunksizes[global_chunknr] = determined_chunksize; + global_chunknr++; + local_chunknr[last_threadnr]++; + last_threadnr = tids[i]; + determined_chunksize = 1; + } + } + +#ifdef VERBOSE + fprintf(stderr, "found\texpected\tconstant\n"); +#endif + + /* identify the constant c for the exponential + decrease of the chunksize */ + expected_chunk_size = openwork / threads; + c = (double) chunksizes[0] / expected_chunk_size; + + for (i = 0; i < global_chunknr; i++) { + /* calculate the new expected chunksize */ + if (expected_chunk_size > 1) + expected_chunk_size = c * openwork / threads; +#ifdef VERBOSE + fprintf(stderr, "%8d\t%8d\t%lf\n", chunksizes[i], + expected_chunk_size, c * chunksizes[i]/expected_chunk_size); +#endif + /* check if chunksize is inside the rounding errors */ + if (abs (chunksizes[i] - expected_chunk_size) >= 2) { + result = 0; +#ifndef VERBOSE + fprintf(stderr, "Chunksize differed from expected " + "value: %d instead of %d\n", chunksizes[i], + expected_chunk_size); + return 0; +#endif + } /* end if */ + +#ifndef VERBOSE + if (expected_chunk_size - chunksizes[i] < 0) + fprintf(stderr, "Chunksize did not decrease: %d" + " instead of %d\n", chunksizes[i],expected_chunk_size); +#endif + + /* calculating the remaining amount of work */ + openwork -= chunksizes[i]; + } + } + return result; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_schedule_guided()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c b/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c new file mode 100644 index 0000000..b957fc3 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_runtime.c @@ -0,0 +1,82 @@ +// RUN: %libomp-compile +// RUN: env OMP_SCHEDULE=static %libomp-run 1 0 +// RUN: env OMP_SCHEDULE=static,10 %libomp-run 1 10 +// RUN: env OMP_SCHEDULE=dynamic %libomp-run 2 1 +// RUN: env OMP_SCHEDULE=dynamic,11 %libomp-run 2 11 +// RUN: env OMP_SCHEDULE=guided %libomp-run 3 1 +// RUN: env OMP_SCHEDULE=guided,12 %libomp-run 3 12 +// RUN: env OMP_SCHEDULE=auto %libomp-run 4 1 +// RUN: env OMP_SCHEDULE=trapezoidal %libomp-run 101 1 +// RUN: env OMP_SCHEDULE=trapezoidal,13 %libomp-run 101 13 +// RUN: env OMP_SCHEDULE=static_steal %libomp-run 102 1 +// RUN: env OMP_SCHEDULE=static_steal,14 %libomp-run 102 14 +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include "omp_testsuite.h" + +int sum; +char* correct_kind_string; +omp_sched_t correct_kind; +int correct_chunk_size; + +int test_omp_for_runtime() +{ + int sum; + int known_sum; + int chunk_size; + int error; + omp_sched_t kind; + + sum = 0; + error = 0; + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + omp_get_schedule(&kind, &chunk_size); + + printf("omp_get_schedule() returns: Schedule = %d, Chunk Size = %d\n", + kind, chunk_size); + if (kind != correct_kind) { + printf("kind(%d) != correct_kind(%d)\n", kind, correct_kind); + error = 1; + } + if (chunk_size != correct_chunk_size) { + printf("chunk_size(%d) != correct_chunk_size(%d)\n", chunk_size, + correct_chunk_size); + error = 1; + } + + #pragma omp parallel + { + int i; + #pragma omp for schedule(runtime) + for (i = 1; i <= LOOPCOUNT; i++) { + #pragma omp critical + sum+=i; + } + } + if (known_sum != sum) { + printf("Known Sum = %d, Calculated Sum = %d\n", known_sum, sum); + error = 1; + } + return !error; +} + +int main(int argc, char** argv) +{ + int i; + int num_failed=0; + if (argc != 3) { + fprintf(stderr, "usage: %s schedule_kind chunk_size\n", argv[0]); + fprintf(stderr, " Run with envirable OMP_SCHEDULE=kind[,chunk_size]\n"); + return 1; + } + correct_kind = atoi(argv[1]); + correct_chunk_size = atoi(argv[2]); + + for (i = 0; i < REPETITIONS; i++) { + if (!test_omp_for_runtime()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_static.c b/final/runtime/test/worksharing/for/omp_for_schedule_static.c new file mode 100644 index 0000000..f46a544 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_static.c @@ -0,0 +1,154 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +#define CFSMAX_SIZE 1000 +#define MAX_TIME 0.01 + +#ifdef SLEEPTIME +#undef SLEEPTIME +#define SLEEPTIME 0.0005 +#endif + +int test_omp_for_schedule_static() +{ + int threads; + int i,lasttid; + int * tids; + int notout; + int maxiter; + int chunk_size; + int counter = 0; + int tmp_count=1; + int lastthreadsstarttid = -1; + int result = 1; + + chunk_size = 7; + tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1)); + notout = 1; + maxiter = 0; + + #pragma omp parallel shared(tids,counter) + { /* begin of parallel*/ + #pragma omp single + { + threads = omp_get_num_threads (); + } /* end of single */ + } /* end of parallel */ + + if (threads < 2) { + omp_set_num_threads(2); + threads = 2; + } + fprintf (stderr,"Using an internal count of %d\nUsing a specified" + " chunksize of %d\n", CFSMAX_SIZE, chunk_size); + tids[CFSMAX_SIZE] = -1; /* setting endflag */ + #pragma omp parallel shared(tids) + { /* begin of parallel */ + double count; + int tid; + int j; + + tid = omp_get_thread_num (); + + #pragma omp for nowait schedule(static,chunk_size) + for(j = 0; j < CFSMAX_SIZE; ++j) { + count = 0.; + #pragma omp flush(maxiter) + if (j > maxiter) { + #pragma omp critical + { + maxiter = j; + } + } + /*printf ("thread %d sleeping\n", tid);*/ + while (notout && (count < MAX_TIME) && (maxiter == j)) { + #pragma omp flush(maxiter,notout) + my_sleep (SLEEPTIME); + count += SLEEPTIME; + printf("."); + } +#ifdef VERBOSE + if (count > 0.) printf(" waited %lf s\n", count); +#endif + /*printf ("thread %d awake\n", tid);*/ + tids[j] = tid; +#ifdef VERBOSE + printf("%d finished by %d\n",j,tid); +#endif + } /* end of for */ + notout = 0; + #pragma omp flush(maxiter,notout) + } /* end of parallel */ + + /**** analysing the data in array tids ****/ + + lasttid = tids[0]; + tmp_count = 0; + + for (i = 0; i < CFSMAX_SIZE + 1; ++i) { + /* If the work was done by the same thread increase tmp_count by one. */ + if (tids[i] == lasttid) { + tmp_count++; +#ifdef VERBOSE + fprintf (stderr, "%d: %d \n", i, tids[i]); +#endif + continue; + } + + /* Check if the next thread had has the right thread number. When finding + * threadnumber -1 the end should be reached. + */ + if (tids[i] == (lasttid + 1) % threads || tids[i] == -1) { + /* checking for the right chunk size */ + if (tmp_count == chunk_size) { + tmp_count = 1; + lasttid = tids[i]; +#ifdef VERBOSE + fprintf (stderr, "OK\n"); +#endif + } else { + /* If the chunk size was wrong, check if the end was reached */ + if (tids[i] == -1) { + if (i == CFSMAX_SIZE) { + fprintf (stderr, "Last thread had chunk size %d\n", + tmp_count); + break; + } else { + fprintf (stderr, "ERROR: Last thread (thread with" + " number -1) was found before the end.\n"); + result = 0; + } + } else { + fprintf (stderr, "ERROR: chunk size was %d. (assigned" + " was %d)\n", tmp_count, chunk_size); + result = 0; + } + } + } else { + fprintf(stderr, "ERROR: Found thread with number %d (should be" + " inbetween 0 and %d).", tids[i], threads - 1); + result = 0; + } +#ifdef VERBOSE + fprintf (stderr, "%d: %d \n", i, tids[i]); +#endif + } + + return result; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_schedule_static()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c b/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c new file mode 100644 index 0000000..922f27a --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_for_schedule_static_3.c @@ -0,0 +1,202 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <stdlib.h> +#include "omp_testsuite.h" +#include "omp_my_sleep.h" + +#define CFSMAX_SIZE 1000 +#define MAX_TIME 0.01 + +#ifdef SLEEPTIME +#undef SLEEPTIME +#define SLEEPTIME 0.0005 +#endif + +#define VERBOSE 0 + +int test_omp_for_schedule_static_3() +{ + int threads; + int i,lasttid; + + int * tids; + int * tids2; + int notout; + int maxiter; + int chunk_size; + + int counter = 0; + int tmp_count=1; + int lastthreadsstarttid = -1; + int result = 1; + chunk_size = 7; + + tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1)); + notout = 1; + maxiter = 0; + + #pragma omp parallel shared(tids,counter) + { /* begin of parallel*/ + #pragma omp single + { + threads = omp_get_num_threads (); + } /* end of single */ + } /* end of parallel */ + + /* Ensure that at least two threads are created */ + if (threads < 2) { + omp_set_num_threads(2); + threads = 2; + } + fprintf (stderr,"Using an internal count of %d\nUsing a" + " specified chunksize of %d\n", CFSMAX_SIZE, chunk_size); + tids[CFSMAX_SIZE] = -1; /* setting endflag */ + + #pragma omp parallel shared(tids) + { /* begin of parallel */ + double count; + int tid; + int j; + + tid = omp_get_thread_num (); + + #pragma omp for nowait schedule(static,chunk_size) + for(j = 0; j < CFSMAX_SIZE; ++j) { + count = 0.; + #pragma omp flush(maxiter) + if (j > maxiter) { + #pragma omp critical + { + maxiter = j; + } + } + /*printf ("thread %d sleeping\n", tid);*/ + while (notout && (count < MAX_TIME) && (maxiter == j)) { + #pragma omp flush(maxiter,notout) + my_sleep (SLEEPTIME); + count += SLEEPTIME; + printf("."); + } +#ifdef VERBOSE + if (count > 0.) printf(" waited %lf s\n", count); +#endif + /*printf ("thread %d awake\n", tid);*/ + tids[j] = tid; +#ifdef VERBOSE + printf("%d finished by %d\n",j,tid); +#endif + } /* end of omp parallel for */ + + notout = 0; + #pragma omp flush(maxiter,notout) + } /* end of parallel */ + + /**** analysing the data in array tids ****/ + + lasttid = tids[0]; + tmp_count = 0; + + for (i = 0; i < CFSMAX_SIZE + 1; ++i) { + /* If the work was done by the same thread + increase tmp_count by one. */ + if (tids[i] == lasttid) { + tmp_count++; +#ifdef VERBOSE + fprintf (stderr, "%d: %d \n", i, tids[i]); +#endif + continue; + } + + /* Check if the next thread had has the right thread number. + * When finding threadnumber -1 the end should be reached. + */ + if (tids[i] == (lasttid + 1) % threads || tids[i] == -1) { + /* checking for the right chunk size */ + if (tmp_count == chunk_size) { + tmp_count = 1; + lasttid = tids[i]; +#ifdef VERBOSE + fprintf (stderr, "OK\n"); +#endif + } else { + /* If the chunk size was wrong, check if the end was reached */ + if (tids[i] == -1) { + if (i == CFSMAX_SIZE) { + fprintf (stderr, "Last thread had chunk size %d\n", + tmp_count); + break; + } else { + fprintf (stderr, "ERROR: Last thread (thread with" + " number -1) was found before the end.\n"); + result = 0; + } + } else { + fprintf (stderr, "ERROR: chunk size was %d. (assigned" + " was %d)\n", tmp_count, chunk_size); + result = 0; + } + } + } else { + fprintf(stderr, "ERROR: Found thread with number %d (should be" + " inbetween 0 and %d).", tids[i], threads - 1); + result = 0; + } +#ifdef VERBOSE + fprintf (stderr, "%d: %d \n", i, tids[i]); +#endif + } + + /* Now we check if several loop regions in one parallel region have the + * same logical assignement of chunks to threads. We use the nowait + * clause to increase the probability to get an error. */ + + /* First we allocate some more memmory */ + free (tids); + tids = (int *) malloc (sizeof (int) * LOOPCOUNT); + tids2 = (int *) malloc (sizeof (int) * LOOPCOUNT); + + #pragma omp parallel + { + { + int n; + #pragma omp for schedule(static) nowait + for (n = 0; n < LOOPCOUNT; n++) { + if (LOOPCOUNT == n + 1 ) + my_sleep(SLEEPTIME); + + tids[n] = omp_get_thread_num(); + } + } + { + int m; + #pragma omp for schedule(static) nowait + for (m = 1; m <= LOOPCOUNT; m++) { + tids2[m-1] = omp_get_thread_num(); + } + } + } + + for (i = 0; i < LOOPCOUNT; i++) + if (tids[i] != tids2[i]) { + fprintf (stderr, "Chunk no. %d was assigned once to thread %d and" + " later to thread %d.\n", i, tids[i],tids2[i]); + result = 0; + } + + free (tids); + free (tids2); + return result; +} + +int main() +{ + int i; + int num_failed=0; + + for (i = 0; i < REPETITIONS; i++) { + if(!test_omp_for_schedule_static_3()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c b/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c new file mode 100644 index 0000000..3b3bf7d --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c @@ -0,0 +1,35 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_for_firstprivate() +{ + int sum ; + int i2; + int i; + int known_sum; + + sum=0; + i2=3; + + #pragma omp parallel for reduction(+:sum) private(i) firstprivate(i2) + for (i = 1; i <= LOOPCOUNT; i++) { + sum = sum + (i + i2); + } + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2 + i2 * LOOPCOUNT; + return (known_sum == sum); +} /* end of check_parallel_for_fistprivate */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_if.c b/final/runtime/test/worksharing/for/omp_parallel_for_if.c new file mode 100644 index 0000000..57fe498 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_if.c @@ -0,0 +1,42 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int test_omp_parallel_for_if() +{ + int known_sum; + int num_threads; + int sum, sum2; + int i; + int control; + + control = 0; + num_threads=0; + sum = 0; + sum2 = 0; + + #pragma omp parallel for private(i) if (control==1) + for (i=0; i <= LOOPCOUNT; i++) { + num_threads = omp_get_num_threads(); + sum = sum + i; + } + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + fprintf(stderr, "Number of threads determined by" + " omp_get_num_threads: %d\n", num_threads); + return (known_sum == sum && num_threads == 1); +} /* end of check_parallel_for_private */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_if()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c b/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c new file mode 100644 index 0000000..a53cfb2 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c @@ -0,0 +1,37 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_for_lastprivate() +{ + int sum; + int i; + int i0; + int known_sum; + + sum =0; + i0 = -1; + + #pragma omp parallel for reduction(+:sum) \ + schedule(static,7) private(i) lastprivate(i0) + for (i = 1; i <= LOOPCOUNT; i++) { + sum = sum + i; + i0 = i; + } /* end of parallel for */ + + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return ((known_sum == sum) && (i0 == LOOPCOUNT)); +} /* end of check_parallel_for_lastprivate */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_lastprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c b/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c new file mode 100644 index 0000000..5fef460 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_ordered.c @@ -0,0 +1,64 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +static int last_i = 0; + +int i; +#pragma omp threadprivate(i) + +/* Variable ii is used to avoid problems with a threadprivate variable used as a loop + * index. See test omp_threadprivate_for. + */ +static int ii; +#pragma omp threadprivate(ii) + +/*! + Utility function: returns true if the passed argument is larger than + the argument of the last call of this function. + */ +static int check_i_islarger2(int i) +{ + int islarger; + islarger = (i > last_i); + last_i = i; + return (islarger); +} + +int test_omp_parallel_for_ordered() +{ + int sum; + int is_larger; + int known_sum; + int i; + + sum = 0; + is_larger = 1; + last_i = 0; + #pragma omp parallel for schedule(static,1) private(i) ordered + for (i = 1; i < 100; i++) { + ii = i; + #pragma omp ordered + { + is_larger = check_i_islarger2 (ii) && is_larger; + sum = sum + ii; + } + } + known_sum = (99 * 100) / 2; + fprintf (stderr," known_sum = %d , sum = %d \n", known_sum, sum); + fprintf (stderr," is_larger = %d\n", is_larger); + return (known_sum == sum) && is_larger; +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_ordered()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_private.c b/final/runtime/test/worksharing/for/omp_parallel_for_private.c new file mode 100644 index 0000000..1231d36 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_private.c @@ -0,0 +1,50 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +/*! Utility function to spend some time in a loop */ +static void do_some_work (void) +{ + int i; + double sum = 0; + for(i = 0; i < 1000; i++){ + sum += sqrt (i); + } +} + +int test_omp_parallel_for_private() +{ + int sum; + int i; + int i2; + int known_sum; + + sum =0; + i2=0; + + #pragma omp parallel for reduction(+:sum) schedule(static,1) private(i) private(i2) + for (i=1;i<=LOOPCOUNT;i++) + { + i2 = i; + #pragma omp flush + do_some_work (); + #pragma omp flush + sum = sum + i2; + } /*end of for*/ + known_sum = (LOOPCOUNT * (LOOPCOUNT + 1)) / 2; + return (known_sum == sum); +} /* end of check_parallel_for_private */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c b/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c new file mode 100644 index 0000000..118d730 --- /dev/null +++ b/final/runtime/test/worksharing/for/omp_parallel_for_reduction.c @@ -0,0 +1,266 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +#define DOUBLE_DIGITS 20 /* dt^DOUBLE_DIGITS */ +#define MAX_FACTOR 10 +#define KNOWN_PRODUCT 3628800 /* 10! */ + +int test_omp_parallel_for_reduction() +{ + int sum; + int known_sum; + double dsum; + double dknown_sum; + double dt=0.5; /* base of geometric row for + and - test*/ + double rounding_error= 1.E-9; + int diff; + double ddiff; + int product; + int known_product; + int logic_and; + int logic_or; + int bit_and; + int bit_or; + int exclusiv_bit_or; + int logics[LOOPCOUNT]; + int i; + double dpt; + int result; + + sum =0; + dsum=0; + dt = 1./3.; + result = 0; + product = 1; + logic_and=1; + logic_or=0; + bit_and=1; + bit_or=0; + exclusiv_bit_or=0; + + /* Tests for integers */ + known_sum = (LOOPCOUNT*(LOOPCOUNT+1))/2; + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:sum) + for (i=1;i<=LOOPCOUNT;i++) { + sum=sum+i; + } + if(known_sum!=sum) { + result++; + fprintf(stderr,"Error in sum with integers: Result was %d" + " instead of %d\n",sum,known_sum); + } + + diff = (LOOPCOUNT*(LOOPCOUNT+1))/2; + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:diff) + for (i=1;i<=LOOPCOUNT;++i) { + diff=diff-i; + } + if(diff != 0) { + result++; + fprintf(stderr,"Error in difference with integers: Result was %d" + " instead of 0.\n",diff); + } + + /* Tests for doubles */ + dsum=0; + dpt=1; + for (i=0;i<DOUBLE_DIGITS;++i) { + dpt*=dt; + } + dknown_sum = (1-dpt)/(1-dt); + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(+:dsum) + for (i=0;i<DOUBLE_DIGITS;++i) { + dsum += pow(dt,i); + } + if( fabs(dsum-dknown_sum) > rounding_error ) { + result++; + fprintf(stderr,"Error in sum with doubles: Result was %f" + " instead of %f (Difference: %E)\n", + dsum, dknown_sum, dsum-dknown_sum); + } + + dpt=1; + + for (i=0;i<DOUBLE_DIGITS;++i) { + dpt*=dt; + } + fprintf(stderr,"\n"); + ddiff = (1-dpt)/(1-dt); + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(-:ddiff) + for (i=0;i<DOUBLE_DIGITS;++i) { + ddiff -= pow(dt,i); + } + if( fabs(ddiff) > rounding_error) { + result++; + fprintf(stderr,"Error in Difference with doubles: Result was %E" + " instead of 0.0\n",ddiff); + } + + /* Tests for integers */ + #pragma omp parallel for schedule(dynamic,1) private(i) reduction(*:product) + for(i=1;i<=MAX_FACTOR;i++) { + product *= i; + } + known_product = KNOWN_PRODUCT; + if(known_product != product) { + result++; + fprintf(stderr,"Error in Product with integers: Result was %d" + " instead of %d\n\n",product,known_product); + } + + /* Tests for logic AND */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=1; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(&&:logic_and) + for(i=0;i<LOOPCOUNT;++i) { + logic_and = (logic_and && logics[i]); + } + if(!logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 1.\n"); + } + + logic_and = 1; + logics[LOOPCOUNT/2]=0; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(&&:logic_and) + for(i=0;i<LOOPCOUNT;++i) { + logic_and = logic_and && logics[i]; + } + if(logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 2.\n"); + } + + /* Tests for logic OR */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(||:logic_or) + for(i=0;i<LOOPCOUNT;++i) { + logic_or = logic_or || logics[i]; + } + if(logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 1.\n"); + } + logic_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(||:logic_or) + for(i=0;i<LOOPCOUNT;++i) { + logic_or = logic_or || logics[i]; + } + if(!logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 2.\n"); + } + + /* Tests for bitwise AND */ + for(i=0;i<LOOPCOUNT;++i) { + logics[i]=1; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(&:bit_and) + for(i=0;i<LOOPCOUNT;++i) { + bit_and = (bit_and & logics[i]); + } + if(!bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 1.\n"); + } + + bit_and = 1; + logics[LOOPCOUNT/2]=0; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(&:bit_and) + for(i=0;i<LOOPCOUNT;++i) { + bit_and = bit_and & logics[i]; + } + if(bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 2.\n"); + } + + /* Tests for bitwise OR */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(|:bit_or) + for(i=0;i<LOOPCOUNT;++i) { + bit_or = bit_or | logics[i]; + } + if(bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 1\n"); + } + bit_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(|:bit_or) + for(i=0;i<LOOPCOUNT;++i) { + bit_or = bit_or | logics[i]; + } + if(!bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 2\n"); + } + + /* Tests for bitwise XOR */ + for(i=0;i<LOOPCOUNT;i++) { + logics[i]=0; + } + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(^:exclusiv_bit_or) + for(i=0;i<LOOPCOUNT;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + if(exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[LOOPCOUNT/2]=1; + + #pragma omp parallel for schedule(dynamic,1) private(i) \ + reduction(^:exclusiv_bit_or) + for(i=0;i<LOOPCOUNT;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + if(!exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n"); + } + + /*printf("\nResult:%d\n",result);*/ + return (result==0); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_for_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c new file mode 100644 index 0000000..1780fab --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c @@ -0,0 +1,54 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_sections_firstprivate() +{ + int sum; + int sum0; + int known_sum; + + sum =7; + sum0=11; + + #pragma omp parallel sections firstprivate(sum0) + { + #pragma omp section + { + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + #pragma omp critical + { + sum= sum+sum0; + } + } + } + + known_sum=11*3+7; + return (known_sum==sum); +} /* end of check_section_firstprivate*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_sections_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c new file mode 100644 index 0000000..9b775ec --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c @@ -0,0 +1,71 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_sections_lastprivate() +{ + int sum; + int sum0; + int i; + int i0; + int known_sum; + sum =0; + sum0 = 0; + i0 = -1; + + #pragma omp parallel sections private(i,sum0) lastprivate(i0) + { + #pragma omp section + { + sum0=0; + for (i=1;i<400;i++) { + sum0=sum0+i; + i0=i; + } + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + sum0=0; + for(i=400;i<700;i++) { + sum0=sum0+i; + i0=i; + } + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + sum0=0; + for(i=700;i<1000;i++) { + sum0=sum0+i; + i0=i; + } + #pragma omp critical + { + sum= sum+sum0; + } + } + } + + known_sum=(999*1000)/2; + return ((known_sum==sum) && (i0==999) ); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_sections_lastprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c new file mode 100644 index 0000000..7dab295 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_private.c @@ -0,0 +1,64 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_parallel_sections_private() +{ + int sum; + int sum0; + int i; + int known_sum; + + sum = 7; + sum0=0; + + #pragma omp parallel sections private(sum0, i) + { + #pragma omp section + { + sum0=0; + for (i=1;i<400;i++) + sum0=sum0+i; + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + sum0=0; + for(i=400;i<700;i++) + sum0=sum0+i; + #pragma omp critical + { + sum= sum+sum0; + } + } + #pragma omp section + { + sum0=0; + for(i=700;i<1000;i++) + sum0=sum0+i; + #pragma omp critical + { + sum= sum+sum0; + } + } + } + + known_sum=(999*1000)/2+7; + return (known_sum==sum); +} /* end of check_section_private*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_sections_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c b/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c new file mode 100644 index 0000000..0d49865 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c @@ -0,0 +1,508 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int test_omp_parallel_sections_reduction() +{ + int sum; + int known_sum; + double dpt; + double dsum; + double dknown_sum; + double dt=0.5; /* base of geometric row for + and - test*/ + double rounding_error= 1.E-5; + int diff; + double ddiff; + int product; + int known_product; + int logic_and; + int bit_and; + int logic_or; + int bit_or; + int exclusiv_bit_or; + int logics[1000]; + int i; + int result; + + sum = 7; + dsum=0; + product =1; + dpt = 1; + logic_and=1; + bit_and=1; + logic_or=0; + bit_or=0; + exclusiv_bit_or=0; + result =0; + /* int my_islarger;*/ + /*int is_larger=1;*/ + + // Test summation of integers + known_sum = (999*1000)/2+7; + #pragma omp parallel sections private(i) reduction(+:sum) + { + #pragma omp section + { + for (i=1;i<300;i++) { + sum=sum+i; + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + sum=sum+i; + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + sum=sum+i; + } + } + } + if(known_sum!=sum) { + result++; + fprintf(stderr,"Error in sum with integers: Result was %d" + " instead of %d.\n",sum, known_sum); + } + + // Test differences of integers + diff = (999*1000)/2; + #pragma omp parallel sections private(i) reduction(-:diff) + { + #pragma omp section + { + for (i=1;i<300;i++) { + diff=diff-i; + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + diff=diff-i; + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + diff=diff-i; + } + } + } + if(diff != 0) { + result++; + fprintf(stderr,"Error in Difference with integers: Result was %d" + " instead of 0.\n",diff); + } + + // Test summation of doubles + for (i=0;i<20;++i) { + dpt*=dt; + } + dknown_sum = (1-dpt)/(1-dt); + #pragma omp parallel sections private(i) reduction(+:dsum) + { + #pragma omp section + { + for (i=0;i<6;++i) { + dsum += pow(dt,i); + } + } + #pragma omp section + { + for (i=6;i<12;++i) { + dsum += pow(dt,i); + } + } + #pragma omp section + { + for (i=12;i<20;++i) { + dsum += pow(dt,i); + } + } + } + if( fabs(dsum-dknown_sum) > rounding_error ) { + result++; + fprintf(stderr,"Error in sum with doubles: Result was %f" + " instead of %f (Difference: %E)\n", + dsum, dknown_sum, dsum-dknown_sum); + } + + // Test differences of doubles + dpt=1; + for (i=0;i<20;++i) { + dpt*=dt; + } + fprintf(stderr,"\n"); + ddiff = (1-dpt)/(1-dt); + #pragma omp parallel sections private(i) reduction(-:ddiff) + { + #pragma omp section + { + for (i=0;i<6;++i) { + ddiff -= pow(dt,i); + } + } + #pragma omp section + { + for (i=6;i<12;++i) { + ddiff -= pow(dt,i); + } + } + #pragma omp section + { + for (i=12;i<20;++i) { + ddiff -= pow(dt,i); + } + } + } + if( fabs(ddiff) > rounding_error) { + result++; + fprintf(stderr,"Error in Difference with doubles: Result was %E" + " instead of 0.0\n",ddiff); + } + + // Test product of integers + known_product = 3628800; + #pragma omp parallel sections private(i) reduction(*:product) + { + #pragma omp section + { + for(i=1;i<3;i++) { + product *= i; + } + } + #pragma omp section + { + for(i=3;i<7;i++) { + product *= i; + } + } + #pragma omp section + { + for(i=7;i<11;i++) { + product *= i; + } + } + } + if(known_product != product) { + result++; + fprintf(stderr,"Error in Product with integers: Result was %d" + " instead of %d\n",product,known_product); + } + + // Test logical AND + for(i=0;i<1000;i++) { + logics[i]=1; + } + + #pragma omp parallel sections private(i) reduction(&&:logic_and) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_and = (logic_and && logics[i]); + } + } + } + if(!logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 1\n"); + } + logic_and = 1; + logics[501] = 0; + + #pragma omp parallel sections private(i) reduction(&&:logic_and) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_and = (logic_and && logics[i]); + } + } + } + if(logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 2"); + } + + // Test logical OR + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel sections private(i) reduction(||:logic_or) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_or = (logic_or || logics[i]); + } + } + } + if(logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 1\n"); + } + + logic_or = 0; + logics[501]=1; + + #pragma omp parallel sections private(i) reduction(||:logic_or) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_or = (logic_or || logics[i]); + } + } + } + if(!logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 2\n"); + } + + // Test bitwise AND + for(i=0;i<1000;++i) { + logics[i]=1; + } + + #pragma omp parallel sections private(i) reduction(&:bit_and) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_and = (bit_and & logics[i]); + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_and = (bit_and & logics[i]); + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_and = (bit_and & logics[i]); + } + } + } + if(!bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 1\n"); + } + + bit_and = 1; + logics[501]=0; + + #pragma omp parallel sections private(i) reduction(&:bit_and) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_and = bit_and & logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_and = bit_and & logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_and = bit_and & logics[i]; + } + } + } + if(bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 2"); + } + + // Test bitwise OR + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel sections private(i) reduction(|:bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_or = bit_or | logics[i]; + } + } + } + if(bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 1\n"); + } + bit_or = 0; + logics[501]=1; + + #pragma omp parallel sections private(i) reduction(|:bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_or = bit_or | logics[i]; + } + } + } + if(!bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 2\n"); + } + + // Test bitwise XOR + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel sections private(i) reduction(^:exclusiv_bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + } + if(exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[501]=1; + + #pragma omp parallel sections private(i) reduction(^:exclusiv_bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + } + if(!exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n"); + } + + /*printf("\nResult:%d\n",result);*/ + return (result==0); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_sections_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_section_firstprivate.c b/final/runtime/test/worksharing/sections/omp_section_firstprivate.c new file mode 100644 index 0000000..5526475 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_section_firstprivate.c @@ -0,0 +1,55 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_section_firstprivate() +{ + int sum; + int sum0; + int known_sum; + + sum0 = 11; + sum = 7; + #pragma omp parallel + { + #pragma omp sections firstprivate(sum0) + { + #pragma omp section + { + #pragma omp critical + { + sum = sum + sum0; + } + } + #pragma omp section + { + #pragma omp critical + { + sum = sum + sum0; + } + } + #pragma omp section + { + #pragma omp critical + { + sum = sum + sum0; + } + } + } + } + known_sum = 11 * 3 + 7; + return (known_sum == sum); +} /* end of check_section_firstprivate*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_section_firstprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_section_lastprivate.c b/final/runtime/test/worksharing/sections/omp_section_lastprivate.c new file mode 100644 index 0000000..0dbbea9 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_section_lastprivate.c @@ -0,0 +1,76 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_section_lastprivate() +{ + int i0 = -1; + int sum = 0; + int i; + int sum0 = 0; + int known_sum; + + i0 = -1; + sum = 0; + + #pragma omp parallel + { + #pragma omp sections lastprivate(i0) private(i,sum0) + { + #pragma omp section + { + sum0 = 0; + for (i = 1; i < 400; i++) + { + sum0 = sum0 + i; + i0 = i; + } + #pragma omp critical + { + sum = sum + sum0; + } /*end of critical*/ + } /* end of section */ + #pragma omp section + { + sum0 = 0; + for(i = 400; i < 700; i++) + { + sum0 = sum0 + i; + i0 = i; + } + #pragma omp critical + { + sum = sum + sum0; + } /*end of critical*/ + } + #pragma omp section + { + sum0 = 0; + for(i = 700; i < 1000; i++) + { + sum0 = sum0 + i; + i0 = i; + } + #pragma omp critical + { + sum = sum + sum0; + } /*end of critical*/ + } /* end of section */ + } /* end of sections*/ + } /* end of parallel*/ + known_sum = (999 * 1000) / 2; + return ((known_sum == sum) && (i0 == 999) ); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_section_lastprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_section_private.c b/final/runtime/test/worksharing/sections/omp_section_private.c new file mode 100644 index 0000000..bf2a30d --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_section_private.c @@ -0,0 +1,66 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_section_private() +{ + int sum; + int sum0; + int i; + int known_sum; + + sum = 7; + sum0 = 0; + + #pragma omp parallel + { + #pragma omp sections private(sum0,i) + { + #pragma omp section + { + sum0 = 0; + for (i = 1; i < 400; i++) + sum0 = sum0 + i; + #pragma omp critical + { + sum = sum + sum0; + } + } + #pragma omp section + { + sum0 = 0; + for (i = 400; i < 700; i++) + sum0 = sum0 + i; + #pragma omp critical + { + sum = sum + sum0; + } + } + #pragma omp section + { + sum0 = 0; + for (i = 700; i < 1000; i++) + sum0 = sum0 + i; + #pragma omp critical + { + sum = sum + sum0; + } + } + } /*end of sections*/ + } /* end of parallel */ + known_sum = (999 * 1000) / 2 + 7; + return (known_sum == sum); +} /* end of check_section_private*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_section_private()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_sections_nowait.c b/final/runtime/test/worksharing/sections/omp_sections_nowait.c new file mode 100644 index 0000000..caff254 --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_sections_nowait.c @@ -0,0 +1,104 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +/* + * This test will hang if the nowait is not working properly + * + * It relies on a thread skipping to the second sections construct to + * release the threads in the first sections construct + * + * Also, since scheduling of sections is implementation defined, it is + * necessary to have all four sections in the second sections construct + * release the threads since we can't guarantee which section a single thread + * will execute. + */ +volatile int release; +volatile int count; + +void wait_for_release_then_increment(int rank) +{ + fprintf(stderr, "Thread nr %d enters first section" + " and waits.\n", rank); + while (release == 0); + #pragma omp atomic + count++; +} + +void release_and_increment(int rank) +{ + fprintf(stderr, "Thread nr %d sets release to 1\n", rank); + release = 1; + #pragma omp flush(release) + #pragma omp atomic + count++; +} + +int test_omp_sections_nowait() +{ + release = 0; + count = 0; + + #pragma omp parallel num_threads(4) + { + int rank; + rank = omp_get_thread_num (); + #pragma omp sections nowait + { + #pragma omp section + { + wait_for_release_then_increment(rank); + } + #pragma omp section + { + wait_for_release_then_increment(rank); + } + #pragma omp section + { + wait_for_release_then_increment(rank); + } + #pragma omp section + { + fprintf(stderr, "Thread nr %d enters first sections and goes " + "immediately to next sections construct to release.\n", rank); + #pragma omp atomic + count++; + } + } + /* Begin of second sections environment */ + #pragma omp sections + { + #pragma omp section + { + release_and_increment(rank); + } + #pragma omp section + { + release_and_increment(rank); + } + #pragma omp section + { + release_and_increment(rank); + } + #pragma omp section + { + release_and_increment(rank); + } + } + } + // Check to make sure all eight sections were executed + return (count==8); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_sections_nowait()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/sections/omp_sections_reduction.c b/final/runtime/test/worksharing/sections/omp_sections_reduction.c new file mode 100644 index 0000000..1fdb5ec --- /dev/null +++ b/final/runtime/test/worksharing/sections/omp_sections_reduction.c @@ -0,0 +1,543 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include <math.h> +#include "omp_testsuite.h" + +int test_omp_sections_reduction() +{ + int sum; + int known_sum; + double dpt,dsum; + double dknown_sum; + double dt=0.5; /* base of geometric row for + and - test*/ + double rounding_error= 1.E-9; + int diff; + double ddiff; + int product; + int known_product; + int logic_and; + int bit_and; + int logic_or; + int bit_or; + int exclusiv_bit_or; + int logics[1000]; + int i; + int result; + /* int my_islarger; */ + /*int is_larger=1;*/ + sum =7; + dpt =1; + dsum=0; + product =1; + logic_and=1; + bit_and=1; + logic_or=0; + bit_or=0; + exclusiv_bit_or=0; + result = 0; + dt = 1./3.; + + known_sum = (999*1000)/2+7; + #pragma omp parallel + { + #pragma omp sections private(i) reduction(+:sum) + { + #pragma omp section + { + for (i=1;i<300;i++) { + sum=sum+i; + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + sum=sum+i; + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + sum=sum+i; + } + } + } + } + if(known_sum!=sum) { + ++result; + fprintf(stderr,"Error in sum with integers: Result was %d" + " instead of %d\n", sum,known_sum); + } + + diff = (999*1000)/2; + #pragma omp parallel + { + #pragma omp sections private(i) reduction(-:diff) + { + #pragma omp section + { + for (i=1;i<300;i++) { + diff=diff-i; + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + diff=diff-i; + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + diff=diff-i; + } + } + } + } + if(diff != 0) { + result++; + fprintf(stderr,"Error in Difference with integers: Result was %d" + " instead of 0.\n",diff); + } + + for (i=0;i<20;++i) { + dpt*=dt; + } + dknown_sum = (1-dpt)/(1-dt); + #pragma omp parallel + { + #pragma omp sections private(i) reduction(+:dsum) + { + #pragma omp section + { + for (i=0;i<6;++i) { + dsum += pow(dt,i); + } + } + #pragma omp section + { + for (i=6;i<12;++i) { + dsum += pow(dt,i); + } + } + #pragma omp section + { + for (i=12;i<20;++i) { + dsum += pow(dt,i); + } + } + } + } + if( fabs(dsum-dknown_sum) > rounding_error ) { + result++; + fprintf(stderr,"Error in sum with doubles: Result was %f" + " instead of %f (Difference: %E)\n", + dsum, dknown_sum, dsum-dknown_sum); + } + + dpt=1; + for (i=0;i<20;++i) { + dpt*=dt; + } + fprintf(stderr,"\n"); + ddiff = (1-dpt)/(1-dt); + #pragma omp parallel + { + #pragma omp sections private(i) reduction(-:ddiff) + { + #pragma omp section + { + for (i=0;i<6;++i) { + ddiff -= pow(dt,i); + } + } + #pragma omp section + { + for (i=6;i<12;++i) { + ddiff -= pow(dt,i); + } + } + #pragma omp section + { + for (i=12;i<20;++i) { + ddiff -= pow(dt,i); + } + } + } + } + + if(fabs(ddiff) > rounding_error) { + result++; + fprintf(stderr,"Error in Difference with doubles: Result was %E" + " instead of 0.0\n",ddiff); + } + + known_product = 3628800; + #pragma omp parallel + { + #pragma omp sections private(i) reduction(*:product) + { + #pragma omp section + { + for(i=1;i<3;i++) { + product *= i; + } + } + #pragma omp section + { + for(i=3;i<7;i++) { + product *= i; + } + } + #pragma omp section + { + for(i=7;i<11;i++) { + product *= i; + } + } + } + } + if(known_product != product) { + result++; + fprintf(stderr,"Error in Product with integers: Result was %d" + " instead of %d\n",product,known_product); + } + + for(i=0;i<1000;i++) { + logics[i]=1; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(&&:logic_and) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_and = (logic_and && logics[i]); + } + } + } + } + if(!logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 1\n"); + } + + logic_and = 1; + logics[501] = 0; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(&&:logic_and) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_and = (logic_and && logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_and = (logic_and && logics[i]); + } + } + } + } + if(logic_and) { + result++; + fprintf(stderr,"Error in logic AND part 2\n"); + } + + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(||:logic_or) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_or = (logic_or || logics[i]); + } + } + } + } + if(logic_or) { + result++; + fprintf(stderr,"\nError in logic OR part 1\n"); + } + + logic_or = 0; + logics[501]=1; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(||:logic_or) + { + #pragma omp section + { + for (i=1;i<300;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=300;i<700;i++) { + logic_or = (logic_or || logics[i]); + } + } + #pragma omp section + { + for (i=700;i<1000;i++) { + logic_or = (logic_or || logics[i]); + } + } + } + } + if(!logic_or) { + result++; + fprintf(stderr,"Error in logic OR part 2\n"); + } + + for(i=0;i<1000;++i) { + logics[i]=1; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(&:bit_and) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_and = (bit_and & logics[i]); + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_and = (bit_and & logics[i]); + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_and = (bit_and & logics[i]); + } + } + } + } + if(!bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 1\n"); + } + + bit_and = 1; + logics[501]=0; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(&:bit_and) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_and = bit_and & logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_and = bit_and & logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_and = bit_and & logics[i]; + } + } + } + } + if(bit_and) { + result++; + fprintf(stderr,"Error in BIT AND part 2\n"); + } + + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(|:bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_or = bit_or | logics[i]; + } + } + } + } + if(bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 1\n"); + } + bit_or = 0; + logics[501]=1; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(|:bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + bit_or = bit_or | logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + bit_or = bit_or | logics[i]; + } + } + } + } + if(!bit_or) { + result++; + fprintf(stderr,"Error in BIT OR part 2\n"); + } + + for(i=0;i<1000;i++) { + logics[i]=0; + } + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(^:exclusiv_bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + } + } + if(exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 1\n"); + } + + exclusiv_bit_or = 0; + logics[501]=1; + + #pragma omp parallel + { + #pragma omp sections private(i) reduction(^:exclusiv_bit_or) + { + #pragma omp section + { + for(i=0;i<300;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=300;i<700;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + #pragma omp section + { + for(i=700;i<1000;++i) { + exclusiv_bit_or = exclusiv_bit_or ^ logics[i]; + } + } + } + } + if(!exclusiv_bit_or) { + result++; + fprintf(stderr,"Error in EXCLUSIV BIT OR part 2\n"); + } + + /*printf("\nResult:%d\n",result);*/ + return (result==0); +} +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_sections_reduction()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/single/omp_single.c b/final/runtime/test/worksharing/single/omp_single.c new file mode 100644 index 0000000..4963579 --- /dev/null +++ b/final/runtime/test/worksharing/single/omp_single.c @@ -0,0 +1,44 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int test_omp_single() +{ + int nr_threads_in_single; + int result; + int nr_iterations; + int i; + + nr_threads_in_single = 0; + result = 0; + nr_iterations = 0; + + #pragma omp parallel private(i) + { + for (i = 0; i < LOOPCOUNT; i++) { + #pragma omp single + { + #pragma omp flush + nr_threads_in_single++; + #pragma omp flush + nr_iterations++; + nr_threads_in_single--; + result = result + nr_threads_in_single; + } + } + } + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} /* end of check_single*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_single()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/single/omp_single_copyprivate.c b/final/runtime/test/worksharing/single/omp_single_copyprivate.c new file mode 100644 index 0000000..2fece5c --- /dev/null +++ b/final/runtime/test/worksharing/single/omp_single_copyprivate.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile-and-run +#include "omp_testsuite.h" + +#define DEBUG_TEST 0 + +int j; +#pragma omp threadprivate(j) + +int test_omp_single_copyprivate() +{ + int result; + int nr_iterations; + + result = 0; + nr_iterations = 0; + #pragma omp parallel num_threads(4) + { + int i; + for (i = 0; i < LOOPCOUNT; i++) + { +#if DEBUG_TEST + int thread; + thread = omp_get_thread_num (); +#endif + #pragma omp single copyprivate(j) + { + nr_iterations++; + j = i; +#if DEBUG_TEST + printf ("thread %d assigns, j = %d, i = %d\n", thread, j, i); +#endif + } +#if DEBUG_TEST + #pragma omp barrier +#endif + #pragma omp critical + { +#if DEBUG_TEST + printf ("thread = %d, j = %d, i = %d\n", thread, j, i); +#endif + result = result + j - i; + } + #pragma omp barrier + } /* end of for */ + } /* end of parallel */ + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_single_copyprivate()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/single/omp_single_nowait.c b/final/runtime/test/worksharing/single/omp_single_nowait.c new file mode 100644 index 0000000..22f8930 --- /dev/null +++ b/final/runtime/test/worksharing/single/omp_single_nowait.c @@ -0,0 +1,73 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +/* + * This test will hang if the nowait is not working properly + * + * It relies on a one thread skipping to the last single construct to + * release the threads in the first three single constructs + */ +volatile int release; +volatile int count; + +void wait_for_release_then_increment(int rank) +{ + fprintf(stderr, "Thread nr %d enters first section" + " and waits.\n", rank); + while (release == 0); + #pragma omp atomic + count++; +} + +void release_and_increment(int rank) +{ + fprintf(stderr, "Thread nr %d sets release to 1\n", rank); + release = 1; + #pragma omp atomic + count++; +} + +int test_omp_single_nowait() +{ + release = 0; + count = 0; + + #pragma omp parallel num_threads(4) + { + int rank; + rank = omp_get_thread_num (); + #pragma omp single nowait + { + wait_for_release_then_increment(rank); + } + #pragma omp single nowait + { + wait_for_release_then_increment(rank); + } + #pragma omp single nowait + { + wait_for_release_then_increment(rank); + } + + #pragma omp single + { + release_and_increment(rank); + } + } + // Check to make sure all four singles were executed + return (count==4); +} /* end of check_single_nowait*/ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_single_nowait()) { + num_failed++; + } + } + return num_failed; +} diff --git a/final/runtime/test/worksharing/single/omp_single_private.c b/final/runtime/test/worksharing/single/omp_single_private.c new file mode 100644 index 0000000..a27f8de --- /dev/null +++ b/final/runtime/test/worksharing/single/omp_single_private.c @@ -0,0 +1,57 @@ +// RUN: %libomp-compile-and-run +#include <stdio.h> +#include "omp_testsuite.h" + +int myit = 0; +#pragma omp threadprivate(myit) +int myresult = 0; +#pragma omp threadprivate(myresult) + +int test_omp_single_private() +{ + int nr_threads_in_single; + int result; + int nr_iterations; + int i; + + myit = 0; + nr_threads_in_single = 0; + nr_iterations = 0; + result = 0; + + #pragma omp parallel private(i) + { + myresult = 0; + myit = 0; + for (i = 0; i < LOOPCOUNT; i++) { + #pragma omp single private(nr_threads_in_single) nowait + { + nr_threads_in_single = 0; + #pragma omp flush + nr_threads_in_single++; + #pragma omp flush + myit++; + myresult = myresult + nr_threads_in_single; + } + } + #pragma omp critical + { + result += nr_threads_in_single; + nr_iterations += myit; + } + } + return ((result == 0) && (nr_iterations == LOOPCOUNT)); +} /* end of check_single private */ + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_single_private()) { + num_failed++; + } + } + return num_failed; +} |