// RUN: %libomp-compile-and-run /* Test for guided scheduling * Ensure threads get chunks interleavely first * Then judge the chunk sizes are decreasing to a stable value * Modified by Chunhua Liao * For example, 100 iteration on 2 threads, chunksize 7 * one line for each dispatch, 0/1 means thread id * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18 * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 * 1 1 1 1 1 1 1 1 1 1 10 * 0 0 0 0 0 0 0 0 8 * 1 1 1 1 1 1 1 7 * 0 0 0 0 0 0 0 7 * 1 1 1 1 1 1 1 7 * 0 0 0 0 0 5 */ #include #include #include "omp_testsuite.h" #include "omp_my_sleep.h" #define CFSMAX_SIZE 1000 #define MAX_TIME 0.005 #ifdef SLEEPTIME #undef SLEEPTIME #define SLEEPTIME 0.0001 #endif int test_omp_for_schedule_guided() { int * tids; int * chunksizes; int notout; int maxiter; int threads; int i; int result; tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1)); maxiter = 0; result = 1; notout = 1; /* Testing if enough threads are available for this check. */ #pragma omp parallel { #pragma omp single { threads = omp_get_num_threads(); } } /* ensure there are at least two threads */ if (threads < 2) { omp_set_num_threads(2); threads = 2; } /* Now the real parallel work: * Each thread will start immediately with the first chunk. */ #pragma omp parallel shared(tids,maxiter) { /* begin of parallel */ double count; int tid; int j; tid = omp_get_thread_num (); #pragma omp for nowait schedule(guided) for(j = 0; j < CFSMAX_SIZE; ++j) { count = 0.; #pragma omp flush(maxiter) if (j > maxiter) { #pragma omp critical { maxiter = j; } } /*printf ("thread %d sleeping\n", tid);*/ #pragma omp flush(maxiter,notout) while (notout && (count < MAX_TIME) && (maxiter == j)) { #pragma omp flush(maxiter,notout) my_sleep (SLEEPTIME); count += SLEEPTIME; #ifdef VERBOSE printf("."); #endif } #ifdef VERBOSE if (count > 0.) printf(" waited %lf s\n", count); #endif /*printf ("thread %d awake\n", tid);*/ tids[j] = tid; #ifdef VERBOSE printf("%d finished by %d\n",j,tid); #endif } /* end of for */ notout = 0; #pragma omp flush(maxiter,notout) } /* end of parallel */ /******************************************************* * evaluation of the values * *******************************************************/ { int determined_chunksize = 1; int last_threadnr = tids[0]; int global_chunknr = 0; int openwork = CFSMAX_SIZE; int expected_chunk_size; int* local_chunknr = (int*)malloc(threads * sizeof(int)); double c = 1; for (i = 0; i < threads; i++) local_chunknr[i] = 0; tids[CFSMAX_SIZE] = -1; /* * determine the number of global chunks */ // fprintf(stderr,"# global_chunknr thread local_chunknr chunksize\n"); for(i = 1; i <= CFSMAX_SIZE; ++i) { if (last_threadnr==tids[i]) { determined_chunksize++; } else { /* fprintf(stderr, "%d\t%d\t%d\t%d\n", global_chunknr, last_threadnr, local_chunknr[last_threadnr], m); */ global_chunknr++; local_chunknr[last_threadnr]++; last_threadnr = tids[i]; determined_chunksize = 1; } } /* now allocate the memory for saving the sizes of the global chunks */ chunksizes = (int*)malloc(global_chunknr * sizeof(int)); /* * Evaluate the sizes of the global chunks */ global_chunknr = 0; determined_chunksize = 1; last_threadnr = tids[0]; for (i = 1; i <= CFSMAX_SIZE; ++i) { /* If the threadnumber was the same as before increase the * detected chunksize for this chunk otherwise set the detected * chunksize again to one and save the number of the next * thread in last_threadnr. */ if (last_threadnr == tids[i]) { determined_chunksize++; } else { chunksizes[global_chunknr] = determined_chunksize; global_chunknr++; local_chunknr[last_threadnr]++; last_threadnr = tids[i]; determined_chunksize = 1; } } #ifdef VERBOSE fprintf(stderr, "found\texpected\tconstant\n"); #endif /* identify the constant c for the exponential decrease of the chunksize */ expected_chunk_size = openwork / threads; c = (double) chunksizes[0] / expected_chunk_size; for (i = 0; i < global_chunknr; i++) { /* calculate the new expected chunksize */ if (expected_chunk_size > 1) expected_chunk_size = c * openwork / threads; #ifdef VERBOSE fprintf(stderr, "%8d\t%8d\t%lf\n", chunksizes[i], expected_chunk_size, c * chunksizes[i]/expected_chunk_size); #endif /* check if chunksize is inside the rounding errors */ if (abs (chunksizes[i] - expected_chunk_size) >= 2) { result = 0; #ifndef VERBOSE fprintf(stderr, "Chunksize differed from expected " "value: %d instead of %d\n", chunksizes[i], expected_chunk_size); return 0; #endif } /* end if */ #ifndef VERBOSE if (expected_chunk_size - chunksizes[i] < 0) fprintf(stderr, "Chunksize did not decrease: %d" " instead of %d\n", chunksizes[i],expected_chunk_size); #endif /* calculating the remaining amount of work */ openwork -= chunksizes[i]; } } return result; } int main() { int i; int num_failed=0; for(i = 0; i < REPETITIONS; i++) { if(!test_omp_for_schedule_guided()) { num_failed++; } } return num_failed; }