aboutsummaryrefslogtreecommitdiff
path: root/final/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'final/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp')
-rw-r--r--final/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp114
1 files changed, 114 insertions, 0 deletions
diff --git a/final/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp b/final/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp
new file mode 100644
index 0000000..e66cda9
--- /dev/null
+++ b/final/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp
@@ -0,0 +1,114 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <stdio.h>
+#include <omp.h>
+
+#define NT 4
+#define INIT 10
+
+/*
+The test emulates code generation needed for reduction with task modifier on
+parallel construct.
+
+Note: tasks could just use in_reduction clause, but compiler does not accept
+this because of bug: it mistakenly requires reduction item to be shared, which
+is only true for reduction on worksharing and wrong for task reductions.
+*/
+
+//------------------------------------------------
+// OpenMP runtime library routines
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item);
+// extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int
+// flags, int num, void* data);
+extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws,
+ int num, void *data);
+extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws);
+extern int __kmpc_global_thread_num(void *);
+#ifdef __cplusplus
+}
+#endif
+
+//------------------------------------------------
+// Compiler-generated code
+
+typedef struct red_input {
+ void *reduce_shar; /**< shared between tasks item to reduce into */
+ void *reduce_orig; /**< original reduction item used for initialization */
+ size_t reduce_size; /**< size of data item in bytes */
+ // three compiler-generated routines (init, fini are optional):
+ void *reduce_init; /**< data initialization routine (single paramemter) */
+ void *reduce_fini; /**< data finalization routine */
+ void *reduce_comb; /**< data combiner routine */
+ unsigned flags; /**< flags for additional info from compiler */
+} red_input_t;
+
+void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; }
+
+int main() {
+ int var = INIT;
+ int *p_var_orig = &var;
+ int i;
+ omp_set_dynamic(0);
+ omp_set_num_threads(NT);
+#pragma omp parallel private(i) shared(p_var_orig)
+// #pragma omp for reduction(task,+:var)
+#pragma omp for reduction(+ : var)
+ for (i = 0; i < NT; ++i) // single iteration per thread
+ {
+ // generated code, which actually should be placed before
+ // loop iterations distribution, but placed here just to show the idea,
+ // and to keep correctness the loop count is equal to number of threads
+ int gtid = __kmpc_global_thread_num(NULL);
+ void *tg; // pointer to taskgroup (optional)
+ red_input_t r_var;
+ r_var.reduce_shar = &var;
+ r_var.reduce_orig =
+ p_var_orig; // not used in this test but illustrates codegen
+ r_var.reduce_size = sizeof(var);
+ r_var.reduce_init = NULL;
+ r_var.reduce_fini = NULL;
+ r_var.reduce_comb = (void *)&i_comb;
+ tg = __kmpc_taskred_modifier_init(
+ NULL, // ident_t loc;
+ gtid,
+ 1, // 1 - worksharing construct, 0 - parallel
+ 1, // number of reduction objects
+ &r_var // related data
+ );
+ // end of generated code
+ var++;
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ // emulate task reduction here because of compiler bug:
+ // it mistakenly declines to accept in_reduction because var is private
+ // outside.
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ if (omp_get_thread_num() > 0) {
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ }
+ // generated code, which actually should be placed after loop completion
+ // but before barrier and before loop reduction. It placed here just to show
+ // the idea,
+ // and to keep correctness the loop count is equal to number of threads
+ __kmpc_task_reduction_modifier_fini(NULL, gtid, 1);
+ // end of generated code
+ }
+ if (var == INIT + NT * 3 - 1) {
+ printf("passed\n");
+ return 0;
+ } else {
+ printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1);
+ return 1;
+ }
+}