aboutsummaryrefslogtreecommitdiff
path: root/final/runtime/src/kmp_itt.inl
diff options
context:
space:
mode:
Diffstat (limited to 'final/runtime/src/kmp_itt.inl')
-rw-r--r--final/runtime/src/kmp_itt.inl1043
1 files changed, 1043 insertions, 0 deletions
diff --git a/final/runtime/src/kmp_itt.inl b/final/runtime/src/kmp_itt.inl
new file mode 100644
index 0000000..a0789fc
--- /dev/null
+++ b/final/runtime/src/kmp_itt.inl
@@ -0,0 +1,1043 @@
+#if USE_ITT_BUILD
+/*
+ * kmp_itt.inl -- Inline functions of ITT Notify.
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Inline function definitions. This file should be included into kmp_itt.h file
+// for production build (to let compliler inline functions) or into kmp_itt.c
+// file for debug build (to reduce the number of files to recompile and save
+// build time).
+
+#include "kmp.h"
+#include "kmp_str.h"
+
+#if KMP_ITT_DEBUG
+extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
+#define KMP_ITT_DEBUG_LOCK() \
+ { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
+#define KMP_ITT_DEBUG_PRINT(...) \
+ { \
+ fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \
+ fprintf(stderr, __VA_ARGS__); \
+ fflush(stderr); \
+ __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \
+ }
+#else
+#define KMP_ITT_DEBUG_LOCK()
+#define KMP_ITT_DEBUG_PRINT(...)
+#endif // KMP_ITT_DEBUG
+
+// Ensure that the functions are static if they're supposed to be being inlined.
+// Otherwise they cannot be used in more than one file, since there will be
+// multiple definitions.
+#if KMP_DEBUG
+#define LINKAGE
+#else
+#define LINKAGE static inline
+#endif
+
+// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
+// this API to support user-defined synchronization primitives, but does not use
+// ZCA; it would be safe to turn this off until wider support becomes available.
+#if USE_ITT_ZCA
+#ifdef __INTEL_COMPILER
+#if __INTEL_COMPILER >= 1200
+#undef __itt_sync_acquired
+#undef __itt_sync_releasing
+#define __itt_sync_acquired(addr) \
+ __notify_zc_intrinsic((char *)"sync_acquired", addr)
+#define __itt_sync_releasing(addr) \
+ __notify_intrinsic((char *)"sync_releasing", addr)
+#endif
+#endif
+#endif
+
+static kmp_bootstrap_lock_t metadata_lock =
+ KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
+
+/* Parallel region reporting.
+ * __kmp_itt_region_forking should be called by master thread of a team.
+ Exact moment of call does not matter, but it should be completed before any
+ thread of this team calls __kmp_itt_region_starting.
+ * __kmp_itt_region_starting should be called by each thread of a team just
+ before entering parallel region body.
+ * __kmp_itt_region_finished should be called by each thread of a team right
+ after returning from parallel region body.
+ * __kmp_itt_region_joined should be called by master thread of a team, after
+ all threads called __kmp_itt_region_finished.
+
+ Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
+ execute some more user code -- such a thread can execute tasks.
+
+ Note: The overhead of logging region_starting and region_finished in each
+ thread is too large, so these calls are not used. */
+
+LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
+#if USE_ITT_NOTIFY
+ kmp_team_t *team = __kmp_team_from_gtid(gtid);
+ if (team->t.t_active_level > 1) {
+ // The frame notifications are only supported for the outermost teams.
+ return;
+ }
+ ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
+ if (loc) {
+ // Use the reserved_2 field to store the index to the region domain.
+ // Assume that reserved_2 contains zero initially. Since zero is special
+ // value here, store the index into domain array increased by 1.
+ if (loc->reserved_2 == 0) {
+ if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm =
+ KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+ // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
+ // frm = loc->reserved_2 - 1; // get value saved by other thread
+ // for same loc
+ //} // AC: this block is to replace next unsynchronized line
+
+ // We need to save indexes for both region and barrier frames. We'll use
+ // loc->reserved_2 field but put region index to the low two bytes and
+ // barrier indexes to the high two bytes. It is OK because
+ // KMP_MAX_FRAME_DOMAINS = 512.
+ loc->reserved_2 |= (frm + 1); // save "new" value
+
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$parallel@[file:]<line>[:<col>]"
+ char *buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
+ team_size, str_loc.file, str_loc.line,
+ str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
+ __itt_suppress_pop();
+
+ __kmp_str_free(&buff);
+ if (barriers) {
+ if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm = KMP_TEST_THEN_INC32(
+ &__kmp_barrier_domain_count); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32(
+ &__kmp_barrier_domain_count); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+ char *buff = NULL;
+ buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
+ str_loc.file, str_loc.col);
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
+ __itt_suppress_pop();
+ __kmp_str_free(&buff);
+ // Save the barrier frame index to the high two bytes.
+ loc->reserved_2 |= (frm + 1) << 16;
+ }
+ }
+ __kmp_str_loc_free(&str_loc);
+ __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
+ }
+ } else { // Region domain exists for this location
+ // Check if team size was changed. Then create new region domain for this
+ // location
+ unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+ if ((frm < KMP_MAX_FRAME_DOMAINS) &&
+ (__kmp_itt_region_team_size[frm] != team_size)) {
+ char *buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
+ team_size, str_loc.file, str_loc.line,
+ str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
+ __itt_suppress_pop();
+
+ __kmp_str_free(&buff);
+ __kmp_str_loc_free(&str_loc);
+ __kmp_itt_region_team_size[frm] = team_size;
+ __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
+ } else { // Team size was not changed. Use existing domain.
+ __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
+ }
+ }
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
+ loc->reserved_2, loc);
+ }
+#endif
+} // __kmp_itt_region_forking
+
+// -----------------------------------------------------------------------------
+LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
+ __itt_timestamp end, int imbalance,
+ ident_t *loc, int team_size, int region) {
+#if USE_ITT_NOTIFY
+ if (region) {
+ kmp_team_t *team = __kmp_team_from_gtid(gtid);
+ int serialized = (region == 2 ? 1 : 0);
+ if (team->t.t_active_level + serialized > 1) {
+ // The frame notifications are only supported for the outermost teams.
+ return;
+ }
+ // Check region domain has not been created before. It's index is saved in
+ // the low two bytes.
+ if ((loc->reserved_2 & 0x0000FFFF) == 0) {
+ if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm =
+ KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+
+ // We need to save indexes for both region and barrier frames. We'll use
+ // loc->reserved_2 field but put region index to the low two bytes and
+ // barrier indexes to the high two bytes. It is OK because
+ // KMP_MAX_FRAME_DOMAINS = 512.
+ loc->reserved_2 |= (frm + 1); // save "new" value
+
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
+ char *buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
+ team_size, str_loc.file, str_loc.line,
+ str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
+ __itt_suppress_pop();
+
+ __kmp_str_free(&buff);
+ __kmp_str_loc_free(&str_loc);
+ __kmp_itt_region_team_size[frm] = team_size;
+ __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
+ }
+ } else { // Region domain exists for this location
+ // Check if team size was changed. Then create new region domain for this
+ // location
+ unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+ if ((frm < KMP_MAX_FRAME_DOMAINS) &&
+ (__kmp_itt_region_team_size[frm] != team_size)) {
+ char *buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
+ team_size, str_loc.file, str_loc.line,
+ str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
+ __itt_suppress_pop();
+
+ __kmp_str_free(&buff);
+ __kmp_str_loc_free(&str_loc);
+ __kmp_itt_region_team_size[frm] = team_size;
+ __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
+ } else { // Team size was not changed. Use existing domain.
+ __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
+ }
+ }
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT(
+ "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
+ gtid, loc->reserved_2, region, loc, begin, end);
+ return;
+ } else { // called for barrier reporting
+ if (loc) {
+ if ((loc->reserved_2 & 0xFFFF0000) == 0) {
+ if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm = KMP_TEST_THEN_INC32(
+ &__kmp_barrier_domain_count); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32(
+ &__kmp_barrier_domain_count); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+ // Save the barrier frame index to the high two bytes.
+ loc->reserved_2 |= (frm + 1) << 16; // save "new" value
+
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$frame@[file:]<line>[:<col>]"
+ kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
+ if (imbalance) {
+ char *buff_imb = NULL;
+ buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
+ str_loc.func, team_size, str_loc.file,
+ str_loc.col);
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
+ __itt_suppress_pop();
+ __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
+ end);
+ __kmp_str_free(&buff_imb);
+ } else {
+ char *buff = NULL;
+ buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
+ str_loc.file, str_loc.col);
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
+ __itt_suppress_pop();
+ __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
+ end);
+ __kmp_str_free(&buff);
+ }
+ __kmp_str_loc_free(&str_loc);
+ }
+ } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
+ if (imbalance) {
+ __itt_frame_submit_v3(
+ __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
+ begin, end);
+ } else {
+ __itt_frame_submit_v3(
+ __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
+ begin, end);
+ }
+ }
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT(
+ "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
+ loc->reserved_2, loc, begin, end);
+ }
+ }
+#endif
+} // __kmp_itt_frame_submit
+
+// -----------------------------------------------------------------------------
+LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
+ kmp_uint64 end, kmp_uint64 imbalance,
+ kmp_uint64 reduction) {
+#if USE_ITT_NOTIFY
+ if (metadata_domain == NULL) {
+ __kmp_acquire_bootstrap_lock(&metadata_lock);
+ if (metadata_domain == NULL) {
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ metadata_domain = __itt_domain_create("OMP Metadata");
+ string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
+ string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
+ string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
+ __itt_suppress_pop();
+ }
+ __kmp_release_bootstrap_lock(&metadata_lock);
+ }
+
+ kmp_uint64 imbalance_data[4];
+ imbalance_data[0] = begin;
+ imbalance_data[1] = end;
+ imbalance_data[2] = imbalance;
+ imbalance_data[3] = reduction;
+
+ __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
+ __itt_metadata_u64, 4, imbalance_data);
+#endif
+} // __kmp_itt_metadata_imbalance
+
+// -----------------------------------------------------------------------------
+LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
+ kmp_uint64 iterations, kmp_uint64 chunk) {
+#if USE_ITT_NOTIFY
+ if (metadata_domain == NULL) {
+ __kmp_acquire_bootstrap_lock(&metadata_lock);
+ if (metadata_domain == NULL) {
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ metadata_domain = __itt_domain_create("OMP Metadata");
+ string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
+ string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
+ string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
+ __itt_suppress_pop();
+ }
+ __kmp_release_bootstrap_lock(&metadata_lock);
+ }
+
+ // Parse line and column from psource string: ";file;func;line;col;;"
+ char *s_line;
+ char *s_col;
+ KMP_DEBUG_ASSERT(loc->psource);
+#ifdef __cplusplus
+ s_line = strchr(CCAST(char *, loc->psource), ';');
+#else
+ s_line = strchr(loc->psource, ';');
+#endif
+ KMP_DEBUG_ASSERT(s_line);
+ s_line = strchr(s_line + 1, ';'); // 2-nd semicolon
+ KMP_DEBUG_ASSERT(s_line);
+ s_line = strchr(s_line + 1, ';'); // 3-rd semicolon
+ KMP_DEBUG_ASSERT(s_line);
+ s_col = strchr(s_line + 1, ';'); // 4-th semicolon
+ KMP_DEBUG_ASSERT(s_col);
+
+ kmp_uint64 loop_data[5];
+ loop_data[0] = atoi(s_line + 1); // read line
+ loop_data[1] = atoi(s_col + 1); // read column
+ loop_data[2] = sched_type;
+ loop_data[3] = iterations;
+ loop_data[4] = chunk;
+
+ __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
+ __itt_metadata_u64, 5, loop_data);
+#endif
+} // __kmp_itt_metadata_loop
+
+// -----------------------------------------------------------------------------
+LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
+#if USE_ITT_NOTIFY
+ if (metadata_domain == NULL) {
+ __kmp_acquire_bootstrap_lock(&metadata_lock);
+ if (metadata_domain == NULL) {
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ metadata_domain = __itt_domain_create("OMP Metadata");
+ string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
+ string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
+ string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
+ __itt_suppress_pop();
+ }
+ __kmp_release_bootstrap_lock(&metadata_lock);
+ }
+
+ kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
+ kmp_uint64 single_data[2];
+ single_data[0] = str_loc.line;
+ single_data[1] = str_loc.col;
+
+ __kmp_str_loc_free(&str_loc);
+
+ __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
+ __itt_metadata_u64, 2, single_data);
+#endif
+} // __kmp_itt_metadata_single
+
+// -----------------------------------------------------------------------------
+LINKAGE void __kmp_itt_region_starting(int gtid) {
+#if USE_ITT_NOTIFY
+#endif
+} // __kmp_itt_region_starting
+
+// -----------------------------------------------------------------------------
+LINKAGE void __kmp_itt_region_finished(int gtid) {
+#if USE_ITT_NOTIFY
+#endif
+} // __kmp_itt_region_finished
+
+// ----------------------------------------------------------------------------
+LINKAGE void __kmp_itt_region_joined(int gtid) {
+#if USE_ITT_NOTIFY
+ kmp_team_t *team = __kmp_team_from_gtid(gtid);
+ if (team->t.t_active_level > 1) {
+ // The frame notifications are only supported for the outermost teams.
+ return;
+ }
+ ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
+ if (loc && loc->reserved_2) {
+ unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+ if (frm < KMP_MAX_FRAME_DOMAINS) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
+ KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
+ loc->reserved_2, loc);
+ }
+ }
+#endif
+} // __kmp_itt_region_joined
+
+/* Barriers reporting.
+
+ A barrier consists of two phases:
+ 1. Gather -- master waits for arriving of all the worker threads; each
+ worker thread registers arrival and goes further.
+ 2. Release -- each worker threads waits until master lets it go; master lets
+ worker threads go.
+
+ Function should be called by each thread:
+ * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
+ * __kmp_itt_barrier_middle() -- between gather and release phases.
+ * __kmp_itt_barrier_finished() -- after release phase.
+
+ Note: Call __kmp_itt_barrier_object() before call to
+ __kmp_itt_barrier_starting() and save result in local variable.
+ __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
+ would return itt sync object for the next barrier!
+
+ ITT need an address (void *) to be specified as a sync object. OpenMP RTL
+ does not have barrier object or barrier data structure. Barrier is just a
+ counter in team and thread structures. We could use an address of team
+ structure as an barrier sync object, but ITT wants different objects for
+ different barriers (even whithin the same team). So let us use team address
+ as barrier sync object for the first barrier, then increase it by one for the
+ next barrier, and so on (but wrap it not to use addresses outside of team
+ structure). */
+
+void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
+ int delta // 0 (current barrier) is default
+ // value; specify -1 to get previous
+ // barrier.
+ ) {
+ void *object = NULL;
+#if USE_ITT_NOTIFY
+ kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
+ kmp_team_t *team = thr->th.th_team;
+
+ // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
+ // be NULL. This "if" helps to avoid crash. However, this is not complete
+ // solution, and reporting fork/join barriers to ITT should be revisited.
+
+ if (team != NULL) {
+ // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
+ // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
+ kmp_uint64 counter =
+ team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
+ // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
+ // barriers of different types do not have the same ids.
+ KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
+ // This conditon is a must (we would have zero divide otherwise).
+ KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
+ // More strong condition: make sure we have room at least for for two
+ // differtent ids (for each barrier type).
+ object = reinterpret_cast<void *>(
+ kmp_uintptr_t(team) +
+ counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier +
+ bt);
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
+ counter, object);
+
+ if (set_name) {
+ ident_t const *loc = NULL;
+ char const *src = NULL;
+ char const *type = "OMP Barrier";
+ switch (bt) {
+ case bs_plain_barrier: {
+ // For plain barrier compiler calls __kmpc_barrier() function, which
+ // saves location in thr->th.th_ident.
+ loc = thr->th.th_ident;
+ // Get the barrier type from flags provided by compiler.
+ kmp_int32 expl = 0;
+ kmp_uint32 impl = 0;
+ if (loc != NULL) {
+ src = loc->psource;
+ expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
+ impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
+ }
+ if (impl) {
+ switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
+ case KMP_IDENT_BARRIER_IMPL_FOR: {
+ type = "OMP For Barrier";
+ } break;
+ case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
+ type = "OMP Sections Barrier";
+ } break;
+ case KMP_IDENT_BARRIER_IMPL_SINGLE: {
+ type = "OMP Single Barrier";
+ } break;
+ case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
+ type = "OMP Workshare Barrier";
+ } break;
+ default: {
+ type = "OMP Implicit Barrier";
+ KMP_DEBUG_ASSERT(0);
+ }
+ }
+ } else if (expl) {
+ type = "OMP Explicit Barrier";
+ }
+ } break;
+ case bs_forkjoin_barrier: {
+ // In case of fork/join barrier we can read thr->th.th_ident, because it
+ // contains location of last passed construct (while join barrier is not
+ // such one). Use th_ident of master thread instead -- __kmp_join_call()
+ // called by the master thread saves location.
+ //
+ // AC: cannot read from master because __kmp_join_call may be not called
+ // yet, so we read the location from team. This is the same location.
+ // And team is valid at the enter to join barrier where this happens.
+ loc = team->t.t_ident;
+ if (loc != NULL) {
+ src = loc->psource;
+ }
+ type = "OMP Join Barrier";
+ } break;
+ }
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create(object, type, src, __itt_attr_barrier);
+ KMP_ITT_DEBUG_PRINT(
+ "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
+ type, src);
+ }
+ }
+#endif
+ return object;
+} // __kmp_itt_barrier_object
+
+// -----------------------------------------------------------------------------
+void __kmp_itt_barrier_starting(int gtid, void *object) {
+#if USE_ITT_NOTIFY
+ if (!KMP_MASTER_GTID(gtid)) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_releasing(object);
+ KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
+ }
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_prepare(object);
+ KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
+#endif
+} // __kmp_itt_barrier_starting
+
+// -----------------------------------------------------------------------------
+void __kmp_itt_barrier_middle(int gtid, void *object) {
+#if USE_ITT_NOTIFY
+ if (KMP_MASTER_GTID(gtid)) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_acquired(object);
+ KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_releasing(object);
+ KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
+ } else {
+ }
+#endif
+} // __kmp_itt_barrier_middle
+
+// -----------------------------------------------------------------------------
+void __kmp_itt_barrier_finished(int gtid, void *object) {
+#if USE_ITT_NOTIFY
+ if (KMP_MASTER_GTID(gtid)) {
+ } else {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_acquired(object);
+ KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
+ }
+#endif
+} // __kmp_itt_barrier_finished
+
+/* Taskwait reporting.
+ ITT need an address (void *) to be specified as a sync object. OpenMP RTL
+ does not have taskwait structure, so we need to construct something. */
+
+void *__kmp_itt_taskwait_object(int gtid) {
+ void *object = NULL;
+#if USE_ITT_NOTIFY
+ if (__itt_sync_create_ptr) {
+ kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
+ kmp_taskdata_t *taskdata = thread->th.th_current_task;
+ object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
+ taskdata->td_taskwait_counter %
+ sizeof(kmp_taskdata_t));
+ }
+#endif
+ return object;
+} // __kmp_itt_taskwait_object
+
+void __kmp_itt_taskwait_starting(int gtid, void *object) {
+#if USE_ITT_NOTIFY
+ kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
+ kmp_taskdata_t *taskdata = thread->th.th_current_task;
+ ident_t const *loc = taskdata->td_taskwait_ident;
+ char const *src = (loc == NULL ? NULL : loc->psource);
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create(object, "OMP Taskwait", src, 0);
+ KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
+ object, src);
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_prepare(object);
+ KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
+#endif
+} // __kmp_itt_taskwait_starting
+
+void __kmp_itt_taskwait_finished(int gtid, void *object) {
+#if USE_ITT_NOTIFY
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_acquired(object);
+ KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_destroy(object);
+ KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
+#endif
+} // __kmp_itt_taskwait_finished
+
+/* Task reporting.
+ Only those tasks are reported which are executed by a thread spinning at
+ barrier (or taskwait). Synch object passed to the function must be barrier of
+ taskwait the threads waiting at. */
+
+void __kmp_itt_task_starting(
+ void *object // ITT sync object: barrier or taskwait.
+ ) {
+#if USE_ITT_NOTIFY
+ if (object != NULL) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_cancel(object);
+ KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
+ }
+#endif
+} // __kmp_itt_task_starting
+
+// -----------------------------------------------------------------------------
+void __kmp_itt_task_finished(
+ void *object // ITT sync object: barrier or taskwait.
+ ) {
+#if USE_ITT_NOTIFY
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_prepare(object);
+ KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
+#endif
+} // __kmp_itt_task_finished
+
+/* Lock reporting.
+ * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
+ operation (set/unset). It is not a real event shown to the user but just
+ setting a name for synchronization object. `lock' is an address of sync
+ object, the same address should be used in all subsequent calls.
+ * __kmp_itt_lock_acquiring() should be called before setting the lock.
+ * __kmp_itt_lock_acquired() should be called after setting the lock.
+ * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
+ * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
+ for the lock.
+ * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
+ operation. After __kmp_itt_lock_destroyed() all the references to the same
+ address will be considered as another sync object, not related with the
+ original one. */
+
+#if KMP_USE_DYNAMIC_LOCK
+// Takes location information directly
+__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
+ const ident_t *loc) {
+#if USE_ITT_NOTIFY
+ if (__itt_sync_create_ptr) {
+ char const *src = (loc == NULL ? NULL : loc->psource);
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create(lock, type, src, 0);
+ KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
+ src);
+ }
+#endif
+}
+#else // KMP_USE_DYNAMIC_LOCK
+// Internal guts -- common code for locks and critical sections, do not call
+// directly.
+__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
+#if USE_ITT_NOTIFY
+ if (__itt_sync_create_ptr) {
+ ident_t const *loc = NULL;
+ if (__kmp_get_user_lock_location_ != NULL)
+ loc = __kmp_get_user_lock_location_((lock));
+ char const *src = (loc == NULL ? NULL : loc->psource);
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create(lock, type, src, 0);
+ KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
+ src);
+ }
+#endif
+} // ___kmp_itt_lock_init
+#endif // KMP_USE_DYNAMIC_LOCK
+
+// Internal guts -- common code for locks and critical sections, do not call
+// directly.
+__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
+#if USE_ITT_NOTIFY
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_destroy(lock);
+ KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
+#endif
+} // ___kmp_itt_lock_fini
+
+// -----------------------------------------------------------------------------
+#if KMP_USE_DYNAMIC_LOCK
+void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
+ ___kmp_itt_lock_init(lock, "OMP Lock", loc);
+}
+#else
+void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
+ ___kmp_itt_lock_init(lock, "OMP Lock");
+} // __kmp_itt_lock_creating
+#endif
+
+void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+ // postpone lock object access
+ if (__itt_sync_prepare_ptr) {
+ if (KMP_EXTRACT_D_TAG(lock) == 0) {
+ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+ __itt_sync_prepare(ilk->lock);
+ } else {
+ __itt_sync_prepare(lock);
+ }
+ }
+#else
+ __itt_sync_prepare(lock);
+#endif
+} // __kmp_itt_lock_acquiring
+
+void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+ // postpone lock object access
+ if (__itt_sync_acquired_ptr) {
+ if (KMP_EXTRACT_D_TAG(lock) == 0) {
+ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+ __itt_sync_acquired(ilk->lock);
+ } else {
+ __itt_sync_acquired(lock);
+ }
+ }
+#else
+ __itt_sync_acquired(lock);
+#endif
+} // __kmp_itt_lock_acquired
+
+void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+ if (__itt_sync_releasing_ptr) {
+ if (KMP_EXTRACT_D_TAG(lock) == 0) {
+ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+ __itt_sync_releasing(ilk->lock);
+ } else {
+ __itt_sync_releasing(lock);
+ }
+ }
+#else
+ __itt_sync_releasing(lock);
+#endif
+} // __kmp_itt_lock_releasing
+
+void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+ if (__itt_sync_cancel_ptr) {
+ if (KMP_EXTRACT_D_TAG(lock) == 0) {
+ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+ __itt_sync_cancel(ilk->lock);
+ } else {
+ __itt_sync_cancel(lock);
+ }
+ }
+#else
+ __itt_sync_cancel(lock);
+#endif
+} // __kmp_itt_lock_cancelled
+
+void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
+ ___kmp_itt_lock_fini(lock, "OMP Lock");
+} // __kmp_itt_lock_destroyed
+
+/* Critical reporting.
+ Critical sections are treated exactly as locks (but have different object
+ type). */
+#if KMP_USE_DYNAMIC_LOCK
+void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
+ ___kmp_itt_lock_init(lock, "OMP Critical", loc);
+}
+#else
+void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
+ ___kmp_itt_lock_init(lock, "OMP Critical");
+} // __kmp_itt_critical_creating
+#endif
+
+void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
+ __itt_sync_prepare(lock);
+} // __kmp_itt_critical_acquiring
+
+void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
+ __itt_sync_acquired(lock);
+} // __kmp_itt_critical_acquired
+
+void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
+ __itt_sync_releasing(lock);
+} // __kmp_itt_critical_releasing
+
+void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
+ ___kmp_itt_lock_fini(lock, "OMP Critical");
+} // __kmp_itt_critical_destroyed
+
+/* Single reporting. */
+
+void __kmp_itt_single_start(int gtid) {
+#if USE_ITT_NOTIFY
+ if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
+ kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
+ ident_t *loc = thr->th.th_ident;
+ char const *src = (loc == NULL ? NULL : loc->psource);
+ kmp_str_buf_t name;
+ __kmp_str_buf_init(&name);
+ __kmp_str_buf_print(&name, "OMP Single-%s", src);
+ KMP_ITT_DEBUG_LOCK();
+ thr->th.th_itt_mark_single = __itt_mark_create(name.str);
+ KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
+ thr->th.th_itt_mark_single);
+ __kmp_str_buf_free(&name);
+ KMP_ITT_DEBUG_LOCK();
+ __itt_mark(thr->th.th_itt_mark_single, NULL);
+ KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
+ thr->th.th_itt_mark_single);
+ }
+#endif
+} // __kmp_itt_single_start
+
+void __kmp_itt_single_end(int gtid) {
+#if USE_ITT_NOTIFY
+ __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
+ KMP_ITT_DEBUG_LOCK();
+ __itt_mark_off(mark);
+ KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
+#endif
+} // __kmp_itt_single_end
+
+/* Ordered reporting.
+ * __kmp_itt_ordered_init is called by each thread *before* first using sync
+ object. ITT team would like it to be called once, but it requires extra
+ synchronization.
+ * __kmp_itt_ordered_prep is called when thread is going to enter ordered
+ section (before synchronization).
+ * __kmp_itt_ordered_start is called just before entering user code (after
+ synchronization).
+ * __kmp_itt_ordered_end is called after returning from user code.
+
+ Sync object is th->th.th_dispatch->th_dispatch_sh_current.
+ Events are not generated in case of serialized team. */
+
+void __kmp_itt_ordered_init(int gtid) {
+#if USE_ITT_NOTIFY
+ if (__itt_sync_create_ptr) {
+ kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
+ ident_t const *loc = thr->th.th_ident;
+ char const *src = (loc == NULL ? NULL : loc->psource);
+ __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
+ "OMP Ordered", src, 0);
+ }
+#endif
+} // __kmp_itt_ordered_init
+
+void __kmp_itt_ordered_prep(int gtid) {
+#if USE_ITT_NOTIFY
+ if (__itt_sync_create_ptr) {
+ kmp_team_t *t = __kmp_team_from_gtid(gtid);
+ if (!t->t.t_serialized) {
+ kmp_info_t *th = __kmp_thread_from_gtid(gtid);
+ __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
+ }
+ }
+#endif
+} // __kmp_itt_ordered_prep
+
+void __kmp_itt_ordered_start(int gtid) {
+#if USE_ITT_NOTIFY
+ if (__itt_sync_create_ptr) {
+ kmp_team_t *t = __kmp_team_from_gtid(gtid);
+ if (!t->t.t_serialized) {
+ kmp_info_t *th = __kmp_thread_from_gtid(gtid);
+ __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
+ }
+ }
+#endif
+} // __kmp_itt_ordered_start
+
+void __kmp_itt_ordered_end(int gtid) {
+#if USE_ITT_NOTIFY
+ if (__itt_sync_create_ptr) {
+ kmp_team_t *t = __kmp_team_from_gtid(gtid);
+ if (!t->t.t_serialized) {
+ kmp_info_t *th = __kmp_thread_from_gtid(gtid);
+ __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
+ }
+ }
+#endif
+} // __kmp_itt_ordered_end
+
+/* Threads reporting. */
+
+void __kmp_itt_thread_ignore() {
+ __itt_thr_ignore();
+} // __kmp_itt_thread_ignore
+
+void __kmp_itt_thread_name(int gtid) {
+#if USE_ITT_NOTIFY
+ if (__itt_thr_name_set_ptr) {
+ kmp_str_buf_t name;
+ __kmp_str_buf_init(&name);
+ if (KMP_MASTER_GTID(gtid)) {
+ __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid);
+ } else {
+ __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
+ }
+ KMP_ITT_DEBUG_LOCK();
+ __itt_thr_name_set(name.str, name.used);
+ KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
+ __kmp_str_buf_free(&name);
+ }
+#endif
+} // __kmp_itt_thread_name
+
+/* System object reporting.
+ ITT catches operations with system sync objects (like Windows* OS on IA-32
+ architecture API critical sections and events). We only need to specify
+ name ("OMP Scheduler") for the object to let ITT know it is an object used
+ by OpenMP RTL for internal purposes. */
+
+void __kmp_itt_system_object_created(void *object, char const *name) {
+#if USE_ITT_NOTIFY
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create(object, "OMP Scheduler", name, 0);
+ KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
+ object, name);
+#endif
+} // __kmp_itt_system_object_created
+
+/* Stack stitching api.
+ Master calls "create" and put the stitching id into team structure.
+ Workers read the stitching id and call "enter" / "leave" api.
+ Master calls "destroy" at the end of the parallel region. */
+
+__itt_caller __kmp_itt_stack_caller_create() {
+#if USE_ITT_NOTIFY
+ if (!__itt_stack_caller_create_ptr)
+ return NULL;
+ KMP_ITT_DEBUG_LOCK();
+ __itt_caller id = __itt_stack_caller_create();
+ KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
+ return id;
+#endif
+ return NULL;
+}
+
+void __kmp_itt_stack_caller_destroy(__itt_caller id) {
+#if USE_ITT_NOTIFY
+ if (__itt_stack_caller_destroy_ptr) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_stack_caller_destroy(id);
+ KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
+ }
+#endif
+}
+
+void __kmp_itt_stack_callee_enter(__itt_caller id) {
+#if USE_ITT_NOTIFY
+ if (__itt_stack_callee_enter_ptr) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_stack_callee_enter(id);
+ KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
+ }
+#endif
+}
+
+void __kmp_itt_stack_callee_leave(__itt_caller id) {
+#if USE_ITT_NOTIFY
+ if (__itt_stack_callee_leave_ptr) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_stack_callee_leave(id);
+ KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
+ }
+#endif
+}
+
+#endif /* USE_ITT_BUILD */