aboutsummaryrefslogtreecommitdiff
path: root/final/runtime/src/kmp_itt.h
diff options
context:
space:
mode:
Diffstat (limited to 'final/runtime/src/kmp_itt.h')
-rw-r--r--final/runtime/src/kmp_itt.h333
1 files changed, 333 insertions, 0 deletions
diff --git a/final/runtime/src/kmp_itt.h b/final/runtime/src/kmp_itt.h
new file mode 100644
index 0000000..2062e01
--- /dev/null
+++ b/final/runtime/src/kmp_itt.h
@@ -0,0 +1,333 @@
+#if USE_ITT_BUILD
+/*
+ * kmp_itt.h -- ITT Notify interface.
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef KMP_ITT_H
+#define KMP_ITT_H
+
+#include "kmp_lock.h"
+
+#define INTEL_ITTNOTIFY_API_PRIVATE
+#include "ittnotify.h"
+#include "legacy/ittnotify.h"
+
+#if KMP_DEBUG
+#define __kmp_inline // Turn off inlining in debug mode.
+#else
+#define __kmp_inline static inline
+#endif
+
+#if USE_ITT_NOTIFY
+extern kmp_int32 __kmp_itt_prepare_delay;
+#ifdef __cplusplus
+extern "C" void __kmp_itt_fini_ittlib(void);
+#else
+extern void __kmp_itt_fini_ittlib(void);
+#endif
+#endif
+
+// Simplify the handling of an argument that is only required when USE_ITT_BUILD
+// is enabled.
+#define USE_ITT_BUILD_ARG(x) , x
+
+void __kmp_itt_initialize();
+void __kmp_itt_destroy();
+void __kmp_itt_reset();
+
+// -----------------------------------------------------------------------------
+// New stuff for reporting high-level constructs.
+
+// Note the naming convention:
+// __kmp_itt_xxxing() function should be called before action, while
+// __kmp_itt_xxxed() function should be called after action.
+
+// --- Parallel region reporting ---
+__kmp_inline void
+__kmp_itt_region_forking(int gtid, int team_size,
+ int barriers); // Master only, before forking threads.
+__kmp_inline void
+__kmp_itt_region_joined(int gtid); // Master only, after joining threads.
+// (*) Note: A thread may execute tasks after this point, though.
+
+// --- Frame reporting ---
+// region=0: no regions, region=1: parallel, region=2: serialized parallel
+__kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
+ __itt_timestamp end, int imbalance,
+ ident_t *loc, int team_size,
+ int region = 0);
+
+// --- Metadata reporting ---
+// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated
+// wait time value, reduction -if this is a reduction barrier
+__kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
+ kmp_uint64 end,
+ kmp_uint64 imbalance,
+ kmp_uint64 reduction);
+// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others);
+// iterations - loop trip count, chunk - chunk size
+__kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
+ kmp_uint64 iterations,
+ kmp_uint64 chunk);
+__kmp_inline void __kmp_itt_metadata_single(ident_t *loc);
+
+// --- Barrier reporting ---
+__kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0,
+ int delta = 0);
+__kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object);
+__kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object);
+__kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object);
+
+// --- Taskwait reporting ---
+__kmp_inline void *__kmp_itt_taskwait_object(int gtid);
+__kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object);
+__kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object);
+
+// --- Task reporting ---
+__kmp_inline void __kmp_itt_task_starting(void *object);
+__kmp_inline void __kmp_itt_task_finished(void *object);
+
+// --- Lock reporting ---
+#if KMP_USE_DYNAMIC_LOCK
+__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock,
+ const ident_t *);
+#else
+__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock);
+#endif
+__kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock);
+__kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock);
+__kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock);
+__kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock);
+__kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock);
+
+// --- Critical reporting ---
+#if KMP_USE_DYNAMIC_LOCK
+__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock,
+ const ident_t *);
+#else
+__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock);
+#endif
+__kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock);
+__kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock);
+__kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock);
+__kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock);
+
+// --- Single reporting ---
+__kmp_inline void __kmp_itt_single_start(int gtid);
+__kmp_inline void __kmp_itt_single_end(int gtid);
+
+// --- Ordered reporting ---
+__kmp_inline void __kmp_itt_ordered_init(int gtid);
+__kmp_inline void __kmp_itt_ordered_prep(int gtid);
+__kmp_inline void __kmp_itt_ordered_start(int gtid);
+__kmp_inline void __kmp_itt_ordered_end(int gtid);
+
+// --- Threads reporting ---
+__kmp_inline void __kmp_itt_thread_ignore();
+__kmp_inline void __kmp_itt_thread_name(int gtid);
+
+// --- System objects ---
+__kmp_inline void __kmp_itt_system_object_created(void *object,
+ char const *name);
+
+// --- Stack stitching ---
+__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
+__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
+__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
+__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
+
+// -----------------------------------------------------------------------------
+// Old stuff for reporting low-level internal synchronization.
+
+#if USE_ITT_NOTIFY
+
+/* Support for SSC marks, which are used by SDE
+ http://software.intel.com/en-us/articles/intel-software-development-emulator
+ to mark points in instruction traces that represent spin-loops and are
+ therefore uninteresting when collecting traces for architecture simulation.
+ */
+#ifndef INCLUDE_SSC_MARKS
+#define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
+#endif
+
+/* Linux 64 only for now */
+#if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
+// Portable (at least for gcc and icc) code to insert the necessary instructions
+// to set %ebx and execute the unlikely no-op.
+#if defined(__INTEL_COMPILER)
+#define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
+#else
+#define INSERT_SSC_MARK(tag) \
+ __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \
+ : "%ebx")
+#endif
+#else
+#define INSERT_SSC_MARK(tag) ((void)0)
+#endif
+
+/* Markers for the start and end of regions that represent polling and are
+ therefore uninteresting to architectural simulations 0x4376 and 0x4377 are
+ arbitrary numbers that should be unique in the space of SSC tags, but there
+ is no central issuing authority rather randomness is expected to work. */
+#define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
+#define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
+
+// Markers for architecture simulation.
+// FORKING : Before the master thread forks.
+// JOINING : At the start of the join.
+// INVOKING : Before the threads invoke microtasks.
+// DISPATCH_INIT: At the start of dynamically scheduled loop.
+// DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
+#define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
+#define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
+#define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
+#define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
+#define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
+
+// The object is an address that associates a specific set of the prepare,
+// acquire, release, and cancel operations.
+
+/* Sync prepare indicates a thread is going to start waiting for another thread
+ to send a release event. This operation should be done just before the
+ thread begins checking for the existence of the release event */
+
+/* Sync cancel indicates a thread is cancelling a wait on another thread and
+ continuing execution without waiting for the other thread to release it */
+
+/* Sync acquired indicates a thread has received a release event from another
+ thread and has stopped waiting. This operation must occur only after the
+ release event is received. */
+
+/* Sync release indicates a thread is going to send a release event to another
+ thread so it will stop waiting and continue execution. This operation must
+ just happen before the release event. */
+
+#define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj))
+#define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj))
+#define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj))
+#define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj))
+
+/* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called
+ with a delay (and not called at all if waiting time is small). So, in spin
+ loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
+ spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
+ KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */
+
+#undef KMP_FSYNC_SPIN_INIT
+#define KMP_FSYNC_SPIN_INIT(obj, spin) \
+ int sync_iters = 0; \
+ if (__itt_fsync_prepare_ptr) { \
+ if (obj == NULL) { \
+ obj = spin; \
+ } /* if */ \
+ } /* if */ \
+ SSC_MARK_SPIN_START()
+
+#undef KMP_FSYNC_SPIN_PREPARE
+#define KMP_FSYNC_SPIN_PREPARE(obj) \
+ do { \
+ if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \
+ ++sync_iters; \
+ if (sync_iters >= __kmp_itt_prepare_delay) { \
+ KMP_FSYNC_PREPARE((void *)obj); \
+ } /* if */ \
+ } /* if */ \
+ } while (0)
+#undef KMP_FSYNC_SPIN_ACQUIRED
+#define KMP_FSYNC_SPIN_ACQUIRED(obj) \
+ do { \
+ SSC_MARK_SPIN_END(); \
+ if (sync_iters >= __kmp_itt_prepare_delay) { \
+ KMP_FSYNC_ACQUIRED((void *)obj); \
+ } /* if */ \
+ } while (0)
+
+/* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
+ KMP_ITT_IGNORE(
+ ptr = malloc( size );
+ );
+*/
+#define KMP_ITT_IGNORE(statement) \
+ do { \
+ __itt_state_t __itt_state_; \
+ if (__itt_state_get_ptr) { \
+ __itt_state_ = __itt_state_get(); \
+ __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \
+ } /* if */ \
+ { statement } \
+ if (__itt_state_get_ptr) { \
+ __itt_state_set(__itt_state_); \
+ } /* if */ \
+ } while (0)
+
+const int KMP_MAX_FRAME_DOMAINS =
+ 512; // Maximum number of frame domains to use (maps to
+// different OpenMP regions in the user source code).
+extern kmp_int32 __kmp_barrier_domain_count;
+extern kmp_int32 __kmp_region_domain_count;
+extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
+extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
+extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
+extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
+extern __itt_domain *metadata_domain;
+extern __itt_string_handle *string_handle_imbl;
+extern __itt_string_handle *string_handle_loop;
+extern __itt_string_handle *string_handle_sngl;
+
+#else
+
+// Null definitions of the synchronization tracing functions.
+#define KMP_FSYNC_PREPARE(obj) ((void)0)
+#define KMP_FSYNC_CANCEL(obj) ((void)0)
+#define KMP_FSYNC_ACQUIRED(obj) ((void)0)
+#define KMP_FSYNC_RELEASING(obj) ((void)0)
+
+#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
+#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
+#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
+
+#define KMP_ITT_IGNORE(stmt) \
+ do { \
+ stmt \
+ } while (0)
+
+#endif // USE_ITT_NOTIFY
+
+#if !KMP_DEBUG
+// In release mode include definitions of inline functions.
+#include "kmp_itt.inl"
+#endif
+
+#endif // KMP_ITT_H
+
+#else /* USE_ITT_BUILD */
+
+// Null definitions of the synchronization tracing functions.
+// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
+// By defining these we avoid unpleasant ifdef tests in many places.
+#define KMP_FSYNC_PREPARE(obj) ((void)0)
+#define KMP_FSYNC_CANCEL(obj) ((void)0)
+#define KMP_FSYNC_ACQUIRED(obj) ((void)0)
+#define KMP_FSYNC_RELEASING(obj) ((void)0)
+
+#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
+#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
+#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
+
+#define KMP_ITT_IGNORE(stmt) \
+ do { \
+ stmt \
+ } while (0)
+
+#define USE_ITT_BUILD_ARG(x)
+
+#endif /* USE_ITT_BUILD */