Block library shutdown until unreaped threads finish spin-waiting

This change fixes possibly invalid access to the internal data structure during library shutdown. In a heavily oversubscribed situation, the library shutdown sequence can reach the point where resources are deallocated while there still exist threads in their final spinning loop. The added loop in __kmp_internal_end() checks if there are such busy-waiting threads and blocks the shutdown sequence if that is the case. Two versions of kmp_wait_template() are now used to minimize performance impact. Patch by Hansang Bae Differential Revision: https://reviews.llvm.org/D49452 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@337486 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jonathan Peyton <jonathan.l.peyton@intel.com> 2018-07-19 19:17:00 +0000
committer: Jonathan Peyton <jonathan.l.peyton@intel.com> 2018-07-19 19:17:00 +0000
commit: d7d72b740038e4285e1d51d6feb48330c0fb6f5a (patch)
tree: 3a942f79bbedae478b970910ba04346d52752f6a
parent: 7af70ccedd0517698ddf5f5e614b753b7b4ddce2 (diff)
4 files changed, 61 insertions, 11 deletions
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index 02236c5..497907b 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -2558,6 +2558,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
 #if KMP_STATS_ENABLED
   kmp_stats_list *th_stats;
 #endif
+#if KMP_OS_UNIX
+  std::atomic<bool> th_blocking;
+#endif
 } kmp_base_info_t;
 
 typedef union KMP_ALIGN_CACHE kmp_info {
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index 69eba21..a16d2fd 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -4334,6 +4334,9 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
 
   new_thr->th.th_spin_here = FALSE;
   new_thr->th.th_next_waiting = 0;
+#if KMP_OS_UNIX
+  new_thr->th.th_blocking = false;
+#endif
 
 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
   new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
@@ -5961,6 +5964,18 @@ static void __kmp_internal_end(void) {
 
     __kmp_reap_task_teams();
 
+#if KMP_OS_UNIX
+    // Threads that are not reaped should not access any resources since they
+    // are going to be deallocated soon, so the shutdown sequence should wait
+    // until all threads either exit the final spin-waiting loop or begin
+    // sleeping after the given blocktime.
+    for (i = 0; i < __kmp_threads_capacity; i++) {
+      kmp_info_t *thr = __kmp_threads[i];
+      while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
+        KMP_CPU_PAUSE();
+    }
+#endif
+
     for (i = 0; i < __kmp_threads_capacity; ++i) {
       // TBD: Add some checking...
       // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
diff --git a/runtime/src/kmp_wait_release.cpp b/runtime/src/kmp_wait_release.cpp
index 98d3a1f..3ebec6b 100644
--- a/runtime/src/kmp_wait_release.cpp
+++ b/runtime/src/kmp_wait_release.cpp
@@ -15,8 +15,12 @@
 
 void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
                    int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
-  __kmp_wait_template(this_thr, flag,
-                      final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+  if (final_spin)
+    __kmp_wait_template<kmp_flag_64, TRUE>(
+        this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
+  else
+    __kmp_wait_template<kmp_flag_64, FALSE>(
+        this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
 }
 
 void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); }
diff --git a/runtime/src/kmp_wait_release.h b/runtime/src/kmp_wait_release.h
index 1301a60..eeac198 100644
--- a/runtime/src/kmp_wait_release.h
+++ b/runtime/src/kmp_wait_release.h
@@ -154,10 +154,10 @@ static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
    calls __kmp_wait_*  must make certain that another thread calls __kmp_release
    to wake it back up to prevent deadlocks!  */
-template <class C>
+template <class C, int final_spin>
 static inline void
-__kmp_wait_template(kmp_info_t *this_thr, C *flag,
-                    int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+__kmp_wait_template(kmp_info_t *this_thr,
+                    C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
   // NOTE: We may not belong to a team at this point.
   volatile void *spin = flag->get();
   kmp_uint32 spins;
@@ -176,6 +176,10 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
     return;
   }
   th_gtid = this_thr->th.th_info.ds.ds_gtid;
+#if KMP_OS_UNIX
+  if (final_spin)
+    KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
+#endif
   KA_TRACE(20,
            ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
 #if KMP_STATS_ENABLED
@@ -409,7 +413,15 @@ final_spin=FALSE)
 #endif
 
     KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
+#if KMP_OS_UNIX
+    if (final_spin)
+      KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
+#endif
     flag->suspend(th_gtid);
+#if KMP_OS_UNIX
+    if (final_spin)
+      KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
+#endif
 
     if (TCR_4(__kmp_global.g.g_done)) {
       if (__kmp_global.g.g_abort)
@@ -450,6 +462,10 @@ final_spin=FALSE)
   }
 #endif
 
+#if KMP_OS_UNIX
+  if (final_spin)
+    KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
+#endif
   KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
 }
 
@@ -732,8 +748,12 @@ public:
   }
   void wait(kmp_info_t *this_thr,
             int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
-    __kmp_wait_template(this_thr, this,
-                        final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+    if (final_spin)
+      __kmp_wait_template<kmp_flag_32, TRUE>(
+          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
+    else
+      __kmp_wait_template<kmp_flag_32, FALSE>(
+          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
   }
   void release() { __kmp_release_template(this); }
   flag_type get_ptr_type() { return flag32; }
@@ -757,8 +777,12 @@ public:
   }
   void wait(kmp_info_t *this_thr,
             int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
-    __kmp_wait_template(this_thr, this,
-                        final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+    if (final_spin)
+      __kmp_wait_template<kmp_flag_64, TRUE>(
+          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
+    else
+      __kmp_wait_template<kmp_flag_64, FALSE>(
+          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
   }
   void release() { __kmp_release_template(this); }
   flag_type get_ptr_type() { return flag64; }
@@ -845,8 +869,12 @@ public:
   bool is_sleeping() { return is_sleeping_val(*get()); }
   bool is_any_sleeping() { return is_sleeping_val(*get()); }
   void wait(kmp_info_t *this_thr, int final_spin) {
-    __kmp_wait_template<kmp_flag_oncore>(
-        this_thr, this, final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+    if (final_spin)
+      __kmp_wait_template<kmp_flag_oncore, TRUE>(
+          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
+    else
+      __kmp_wait_template<kmp_flag_oncore, FALSE>(
+          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
   }
   void release() { __kmp_release_template(this); }
   void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
author	Jonathan Peyton <jonathan.l.peyton@intel.com>	2018-07-19 19:17:00 +0000
committer	Jonathan Peyton <jonathan.l.peyton@intel.com>	2018-07-19 19:17:00 +0000
commit	d7d72b740038e4285e1d51d6feb48330c0fb6f5a (patch)
tree	3a942f79bbedae478b970910ba04346d52752f6a
parent	7af70ccedd0517698ddf5f5e614b753b7b4ddce2 (diff)