2019-05-13 Chung-Lin Tang <cltang@codesourcery.com>
Reviewed-by: Thomas Schwinge <thomas@codesourcery.com>
libgomp/
* libgomp-plugin.h (struct goacc_asyncqueue): Declare.
(struct goacc_asyncqueue_list): Likewise.
(goacc_aq): Likewise.
(goacc_aq_list): Likewise.
(GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
(GOMP_OFFLOAD_openacc_async_test): Remove.
(GOMP_OFFLOAD_openacc_async_test_all): Remove.
(GOMP_OFFLOAD_openacc_async_wait): Remove.
(GOMP_OFFLOAD_openacc_async_wait_async): Remove.
(GOMP_OFFLOAD_openacc_async_wait_all): Remove.
(GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
(GOMP_OFFLOAD_openacc_async_set_async): Remove.
(GOMP_OFFLOAD_openacc_exec): Adjust declaration.
(GOMP_OFFLOAD_openacc_cuda_get_stream): Likewise.
(GOMP_OFFLOAD_openacc_cuda_set_stream): Likewise.
(GOMP_OFFLOAD_openacc_async_exec): Declare.
(GOMP_OFFLOAD_openacc_async_construct): Declare.
(GOMP_OFFLOAD_openacc_async_destruct): Declare.
(GOMP_OFFLOAD_openacc_async_test): Declare.
(GOMP_OFFLOAD_openacc_async_synchronize): Declare.
(GOMP_OFFLOAD_openacc_async_serialize): Declare.
(GOMP_OFFLOAD_openacc_async_queue_callback): Declare.
(GOMP_OFFLOAD_openacc_async_host2dev): Declare.
(GOMP_OFFLOAD_openacc_async_dev2host): Declare.
* libgomp.h (struct acc_dispatch_t): Define 'async' sub-struct.
(gomp_acc_insert_pointer): Adjust declaration.
(gomp_copy_host2dev): New declaration.
(gomp_copy_dev2host): Likewise.
(gomp_map_vars_async): Likewise.
(gomp_unmap_tgt): Likewise.
(gomp_unmap_vars_async): Likewise.
(gomp_fini_device): Likewise.
* oacc-async.c (get_goacc_thread): New function.
(get_goacc_thread_device): New function.
(lookup_goacc_asyncqueue): New function.
(get_goacc_asyncqueue): New function.
(acc_async_test): Adjust code to use new async design.
(acc_async_test_all): Likewise.
(acc_wait): Likewise.
(acc_wait_async): Likewise.
(acc_wait_all): Likewise.
(acc_wait_all_async): Likewise.
(goacc_async_free): New function.
(goacc_init_asyncqueues): Likewise.
(goacc_fini_asyncqueues): Likewise.
* oacc-cuda.c (acc_get_cuda_stream): Adjust code to use new async
design.
(acc_set_cuda_stream): Likewise.
* oacc-host.c (host_openacc_exec): Adjust parameters, remove 'async'.
(host_openacc_register_async_cleanup): Remove.
(host_openacc_async_exec): New function.
(host_openacc_async_test): Adjust parameters.
(host_openacc_async_test_all): Remove.
(host_openacc_async_wait): Remove.
(host_openacc_async_wait_async): Remove.
(host_openacc_async_wait_all): Remove.
(host_openacc_async_wait_all_async): Remove.
(host_openacc_async_set_async): Remove.
(host_openacc_async_synchronize): New function.
(host_openacc_async_serialize): New function.
(host_openacc_async_host2dev): New function.
(host_openacc_async_dev2host): New function.
(host_openacc_async_queue_callback): New function.
(host_openacc_async_construct): New function.
(host_openacc_async_destruct): New function.
(struct gomp_device_descr host_dispatch): Remove initialization of old
interface, add intialization of new async sub-struct.
* oacc-init.c (acc_shutdown_1): Adjust to use gomp_fini_device.
(goacc_attach_host_thread_to_device): Remove old async code usage.
* oacc-int.h (goacc_init_asyncqueues): New declaration.
(goacc_fini_asyncqueues): Likewise.
(goacc_async_copyout_unmap_vars): Likewise.
(goacc_async_free): Likewise.
(get_goacc_asyncqueue): Likewise.
(lookup_goacc_asyncqueue): Likewise.
* oacc-mem.c (memcpy_tofrom_device): Adjust code to use new async
design.
(present_create_copy): Adjust code to use new async design.
(delete_copyout): Likewise.
(update_dev_host): Likewise.
(gomp_acc_insert_pointer): Add async parameter, adjust code to use new
async design.
(gomp_acc_remove_pointer): Adjust code to use new async design.
* oacc-parallel.c (GOACC_parallel_keyed): Adjust code to use new async
design.
(GOACC_enter_exit_data): Likewise.
(goacc_wait): Likewise.
(GOACC_update): Likewise.
* oacc-plugin.c (GOMP_PLUGIN_async_unmap_vars): Change to assert fail
when called, warn as obsolete in comment.
* target.c (goacc_device_copy_async): New function.
(gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter,
add goacc_device_copy_async case.
(gomp_copy_dev2host): Likewise.
(gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code.
(gomp_map_pointer): Likewise.
(gomp_map_fields_existing): Likewise.
(gomp_map_vars_internal): New always_inline function, renamed from
gomp_map_vars.
(gomp_map_vars): Implement by calling gomp_map_vars_internal.
(gomp_map_vars_async): Implement by calling gomp_map_vars_internal,
passing goacc_asyncqueue argument.
(gomp_unmap_tgt): Remove static, add attribute_hidden.
(gomp_unref_tgt): New function.
(gomp_unmap_vars_internal): New always_inline function, renamed from
gomp_unmap_vars.
(gomp_unmap_vars): Implement by calling gomp_unmap_vars_internal.
(gomp_unmap_vars_async): Implement by calling
gomp_unmap_vars_internal, passing goacc_asyncqueue argument.
(gomp_fini_device): New function.
(gomp_exit_data): Adjust gomp_copy_dev2host call.
(gomp_load_plugin_for_device): Remove old interface, adjust to load
new async interface.
(gomp_target_fini): Adjust code to call gomp_fini_device.
* plugin/plugin-nvptx.c (struct cuda_map): Remove.
(struct ptx_stream): Remove.
(struct nvptx_thread): Remove current_stream field.
(cuda_map_create): Remove.
(cuda_map_destroy): Remove.
(map_init): Remove.
(map_fini): Remove.
(map_pop): Remove.
(map_push): Remove.
(struct goacc_asyncqueue): Define.
(struct nvptx_callback): Define.
(struct ptx_free_block): Define.
(struct ptx_device): Remove null_stream, active_streams, async_streams,
stream_lock, and next fields.
(enum ptx_event_type): Remove.
(struct ptx_event): Remove.
(ptx_event_lock): Remove.
(ptx_events): Remove.
(init_streams_for_device): Remove.
(fini_streams_for_device): Remove.
(select_stream_for_async): Remove.
(nvptx_init): Remove ptx_events and ptx_event_lock references.
(nvptx_attach_host_thread_to_device): Remove CUDA_ERROR_NOT_PERMITTED
case.
(nvptx_open_device): Add free_blocks initialization, remove
init_streams_for_device call.
(nvptx_close_device): Remove fini_streams_for_device call, add
free_blocks destruct code.
(event_gc): Remove.
(event_add): Remove.
(nvptx_exec): Adjust parameters and code.
(nvptx_free): Likewise.
(nvptx_host2dev): Remove.
(nvptx_dev2host): Remove.
(nvptx_set_async): Remove.
(nvptx_async_test): Remove.
(nvptx_async_test_all): Remove.
(nvptx_wait): Remove.
(nvptx_wait_async): Remove.
(nvptx_wait_all): Remove.
(nvptx_wait_all_async): Remove.
(nvptx_get_cuda_stream): Remove.
(nvptx_set_cuda_stream): Remove.
(GOMP_OFFLOAD_alloc): Adjust code.
(GOMP_OFFLOAD_free): Likewise.
(GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
(GOMP_OFFLOAD_openacc_exec): Adjust parameters and code.
(GOMP_OFFLOAD_openacc_async_test_all): Remove.
(GOMP_OFFLOAD_openacc_async_wait): Remove.
(GOMP_OFFLOAD_openacc_async_wait_async): Remove.
(GOMP_OFFLOAD_openacc_async_wait_all): Remove.
(GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
(GOMP_OFFLOAD_openacc_async_set_async): Remove.
(cuda_free_argmem): New function.
(GOMP_OFFLOAD_openacc_async_exec): New plugin hook function.
(GOMP_OFFLOAD_openacc_create_thread_data): Adjust code.
(GOMP_OFFLOAD_openacc_cuda_get_stream): Adjust code.
(GOMP_OFFLOAD_openacc_cuda_set_stream): Adjust code.
(GOMP_OFFLOAD_openacc_async_construct): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_destruct): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_test): Remove and re-implement.
(GOMP_OFFLOAD_openacc_async_synchronize): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_serialize): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_queue_callback): New plugin hook function.
(cuda_callback_wrapper): New function.
(cuda_memcpy_sanity_check): New function.
(GOMP_OFFLOAD_host2dev): Remove and re-implement.
(GOMP_OFFLOAD_dev2host): Remove and re-implement.
(GOMP_OFFLOAD_openacc_async_host2dev): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_dev2host): New plugin hook function.
From-SVN: r271128
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index f5fb63c..fa99a2a 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -217,8 +217,6 @@
}
va_end (ap);
- acc_dev->openacc.async_set_async_func (async);
-
if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
{
k.host_start = (uintptr_t) fn;
@@ -235,44 +233,29 @@
else
tgt_fn = (void (*)) fn;
- tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
- GOMP_MAP_VARS_OPENACC);
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
+ true, GOMP_MAP_VARS_OPENACC);
+
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
for (i = 0; i < mapnum; i++)
devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+ tgt->list[i].key->tgt_offset
+ tgt->list[i].offset);
-
- acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
- async, dims, tgt);
-
- /* If running synchronously, unmap immediately. */
- bool copyfrom = true;
- if (async_synchronous_p (async))
- gomp_unmap_vars (tgt, true);
+ if (aq == NULL)
+ {
+ acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+ dims, tgt);
+ /* If running synchronously, unmap immediately. */
+ gomp_unmap_vars (tgt, true);
+ }
else
{
- bool async_unmap = false;
- for (size_t i = 0; i < tgt->list_count; i++)
- {
- splay_tree_key k = tgt->list[i].key;
- if (k && k->refcount == 1)
- {
- async_unmap = true;
- break;
- }
- }
- if (async_unmap)
- tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
- else
- {
- copyfrom = false;
- gomp_unmap_vars (tgt, copyfrom);
- }
+ acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+ dims, tgt, aq);
+ gomp_unmap_vars_async (tgt, true, aq);
}
-
- acc_dev->openacc.async_set_async_func (acc_async_sync);
}
/* Legacy entry point (GCC 5). Only provide host fallback execution. */
@@ -383,8 +366,6 @@
finalize = true;
}
- acc_dev->openacc.async_set_async_func (async);
-
/* Determine if this is an "acc enter data". */
for (i = 0; i < mapnum; ++i)
{
@@ -437,11 +418,11 @@
{
case GOMP_MAP_ALLOC:
case GOMP_MAP_FORCE_ALLOC:
- acc_create (hostaddrs[i], sizes[i]);
+ acc_create_async (hostaddrs[i], sizes[i], async);
break;
case GOMP_MAP_TO:
case GOMP_MAP_FORCE_TO:
- acc_copyin (hostaddrs[i], sizes[i]);
+ acc_copyin_async (hostaddrs[i], sizes[i], async);
break;
default:
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -452,7 +433,7 @@
else
{
gomp_acc_insert_pointer (pointer, &hostaddrs[i],
- &sizes[i], &kinds[i]);
+ &sizes[i], &kinds[i], async);
/* Increment 'i' by two because OpenACC requires fortran
arrays to be contiguous, so each PSET is associated with
one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
@@ -477,17 +458,17 @@
if (acc_is_present (hostaddrs[i], sizes[i]))
{
if (finalize)
- acc_delete_finalize (hostaddrs[i], sizes[i]);
+ acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
else
- acc_delete (hostaddrs[i], sizes[i]);
+ acc_delete_async (hostaddrs[i], sizes[i], async);
}
break;
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_FROM:
if (finalize)
- acc_copyout_finalize (hostaddrs[i], sizes[i]);
+ acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
else
- acc_copyout (hostaddrs[i], sizes[i]);
+ acc_copyout_async (hostaddrs[i], sizes[i], async);
break;
default:
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -505,8 +486,6 @@
i += pointer - 1;
}
}
-
- acc_dev->openacc.async_set_async_func (acc_async_sync);
}
static void
@@ -532,9 +511,10 @@
if (async == acc_async_sync)
acc_wait (qid);
else if (qid == async)
- ;/* If we're waiting on the same asynchronous queue as we're
- launching on, the queue itself will order work as
- required, so there's no need to wait explicitly. */
+ /* If we're waiting on the same asynchronous queue as we're
+ launching on, the queue itself will order work as
+ required, so there's no need to wait explicitly. */
+ ;
else
acc_wait_async (qid, async);
}
@@ -567,8 +547,6 @@
va_end (ap);
}
- acc_dev->openacc.async_set_async_func (async);
-
bool update_device = false;
for (i = 0; i < mapnum; ++i)
{
@@ -591,6 +569,8 @@
the value of the allocated device memory in the
previous pointer. */
*(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
+ /* TODO: verify that we really cannot use acc_update_device_async
+ here. */
acc_update_device (hostaddrs[i], sizeof (uintptr_t));
/* Restore the host pointer. */
@@ -608,7 +588,7 @@
/* Fallthru */
case GOMP_MAP_FORCE_TO:
update_device = true;
- acc_update_device (hostaddrs[i], sizes[i]);
+ acc_update_device_async (hostaddrs[i], sizes[i], async);
break;
case GOMP_MAP_FROM:
@@ -620,7 +600,7 @@
/* Fallthru */
case GOMP_MAP_FORCE_FROM:
update_device = false;
- acc_update_self (hostaddrs[i], sizes[i]);
+ acc_update_self_async (hostaddrs[i], sizes[i], async);
break;
default:
@@ -628,8 +608,6 @@
break;
}
}
-
- acc_dev->openacc.async_set_async_func (acc_async_sync);
}
void