blob: b5e80603004a50c8ed6263d57066159d54c85e31 [file] [log] [blame]
Thomas Schwinge41dbbb32015-01-15 21:11:12 +01001/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2
3 Contributed by Mentor Embedded.
4
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
7
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27/* This file handles OpenACC constructs. */
28
29#include "openacc.h"
30#include "libgomp.h"
31#include "libgomp_g.h"
32#include "gomp-constants.h"
33#include "oacc-int.h"
34#include <string.h>
35#include <stdarg.h>
36#include <assert.h>
Thomas Schwinge41dbbb32015-01-15 21:11:12 +010037
38static int
39find_pset (int pos, size_t mapnum, unsigned short *kinds)
40{
41 if (pos + 1 >= mapnum)
42 return 0;
43
44 unsigned char kind = kinds[pos+1] & 0xff;
45
46 return kind == GOMP_MAP_TO_PSET;
47}
48
49
50/* Ensure that the target device for DEVICE_TYPE is initialised (and that
51 plugins have been loaded if appropriate). The ACC_dev variable for the
52 current thread will be set appropriately for the given device type on
53 return. */
54
55attribute_hidden void
56select_acc_device (int device_type)
57{
58 goacc_lazy_initialize ();
59
60 if (device_type == GOMP_DEVICE_HOST_FALLBACK)
61 return;
62
63 if (device_type == acc_device_none)
64 device_type = acc_device_host;
65
66 if (device_type >= 0)
67 {
68 /* NOTE: this will go badly if the surrounding data environment is set up
69 to use a different device type. We'll just have to trust that users
70 know what they're doing... */
71 acc_set_device_type (device_type);
72 }
73}
74
75static void goacc_wait (int async, int num_waits, va_list ap);
76
77void
78GOACC_parallel (int device, void (*fn) (void *), const void *offload_table,
79 size_t mapnum, void **hostaddrs, size_t *sizes,
80 unsigned short *kinds,
81 int num_gangs, int num_workers, int vector_length,
82 int async, int num_waits, ...)
83{
84 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
85 va_list ap;
86 struct goacc_thread *thr;
87 struct gomp_device_descr *acc_dev;
88 struct target_mem_desc *tgt;
89 void **devaddrs;
90 unsigned int i;
91 struct splay_tree_key_s k;
92 splay_tree_key tgt_fn_key;
93 void (*tgt_fn);
94
95 if (num_gangs != 1)
96 gomp_fatal ("num_gangs (%d) different from one is not yet supported",
97 num_gangs);
98 if (num_workers != 1)
99 gomp_fatal ("num_workers (%d) different from one is not yet supported",
100 num_workers);
101
102 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
103 __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async);
104
105 select_acc_device (device);
106
107 thr = goacc_thread ();
108 acc_dev = thr->dev;
109
110 /* Host fallback if "if" clause is false or if the current device is set to
111 the host. */
112 if (host_fallback)
113 {
114 goacc_save_and_set_bind (acc_device_host);
115 fn (hostaddrs);
116 goacc_restore_bind ();
117 return;
118 }
119 else if (acc_device_type (acc_dev->type) == acc_device_host)
120 {
121 fn (hostaddrs);
122 return;
123 }
124
125 va_start (ap, num_waits);
126
127 if (num_waits > 0)
128 goacc_wait (async, num_waits, ap);
129
130 va_end (ap);
131
132 acc_dev->openacc.async_set_async_func (async);
133
134 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
135 {
136 k.host_start = (uintptr_t) fn;
137 k.host_end = k.host_start + 1;
138 gomp_mutex_lock (&acc_dev->mem_map.lock);
139 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
140 gomp_mutex_unlock (&acc_dev->mem_map.lock);
141
142 if (tgt_fn_key == NULL)
143 gomp_fatal ("target function wasn't mapped");
144
145 tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
146 }
147 else
148 tgt_fn = (void (*)) fn;
149
150 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
151 false);
152
Thomas Schwinge6e361142015-01-16 21:05:21 +0100153 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
Thomas Schwinge41dbbb32015-01-15 21:11:12 +0100154 for (i = 0; i < mapnum; i++)
155 devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
156 + tgt->list[i]->tgt_offset);
157
158 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
159 num_gangs, num_workers, vector_length, async,
160 tgt);
161
162 /* If running synchronously, unmap immediately. */
163 if (async < acc_async_noval)
164 gomp_unmap_vars (tgt, true);
165 else
166 {
167 gomp_copy_from_async (tgt);
168 acc_dev->openacc.register_async_cleanup_func (tgt);
169 }
170
171 acc_dev->openacc.async_set_async_func (acc_async_sync);
172}
173
174void
175GOACC_data_start (int device, const void *offload_table, size_t mapnum,
176 void **hostaddrs, size_t *sizes, unsigned short *kinds)
177{
178 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
179 struct target_mem_desc *tgt;
180
181 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
182 __FUNCTION__, mapnum, hostaddrs, sizes, kinds);
183
184 select_acc_device (device);
185
186 struct goacc_thread *thr = goacc_thread ();
187 struct gomp_device_descr *acc_dev = thr->dev;
188
189 /* Host fallback or 'do nothing'. */
190 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
191 || host_fallback)
192 {
193 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
194 tgt->prev = thr->mapped_data;
195 thr->mapped_data = tgt;
196
197 return;
198 }
199
200 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
201 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
202 false);
203 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
204 tgt->prev = thr->mapped_data;
205 thr->mapped_data = tgt;
206}
207
208void
209GOACC_data_end (void)
210{
211 struct goacc_thread *thr = goacc_thread ();
212 struct target_mem_desc *tgt = thr->mapped_data;
213
214 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
215 thr->mapped_data = tgt->prev;
216 gomp_unmap_vars (tgt, true);
217 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
218}
219
220void
221GOACC_enter_exit_data (int device, const void *offload_table, size_t mapnum,
222 void **hostaddrs, size_t *sizes, unsigned short *kinds,
223 int async, int num_waits, ...)
224{
225 struct goacc_thread *thr;
226 struct gomp_device_descr *acc_dev;
227 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
228 bool data_enter = false;
229 size_t i;
230
231 select_acc_device (device);
232
233 thr = goacc_thread ();
234 acc_dev = thr->dev;
235
236 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
237 || host_fallback)
238 return;
239
240 if (num_waits > 0)
241 {
242 va_list ap;
243
244 va_start (ap, num_waits);
245
246 goacc_wait (async, num_waits, ap);
247
248 va_end (ap);
249 }
250
251 acc_dev->openacc.async_set_async_func (async);
252
253 /* Determine if this is an "acc enter data". */
254 for (i = 0; i < mapnum; ++i)
255 {
256 unsigned char kind = kinds[i] & 0xff;
257
258 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
259 continue;
260
261 if (kind == GOMP_MAP_FORCE_ALLOC
262 || kind == GOMP_MAP_FORCE_PRESENT
263 || kind == GOMP_MAP_FORCE_TO)
264 {
265 data_enter = true;
266 break;
267 }
268
269 if (kind == GOMP_MAP_FORCE_DEALLOC
270 || kind == GOMP_MAP_FORCE_FROM)
271 break;
272
273 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
274 kind);
275 }
276
277 if (data_enter)
278 {
279 for (i = 0; i < mapnum; i++)
280 {
281 unsigned char kind = kinds[i] & 0xff;
282
283 /* Scan for PSETs. */
284 int psets = find_pset (i, mapnum, kinds);
285
286 if (!psets)
287 {
288 switch (kind)
289 {
290 case GOMP_MAP_POINTER:
291 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
292 &kinds[i]);
293 break;
294 case GOMP_MAP_FORCE_ALLOC:
295 acc_create (hostaddrs[i], sizes[i]);
296 break;
297 case GOMP_MAP_FORCE_PRESENT:
298 acc_present_or_copyin (hostaddrs[i], sizes[i]);
299 break;
300 case GOMP_MAP_FORCE_TO:
301 acc_present_or_copyin (hostaddrs[i], sizes[i]);
302 break;
303 default:
304 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
305 kind);
306 break;
307 }
308 }
309 else
310 {
311 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
312 /* Increment 'i' by two because OpenACC requires fortran
313 arrays to be contiguous, so each PSET is associated with
314 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
315 one MAP_POINTER. */
316 i += 2;
317 }
318 }
319 }
320 else
321 for (i = 0; i < mapnum; ++i)
322 {
323 unsigned char kind = kinds[i] & 0xff;
324
325 int psets = find_pset (i, mapnum, kinds);
326
327 if (!psets)
328 {
329 switch (kind)
330 {
331 case GOMP_MAP_POINTER:
332 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
333 == GOMP_MAP_FORCE_FROM,
334 async, 1);
335 break;
336 case GOMP_MAP_FORCE_DEALLOC:
337 acc_delete (hostaddrs[i], sizes[i]);
338 break;
339 case GOMP_MAP_FORCE_FROM:
340 acc_copyout (hostaddrs[i], sizes[i]);
341 break;
342 default:
343 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
344 kind);
345 break;
346 }
347 }
348 else
349 {
350 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
351 == GOMP_MAP_FORCE_FROM, async, 3);
352 /* See the above comment. */
353 i += 2;
354 }
355 }
356
357 acc_dev->openacc.async_set_async_func (acc_async_sync);
358}
359
360static void
361goacc_wait (int async, int num_waits, va_list ap)
362{
363 struct goacc_thread *thr = goacc_thread ();
364 struct gomp_device_descr *acc_dev = thr->dev;
365 int i;
366
367 assert (num_waits >= 0);
368
369 if (async == acc_async_sync && num_waits == 0)
370 {
371 acc_wait_all ();
372 return;
373 }
374
375 if (async == acc_async_sync && num_waits)
376 {
377 for (i = 0; i < num_waits; i++)
378 {
379 int qid = va_arg (ap, int);
380
381 if (acc_async_test (qid))
382 continue;
383
384 acc_wait (qid);
385 }
386 return;
387 }
388
389 if (async == acc_async_noval && num_waits == 0)
390 {
391 acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
392 return;
393 }
394
395 for (i = 0; i < num_waits; i++)
396 {
397 int qid = va_arg (ap, int);
398
399 if (acc_async_test (qid))
400 continue;
401
402 /* If we're waiting on the same asynchronous queue as we're launching on,
403 the queue itself will order work as required, so there's no need to
404 wait explicitly. */
405 if (qid != async)
406 acc_dev->openacc.async_wait_async_func (qid, async);
407 }
408}
409
410void
411GOACC_update (int device, const void *offload_table, size_t mapnum,
412 void **hostaddrs, size_t *sizes, unsigned short *kinds,
413 int async, int num_waits, ...)
414{
415 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
416 size_t i;
417
418 select_acc_device (device);
419
420 struct goacc_thread *thr = goacc_thread ();
421 struct gomp_device_descr *acc_dev = thr->dev;
422
423 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
424 || host_fallback)
425 return;
426
427 if (num_waits > 0)
428 {
429 va_list ap;
430
431 va_start (ap, num_waits);
432
433 goacc_wait (async, num_waits, ap);
434
435 va_end (ap);
436 }
437
438 acc_dev->openacc.async_set_async_func (async);
439
440 for (i = 0; i < mapnum; ++i)
441 {
442 unsigned char kind = kinds[i] & 0xff;
443
444 switch (kind)
445 {
446 case GOMP_MAP_POINTER:
447 case GOMP_MAP_TO_PSET:
448 break;
449
450 case GOMP_MAP_FORCE_TO:
451 acc_update_device (hostaddrs[i], sizes[i]);
452 break;
453
454 case GOMP_MAP_FORCE_FROM:
455 acc_update_self (hostaddrs[i], sizes[i]);
456 break;
457
458 default:
459 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
460 break;
461 }
462 }
463
464 acc_dev->openacc.async_set_async_func (acc_async_sync);
465}
466
467void
468GOACC_wait (int async, int num_waits, ...)
469{
470 va_list ap;
471
472 va_start (ap, num_waits);
473
474 goacc_wait (async, num_waits, ap);
475
476 va_end (ap);
477}
478
479int
480GOACC_get_num_threads (void)
481{
482 return 1;
483}
484
485int
486GOACC_get_thread_num (void)
487{
488 return 0;
489}