Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 1 | /* Copyright (C) 2015-2021 Free Software Foundation, Inc. |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 2 | Contributed by Jakub Jelinek <jakub@redhat.com>. |
| 3 | |
| 4 | This file is part of the GNU Offloading and Multi Processing Library |
| 5 | (libgomp). |
| 6 | |
| 7 | Libgomp is free software; you can redistribute it and/or modify it |
| 8 | under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation; either version 3, or (at your option) |
| 10 | any later version. |
| 11 | |
| 12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
| 13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 15 | more details. |
| 16 | |
| 17 | Under Section 7 of GPL version 3, you are granted additional |
| 18 | permissions described in the GCC Runtime Library Exception, version |
| 19 | 3.1, as published by the Free Software Foundation. |
| 20 | |
| 21 | You should have received a copy of the GNU General Public License and |
| 22 | a copy of the GCC Runtime Library Exception along with this program; |
| 23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| 24 | <http://www.gnu.org/licenses/>. */ |
| 25 | |
| 26 | /* This file handles the taskloop construct. It is included twice, once |
| 27 | for the long and once for unsigned long long variant. */ |
| 28 | |
| 29 | /* Called when encountering an explicit task directive. If IF_CLAUSE is |
| 30 | false, then we must not delay in executing the task. If UNTIED is true, |
| 31 | then the task may be executed by any member of the team. */ |
| 32 | |
| 33 | void |
| 34 | GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), |
| 35 | long arg_size, long arg_align, unsigned flags, |
| 36 | unsigned long num_tasks, int priority, |
| 37 | TYPE start, TYPE end, TYPE step) |
| 38 | { |
| 39 | struct gomp_thread *thr = gomp_thread (); |
| 40 | struct gomp_team *team = thr->ts.team; |
| 41 | |
| 42 | #ifdef HAVE_BROKEN_POSIX_SEMAPHORES |
| 43 | /* If pthread_mutex_* is used for omp_*lock*, then each task must be |
| 44 | tied to one thread all the time. This means UNTIED tasks must be |
| 45 | tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN |
| 46 | might be running on different thread than FN. */ |
| 47 | if (cpyfn) |
| 48 | flags &= ~GOMP_TASK_FLAG_IF; |
| 49 | flags &= ~GOMP_TASK_FLAG_UNTIED; |
| 50 | #endif |
| 51 | |
| 52 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ |
| 53 | if (team && gomp_team_barrier_cancelled (&team->barrier)) |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 54 | { |
| 55 | early_return: |
| 56 | if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION)) |
| 57 | == GOMP_TASK_FLAG_REDUCTION) |
| 58 | { |
| 59 | struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; |
| 60 | uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; |
| 61 | /* Tell callers GOMP_taskgroup_reduction_register has not been |
| 62 | called. */ |
| 63 | ptr[2] = 0; |
| 64 | } |
| 65 | return; |
| 66 | } |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 67 | |
| 68 | #ifdef TYPE_is_long |
| 69 | TYPE s = step; |
| 70 | if (step > 0) |
| 71 | { |
| 72 | if (start >= end) |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 73 | goto early_return; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 74 | s--; |
| 75 | } |
| 76 | else |
| 77 | { |
| 78 | if (start <= end) |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 79 | goto early_return; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 80 | s++; |
| 81 | } |
| 82 | UTYPE n = (end - start + s) / step; |
| 83 | #else |
| 84 | UTYPE n; |
| 85 | if (flags & GOMP_TASK_FLAG_UP) |
| 86 | { |
| 87 | if (start >= end) |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 88 | goto early_return; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 89 | n = (end - start + step - 1) / step; |
| 90 | } |
| 91 | else |
| 92 | { |
| 93 | if (start <= end) |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 94 | goto early_return; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 95 | n = (start - end - step - 1) / -step; |
| 96 | } |
| 97 | #endif |
| 98 | |
| 99 | TYPE task_step = step; |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 100 | TYPE nfirst_task_step = step; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 101 | unsigned long nfirst = n; |
| 102 | if (flags & GOMP_TASK_FLAG_GRAINSIZE) |
| 103 | { |
| 104 | unsigned long grainsize = num_tasks; |
| 105 | #ifdef TYPE_is_long |
| 106 | num_tasks = n / grainsize; |
| 107 | #else |
| 108 | UTYPE ndiv = n / grainsize; |
| 109 | num_tasks = ndiv; |
| 110 | if (num_tasks != ndiv) |
| 111 | num_tasks = ~0UL; |
| 112 | #endif |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 113 | if ((flags & GOMP_TASK_FLAG_STRICT) |
| 114 | && num_tasks != ~0ULL) |
| 115 | { |
| 116 | UTYPE mod = n % grainsize; |
| 117 | task_step = (TYPE) grainsize * step; |
| 118 | if (mod) |
| 119 | { |
| 120 | num_tasks++; |
| 121 | nfirst_task_step = (TYPE) mod * step; |
| 122 | if (num_tasks == 1) |
| 123 | task_step = nfirst_task_step; |
| 124 | else |
| 125 | nfirst = num_tasks - 2; |
| 126 | } |
| 127 | } |
| 128 | else if (num_tasks <= 1) |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 129 | { |
| 130 | num_tasks = 1; |
| 131 | task_step = end - start; |
| 132 | } |
| 133 | else if (num_tasks >= grainsize |
| 134 | #ifndef TYPE_is_long |
| 135 | && num_tasks != ~0UL |
| 136 | #endif |
| 137 | ) |
| 138 | { |
| 139 | UTYPE mul = num_tasks * grainsize; |
| 140 | task_step = (TYPE) grainsize * step; |
| 141 | if (mul != n) |
| 142 | { |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 143 | nfirst_task_step = task_step; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 144 | task_step += step; |
| 145 | nfirst = n - mul - 1; |
| 146 | } |
| 147 | } |
| 148 | else |
| 149 | { |
| 150 | UTYPE div = n / num_tasks; |
| 151 | UTYPE mod = n % num_tasks; |
| 152 | task_step = (TYPE) div * step; |
| 153 | if (mod) |
| 154 | { |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 155 | nfirst_task_step = task_step; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 156 | task_step += step; |
| 157 | nfirst = mod - 1; |
| 158 | } |
| 159 | } |
| 160 | } |
| 161 | else |
| 162 | { |
| 163 | if (num_tasks == 0) |
| 164 | num_tasks = team ? team->nthreads : 1; |
| 165 | if (num_tasks >= n) |
| 166 | num_tasks = n; |
| 167 | else |
| 168 | { |
| 169 | UTYPE div = n / num_tasks; |
| 170 | UTYPE mod = n % num_tasks; |
| 171 | task_step = (TYPE) div * step; |
| 172 | if (mod) |
| 173 | { |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 174 | nfirst_task_step = task_step; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 175 | task_step += step; |
| 176 | nfirst = mod - 1; |
| 177 | } |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | if (flags & GOMP_TASK_FLAG_NOGROUP) |
| 182 | { |
Jakub Jelinek | 28567c4 | 2018-11-08 18:13:04 +0100 | [diff] [blame] | 183 | if (__builtin_expect (gomp_cancel_var, 0) |
| 184 | && thr->task |
| 185 | && thr->task->taskgroup) |
| 186 | { |
| 187 | if (thr->task->taskgroup->cancelled) |
| 188 | return; |
| 189 | if (thr->task->taskgroup->workshare |
| 190 | && thr->task->taskgroup->prev |
| 191 | && thr->task->taskgroup->prev->cancelled) |
| 192 | return; |
| 193 | } |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 194 | } |
| 195 | else |
Jakub Jelinek | 28567c4 | 2018-11-08 18:13:04 +0100 | [diff] [blame] | 196 | { |
| 197 | ialias_call (GOMP_taskgroup_start) (); |
| 198 | if (flags & GOMP_TASK_FLAG_REDUCTION) |
| 199 | { |
| 200 | struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; |
| 201 | uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; |
| 202 | ialias_call (GOMP_taskgroup_reduction_register) (ptr); |
| 203 | } |
| 204 | } |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 205 | |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 206 | if (priority > gomp_max_task_priority_var) |
| 207 | priority = gomp_max_task_priority_var; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 208 | |
| 209 | if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL |
| 210 | || (thr->task && thr->task->final_task) |
| 211 | || team->task_count + num_tasks > 64 * team->nthreads) |
| 212 | { |
| 213 | unsigned long i; |
| 214 | if (__builtin_expect (cpyfn != NULL, 0)) |
| 215 | { |
| 216 | struct gomp_task task[num_tasks]; |
| 217 | struct gomp_task *parent = thr->task; |
| 218 | arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); |
| 219 | char buf[num_tasks * arg_size + arg_align - 1]; |
| 220 | char *arg = (char *) (((uintptr_t) buf + arg_align - 1) |
| 221 | & ~(uintptr_t) (arg_align - 1)); |
| 222 | char *orig_arg = arg; |
| 223 | for (i = 0; i < num_tasks; i++) |
| 224 | { |
| 225 | gomp_init_task (&task[i], parent, gomp_icv (false)); |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 226 | task[i].priority = priority; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 227 | task[i].kind = GOMP_TASK_UNDEFERRED; |
| 228 | task[i].final_task = (thr->task && thr->task->final_task) |
| 229 | || (flags & GOMP_TASK_FLAG_FINAL); |
| 230 | if (thr->task) |
| 231 | { |
| 232 | task[i].in_tied_task = thr->task->in_tied_task; |
| 233 | task[i].taskgroup = thr->task->taskgroup; |
| 234 | } |
| 235 | thr->task = &task[i]; |
| 236 | cpyfn (arg, data); |
| 237 | arg += arg_size; |
| 238 | } |
| 239 | arg = orig_arg; |
| 240 | for (i = 0; i < num_tasks; i++) |
| 241 | { |
| 242 | thr->task = &task[i]; |
| 243 | ((TYPE *)arg)[0] = start; |
| 244 | start += task_step; |
| 245 | ((TYPE *)arg)[1] = start; |
| 246 | if (i == nfirst) |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 247 | task_step = nfirst_task_step; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 248 | fn (arg); |
| 249 | arg += arg_size; |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 250 | if (!priority_queue_empty_p (&task[i].children_queue, |
| 251 | MEMMODEL_RELAXED)) |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 252 | { |
| 253 | gomp_mutex_lock (&team->task_lock); |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 254 | gomp_clear_parent (&task[i].children_queue); |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 255 | gomp_mutex_unlock (&team->task_lock); |
| 256 | } |
| 257 | gomp_end_task (); |
| 258 | } |
| 259 | } |
| 260 | else |
| 261 | for (i = 0; i < num_tasks; i++) |
| 262 | { |
| 263 | struct gomp_task task; |
| 264 | |
| 265 | gomp_init_task (&task, thr->task, gomp_icv (false)); |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 266 | task.priority = priority; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 267 | task.kind = GOMP_TASK_UNDEFERRED; |
| 268 | task.final_task = (thr->task && thr->task->final_task) |
| 269 | || (flags & GOMP_TASK_FLAG_FINAL); |
| 270 | if (thr->task) |
| 271 | { |
| 272 | task.in_tied_task = thr->task->in_tied_task; |
| 273 | task.taskgroup = thr->task->taskgroup; |
| 274 | } |
| 275 | thr->task = &task; |
| 276 | ((TYPE *)data)[0] = start; |
| 277 | start += task_step; |
| 278 | ((TYPE *)data)[1] = start; |
| 279 | if (i == nfirst) |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 280 | task_step = nfirst_task_step; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 281 | fn (data); |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 282 | if (!priority_queue_empty_p (&task.children_queue, |
| 283 | MEMMODEL_RELAXED)) |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 284 | { |
| 285 | gomp_mutex_lock (&team->task_lock); |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 286 | gomp_clear_parent (&task.children_queue); |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 287 | gomp_mutex_unlock (&team->task_lock); |
| 288 | } |
| 289 | gomp_end_task (); |
| 290 | } |
| 291 | } |
| 292 | else |
| 293 | { |
| 294 | struct gomp_task *tasks[num_tasks]; |
| 295 | struct gomp_task *parent = thr->task; |
| 296 | struct gomp_taskgroup *taskgroup = parent->taskgroup; |
| 297 | char *arg; |
| 298 | int do_wake; |
| 299 | unsigned long i; |
| 300 | |
| 301 | for (i = 0; i < num_tasks; i++) |
| 302 | { |
| 303 | struct gomp_task *task |
| 304 | = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); |
| 305 | tasks[i] = task; |
| 306 | arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) |
| 307 | & ~(uintptr_t) (arg_align - 1)); |
| 308 | gomp_init_task (task, parent, gomp_icv (false)); |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 309 | task->priority = priority; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 310 | task->kind = GOMP_TASK_UNDEFERRED; |
| 311 | task->in_tied_task = parent->in_tied_task; |
| 312 | task->taskgroup = taskgroup; |
| 313 | thr->task = task; |
| 314 | if (cpyfn) |
| 315 | { |
| 316 | cpyfn (arg, data); |
| 317 | task->copy_ctors_done = true; |
| 318 | } |
| 319 | else |
| 320 | memcpy (arg, data, arg_size); |
| 321 | ((TYPE *)arg)[0] = start; |
| 322 | start += task_step; |
| 323 | ((TYPE *)arg)[1] = start; |
| 324 | if (i == nfirst) |
Thomas Koenig | b18a97e | 2021-09-13 19:49:49 +0200 | [diff] [blame] | 325 | task_step = nfirst_task_step; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 326 | thr->task = parent; |
| 327 | task->kind = GOMP_TASK_WAITING; |
| 328 | task->fn = fn; |
| 329 | task->fn_data = arg; |
| 330 | task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; |
| 331 | } |
| 332 | gomp_mutex_lock (&team->task_lock); |
| 333 | /* If parallel or taskgroup has been cancelled, don't start new |
| 334 | tasks. */ |
Jakub Jelinek | 28567c4 | 2018-11-08 18:13:04 +0100 | [diff] [blame] | 335 | if (__builtin_expect (gomp_cancel_var, 0) |
| 336 | && cpyfn == NULL) |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 337 | { |
Jakub Jelinek | 28567c4 | 2018-11-08 18:13:04 +0100 | [diff] [blame] | 338 | if (gomp_team_barrier_cancelled (&team->barrier)) |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 339 | { |
Jakub Jelinek | 28567c4 | 2018-11-08 18:13:04 +0100 | [diff] [blame] | 340 | do_cancel: |
| 341 | gomp_mutex_unlock (&team->task_lock); |
| 342 | for (i = 0; i < num_tasks; i++) |
| 343 | { |
| 344 | gomp_finish_task (tasks[i]); |
| 345 | free (tasks[i]); |
| 346 | } |
| 347 | if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) |
| 348 | ialias_call (GOMP_taskgroup_end) (); |
| 349 | return; |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 350 | } |
Jakub Jelinek | 28567c4 | 2018-11-08 18:13:04 +0100 | [diff] [blame] | 351 | if (taskgroup) |
| 352 | { |
| 353 | if (taskgroup->cancelled) |
| 354 | goto do_cancel; |
| 355 | if (taskgroup->workshare |
| 356 | && taskgroup->prev |
| 357 | && taskgroup->prev->cancelled) |
| 358 | goto do_cancel; |
| 359 | } |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 360 | } |
| 361 | if (taskgroup) |
| 362 | taskgroup->num_children += num_tasks; |
| 363 | for (i = 0; i < num_tasks; i++) |
| 364 | { |
| 365 | struct gomp_task *task = tasks[i]; |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 366 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, |
| 367 | task, priority, |
| 368 | PRIORITY_INSERT_BEGIN, |
| 369 | /*last_parent_depends_on=*/false, |
| 370 | task->parent_depends_on); |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 371 | if (taskgroup) |
Jakub Jelinek | e460634 | 2015-11-14 19:42:13 +0100 | [diff] [blame] | 372 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
| 373 | task, priority, PRIORITY_INSERT_BEGIN, |
| 374 | /*last_parent_depends_on=*/false, |
| 375 | task->parent_depends_on); |
| 376 | priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, |
| 377 | PRIORITY_INSERT_END, |
| 378 | /*last_parent_depends_on=*/false, |
| 379 | task->parent_depends_on); |
Jakub Jelinek | d9a6bd3 | 2015-10-13 21:06:23 +0200 | [diff] [blame] | 380 | ++team->task_count; |
| 381 | ++team->task_queued_count; |
| 382 | } |
| 383 | gomp_team_barrier_set_task_pending (&team->barrier); |
| 384 | if (team->task_running_count + !parent->in_tied_task |
| 385 | < team->nthreads) |
| 386 | { |
| 387 | do_wake = team->nthreads - team->task_running_count |
| 388 | - !parent->in_tied_task; |
| 389 | if ((unsigned long) do_wake > num_tasks) |
| 390 | do_wake = num_tasks; |
| 391 | } |
| 392 | else |
| 393 | do_wake = 0; |
| 394 | gomp_mutex_unlock (&team->task_lock); |
| 395 | if (do_wake) |
| 396 | gomp_team_barrier_wake (&team->barrier, do_wake); |
| 397 | } |
| 398 | if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) |
| 399 | ialias_call (GOMP_taskgroup_end) (); |
| 400 | } |