38 #include "kmp_wait_release.h"
41 #include "ompt-specific.h"
51 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
52 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
53 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
56 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
59 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
61 switch (((kmp_flag_64 *)flag)->get_type()) {
62 case flag32: __kmp_resume_32(gtid, NULL);
break;
63 case flag64: __kmp_resume_64(gtid, NULL);
break;
64 case flag_oncore: __kmp_resume_oncore(gtid, NULL);
break;
68 #ifdef BUILD_TIED_TASK_STACK
80 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data,
int threshold,
char *location )
82 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
83 kmp_taskdata_t **stack_top = task_stack -> ts_top;
84 kmp_int32 entries = task_stack -> ts_entries;
85 kmp_taskdata_t *tied_task;
87 KA_TRACE(threshold, (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
88 "first_block = %p, stack_top = %p \n",
89 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
91 KMP_DEBUG_ASSERT( stack_top != NULL );
92 KMP_DEBUG_ASSERT( entries > 0 );
94 while ( entries != 0 )
96 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
98 if ( entries & TASK_STACK_INDEX_MASK == 0 )
100 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
102 stack_block = stack_block -> sb_prev;
103 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
110 tied_task = * stack_top;
112 KMP_DEBUG_ASSERT( tied_task != NULL );
113 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
115 KA_TRACE(threshold, (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
116 "stack_top=%p, tied_task=%p\n",
117 location, gtid, entries, stack_top, tied_task ) );
119 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
121 KA_TRACE(threshold, (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
134 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
136 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
137 kmp_stack_block_t *first_block;
140 first_block = & task_stack -> ts_first_block;
141 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
142 memset( (
void *) first_block,
'\0', TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
145 task_stack -> ts_entries = TASK_STACK_EMPTY;
146 first_block -> sb_next = NULL;
147 first_block -> sb_prev = NULL;
158 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
160 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
161 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
163 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
165 while ( stack_block != NULL ) {
166 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
168 stack_block -> sb_next = NULL;
169 stack_block -> sb_prev = NULL;
170 if (stack_block != & task_stack -> ts_first_block) {
171 __kmp_thread_free( thread, stack_block );
173 stack_block = next_block;
176 task_stack -> ts_entries = 0;
177 task_stack -> ts_top = NULL;
190 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
193 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
194 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
195 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
197 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
201 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
202 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
204 KA_TRACE(20, (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
205 gtid, thread, tied_task ) );
207 * (task_stack -> ts_top) = tied_task;
210 task_stack -> ts_top++;
211 task_stack -> ts_entries++;
213 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
216 kmp_stack_block_t *stack_block =
217 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
220 if ( stack_block -> sb_next != NULL )
222 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
226 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
227 __kmp_thread_calloc(thread,
sizeof(kmp_stack_block_t));
229 task_stack -> ts_top = & new_block -> sb_block[0];
230 stack_block -> sb_next = new_block;
231 new_block -> sb_prev = stack_block;
232 new_block -> sb_next = NULL;
234 KA_TRACE(30, (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
235 gtid, tied_task, new_block ) );
238 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
251 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
254 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
255 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
256 kmp_taskdata_t *tied_task;
258 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
262 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
263 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
265 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
268 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
270 kmp_stack_block_t *stack_block =
271 (kmp_stack_block_t *) (task_stack -> ts_top) ;
273 stack_block = stack_block -> sb_prev;
274 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
278 task_stack -> ts_top--;
279 task_stack -> ts_entries--;
281 tied_task = * (task_stack -> ts_top );
283 KMP_DEBUG_ASSERT( tied_task != NULL );
284 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
285 KMP_DEBUG_ASSERT( tied_task == ending_task );
287 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
296 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
298 kmp_info_t * thread = __kmp_threads[ gtid ];
299 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
300 kmp_task_team_t * task_team = thread->th.th_task_team;
301 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
302 kmp_thread_data_t * thread_data;
304 KA_TRACE(20, (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
307 if ( taskdata->td_flags.task_serial ) {
308 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
310 return TASK_NOT_PUSHED;
314 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
315 if ( ! KMP_TASKING_ENABLED(task_team) ) {
316 __kmp_enable_tasking( task_team, thread );
318 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
319 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
322 thread_data = & task_team -> tt.tt_threads_data[ tid ];
325 if (thread_data -> td.td_deque == NULL ) {
326 __kmp_alloc_task_deque( thread, thread_data );
330 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
332 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
334 return TASK_NOT_PUSHED;
338 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
342 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
344 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
345 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
347 return TASK_NOT_PUSHED;
351 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
354 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
356 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
357 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
359 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
361 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
362 "task=%p ntasks=%d head=%u tail=%u\n",
363 gtid, taskdata, thread_data->td.td_deque_ntasks,
364 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
366 return TASK_SUCCESSFULLY_PUSHED;
375 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
377 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
378 "curtask_parent=%p\n",
379 0, this_thr, this_thr -> th.th_current_task,
380 this_thr -> th.th_current_task -> td_parent ) );
382 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
384 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
385 "curtask_parent=%p\n",
386 0, this_thr, this_thr -> th.th_current_task,
387 this_thr -> th.th_current_task -> td_parent ) );
398 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team,
int tid )
401 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
403 tid, this_thr, this_thr->th.th_current_task,
404 team->t.t_implicit_task_taskdata[tid].td_parent ) );
406 KMP_DEBUG_ASSERT (this_thr != NULL);
409 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
410 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
411 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
414 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
415 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
418 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
420 tid, this_thr, this_thr->th.th_current_task,
421 team->t.t_implicit_task_taskdata[tid].td_parent ) );
432 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
434 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
435 kmp_info_t * thread = __kmp_threads[ gtid ];
437 KA_TRACE(10, (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
438 gtid, taskdata, current_task) );
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
445 current_task -> td_flags.executing = 0;
448 #ifdef BUILD_TIED_TASK_STACK
449 if ( taskdata -> td_flags.tiedness == TASK_TIED )
451 __kmp_push_task_stack( gtid, thread, taskdata );
456 thread -> th.th_current_task = taskdata;
458 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
459 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
460 taskdata -> td_flags.started = 1;
461 taskdata -> td_flags.executing = 1;
462 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
463 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
470 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n",
474 if ((ompt_status == ompt_status_track_callback) &&
475 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
476 kmp_taskdata_t *parent = taskdata->td_parent;
477 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
478 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
479 parent ? &(parent->ompt_task_info.frame) : NULL,
480 taskdata->ompt_task_info.task_id,
481 taskdata->ompt_task_info.function);
496 __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
498 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
499 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
501 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
502 gtid, loc_ref, taskdata, current_task ) );
504 taskdata -> td_flags.task_serial = 1;
505 __kmp_task_start( gtid, task, current_task );
507 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
508 gtid, loc_ref, taskdata ) );
519 __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
521 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
523 KA_TRACE(10, (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
524 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
526 __kmp_task_start( gtid, task, current_task );
528 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
529 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
533 #endif // TASK_UNUSED
543 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
545 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n",
549 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
550 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
551 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
552 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
553 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
554 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
556 taskdata->td_flags.freed = 1;
559 __kmp_fast_free( thread, taskdata );
561 __kmp_thread_free( thread, taskdata );
564 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n",
576 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
578 kmp_int32 children = 0;
579 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
581 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
583 if ( !team_or_tasking_serialized ) {
584 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
585 KMP_DEBUG_ASSERT( children >= 0 );
589 while ( children == 0 )
591 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
593 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
594 "and freeing itself\n", gtid, taskdata) );
597 __kmp_free_task( gtid, taskdata, thread );
599 taskdata = parent_taskdata;
603 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
606 if ( !team_or_tasking_serialized ) {
608 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
609 KMP_DEBUG_ASSERT( children >= 0 );
613 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
614 "not freeing it yet\n", gtid, taskdata, children) );
624 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
626 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
627 kmp_info_t * thread = __kmp_threads[ gtid ];
628 kmp_int32 children = 0;
631 if ((ompt_status == ompt_status_track_callback) &&
632 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
633 kmp_taskdata_t *parent = taskdata->td_parent;
634 ompt_callbacks.ompt_callback(ompt_event_task_end)(
635 taskdata->ompt_task_info.task_id);
639 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
640 gtid, taskdata, resumed_task) );
642 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
645 #ifdef BUILD_TIED_TASK_STACK
646 if ( taskdata -> td_flags.tiedness == TASK_TIED )
648 __kmp_pop_task_stack( gtid, thread, taskdata );
652 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
653 taskdata -> td_flags.complete = 1;
654 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
655 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
658 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
660 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
661 KMP_DEBUG_ASSERT( children >= 0 );
663 if ( taskdata->td_taskgroup )
664 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
665 __kmp_release_deps(gtid,taskdata);
672 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
673 taskdata -> td_flags.executing = 0;
675 KA_TRACE(20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
676 gtid, taskdata, children) );
686 if (taskdata->td_flags.destructors_thunk) {
687 kmp_routine_entry_t destr_thunk = task->destructors;
688 KMP_ASSERT(destr_thunk);
689 destr_thunk(gtid, task);
691 #endif // OMP_40_ENABLED
695 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
696 taskdata->td_flags.task_serial);
697 if ( taskdata->td_flags.task_serial )
699 if (resumed_task == NULL) {
700 resumed_task = taskdata->td_parent;
704 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
708 KMP_DEBUG_ASSERT( resumed_task != NULL );
712 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
717 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task;
721 resumed_task->td_flags.executing = 1;
723 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
724 gtid, taskdata, resumed_task) );
736 __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
738 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
739 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
741 __kmp_task_finish( gtid, task, NULL );
743 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
744 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
755 __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
757 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
758 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
760 __kmp_task_finish( gtid, task, NULL );
762 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
763 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
766 #endif // TASK_UNUSED
777 __kmp_task_init_ompt( kmp_taskdata_t * task,
int tid )
779 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
780 task->ompt_task_info.function = NULL;
781 task->ompt_task_info.frame.exit_runtime_frame = NULL;
782 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
803 __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int set_curr_task )
805 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
807 KF_TRACE(10, (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
808 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE" ) );
810 task->td_task_id = KMP_GEN_TASK_ID();
811 task->td_team = team;
813 task->td_ident = loc_ref;
814 task->td_taskwait_ident = NULL;
815 task->td_taskwait_counter = 0;
816 task->td_taskwait_thread = 0;
818 task->td_flags.tiedness = TASK_TIED;
819 task->td_flags.tasktype = TASK_IMPLICIT;
821 task->td_flags.proxy = TASK_FULL;
825 task->td_flags.task_serial = 1;
826 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
827 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
829 task->td_flags.started = 1;
830 task->td_flags.executing = 1;
831 task->td_flags.complete = 0;
832 task->td_flags.freed = 0;
835 task->td_dephash = NULL;
836 task->td_depnode = NULL;
840 task->td_incomplete_child_tasks = 0;
841 task->td_allocated_child_tasks = 0;
843 task->td_taskgroup = NULL;
845 __kmp_push_current_task_to_thread( this_thr, team, tid );
847 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
848 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
852 __kmp_task_init_ompt(task, tid);
855 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
862 __kmp_round_up_to_val(
size_t size,
size_t val ) {
863 if ( size & ( val - 1 ) ) {
864 size &= ~ ( val - 1 );
865 if ( size <= KMP_SIZE_T_MAX - val ) {
886 __kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
887 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
888 kmp_routine_entry_t task_entry )
891 kmp_taskdata_t *taskdata;
892 kmp_info_t *thread = __kmp_threads[ gtid ];
893 kmp_team_t *team = thread->th.th_team;
894 kmp_taskdata_t *parent_task = thread->th.th_current_task;
895 size_t shareds_offset;
897 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
898 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
899 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
900 sizeof_shareds, task_entry) );
902 if ( parent_task->td_flags.final ) {
903 if (flags->merged_if0) {
909 if ( flags->proxy == TASK_PROXY ) {
910 flags->tiedness = TASK_UNTIED;
911 flags->merged_if0 = 1;
914 if ( (thread->th.th_task_team) == NULL ) {
918 KMP_DEBUG_ASSERT(team->t.t_serialized);
919 KA_TRACE(30,(
"T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
920 __kmp_task_team_setup(thread,team,0,1);
921 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
923 kmp_task_team_t * task_team = thread->th.th_task_team;
926 if ( !KMP_TASKING_ENABLED( task_team ) ) {
927 KA_TRACE(30,(
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
928 __kmp_enable_tasking( task_team, thread );
929 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
930 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
932 if (thread_data -> td.td_deque == NULL ) {
933 __kmp_alloc_task_deque( thread, thread_data );
937 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
938 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
944 shareds_offset =
sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
945 shareds_offset = __kmp_round_up_to_val( shareds_offset,
sizeof(
void * ));
948 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n",
949 gtid, shareds_offset) );
950 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n",
951 gtid, sizeof_shareds) );
955 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
957 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
960 task = KMP_TASKDATA_TO_TASK(taskdata);
963 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
964 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(
double)-1) ) == 0 );
965 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(
double)-1) ) == 0 );
967 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(_Quad)-1) ) == 0 );
968 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(_Quad)-1) ) == 0 );
970 if (sizeof_shareds > 0) {
972 task->shareds = & ((
char *) taskdata)[ shareds_offset ];
974 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (
sizeof(
void *)-1) ) == 0 );
976 task->shareds = NULL;
978 task->routine = task_entry;
981 taskdata->td_task_id = KMP_GEN_TASK_ID();
982 taskdata->td_team = team;
983 taskdata->td_alloc_thread = thread;
984 taskdata->td_parent = parent_task;
985 taskdata->td_level = parent_task->td_level + 1;
986 taskdata->td_ident = loc_ref;
987 taskdata->td_taskwait_ident = NULL;
988 taskdata->td_taskwait_counter = 0;
989 taskdata->td_taskwait_thread = 0;
990 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
993 if ( flags->proxy == TASK_FULL )
995 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
997 taskdata->td_flags.tiedness = flags->tiedness;
998 taskdata->td_flags.final = flags->final;
999 taskdata->td_flags.merged_if0 = flags->merged_if0;
1001 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1002 #endif // OMP_40_ENABLED
1004 taskdata->td_flags.proxy = flags->proxy;
1006 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1009 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1012 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1018 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
1019 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1021 taskdata->td_flags.started = 0;
1022 taskdata->td_flags.executing = 0;
1023 taskdata->td_flags.complete = 0;
1024 taskdata->td_flags.freed = 0;
1026 taskdata->td_flags.native = flags->native;
1028 taskdata->td_incomplete_child_tasks = 0;
1029 taskdata->td_allocated_child_tasks = 1;
1031 taskdata->td_taskgroup = parent_task->td_taskgroup;
1032 taskdata->td_dephash = NULL;
1033 taskdata->td_depnode = NULL;
1038 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1040 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1043 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1045 if ( parent_task->td_taskgroup )
1046 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1049 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1050 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1054 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1055 gtid, taskdata, taskdata->td_parent) );
1058 if (ompt_status & ompt_status_track) {
1059 taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1060 taskdata->ompt_task_info.function = (
void*) task_entry;
1061 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1062 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1073 __kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1074 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1075 kmp_routine_entry_t task_entry )
1078 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1080 input_flags->native = FALSE;
1084 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1085 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1086 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1087 input_flags->proxy ?
"proxy" :
"",
1088 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1090 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1091 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1092 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1093 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1096 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1097 sizeof_shareds, task_entry );
1099 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1112 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1114 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1118 KA_TRACE(30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1119 gtid, taskdata, current_task) );
1120 KMP_DEBUG_ASSERT(task);
1122 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1123 taskdata->td_flags.complete == 1)
1127 KA_TRACE(30, (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1130 __kmp_bottom_half_finish_proxy(gtid,task);
1132 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1140 if ( taskdata->td_flags.proxy != TASK_PROXY )
1142 __kmp_task_start( gtid, task, current_task );
1145 ompt_thread_info_t oldInfo;
1146 kmp_info_t * thread;
1147 if (ompt_status & ompt_status_track) {
1149 thread = __kmp_threads[ gtid ];
1150 oldInfo = thread->th.ompt_thread_info;
1151 thread->th.ompt_thread_info.wait_id = 0;
1152 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1153 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1161 if (__kmp_omp_cancellation) {
1162 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1163 kmp_team_t * this_team = this_thr->th.th_team;
1164 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1165 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1176 #endif // OMP_40_ENABLED
1177 #ifdef KMP_GOMP_COMPAT
1178 if (taskdata->td_flags.native) {
1179 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1184 (*(task->routine))(gtid, task);
1188 #endif // OMP_40_ENABLED
1192 if (ompt_status & ompt_status_track) {
1193 thread->th.ompt_thread_info = oldInfo;
1194 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1200 if ( taskdata->td_flags.proxy != TASK_PROXY )
1202 __kmp_task_finish( gtid, task, current_task );
1204 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1205 gtid, taskdata, current_task) );
1220 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1222 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1224 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1225 gtid, loc_ref, new_taskdata ) );
1230 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1232 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1233 new_taskdata->td_flags.task_serial = 1;
1234 __kmp_invoke_task( gtid, new_task, current_task );
1237 KA_TRACE(10, (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1238 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1241 return TASK_CURRENT_NOT_QUEUED;
1254 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task,
bool serialize_immediate )
1256 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1259 if (ompt_status & ompt_status_track) {
1260 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1261 __builtin_frame_address(0);
1268 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1270 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1273 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1274 if ( serialize_immediate )
1275 new_taskdata -> td_flags.task_serial = 1;
1276 __kmp_invoke_task( gtid, new_task, current_task );
1280 if (ompt_status & ompt_status_track) {
1281 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1285 return TASK_CURRENT_NOT_QUEUED;
1300 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1302 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1305 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1306 gtid, loc_ref, new_taskdata ) );
1308 res = __kmp_omp_task(gtid,new_task,
true);
1310 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1311 gtid, loc_ref, new_taskdata ) );
1319 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid )
1321 kmp_taskdata_t * taskdata;
1322 kmp_info_t * thread;
1323 int thread_finished = FALSE;
1325 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1328 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1330 thread = __kmp_threads[ gtid ];
1331 taskdata = thread -> th.th_current_task;
1335 taskdata->td_taskwait_counter += 1;
1336 taskdata->td_taskwait_ident = loc_ref;
1337 taskdata->td_taskwait_thread = gtid + 1;
1340 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1341 if ( itt_sync_obj != NULL )
1342 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1346 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1348 if ( ! taskdata->td_flags.team_serial )
1352 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1353 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1354 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1355 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1359 if ( itt_sync_obj != NULL )
1360 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1364 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1367 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1368 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1370 return TASK_CURRENT_NOT_QUEUED;
1378 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part )
1380 kmp_taskdata_t * taskdata;
1381 kmp_info_t * thread;
1382 int thread_finished = FALSE;
1384 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1385 gtid, loc_ref, end_part) );
1387 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1390 thread = __kmp_threads[ gtid ];
1391 taskdata = thread -> th.th_current_task;
1396 taskdata->td_taskwait_counter += 1;
1397 taskdata->td_taskwait_ident = loc_ref;
1398 taskdata->td_taskwait_thread = gtid + 1;
1401 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1402 if ( itt_sync_obj != NULL )
1403 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1405 if ( ! taskdata->td_flags.team_serial ) {
1406 kmp_task_team_t * task_team = thread->th.th_task_team;
1407 if (task_team != NULL) {
1408 if (KMP_TASKING_ENABLED(task_team)) {
1409 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1410 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1415 if ( itt_sync_obj != NULL )
1416 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1420 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1423 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1424 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1426 return TASK_CURRENT_NOT_QUEUED;
1435 __kmpc_taskgroup(
ident_t* loc,
int gtid )
1437 kmp_info_t * thread = __kmp_threads[ gtid ];
1438 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1439 kmp_taskgroup_t * tg_new =
1440 (kmp_taskgroup_t *)__kmp_thread_malloc( thread,
sizeof( kmp_taskgroup_t ) );
1441 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1443 tg_new->cancel_request = cancel_noreq;
1444 tg_new->parent = taskdata->td_taskgroup;
1445 taskdata->td_taskgroup = tg_new;
1454 __kmpc_end_taskgroup(
ident_t* loc,
int gtid )
1456 kmp_info_t * thread = __kmp_threads[ gtid ];
1457 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1458 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1459 int thread_finished = FALSE;
1461 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1462 KMP_DEBUG_ASSERT( taskgroup != NULL );
1464 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1467 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1468 if ( itt_sync_obj != NULL )
1469 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1473 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1475 if ( ! taskdata->td_flags.team_serial )
1478 kmp_flag_32 flag(&(taskgroup->count), 0U);
1479 while ( TCR_4(taskgroup->count) != 0 ) {
1480 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1481 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1486 if ( itt_sync_obj != NULL )
1487 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1490 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1493 taskdata->td_taskgroup = taskgroup->parent;
1494 __kmp_thread_free( thread, taskgroup );
1496 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1505 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1506 kmp_int32 is_constrained )
1509 kmp_taskdata_t * taskdata;
1510 kmp_thread_data_t *thread_data;
1513 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1514 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL );
1516 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1518 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1519 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1520 thread_data->td.td_deque_tail) );
1522 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1523 KA_TRACE(10, (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1524 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1525 thread_data->td.td_deque_tail) );
1529 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1531 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1532 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1533 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1534 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1535 thread_data->td.td_deque_tail) );
1539 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1540 taskdata = thread_data -> td.td_deque[ tail ];
1542 if (is_constrained) {
1545 kmp_taskdata_t * current = thread->th.th_current_task;
1546 kmp_int32 level = current->td_level;
1547 kmp_taskdata_t * parent = taskdata->td_parent;
1548 while ( parent != current && parent->td_level > level ) {
1549 parent = parent->td_parent;
1550 KMP_DEBUG_ASSERT(parent != NULL);
1552 if ( parent != current ) {
1554 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1555 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1556 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1557 thread_data->td.td_deque_tail) );
1562 thread_data -> td.td_deque_tail = tail;
1563 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1565 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1567 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1568 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1569 thread_data->td.td_deque_tail) );
1571 task = KMP_TASKDATA_TO_TASK( taskdata );
1582 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1583 volatile kmp_uint32 *unfinished_threads,
int *thread_finished,
1584 kmp_int32 is_constrained
1588 kmp_taskdata_t * taskdata;
1589 kmp_thread_data_t *victim_td, *threads_data;
1590 kmp_int32 victim_tid, thread_tid;
1592 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1594 threads_data = task_team -> tt.tt_threads_data;
1595 KMP_DEBUG_ASSERT( threads_data != NULL );
1597 victim_tid = victim->th.th_info.ds.ds_tid;
1598 victim_td = & threads_data[ victim_tid ];
1600 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1601 "head=%u tail=%u\n",
1602 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1603 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1605 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1606 (TCR_PTR(victim->th.th_task_team) != task_team))
1608 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1609 "ntasks=%d head=%u tail=%u\n",
1610 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1611 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1615 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1618 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1619 (TCR_PTR(victim->th.th_task_team) != task_team))
1621 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1622 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1623 "ntasks=%d head=%u tail=%u\n",
1624 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1625 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1629 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1631 if ( !is_constrained ) {
1632 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1634 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1637 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1638 taskdata = victim_td -> td.td_deque[ tail ];
1641 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1642 kmp_int32 level = current->td_level;
1643 kmp_taskdata_t * parent = taskdata->td_parent;
1644 while ( parent != current && parent->td_level > level ) {
1645 parent = parent->td_parent;
1646 KMP_DEBUG_ASSERT(parent != NULL);
1648 if ( parent != current ) {
1650 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1651 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1652 "ntasks=%d head=%u tail=%u\n",
1653 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1654 task_team, victim_td->td.td_deque_ntasks,
1655 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1658 victim_td -> td.td_deque_tail = tail;
1660 if (*thread_finished) {
1664 kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1666 KA_TRACE(20, (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1667 gtid, count + 1, task_team) );
1669 *thread_finished = FALSE;
1671 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1674 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1676 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1677 "ntasks=%d head=%u tail=%u\n",
1678 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1679 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1680 victim_td->td.td_deque_tail) );
1682 task = KMP_TASKDATA_TO_TASK( taskdata );
1697 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
1698 int *thread_finished
1699 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1701 kmp_task_team_t * task_team;
1703 kmp_thread_data_t * threads_data;
1705 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1706 volatile kmp_uint32 * unfinished_threads;
1707 kmp_int32 nthreads, last_stolen, k, tid;
1709 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1710 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1712 task_team = thread -> th.th_task_team;
1713 KMP_DEBUG_ASSERT( task_team != NULL );
1715 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1716 gtid, final_spin, *thread_finished) );
1718 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1719 KMP_DEBUG_ASSERT( threads_data != NULL );
1721 nthreads = task_team -> tt.tt_nproc;
1722 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1724 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1726 KMP_DEBUG_ASSERT( nthreads > 1 );
1728 KMP_DEBUG_ASSERT( TCR_4((
int)*unfinished_threads) >= 0 );
1732 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1733 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1734 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1735 if ( itt_sync_obj == NULL ) {
1737 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1739 __kmp_itt_task_starting( itt_sync_obj );
1742 __kmp_invoke_task( gtid, task, current_task );
1744 if ( itt_sync_obj != NULL )
1745 __kmp_itt_task_finished( itt_sync_obj );
1753 if (flag == NULL || (!final_spin && flag->done_check())) {
1754 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
1757 KMP_YIELD( __kmp_library == library_throughput );
1764 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1772 if (! *thread_finished) {
1773 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1774 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1775 gtid, count, task_team) );
1776 *thread_finished = TRUE;
1784 if (flag != NULL && flag->done_check()) {
1785 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
1792 if ( nthreads == 1 )
1797 tid = thread -> th.th_info.ds.ds_tid;
1798 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1800 if (last_stolen != -1) {
1801 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1803 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1804 thread_finished, is_constrained
1807 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1808 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1809 if ( itt_sync_obj == NULL ) {
1811 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1813 __kmp_itt_task_starting( itt_sync_obj );
1816 __kmp_invoke_task( gtid, task, current_task );
1818 if ( itt_sync_obj != NULL )
1819 __kmp_itt_task_finished( itt_sync_obj );
1823 if (flag == NULL || (!final_spin && flag->done_check())) {
1824 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
1829 KMP_YIELD( __kmp_library == library_throughput );
1832 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1833 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1840 threads_data[ tid ].td.td_deque_last_stolen = -1;
1846 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1854 if (! *thread_finished) {
1855 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1856 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
1857 "task_team=%p\n", gtid, count, task_team) );
1858 *thread_finished = TRUE;
1867 if (flag != NULL && flag->done_check()) {
1868 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
1881 k = __kmp_get_random( thread ) % (nthreads - 1);
1882 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1886 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1896 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1897 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1898 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1900 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1911 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1912 thread_finished, is_constrained
1915 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1916 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1917 if ( itt_sync_obj == NULL ) {
1919 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1921 __kmp_itt_task_starting( itt_sync_obj );
1924 __kmp_invoke_task( gtid, task, current_task );
1926 if ( itt_sync_obj != NULL )
1927 __kmp_itt_task_finished( itt_sync_obj );
1932 threads_data[ tid ].td.td_deque_last_stolen = k;
1937 if (flag == NULL || (!final_spin && flag->done_check())) {
1938 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
1942 KMP_YIELD( __kmp_library == library_throughput );
1946 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1947 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1960 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1968 if (! *thread_finished) {
1969 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1970 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
1972 gtid, count, task_team) );
1973 *thread_finished = TRUE;
1982 if (flag != NULL && flag->done_check()) {
1983 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
1989 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
1993 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
1994 int *thread_finished
1995 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1997 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1998 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2001 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
2002 int *thread_finished
2003 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
2005 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2006 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2009 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
2010 int *thread_finished
2011 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
2013 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2014 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2023 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2025 kmp_team_t *team = this_thr->th.th_team;
2026 kmp_thread_data_t *threads_data;
2027 int nthreads, i, is_init_thread;
2029 KA_TRACE( 10, (
"__kmp_enable_tasking(enter): T#%d\n",
2030 __kmp_gtid_from_thread( this_thr ) ) );
2032 KMP_DEBUG_ASSERT(task_team != NULL);
2033 KMP_DEBUG_ASSERT(team != NULL);
2035 nthreads = task_team->tt.tt_nproc;
2036 KMP_DEBUG_ASSERT(nthreads > 0);
2037 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
2040 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2042 if (!is_init_thread) {
2044 KA_TRACE( 20, (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2045 __kmp_gtid_from_thread( this_thr ) ) );
2048 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2049 KMP_DEBUG_ASSERT( threads_data != NULL );
2051 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2052 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2057 for (i = 0; i < nthreads; i++) {
2058 volatile void *sleep_loc;
2059 kmp_info_t *thread = threads_data[i].td.td_thr;
2061 if (i == this_thr->th.th_info.ds.ds_tid) {
2071 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
2073 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2074 __kmp_gtid_from_thread( this_thr ),
2075 __kmp_gtid_from_thread( thread ) ) );
2076 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2079 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2080 __kmp_gtid_from_thread( this_thr ),
2081 __kmp_gtid_from_thread( thread ) ) );
2086 KA_TRACE( 10, (
"__kmp_enable_tasking(exit): T#%d\n",
2087 __kmp_gtid_from_thread( this_thr ) ) );
2127 static kmp_task_team_t *__kmp_free_task_teams = NULL;
2129 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2141 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2143 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2144 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2147 thread_data -> td.td_deque_last_stolen = -1;
2149 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2150 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2151 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2153 KE_TRACE( 10, (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2154 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2158 thread_data -> td.td_deque = (kmp_taskdata_t **)
2159 __kmp_allocate( TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2169 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2171 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2173 if ( thread_data -> td.td_deque != NULL ) {
2174 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2175 __kmp_free( thread_data -> td.td_deque );
2176 thread_data -> td.td_deque = NULL;
2178 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2180 #ifdef BUILD_TIED_TASK_STACK
2182 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2183 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2185 #endif // BUILD_TIED_TASK_STACK
2199 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2201 kmp_thread_data_t ** threads_data_p;
2202 kmp_int32 nthreads, maxthreads;
2203 int is_init_thread = FALSE;
2205 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2210 threads_data_p = & task_team -> tt.tt_threads_data;
2211 nthreads = task_team -> tt.tt_nproc;
2212 maxthreads = task_team -> tt.tt_max_threads;
2216 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2218 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2220 kmp_team_t *team = thread -> th.th_team;
2223 is_init_thread = TRUE;
2224 if ( maxthreads < nthreads ) {
2226 if ( *threads_data_p != NULL ) {
2227 kmp_thread_data_t *old_data = *threads_data_p;
2228 kmp_thread_data_t *new_data = NULL;
2230 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d reallocating "
2231 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2232 __kmp_gtid_from_thread( thread ), task_team,
2233 nthreads, maxthreads ) );
2238 new_data = (kmp_thread_data_t *)
2239 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2241 KMP_MEMCPY_S( (
void *) new_data, nthreads *
sizeof(kmp_thread_data_t),
2243 maxthreads *
sizeof(kmp_taskdata_t *) );
2245 #ifdef BUILD_TIED_TASK_STACK
2247 for (i = maxthreads; i < nthreads; i++) {
2248 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2249 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2251 #endif // BUILD_TIED_TASK_STACK
2253 (*threads_data_p) = new_data;
2254 __kmp_free( old_data );
2257 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
2258 "threads data for task_team %p, size = %d\n",
2259 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2263 *threads_data_p = (kmp_thread_data_t *)
2264 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2265 #ifdef BUILD_TIED_TASK_STACK
2267 for (i = 0; i < nthreads; i++) {
2268 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2269 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2271 #endif // BUILD_TIED_TASK_STACK
2273 task_team -> tt.tt_max_threads = nthreads;
2277 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2281 for (i = 0; i < nthreads; i++) {
2282 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2283 thread_data -> td.td_thr = team -> t.t_threads[i];
2285 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2289 thread_data -> td.td_deque_last_stolen = -1;
2294 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2297 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2298 return is_init_thread;
2308 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2310 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2311 if ( task_team -> tt.tt_threads_data != NULL ) {
2313 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2314 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2316 __kmp_free( task_team -> tt.tt_threads_data );
2317 task_team -> tt.tt_threads_data = NULL;
2319 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2328 static kmp_task_team_t *
2329 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2331 kmp_task_team_t *task_team = NULL;
2334 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
2335 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2337 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2339 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2340 if (__kmp_free_task_teams != NULL) {
2341 task_team = __kmp_free_task_teams;
2342 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2343 task_team -> tt.tt_next = NULL;
2345 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2348 if (task_team == NULL) {
2349 KE_TRACE( 10, (
"__kmp_allocate_task_team: T#%d allocating "
2350 "task team for team %p\n",
2351 __kmp_gtid_from_thread( thread ), team ) );
2355 task_team = (kmp_task_team_t *) __kmp_allocate(
sizeof(kmp_task_team_t) );
2356 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2362 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2364 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2366 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2368 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2369 TCW_4( task_team -> tt.tt_active, TRUE );
2370 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2372 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2373 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2385 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2387 KA_TRACE( 20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
2388 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2390 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2393 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2395 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2396 task_team -> tt.tt_next = __kmp_free_task_teams;
2397 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2398 TCW_PTR(__kmp_free_task_teams, task_team);
2400 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2411 __kmp_reap_task_teams(
void )
2413 kmp_task_team_t *task_team;
2415 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2417 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2418 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2419 __kmp_free_task_teams = task_team -> tt.tt_next;
2420 task_team -> tt.tt_next = NULL;
2423 if ( task_team -> tt.tt_threads_data != NULL ) {
2424 __kmp_free_task_threads_data( task_team );
2426 __kmp_free( task_team );
2428 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2440 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2444 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2446 KA_TRACE( 20, (
"__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2447 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2450 if ( ref_ct == 0 ) {
2451 __kmp_free_task_team( thread, task_team );
2454 TCW_PTR( *((
volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2464 __kmp_wait_to_unref_task_teams(
void)
2470 KMP_INIT_YIELD( spins );
2480 for (thread = (kmp_info_t *)__kmp_thread_pool;
2482 thread = thread->th.th_next_pool)
2487 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2488 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2489 __kmp_gtid_from_thread( thread ) ) );
2494 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2495 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2496 __kmp_unref_task_team( thread->th.th_task_team, thread );
2504 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2505 __kmp_gtid_from_thread( thread ) ) );
2507 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2508 volatile void *sleep_loc;
2510 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2511 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2512 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2513 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2524 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2525 KMP_YIELD_SPIN( spins );
2537 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team,
int both,
int always )
2539 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2541 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
2549 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2550 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
2551 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2552 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2558 int other_team = 1 - this_thr->th.th_task_state;
2559 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) {
2560 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2561 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2562 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2563 ((team != NULL) ? team->t.t_id : -1), other_team ));
2574 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2576 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2579 if ( this_thr->th.th_task_team != NULL ) {
2580 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2581 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2582 KA_TRACE(20, (
"__kmp_task_team_sync: Thread T#%d task team (%p)is not active, unrefing\n",
2583 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team));
2584 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2588 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
2594 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2596 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2597 KA_TRACE(20, (
"__kmp_task_team_sync: Thread T#%d task team switched to %p from Team #%d task team (parity=%d)\n",
2598 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2599 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2607 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2608 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
2611 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2613 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2614 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2616 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2617 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2618 __kmp_gtid_from_thread(this_thr), task_team));
2621 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2622 flag.wait(this_thr, TRUE
2623 USE_ITT_BUILD_ARG(itt_sync_obj));
2628 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: setting active to false, setting local and team's pointer to NULL\n",
2629 __kmp_gtid_from_thread(this_thr), task_team));
2631 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2632 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2634 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2636 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2639 TCW_PTR(this_thr->th.th_task_team, NULL);
2640 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
2653 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread,
int gtid )
2655 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2657 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2660 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2662 kmp_flag_32 spin_flag(spin, 0U);
2663 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2664 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2667 KMP_FSYNC_SPIN_PREPARE( spin );
2670 if( TCR_4(__kmp_global.g.g_done) ) {
2671 if( __kmp_global.g.g_abort )
2672 __kmp_abort_thread( );
2678 KMP_FSYNC_SPIN_ACQUIRED( (
void*) spin );
2691 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2693 kmp_task_team_t * task_team = thread->th.th_task_team;
2694 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2695 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2696 bool result =
false;
2698 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2701 KMP_DEBUG_ASSERT( task_team != NULL );
2703 if (thread_data -> td.td_deque == NULL ) {
2706 KA_TRACE(30, (
"__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2710 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2712 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2716 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2718 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2720 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2721 goto release_and_exit;
2724 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2726 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2727 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2730 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2733 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2753 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2755 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2756 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2757 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2758 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2760 taskdata -> td_flags.complete = 1;
2762 if ( taskdata->td_taskgroup )
2763 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2766 TCR_4(taskdata->td_incomplete_child_tasks++);
2769 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2771 kmp_int32 children = 0;
2774 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2775 KMP_DEBUG_ASSERT( children >= 0 );
2778 TCR_4(taskdata->td_incomplete_child_tasks--);
2781 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2783 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2784 kmp_info_t * thread = __kmp_threads[ gtid ];
2786 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2787 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 );
2791 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2793 __kmp_release_deps(gtid,taskdata);
2794 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2804 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2806 KMP_DEBUG_ASSERT( ptask != NULL );
2807 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2808 KA_TRACE(10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2810 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2812 __kmp_first_top_half_finish_proxy(taskdata);
2813 __kmp_second_top_half_finish_proxy(taskdata);
2814 __kmp_bottom_half_finish_proxy(gtid,ptask);
2816 KA_TRACE(10, (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2825 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2827 KMP_DEBUG_ASSERT( ptask != NULL );
2828 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2830 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2832 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2834 __kmp_first_top_half_finish_proxy(taskdata);
2837 kmp_team_t * team = taskdata->td_team;
2838 kmp_int32 nthreads = team->t.t_nproc;
2845 k = (k+1) % nthreads;
2846 thread = team->t.t_threads[k];
2847 }
while ( !__kmp_give_task( thread, k, ptask ) );
2849 __kmp_second_top_half_finish_proxy(taskdata);
2851 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );