36 #include "kmp_atomic.h"
37 #include "kmp_wrapper_getpid.h"
38 #include "kmp_environment.h"
41 #include "kmp_settings.h"
44 #include "kmp_error.h"
45 #include "kmp_stats.h"
46 #include "kmp_wait_release.h"
49 #include "ompt-specific.h"
53 #define KMP_USE_PRCTL 0
54 #define KMP_USE_POOLED_ALLOC 0
62 #if defined(KMP_GOMP_COMPAT)
63 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
66 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
74 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
78 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
83 kmp_info_t __kmp_monitor;
90 void __kmp_cleanup(
void );
92 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
93 static void __kmp_initialize_team( kmp_team_t * team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t * loc );
94 static void __kmp_partition_places( kmp_team_t *team );
95 static void __kmp_do_serial_initialize(
void );
96 void __kmp_fork_barrier(
int gtid,
int tid );
97 void __kmp_join_barrier(
int gtid );
98 void __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t *loc );
101 #ifdef USE_LOAD_BALANCE
102 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
105 static int __kmp_expand_threads(
int nWish,
int nNeed);
106 static int __kmp_unregister_root_other_thread(
int gtid );
107 static void __kmp_unregister_library(
void );
108 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
109 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
120 __kmp_get_global_thread_id( )
123 kmp_info_t **other_threads;
129 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
130 __kmp_nth, __kmp_all_nth ));
137 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
139 #ifdef KMP_TDATA_GTID
140 if ( TCR_4(__kmp_gtid_mode) >= 3) {
141 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
145 if ( TCR_4(__kmp_gtid_mode) >= 2) {
146 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
147 return __kmp_gtid_get_specific();
149 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
151 stack_addr = (
char*) & stack_data;
152 other_threads = __kmp_threads;
167 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
169 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
172 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
173 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
177 if( stack_addr <= stack_base ) {
178 size_t stack_diff = stack_base - stack_addr;
180 if( stack_diff <= stack_size ) {
183 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
190 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
191 "thread, using TLS\n" ));
192 i = __kmp_gtid_get_specific();
200 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
201 KMP_FATAL( StackOverflow, i );
204 stack_base = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
205 if( stack_addr > stack_base ) {
206 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
208 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
210 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
214 if ( __kmp_storage_map ) {
215 char *stack_end = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
216 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
217 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
218 other_threads[i]->th.th_info.ds.ds_stacksize,
219 "th_%d stack (refinement)", i );
225 __kmp_get_global_thread_id_reg( )
229 if ( !__kmp_init_serial ) {
232 #ifdef KMP_TDATA_GTID
233 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
234 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
238 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
239 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
240 gtid = __kmp_gtid_get_specific();
242 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
243 gtid = __kmp_get_global_thread_id();
247 if( gtid == KMP_GTID_DNE ) {
248 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
249 "Registering a new gtid.\n" ));
250 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
251 if( !__kmp_init_serial ) {
252 __kmp_do_serial_initialize();
253 gtid = __kmp_gtid_get_specific();
255 gtid = __kmp_register_root(FALSE);
257 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
261 KMP_DEBUG_ASSERT( gtid >=0 );
268 __kmp_check_stack_overlap( kmp_info_t *th )
271 char *stack_beg = NULL;
272 char *stack_end = NULL;
275 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
276 if ( __kmp_storage_map ) {
277 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
278 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
280 gtid = __kmp_gtid_from_thread( th );
282 if (gtid == KMP_GTID_MONITOR) {
283 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
284 "th_%s stack (%s)",
"mon",
285 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
287 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
288 "th_%d stack (%s)", gtid,
289 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
294 gtid = __kmp_gtid_from_thread( th );
295 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
297 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
298 if ( stack_beg == NULL ) {
299 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
300 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
303 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
304 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
306 if( f_th && f_th != th ) {
307 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
308 char *other_stack_beg = other_stack_end -
309 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
310 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
311 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
314 if ( __kmp_storage_map )
315 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
316 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
317 "th_%d stack (overlapped)",
318 __kmp_gtid_from_thread( f_th ) );
320 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
325 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
334 __kmp_infinite_loop(
void )
336 static int done = FALSE;
343 #define MAX_MESSAGE 512
346 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
347 char buffer[MAX_MESSAGE];
351 va_start( ap, format);
352 KMP_SNPRINTF( buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
353 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
354 __kmp_vprintf( kmp_err, buffer, ap );
355 #if KMP_PRINT_DATA_PLACEMENT
357 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
358 if( __kmp_storage_map_verbose ) {
359 node = __kmp_get_host_node(p1);
361 __kmp_storage_map_verbose = FALSE;
365 int localProc = __kmp_get_cpu_from_gtid(gtid);
367 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
368 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
370 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
372 __kmp_printf_no_lock(
" GTID %d\n", gtid);
380 (
char*)p1 += PAGE_SIZE;
381 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
382 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
383 (
char*)p1 - 1, lastNode);
386 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
387 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
389 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
390 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
396 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
399 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
403 __kmp_warn(
char const * format, ... )
405 char buffer[MAX_MESSAGE];
408 if ( __kmp_generate_warnings == kmp_warnings_off ) {
412 va_start( ap, format );
414 KMP_SNPRINTF( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
415 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
416 __kmp_vprintf( kmp_err, buffer, ap );
417 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
423 __kmp_abort_process()
427 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
429 if ( __kmp_debug_buf ) {
430 __kmp_dump_debug_buffer();
433 if ( KMP_OS_WINDOWS ) {
436 __kmp_global.g.g_abort = SIGABRT;
454 __kmp_infinite_loop();
455 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
460 __kmp_abort_thread(
void )
464 __kmp_infinite_loop();
475 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
477 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
479 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
480 "th_%d.th_info", gtid );
482 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
483 "th_%d.th_local", gtid );
485 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
486 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
488 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
489 &thr->th.th_bar[bs_plain_barrier+1],
490 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
492 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
493 &thr->th.th_bar[bs_forkjoin_barrier+1],
494 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
496 #if KMP_FAST_REDUCTION_BARRIER
497 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
498 &thr->th.th_bar[bs_reduction_barrier+1],
499 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
500 #endif // KMP_FAST_REDUCTION_BARRIER
509 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
511 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
512 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
515 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
516 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
519 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
520 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
522 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
523 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
525 #if KMP_FAST_REDUCTION_BARRIER
526 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
527 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
528 #endif // KMP_FAST_REDUCTION_BARRIER
530 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
531 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
533 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
534 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
536 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
537 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
570 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
571 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
574 static void __kmp_init_allocator() {}
575 static void __kmp_fini_allocator() {}
576 static void __kmp_fini_allocator_thread() {}
580 #ifdef KMP_DYNAMIC_LIB
585 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
587 __kmp_init_bootstrap_lock( lck );
591 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
608 for( i = 0; i < __kmp_threads_capacity; ++i ) {
609 if( !__kmp_threads )
continue;
610 kmp_info_t* th = __kmp_threads[ i ];
611 if( th == NULL )
continue;
612 int gtid = th->th.th_info.ds.ds_gtid;
613 if( gtid == gtid_req )
continue;
614 if( gtid < 0 )
continue;
616 int alive = __kmp_is_thread_alive( th, &exit_val );
621 if( thread_count == 0 )
break;
628 __kmp_reset_lock( &__kmp_forkjoin_lock );
630 __kmp_reset_lock( &__kmp_stdio_lock );
637 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
640 switch( fdwReason ) {
642 case DLL_PROCESS_ATTACH:
643 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
647 case DLL_PROCESS_DETACH:
648 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
649 __kmp_gtid_get_specific() ));
651 if( lpReserved != NULL )
678 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
681 __kmp_internal_end_library( __kmp_gtid_get_specific() );
685 case DLL_THREAD_ATTACH:
686 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
692 case DLL_THREAD_DETACH:
693 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
694 __kmp_gtid_get_specific() ));
696 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
712 __kmp_change_library(
int status )
716 old_status = __kmp_yield_init & 1;
719 __kmp_yield_init |= 1;
722 __kmp_yield_init &= ~1;
735 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
737 int gtid = *gtid_ref;
738 #ifdef BUILD_PARALLEL_ORDERED
739 kmp_team_t *team = __kmp_team_from_gtid( gtid );
742 if( __kmp_env_consistency_check ) {
743 if( __kmp_threads[gtid]->th.th_root->r.r_active )
744 #if KMP_USE_DYNAMIC_LOCK
745 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
747 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
750 #ifdef BUILD_PARALLEL_ORDERED
751 if( !team->t.t_serialized ) {
755 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
766 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
768 int gtid = *gtid_ref;
769 #ifdef BUILD_PARALLEL_ORDERED
770 int tid = __kmp_tid_from_gtid( gtid );
771 kmp_team_t *team = __kmp_team_from_gtid( gtid );
774 if( __kmp_env_consistency_check ) {
775 if( __kmp_threads[gtid]->th.th_root->r.r_active )
776 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
778 #ifdef BUILD_PARALLEL_ORDERED
779 if ( ! team->t.t_serialized ) {
784 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
786 #if OMPT_SUPPORT && OMPT_BLAME
787 if ((ompt_status == ompt_status_track_callback) &&
788 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
790 kmp_info_t *this_thread = __kmp_threads[gtid];
791 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
792 this_thread->th.ompt_thread_info.wait_id);
810 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
816 if( ! TCR_4(__kmp_init_parallel) )
817 __kmp_parallel_initialize();
819 th = __kmp_threads[ gtid ];
820 team = th->th.th_team;
823 th->th.th_ident = id_ref;
825 if ( team->t.t_serialized ) {
828 kmp_int32 old_this = th->th.th_local.this_construct;
830 ++th->th.th_local.this_construct;
835 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
836 th->th.th_local.this_construct);
838 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
840 th->th.th_teams_microtask == NULL &&
842 team->t.t_active_level == 1 )
844 __kmp_itt_metadata_single( id_ref );
849 if( __kmp_env_consistency_check ) {
850 if (status && push_ws) {
851 __kmp_push_workshare( gtid, ct_psingle, id_ref );
853 __kmp_check_workshare( gtid, ct_psingle, id_ref );
858 __kmp_itt_single_start( gtid );
865 __kmp_exit_single(
int gtid )
868 __kmp_itt_single_end( gtid );
870 if( __kmp_env_consistency_check )
871 __kmp_pop_workshare( gtid, ct_psingle, NULL );
884 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
885 int master_tid,
int set_nthreads
893 KMP_DEBUG_ASSERT( __kmp_init_serial );
894 KMP_DEBUG_ASSERT( root && parent_team );
900 new_nthreads = set_nthreads;
901 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
904 #ifdef USE_LOAD_BALANCE
905 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
906 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
907 if ( new_nthreads == 1 ) {
908 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
912 if ( new_nthreads < set_nthreads ) {
913 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
914 master_tid, new_nthreads ));
918 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
919 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
920 : root->r.r_hot_team->t.t_nproc);
921 if ( new_nthreads <= 1 ) {
922 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
926 if ( new_nthreads < set_nthreads ) {
927 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
928 master_tid, new_nthreads ));
931 new_nthreads = set_nthreads;
934 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
935 if ( set_nthreads > 2 ) {
936 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
937 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
938 if ( new_nthreads == 1 ) {
939 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
943 if ( new_nthreads < set_nthreads ) {
944 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
945 master_tid, new_nthreads ));
956 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
957 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
958 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
959 root->r.r_hot_team->t.t_nproc );
960 if ( tl_nthreads <= 0 ) {
967 if ( ! get__dynamic_2( parent_team, master_tid )
968 && ( ! __kmp_reserve_warn ) ) {
969 __kmp_reserve_warn = 1;
972 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
973 KMP_HNT( Unset_ALL_THREADS ),
977 if ( tl_nthreads == 1 ) {
978 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
982 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
983 master_tid, tl_nthreads ));
984 new_nthreads = tl_nthreads;
994 capacity = __kmp_threads_capacity;
995 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
998 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
999 root->r.r_hot_team->t.t_nproc ) > capacity ) {
1003 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
1004 root->r.r_hot_team->t.t_nproc ) - capacity;
1005 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
1006 if ( slotsAdded < slotsRequired ) {
1010 new_nthreads -= ( slotsRequired - slotsAdded );
1011 KMP_ASSERT( new_nthreads >= 1 );
1016 if ( ! get__dynamic_2( parent_team, master_tid )
1017 && ( ! __kmp_reserve_warn ) ) {
1018 __kmp_reserve_warn = 1;
1019 if ( __kmp_tp_cached ) {
1022 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1023 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
1024 KMP_HNT( PossibleSystemLimitOnThreads ),
1031 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1032 KMP_HNT( SystemLimitOnThreads ),
1040 if ( new_nthreads == 1 ) {
1041 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
1042 __kmp_get_gtid(), set_nthreads ) );
1046 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
1047 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1048 return new_nthreads;
1059 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1060 kmp_info_t *master_th,
int master_gtid )
1065 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1066 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1070 master_th->th.th_info.ds.ds_tid = 0;
1071 master_th->th.th_team = team;
1072 master_th->th.th_team_nproc = team->t.t_nproc;
1073 master_th->th.th_team_master = master_th;
1074 master_th->th.th_team_serialized = FALSE;
1075 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
1078 #if KMP_NESTED_HOT_TEAMS
1080 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1082 int level = team->t.t_active_level - 1;
1083 if( master_th->th.th_teams_microtask ) {
1084 if( master_th->th.th_teams_size.nteams > 1 ) {
1087 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1088 master_th->th.th_teams_level == team->t.t_level ) {
1092 if( level < __kmp_hot_teams_max_level ) {
1093 if( hot_teams[level].hot_team ) {
1095 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1099 hot_teams[level].hot_team = team;
1100 hot_teams[level].hot_team_nth = team->t.t_nproc;
1107 use_hot_team = team == root->r.r_hot_team;
1109 if ( !use_hot_team ) {
1112 team->t.t_threads[ 0 ] = master_th;
1113 __kmp_initialize_info( master_th, team, 0, master_gtid );
1116 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1119 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1120 team->t.t_threads[ i ] = thr;
1121 KMP_DEBUG_ASSERT( thr );
1122 KMP_DEBUG_ASSERT( thr->th.th_team == team );
1124 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
1125 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1126 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1127 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1128 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
1130 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1131 thr->th.th_teams_level = master_th->th.th_teams_level;
1132 thr->th.th_teams_size = master_th->th.th_teams_size;
1136 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1137 for ( b = 0; b < bs_last_barrier; ++ b ) {
1138 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
1139 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1141 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1147 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1148 __kmp_partition_places( team );
1156 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1163 propagateFPControl(kmp_team_t * team)
1165 if ( __kmp_inherit_fp_control ) {
1166 kmp_int16 x87_fpu_control_word;
1170 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1171 __kmp_store_mxcsr( &mxcsr );
1172 mxcsr &= KMP_X86_MXCSR_MASK;
1181 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1182 team->t.t_x87_fpu_control_word = x87_fpu_control_word;
1184 if ( team->t.t_mxcsr != mxcsr ) {
1185 team->t.t_mxcsr = mxcsr;
1189 if (!team->t.t_fp_control_saved) {
1190 team->t.t_fp_control_saved = TRUE;
1195 if (team->t.t_fp_control_saved)
1196 team->t.t_fp_control_saved = FALSE;
1202 updateHWFPControl(kmp_team_t * team)
1204 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1209 kmp_int16 x87_fpu_control_word;
1211 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1212 __kmp_store_mxcsr( &mxcsr );
1213 mxcsr &= KMP_X86_MXCSR_MASK;
1215 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1216 __kmp_clear_x87_fpu_status_word();
1217 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1220 if ( team->t.t_mxcsr != mxcsr ) {
1221 __kmp_load_mxcsr( &team->t.t_mxcsr );
1226 # define propagateFPControl(x) ((void)0)
1227 # define updateHWFPControl(x) ((void)0)
1231 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
1237 __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid)
1239 kmp_info_t *this_thr;
1240 kmp_team_t *serial_team;
1242 KC_TRACE( 10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1249 if( ! TCR_4( __kmp_init_parallel ) )
1250 __kmp_parallel_initialize();
1252 this_thr = __kmp_threads[ global_tid ];
1253 serial_team = this_thr->th.th_serial_team;
1256 KMP_DEBUG_ASSERT( serial_team );
1259 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1260 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1261 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
1262 KA_TRACE( 20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1263 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1264 this_thr->th.th_task_team = NULL;
1268 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1269 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1270 proc_bind = proc_bind_false;
1272 else if ( proc_bind == proc_bind_default ) {
1277 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1282 this_thr->th.th_set_proc_bind = proc_bind_default;
1285 if( this_thr->th.th_team != serial_team ) {
1287 int level = this_thr->th.th_team->t.t_level;
1289 if( serial_team->t.t_serialized ) {
1292 kmp_team_t *new_team;
1293 int tid = this_thr->th.th_info.ds.ds_tid;
1295 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1298 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1301 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1308 & this_thr->th.th_current_task->td_icvs,
1309 0 USE_NESTED_HOT_ARG(NULL) );
1310 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1311 KMP_ASSERT( new_team );
1314 new_team->t.t_threads[0] = this_thr;
1315 new_team->t.t_parent = this_thr->th.th_team;
1316 serial_team = new_team;
1317 this_thr->th.th_serial_team = serial_team;
1319 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1320 global_tid, serial_team ) );
1327 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1328 global_tid, serial_team ) );
1332 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1333 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1334 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1335 serial_team->t.t_ident = loc;
1336 serial_team->t.t_serialized = 1;
1337 serial_team->t.t_nproc = 1;
1338 serial_team->t.t_parent = this_thr->th.th_team;
1339 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1340 this_thr->th.th_team = serial_team;
1341 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1343 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n",
1344 global_tid, this_thr->th.th_current_task ) );
1345 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1346 this_thr->th.th_current_task->td_flags.executing = 0;
1348 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1353 & this_thr->th.th_current_task->td_icvs,
1354 & this_thr->th.th_current_task->td_parent->td_icvs );
1357 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1358 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1362 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1363 this_thr->th.th_current_task->td_icvs.proc_bind
1364 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1369 serial_team->t.t_pkfn = (microtask_t)( ~0 );
1371 this_thr->th.th_info.ds.ds_tid = 0;
1374 this_thr->th.th_team_nproc = 1;
1375 this_thr->th.th_team_master = this_thr;
1376 this_thr->th.th_team_serialized = 1;
1378 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1379 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1381 propagateFPControl (serial_team);
1384 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1385 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1386 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1387 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1389 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1392 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1393 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1401 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1402 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1403 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1404 ++ serial_team->t.t_serialized;
1405 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1408 int level = this_thr->th.th_team->t.t_level;
1410 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1411 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1413 serial_team->t.t_level++;
1414 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1415 global_tid, serial_team, serial_team->t.t_level ) );
1418 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1420 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1421 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1422 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1423 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1425 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1430 if ( __kmp_env_consistency_check )
1431 __kmp_push_parallel( global_tid, NULL );
1435 if ( serial_team->t.t_level == 1
1437 && this_thr->th.th_teams_microtask == NULL
1442 if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) &&
1443 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
1445 serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
1448 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
1449 __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode )
1451 this_thr->th.th_ident = loc;
1453 __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
1465 enum fork_context_e call_context,
1468 void *unwrapped_task,
1470 microtask_t microtask,
1473 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1483 int master_this_cons;
1485 kmp_team_t *parent_team;
1486 kmp_info_t *master_th;
1490 int master_set_numthreads;
1496 #if KMP_NESTED_HOT_TEAMS
1497 kmp_hot_team_ptr_t **p_hot_teams;
1502 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
1503 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1506 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1508 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1509 __kmp_stkpadding += (short)((kmp_int64)dummy);
1513 KMP_DEBUG_ASSERT( __kmp_init_serial );
1514 if( ! TCR_4(__kmp_init_parallel) )
1515 __kmp_parallel_initialize();
1518 master_th = __kmp_threads[ gtid ];
1519 parent_team = master_th->th.th_team;
1520 master_tid = master_th->th.th_info.ds.ds_tid;
1521 master_this_cons = master_th->th.th_local.this_construct;
1522 root = master_th->th.th_root;
1523 master_active = root->r.r_active;
1524 master_set_numthreads = master_th->th.th_set_nproc;
1527 ompt_parallel_id_t ompt_parallel_id;
1528 ompt_task_id_t ompt_task_id;
1529 ompt_frame_t *ompt_frame;
1530 ompt_task_id_t my_task_id;
1531 ompt_parallel_id_t my_parallel_id;
1533 if (ompt_status & ompt_status_track) {
1534 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1535 ompt_task_id = __ompt_get_task_id_internal(0);
1536 ompt_frame = __ompt_get_task_frame_internal(0);
1541 level = parent_team->t.t_level;
1543 active_level = parent_team->t.t_active_level;
1544 teams_level = master_th->th.th_teams_level;
1546 #if KMP_NESTED_HOT_TEAMS
1547 p_hot_teams = &master_th->th.th_hot_teams;
1548 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1549 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1550 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1551 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1552 (*p_hot_teams)[0].hot_team_nth = 1;
1557 if ( __kmp_debugging ) {
1558 int nth = __kmp_omp_num_threads( loc );
1560 master_set_numthreads = nth;
1566 if ((ompt_status == ompt_status_track_callback) &&
1567 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1568 int team_size = master_set_numthreads;
1570 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1571 ompt_task_id, ompt_frame, ompt_parallel_id,
1572 team_size, unwrapped_task);
1576 master_th->th.th_ident = loc;
1579 if ( master_th->th.th_teams_microtask &&
1580 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1584 parent_team->t.t_ident = loc;
1585 parent_team->t.t_argc = argc;
1586 argv = (
void**)parent_team->t.t_argv;
1587 for( i=argc-1; i >= 0; --i )
1589 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1590 *argv++ = va_arg( *ap,
void * );
1592 *argv++ = va_arg( ap,
void * );
1595 if ( parent_team == master_th->th.th_serial_team ) {
1598 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1599 parent_team->t.t_serialized--;
1604 void **exit_runtime_p;
1606 ompt_lw_taskteam_t lw_taskteam;
1608 if (ompt_status & ompt_status_track) {
1609 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1610 unwrapped_task, ompt_parallel_id);
1611 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1612 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1614 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1618 my_task_id = lw_taskteam.ompt_task_info.task_id;
1619 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1620 if ((ompt_status == ompt_status_track_callback) &&
1621 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1622 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1623 my_parallel_id, my_task_id);
1628 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1630 exit_runtime_p = &dummy;
1635 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1642 if (ompt_status & ompt_status_track) {
1644 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1646 if ((ompt_status == ompt_status_track_callback) &&
1647 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1648 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1649 ompt_parallel_id, ompt_task_id);
1652 __ompt_lw_taskteam_unlink(master_th);
1654 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1657 if ((ompt_status == ompt_status_track_callback) &&
1658 ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1659 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1660 ompt_parallel_id, ompt_task_id);
1662 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1668 parent_team->t.t_pkfn = microtask;
1670 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1672 parent_team->t.t_invoke = invoker;
1673 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1674 parent_team->t.t_active_level ++;
1675 parent_team->t.t_level ++;
1678 if ( master_set_numthreads ) {
1679 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
1681 kmp_info_t **other_threads = parent_team->t.t_threads;
1682 parent_team->t.t_nproc = master_set_numthreads;
1683 for ( i = 0; i < master_set_numthreads; ++i ) {
1684 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1688 master_th->th.th_set_nproc = 0;
1692 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1693 __kmp_internal_fork( loc, gtid, parent_team );
1694 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1697 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1698 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1702 if (! parent_team->t.t_invoke( gtid )) {
1703 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1706 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1707 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1710 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1717 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1718 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
1722 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1726 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1728 nthreads = master_set_numthreads ?
1729 master_set_numthreads : get__nproc_2( parent_team, master_tid );
1734 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1738 ) ) || ( __kmp_library == library_serial ) ) {
1739 KC_TRACE( 10, (
"__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1744 if ( nthreads > 1 ) {
1746 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1748 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
1756 if ( nthreads == 1 ) {
1757 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1761 KMP_DEBUG_ASSERT( nthreads > 0 );
1764 master_th->th.th_set_nproc = 0;
1768 if ( nthreads == 1 ) {
1770 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1771 void * args[ argc ];
1773 void * * args = (
void**) KMP_ALLOCA( argc *
sizeof(
void * ) );
1776 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1780 if ( call_context == fork_context_intel ) {
1782 master_th->th.th_serial_team->t.t_ident = loc;
1786 master_th->th.th_serial_team->t.t_level--;
1791 void **exit_runtime_p;
1793 ompt_lw_taskteam_t lw_taskteam;
1795 if (ompt_status & ompt_status_track) {
1796 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1797 unwrapped_task, ompt_parallel_id);
1798 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1799 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1801 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1804 my_task_id = lw_taskteam.ompt_task_info.task_id;
1805 if ((ompt_status == ompt_status_track_callback) &&
1806 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1807 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1808 ompt_parallel_id, my_task_id);
1813 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1815 exit_runtime_p = &dummy;
1821 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1829 if (ompt_status & ompt_status_track) {
1830 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1833 if ((ompt_status == ompt_status_track_callback) &&
1834 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1835 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1836 ompt_parallel_id, ompt_task_id);
1840 __ompt_lw_taskteam_unlink(master_th);
1842 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1844 if ((ompt_status == ompt_status_track_callback) &&
1845 ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1846 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1847 ompt_parallel_id, ompt_task_id);
1849 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1852 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
1853 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1854 team = master_th->th.th_team;
1856 team->t.t_invoke = invoker;
1857 __kmp_alloc_argv_entries( argc, team, TRUE );
1858 team->t.t_argc = argc;
1859 argv = (
void**) team->t.t_argv;
1861 for( i=argc-1; i >= 0; --i )
1863 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1864 *argv++ = va_arg( *ap,
void * );
1866 *argv++ = va_arg( ap,
void * );
1869 for( i=0; i < argc; ++i )
1871 argv[i] = parent_team->t.t_argv[i];
1884 for( i=argc-1; i >= 0; --i )
1886 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1887 *argv++ = va_arg( *ap,
void * );
1889 *argv++ = va_arg( ap,
void * );
1895 void **exit_runtime_p;
1897 ompt_lw_taskteam_t lw_taskteam;
1899 if (ompt_status & ompt_status_track) {
1900 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1901 unwrapped_task, ompt_parallel_id);
1902 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1903 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1905 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1909 my_task_id = lw_taskteam.ompt_task_info.task_id;
1910 my_parallel_id = ompt_parallel_id;
1911 if ((ompt_status == ompt_status_track_callback) &&
1912 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1913 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1914 my_parallel_id, my_task_id);
1919 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1921 exit_runtime_p = &dummy;
1927 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1935 if (ompt_status & ompt_status_track) {
1937 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1939 if ((ompt_status == ompt_status_track_callback) &&
1940 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1941 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1942 my_parallel_id, my_task_id);
1946 __ompt_lw_taskteam_unlink(master_th);
1948 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1950 if ((ompt_status == ompt_status_track_callback) &&
1951 ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1952 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1953 ompt_parallel_id, ompt_task_id);
1955 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1962 else if ( call_context == fork_context_gnu ) {
1964 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1965 __kmp_allocate(
sizeof(ompt_lw_taskteam_t));
1966 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1967 unwrapped_task, ompt_parallel_id);
1969 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1970 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1971 __ompt_lw_taskteam_link(lwt, master_th);
1975 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1979 KMP_ASSERT2( call_context < fork_context_last,
"__kmp_fork_call: unknown fork_context parameter" );
1983 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1990 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
1991 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1992 master_th->th.th_current_task->td_icvs.max_active_levels ) );
1995 master_th->th.th_current_task->td_flags.executing = 0;
1998 if ( !master_th->th.th_teams_microtask || level > teams_level )
2002 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2006 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2007 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
2008 nthreads_icv = __kmp_nested_nth.nth[level+1];
2016 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2017 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2018 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
2019 proc_bind = proc_bind_false;
2022 if (proc_bind == proc_bind_default) {
2024 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2029 if ((level+1 < __kmp_nested_proc_bind.used)
2030 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
2031 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
2036 master_th->th.th_set_proc_bind = proc_bind_default;
2039 if ((nthreads_icv > 0)
2041 || (proc_bind_icv != proc_bind_default)
2044 kmp_internal_control_t new_icvs;
2045 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2046 new_icvs.next = NULL;
2047 if (nthreads_icv > 0) {
2048 new_icvs.nproc = nthreads_icv;
2052 if (proc_bind_icv != proc_bind_default) {
2053 new_icvs.proc_bind = proc_bind_icv;
2058 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2059 team = __kmp_allocate_team(root, nthreads, nthreads,
2066 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
2069 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2070 team = __kmp_allocate_team(root, nthreads, nthreads,
2077 &master_th->th.th_current_task->td_icvs, argc
2078 USE_NESTED_HOT_ARG(master_th) );
2080 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
2083 team->t.t_master_tid = master_tid;
2084 team->t.t_master_this_cons = master_this_cons;
2085 team->t.t_ident = loc;
2086 team->t.t_parent = parent_team;
2087 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
2089 TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task);
2091 team->t.t_invoke = invoker;
2094 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
2096 team->t.t_level = parent_team->t.t_level + 1;
2097 team->t.t_active_level = parent_team->t.t_active_level + 1;
2101 team->t.t_level = parent_team->t.t_level;
2102 team->t.t_active_level = parent_team->t.t_active_level;
2105 team->t.t_sched = get__sched_2(parent_team, master_tid);
2108 propagateFPControl(team);
2110 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2112 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
2113 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2114 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2115 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
2118 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2119 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) {
2120 kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz );
2122 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2123 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2125 old_stack = master_th->th.th_task_state_memo_stack;
2126 master_th->th.th_task_state_memo_stack = new_stack;
2127 master_th->th.th_task_state_stack_sz *= 2;
2128 __kmp_free(old_stack);
2131 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2132 master_th->th.th_task_state_top++;
2133 master_th->th.th_task_state = 0;
2135 master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state];
2137 #if !KMP_NESTED_HOT_TEAMS
2138 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2142 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2143 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2144 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2145 ( team->t.t_master_tid == 0 &&
2146 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2150 argv = (
void**)team->t.t_argv;
2154 for ( i=argc-1; i >= 0; --i )
2156 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
2157 *argv++ = va_arg( *ap,
void * );
2159 *argv++ = va_arg( ap,
void * );
2163 for ( i=0; i < argc; ++i )
2165 argv[i] = team->t.t_parent->t.t_argv[i];
2170 team->t.t_master_active = master_active;
2171 if (!root->r.r_active)
2172 root->r.r_active = TRUE;
2174 __kmp_fork_team_threads( root, team, master_th, gtid );
2175 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
2179 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2182 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2186 if ( team->t.t_active_level == 1
2188 && !master_th->th.th_teams_microtask
2192 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2193 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
2195 kmp_uint64 tmp_time = 0;
2196 if ( __itt_get_timestamp_ptr )
2197 tmp_time = __itt_get_timestamp();
2199 master_th->th.th_frame_time = tmp_time;
2200 if ( __kmp_forkjoin_frames_mode == 3 )
2201 team->t.t_region_time = tmp_time;
2204 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2205 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2207 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2213 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2215 KF_TRACE(10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2216 root, team, master_th, gtid));
2219 if ( __itt_stack_caller_create_ptr ) {
2220 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2228 __kmp_internal_fork( loc, gtid, team );
2229 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2230 root, team, master_th, gtid));
2233 if (call_context == fork_context_gnu) {
2234 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2239 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2240 gtid, team->t.t_id, team->t.t_pkfn ) );
2246 if (! team->t.t_invoke( gtid )) {
2247 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2250 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2251 gtid, team->t.t_id, team->t.t_pkfn ) );
2254 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2257 if (ompt_status & ompt_status_track) {
2258 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2267 __kmp_join_restore_state(
2272 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2273 ompt_state_work_serial : ompt_state_work_parallel);
2280 ompt_parallel_id_t parallel_id)
2282 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2283 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2284 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2285 parallel_id, task_info->task_id);
2288 __kmp_join_restore_state(thread,team);
2293 __kmp_join_call(
ident_t *loc,
int gtid
2301 kmp_team_t *parent_team;
2302 kmp_info_t *master_th;
2307 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
2310 master_th = __kmp_threads[ gtid ];
2311 root = master_th->th.th_root;
2312 team = master_th->th.th_team;
2313 parent_team = team->t.t_parent;
2315 master_th->th.th_ident = loc;
2318 if (ompt_status & ompt_status_track) {
2319 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2324 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2325 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2326 __kmp_gtid_from_thread( master_th ), team,
2327 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2328 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
2332 if( team->t.t_serialized ) {
2334 if ( master_th->th.th_teams_microtask ) {
2336 int level = team->t.t_level;
2337 int tlevel = master_th->th.th_teams_level;
2338 if ( level == tlevel ) {
2342 }
else if ( level == tlevel + 1 ) {
2345 team->t.t_serialized++;
2352 if (ompt_status == ompt_status_track_callback) {
2353 __kmp_join_restore_state(master_th, parent_team);
2360 master_active = team->t.t_master_active;
2368 __kmp_internal_join( loc, gtid, team );
2372 master_th->th.th_task_state = 0;
2379 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2383 if ( __itt_stack_caller_create_ptr ) {
2384 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
2388 if ( team->t.t_active_level == 1
2390 && !master_th->th.th_teams_microtask
2393 master_th->th.th_ident = loc;
2395 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2396 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2397 0, loc, master_th->th.th_team_nproc, 1 );
2398 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2399 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2400 __kmp_itt_region_joined( gtid );
2405 if ( master_th->th.th_teams_microtask &&
2407 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2408 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2415 team->t.t_active_level --;
2416 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2419 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
2420 int old_num = master_th->th.th_team_nproc;
2421 int new_num = master_th->th.th_teams_size.nth;
2422 kmp_info_t **other_threads = team->t.t_threads;
2423 kmp_task_team_t * task_team = master_th->th.th_task_team;
2424 team->t.t_nproc = new_num;
2426 task_team->tt.tt_ref_ct = new_num - 1;
2427 task_team->tt.tt_unfinished_threads = new_num;
2429 for ( i = 0; i < old_num; ++i ) {
2430 other_threads[i]->th.th_team_nproc = new_num;
2433 for ( i = old_num; i < new_num; ++i ) {
2436 kmp_balign_t * balign = other_threads[i]->th.th_bar;
2437 for ( b = 0; b < bs_last_barrier; ++ b ) {
2438 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2439 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2441 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2444 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2446 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2452 if (ompt_status == ompt_status_track_callback) {
2453 __kmp_join_ompt(master_th, parent_team, parallel_id);
2462 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2463 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2465 master_th->th.th_dispatch =
2466 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
2472 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2475 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
2479 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2481 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2483 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2484 0, master_th, team ) );
2485 __kmp_pop_current_task_from_thread( master_th );
2487 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2491 master_th->th.th_first_place = team->t.t_first_place;
2492 master_th->th.th_last_place = team->t.t_last_place;
2495 updateHWFPControl (team);
2497 if ( root->r.r_active != master_active )
2498 root->r.r_active = master_active;
2500 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) );
2508 master_th->th.th_team = parent_team;
2509 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2510 master_th->th.th_team_master = parent_team->t.t_threads[0];
2511 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2514 if( parent_team->t.t_serialized &&
2515 parent_team != master_th->th.th_serial_team &&
2516 parent_team != root->r.r_root_team ) {
2517 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2518 master_th->th.th_serial_team = parent_team;
2521 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2523 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2524 if (master_th->th.th_task_state_top > 0) {
2525 --master_th->th.th_task_state_top;
2526 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2529 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
2531 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2532 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2538 master_th->th.th_current_task->td_flags.executing = 1;
2540 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2543 if (ompt_status == ompt_status_track_callback) {
2544 __kmp_join_ompt(master_th, parent_team, parallel_id);
2549 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
2558 __kmp_save_internal_controls ( kmp_info_t * thread )
2561 if ( thread->th.th_team != thread->th.th_serial_team ) {
2564 if (thread->th.th_team->t.t_serialized > 1) {
2567 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2570 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2571 thread->th.th_team->t.t_serialized ) {
2576 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
2578 copy_icvs( control, & thread->th.th_current_task->td_icvs );
2580 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2582 control->next = thread->th.th_team->t.t_control_stack_top;
2583 thread->th.th_team->t.t_control_stack_top = control;
2590 __kmp_set_num_threads(
int new_nth,
int gtid )
2595 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2596 KMP_DEBUG_ASSERT( __kmp_init_serial );
2600 else if (new_nth > __kmp_max_nth)
2601 new_nth = __kmp_max_nth;
2603 thread = __kmp_threads[gtid];
2605 __kmp_save_internal_controls( thread );
2607 set__nproc( thread, new_nth );
2614 root = thread->th.th_root;
2615 if ( __kmp_init_parallel && ( ! root->r.r_active )
2616 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2617 #
if KMP_NESTED_HOT_TEAMS
2618 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2621 kmp_team_t *hot_team = root->r.r_hot_team;
2624 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2627 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2629 for (tt_idx=0; tt_idx<2; ++tt_idx) {
2630 kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx];
2631 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
2634 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
2635 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2637 KA_TRACE( 20, (
"__kmp_set_num_threads: setting task_team %p to NULL\n",
2638 &hot_team->t.t_task_team[tt_idx] ) );
2639 hot_team->t.t_task_team[tt_idx] = NULL;
2642 KMP_DEBUG_ASSERT( task_team == NULL );
2650 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2651 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2652 __kmp_free_thread( hot_team->t.t_threads[f] );
2653 hot_team->t.t_threads[f] = NULL;
2655 hot_team->t.t_nproc = new_nth;
2656 #if KMP_NESTED_HOT_TEAMS
2657 if( thread->th.th_hot_teams ) {
2658 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2659 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2664 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2669 for( f=0 ; f < new_nth; f++ ) {
2670 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2671 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2674 hot_team->t.t_size_changed = -1;
2681 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
2685 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2686 KMP_DEBUG_ASSERT( __kmp_init_serial );
2689 if( max_active_levels < 0 ) {
2690 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2694 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2697 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2701 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2702 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2707 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2709 thread = __kmp_threads[ gtid ];
2711 __kmp_save_internal_controls( thread );
2713 set__max_active_levels( thread, max_active_levels );
2719 __kmp_get_max_active_levels(
int gtid )
2723 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
2724 KMP_DEBUG_ASSERT( __kmp_init_serial );
2726 thread = __kmp_threads[ gtid ];
2727 KMP_DEBUG_ASSERT( thread->th.th_current_task );
2728 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
2729 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2730 return thread->th.th_current_task->td_icvs.max_active_levels;
2735 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
2740 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
2741 KMP_DEBUG_ASSERT( __kmp_init_serial );
2747 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2748 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2753 KMP_MSG( ScheduleKindOutOfRange, kind ),
2754 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
2757 kind = kmp_sched_default;
2761 thread = __kmp_threads[ gtid ];
2763 __kmp_save_internal_controls( thread );
2765 if ( kind < kmp_sched_upper_std ) {
2766 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2769 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2771 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
2775 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2776 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2778 if ( kind == kmp_sched_auto ) {
2780 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2782 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2788 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
2794 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
2795 KMP_DEBUG_ASSERT( __kmp_init_serial );
2797 thread = __kmp_threads[ gtid ];
2800 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2802 switch ( th_type ) {
2804 case kmp_sch_static_greedy:
2805 case kmp_sch_static_balanced:
2806 *kind = kmp_sched_static;
2809 case kmp_sch_static_chunked:
2810 *kind = kmp_sched_static;
2812 case kmp_sch_dynamic_chunked:
2813 *kind = kmp_sched_dynamic;
2816 case kmp_sch_guided_iterative_chunked:
2817 case kmp_sch_guided_analytical_chunked:
2818 *kind = kmp_sched_guided;
2821 *kind = kmp_sched_auto;
2823 case kmp_sch_trapezoidal:
2824 *kind = kmp_sched_trapezoidal;
2832 KMP_FATAL( UnknownSchedulingType, th_type );
2836 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2840 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
2846 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2847 KMP_DEBUG_ASSERT( __kmp_init_serial );
2850 if( level == 0 )
return 0;
2851 if( level < 0 )
return -1;
2852 thr = __kmp_threads[ gtid ];
2853 team = thr->th.th_team;
2854 ii = team->t.t_level;
2855 if( level > ii )
return -1;
2858 if( thr->th.th_teams_microtask ) {
2860 int tlevel = thr->th.th_teams_level;
2861 if( level <= tlevel ) {
2862 KMP_DEBUG_ASSERT( ii >= tlevel );
2864 if ( ii == tlevel ) {
2873 if( ii == level )
return __kmp_tid_from_gtid( gtid );
2875 dd = team->t.t_serialized;
2879 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2882 if( ( team->t.t_serialized ) && ( !dd ) ) {
2883 team = team->t.t_parent;
2887 team = team->t.t_parent;
2888 dd = team->t.t_serialized;
2893 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
2897 __kmp_get_team_size(
int gtid,
int level ) {
2903 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
2904 KMP_DEBUG_ASSERT( __kmp_init_serial );
2907 if( level == 0 )
return 1;
2908 if( level < 0 )
return -1;
2909 thr = __kmp_threads[ gtid ];
2910 team = thr->th.th_team;
2911 ii = team->t.t_level;
2912 if( level > ii )
return -1;
2915 if( thr->th.th_teams_microtask ) {
2917 int tlevel = thr->th.th_teams_level;
2918 if( level <= tlevel ) {
2919 KMP_DEBUG_ASSERT( ii >= tlevel );
2921 if ( ii == tlevel ) {
2932 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2935 if( team->t.t_serialized && ( !dd ) ) {
2936 team = team->t.t_parent;
2940 team = team->t.t_parent;
2945 return team->t.t_nproc;
2949 __kmp_get_schedule_global() {
2953 kmp_r_sched_t r_sched;
2959 r_sched.r_sched_type = __kmp_static;
2961 r_sched.r_sched_type = __kmp_guided;
2963 r_sched.r_sched_type = __kmp_sched;
2966 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
2967 r_sched.chunk = KMP_DEFAULT_CHUNK;
2969 r_sched.chunk = __kmp_chunk;
2984 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
2987 KMP_DEBUG_ASSERT( team );
2988 if( !realloc || argc > team->t.t_max_argc ) {
2990 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2991 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
2993 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2994 __kmp_free( (
void *) team->t.t_argv );
2996 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2998 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2999 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
3000 team->t.t_id, team->t.t_max_argc ));
3001 team->t.t_argv = &team->t.t_inline_argv[0];
3002 if ( __kmp_storage_map ) {
3003 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
3004 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3005 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
3006 "team_%d.t_inline_argv",
3011 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3012 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3013 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3014 team->t.t_id, team->t.t_max_argc ));
3015 team->t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
3016 if ( __kmp_storage_map ) {
3017 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3018 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
3026 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
3029 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
3030 #if KMP_USE_POOLED_ALLOC
3032 char *ptr = __kmp_allocate(max_nth *
3033 (
sizeof(kmp_info_t*) +
sizeof(dispatch_shared_info_t)*num_disp_buf
3034 +
sizeof(kmp_disp_t) +
sizeof(
int)*6
3036 +
sizeof(kmp_r_sched_t)
3037 +
sizeof(kmp_taskdata_t) ) );
3039 team->t.t_threads = (kmp_info_t**) ptr; ptr +=
sizeof(kmp_info_t*) * max_nth;
3040 team->t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
3041 ptr +=
sizeof(dispatch_shared_info_t) * num_disp_buff;
3042 team->t.t_dispatch = (kmp_disp_t*) ptr; ptr +=
sizeof(kmp_disp_t) * max_nth;
3043 team->t.t_set_nproc = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3044 team->t.t_set_dynamic = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3045 team->t.t_set_nested = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3046 team->t.t_set_blocktime = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3047 team->t.t_set_bt_intervals = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3048 team->t.t_set_bt_set = (
int*) ptr;
3049 ptr +=
sizeof(int) * max_nth;
3051 team->t.t_set_sched = (kmp_r_sched_t*) ptr;
3052 ptr +=
sizeof(kmp_r_sched_t) * max_nth;
3053 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
3054 ptr +=
sizeof(kmp_taskdata_t) * max_nth;
3057 team->t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
3058 team->t.t_disp_buffer = (dispatch_shared_info_t*)
3059 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
3060 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
3063 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
3065 team->t.t_max_nproc = max_nth;
3068 for(i = 0 ; i < num_disp_buff; ++i)
3069 team->t.t_disp_buffer[i].buffer_index = i;
3073 __kmp_free_team_arrays(kmp_team_t *team) {
3076 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
3077 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
3078 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3079 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3082 __kmp_free(team->t.t_threads);
3083 #if !KMP_USE_POOLED_ALLOC
3084 __kmp_free(team->t.t_disp_buffer);
3085 __kmp_free(team->t.t_dispatch);
3088 __kmp_free(team->t.t_implicit_task_taskdata);
3090 team->t.t_threads = NULL;
3091 team->t.t_disp_buffer = NULL;
3092 team->t.t_dispatch = NULL;
3095 team->t.t_implicit_task_taskdata = 0;
3099 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3100 kmp_info_t **oldThreads = team->t.t_threads;
3102 #if !KMP_USE_POOLED_ALLOC
3103 __kmp_free(team->t.t_disp_buffer);
3104 __kmp_free(team->t.t_dispatch);
3107 __kmp_free(team->t.t_implicit_task_taskdata);
3109 __kmp_allocate_team_arrays(team, max_nth);
3111 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3113 __kmp_free(oldThreads);
3116 static kmp_internal_control_t
3117 __kmp_get_global_icvs(
void ) {
3119 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3122 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3125 kmp_internal_control_t g_icvs = {
3127 (kmp_int8)__kmp_dflt_nested,
3128 (kmp_int8)__kmp_global.g.g_dynamic,
3129 (kmp_int8)__kmp_env_blocktime,
3130 __kmp_dflt_blocktime,
3132 __kmp_dflt_team_nth,
3134 __kmp_dflt_max_active_levels,
3137 __kmp_nested_proc_bind.bind_types[0],
3145 static kmp_internal_control_t
3146 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3148 kmp_internal_control_t gx_icvs;
3149 gx_icvs.serial_nesting_level = 0;
3150 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3151 gx_icvs.next = NULL;
3157 __kmp_initialize_root( kmp_root_t *root )
3160 kmp_team_t *root_team;
3161 kmp_team_t *hot_team;
3162 size_t disp_size, dispatch_size, bar_size;
3163 int hot_team_max_nth;
3164 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3165 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3166 KMP_DEBUG_ASSERT( root );
3167 KMP_ASSERT( ! root->r.r_begin );
3170 __kmp_init_lock( &root->r.r_begin_lock );
3171 root->r.r_begin = FALSE;
3172 root->r.r_active = FALSE;
3173 root->r.r_in_parallel = 0;
3174 root->r.r_blocktime = __kmp_dflt_blocktime;
3175 root->r.r_nested = __kmp_dflt_nested;
3179 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
3182 __kmp_allocate_team(
3190 __kmp_nested_proc_bind.bind_types[0],
3194 USE_NESTED_HOT_ARG(NULL)
3198 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3201 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
3203 root->r.r_root_team = root_team;
3204 root_team->t.t_control_stack_top = NULL;
3207 root_team->t.t_threads[0] = NULL;
3208 root_team->t.t_nproc = 1;
3209 root_team->t.t_serialized = 1;
3211 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3212 root_team->t.t_sched.chunk = r_sched.chunk;
3213 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3214 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3218 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
3221 __kmp_allocate_team(
3224 __kmp_dflt_team_nth_ub * 2,
3229 __kmp_nested_proc_bind.bind_types[0],
3233 USE_NESTED_HOT_ARG(NULL)
3235 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3237 root->r.r_hot_team = hot_team;
3238 root_team->t.t_control_stack_top = NULL;
3241 hot_team->t.t_parent = root_team;
3244 hot_team_max_nth = hot_team->t.t_max_nproc;
3245 for ( f = 0; f < hot_team_max_nth; ++ f ) {
3246 hot_team->t.t_threads[ f ] = NULL;
3248 hot_team->t.t_nproc = 1;
3250 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3251 hot_team->t.t_sched.chunk = r_sched.chunk;
3252 hot_team->t.t_size_changed = 0;
3259 typedef struct kmp_team_list_item {
3260 kmp_team_p
const * entry;
3261 struct kmp_team_list_item * next;
3262 } kmp_team_list_item_t;
3263 typedef kmp_team_list_item_t * kmp_team_list_t;
3267 __kmp_print_structure_team_accum(
3268 kmp_team_list_t list,
3269 kmp_team_p
const * team
3279 KMP_DEBUG_ASSERT( list != NULL );
3280 if ( team == NULL ) {
3284 __kmp_print_structure_team_accum( list, team->t.t_parent );
3285 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3289 while ( l->next != NULL && l->entry != team ) {
3292 if ( l->next != NULL ) {
3298 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3304 kmp_team_list_item_t * item =
3305 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3314 __kmp_print_structure_team(
3316 kmp_team_p
const * team
3319 __kmp_printf(
"%s", title );
3320 if ( team != NULL ) {
3321 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
3323 __kmp_printf(
" - (nil)\n" );
3328 __kmp_print_structure_thread(
3330 kmp_info_p
const * thread
3333 __kmp_printf(
"%s", title );
3334 if ( thread != NULL ) {
3335 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3337 __kmp_printf(
" - (nil)\n" );
3342 __kmp_print_structure(
3346 kmp_team_list_t list;
3349 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3353 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3356 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3357 __kmp_printf(
"%2d", gtid );
3358 if ( __kmp_threads != NULL ) {
3359 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
3361 if ( __kmp_root != NULL ) {
3362 __kmp_printf(
" %p", __kmp_root[ gtid ] );
3364 __kmp_printf(
"\n" );
3369 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
3370 if ( __kmp_threads != NULL ) {
3372 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3373 kmp_info_t
const * thread = __kmp_threads[ gtid ];
3374 if ( thread != NULL ) {
3375 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
3376 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
3377 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
3378 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
3379 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
3380 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
3381 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
3382 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
3384 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3386 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
3387 __kmp_printf(
"\n" );
3388 __kmp_print_structure_team_accum( list, thread->th.th_team );
3389 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3393 __kmp_printf(
"Threads array is not allocated.\n" );
3397 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
3398 if ( __kmp_root != NULL ) {
3400 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3401 kmp_root_t
const * root = __kmp_root[ gtid ];
3402 if ( root != NULL ) {
3403 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
3404 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
3405 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
3406 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
3407 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
3408 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
3409 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
3410 __kmp_printf(
"\n" );
3411 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3412 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3416 __kmp_printf(
"Ubers array is not allocated.\n" );
3419 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
3420 while ( list->next != NULL ) {
3421 kmp_team_p
const * team = list->entry;
3423 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
3424 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
3425 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
3426 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
3427 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
3428 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
3429 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3430 __kmp_printf(
" Thread %2d: ", i );
3431 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
3433 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
3434 __kmp_printf(
"\n" );
3439 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
3440 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3441 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
3442 __kmp_printf(
"\n" );
3445 while ( list != NULL ) {
3446 kmp_team_list_item_t * item = list;
3448 KMP_INTERNAL_FREE( item );
3460 static const unsigned __kmp_primes[] = {
3461 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3462 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3463 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3464 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3465 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3466 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3467 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3468 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3469 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3470 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3471 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3472 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3473 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3474 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3475 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3476 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3483 __kmp_get_random( kmp_info_t * thread )
3485 unsigned x = thread->th.th_x;
3486 unsigned short r = x>>16;
3488 thread->th.th_x = x*thread->th.th_a+1;
3490 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3491 thread->th.th_info.ds.ds_tid, r) );
3499 __kmp_init_random( kmp_info_t * thread )
3501 unsigned seed = thread->th.th_info.ds.ds_tid;
3503 thread->th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
3504 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3505 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
3512 __kmp_reclaim_dead_roots(
void) {
3515 for(i = 0; i < __kmp_threads_capacity; ++i) {
3516 if( KMP_UBER_GTID( i ) &&
3517 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3518 !__kmp_root[i]->r.r_active ) {
3519 r += __kmp_unregister_root_other_thread(i);
3548 __kmp_expand_threads(
int nWish,
int nNeed) {
3551 int __kmp_actual_max_nth;
3555 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
3558 added = __kmp_reclaim_dead_roots();
3576 int minimumRequiredCapacity;
3578 kmp_info_t **newThreads;
3579 kmp_root_t **newRoot;
3601 old_tp_cached = __kmp_tp_cached;
3602 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3603 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3607 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3611 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3617 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3624 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3626 newCapacity = __kmp_threads_capacity;
3629 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3630 (newCapacity << 1) :
3631 __kmp_actual_max_nth;
3632 }
while(newCapacity < minimumRequiredCapacity);
3633 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3634 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
3635 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
3636 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
3637 memset(newThreads + __kmp_threads_capacity, 0,
3638 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
3639 memset(newRoot + __kmp_threads_capacity, 0,
3640 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
3642 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3648 __kmp_free(newThreads);
3651 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3652 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3654 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3655 __kmp_free(newThreads);
3661 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
3662 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
3663 added += newCapacity - __kmp_threads_capacity;
3664 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
3665 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3676 __kmp_register_root(
int initial_thread )
3678 kmp_info_t *root_thread;
3682 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3683 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
3701 capacity = __kmp_threads_capacity;
3702 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3707 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3708 if ( __kmp_tp_cached ) {
3711 KMP_MSG( CantRegisterNewThread ),
3712 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3713 KMP_HNT( PossibleSystemLimitOnThreads ),
3720 KMP_MSG( CantRegisterNewThread ),
3721 KMP_HNT( SystemLimitOnThreads ),
3730 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3732 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3733 KMP_ASSERT( gtid < __kmp_threads_capacity );
3737 TCW_4(__kmp_nth, __kmp_nth + 1);
3744 if ( __kmp_adjust_gtid_mode ) {
3745 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3746 if ( TCR_4(__kmp_gtid_mode) != 2) {
3747 TCW_4(__kmp_gtid_mode, 2);
3751 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3752 TCW_4(__kmp_gtid_mode, 1);
3757 #ifdef KMP_ADJUST_BLOCKTIME
3760 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3761 if ( __kmp_nth > __kmp_avail_proc ) {
3762 __kmp_zero_bt = TRUE;
3768 if( ! ( root = __kmp_root[gtid] )) {
3769 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
3770 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3773 __kmp_initialize_root( root );
3776 if( root->r.r_uber_thread ) {
3777 root_thread = root->r.r_uber_thread;
3779 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3780 if ( __kmp_storage_map ) {
3781 __kmp_print_thread_storage_map( root_thread, gtid );
3783 root_thread->th.th_info .ds.ds_gtid = gtid;
3784 root_thread->th.th_root = root;
3785 if( __kmp_env_consistency_check ) {
3786 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
3789 __kmp_initialize_fast_memory( root_thread );
3793 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
3794 __kmp_initialize_bget( root_thread );
3796 __kmp_init_random( root_thread );
3800 if( ! root_thread->th.th_serial_team ) {
3801 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3802 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
3803 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
3811 0 USE_NESTED_HOT_ARG(NULL) );
3813 KMP_ASSERT( root_thread->th.th_serial_team );
3814 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
3815 root_thread->th.th_serial_team ) );
3818 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3820 root->r.r_root_team->t.t_threads[0] = root_thread;
3821 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3822 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3823 root_thread->th.th_serial_team->t.t_serialized = 0;
3824 root->r.r_uber_thread = root_thread;
3827 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
3830 __kmp_gtid_set_specific( gtid );
3832 __kmp_itt_thread_name( gtid );
3834 #ifdef KMP_TDATA_GTID
3837 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3838 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
3839 TCW_4(__kmp_init_gtid, TRUE);
3841 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3842 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
3843 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3844 KMP_INIT_BARRIER_STATE ) );
3847 for ( b = 0; b < bs_last_barrier; ++ b ) {
3848 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3850 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3854 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3857 #if KMP_AFFINITY_SUPPORTED
3858 if ( TCR_4(__kmp_init_middle) ) {
3859 __kmp_affinity_set_init_mask( gtid, TRUE );
3863 __kmp_root_counter ++;
3866 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3871 #if KMP_NESTED_HOT_TEAMS
3873 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr,
int level,
const int max_level )
3876 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3877 if( !hot_teams || !hot_teams[level].hot_team ) {
3880 KMP_DEBUG_ASSERT( level < max_level );
3881 kmp_team_t *team = hot_teams[level].hot_team;
3882 nth = hot_teams[level].hot_team_nth;
3884 if( level < max_level - 1 ) {
3885 for( i = 0; i < nth; ++i ) {
3886 kmp_info_t *th = team->t.t_threads[i];
3887 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3888 if( i > 0 && th->th.th_hot_teams ) {
3889 __kmp_free( th->th.th_hot_teams );
3890 th->th.th_hot_teams = NULL;
3894 __kmp_free_team( root, team, NULL );
3903 __kmp_reset_root(
int gtid, kmp_root_t *root)
3905 kmp_team_t * root_team = root->r.r_root_team;
3906 kmp_team_t * hot_team = root->r.r_hot_team;
3907 int n = hot_team->t.t_nproc;
3910 KMP_DEBUG_ASSERT( ! root->r.r_active );
3912 root->r.r_root_team = NULL;
3913 root->r.r_hot_team = NULL;
3916 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3917 #if KMP_NESTED_HOT_TEAMS
3918 if( __kmp_hot_teams_max_level > 1 ) {
3919 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3920 kmp_info_t *th = hot_team->t.t_threads[i];
3921 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3922 if( th->th.th_hot_teams ) {
3923 __kmp_free( th->th.th_hot_teams );
3924 th->th.th_hot_teams = NULL;
3929 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
3935 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3936 __kmp_wait_to_unref_task_teams();
3941 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
3942 (LPVOID)&(root->r.r_uber_thread->th),
3943 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3944 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3948 if ((ompt_status == ompt_status_track_callback) &&
3949 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3950 int gtid = __kmp_get_gtid();
3951 __ompt_thread_end(ompt_thread_initial, gtid);
3955 TCW_4(__kmp_nth, __kmp_nth - 1);
3956 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3959 root->r.r_uber_thread = NULL;
3961 root->r.r_begin = FALSE;
3967 __kmp_unregister_root_current_thread(
int gtid )
3969 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
3974 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3975 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3976 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3977 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3980 kmp_root_t *root = __kmp_root[gtid];
3982 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3983 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3984 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3985 KMP_ASSERT( root->r.r_active == FALSE );
3991 kmp_info_t * thread = __kmp_threads[gtid];
3992 kmp_team_t * team = thread->th.th_team;
3993 kmp_task_team_t * task_team = thread->th.th_task_team;
3996 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
3997 __kmp_task_team_wait(thread, team, NULL );
4000 __kmp_reset_root(gtid, root);
4003 __kmp_gtid_set_specific( KMP_GTID_DNE );
4004 #ifdef KMP_TDATA_GTID
4005 __kmp_gtid = KMP_GTID_DNE;
4009 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
4011 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4019 __kmp_unregister_root_other_thread(
int gtid )
4021 kmp_root_t *root = __kmp_root[gtid];
4024 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
4025 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4026 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4027 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4028 KMP_ASSERT( root->r.r_active == FALSE );
4030 r = __kmp_reset_root(gtid, root);
4031 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
4036 void __kmp_task_info() {
4038 kmp_int32 gtid = __kmp_entry_gtid();
4039 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
4040 kmp_info_t *this_thr = __kmp_threads[ gtid ];
4041 kmp_team_t *steam = this_thr->th.th_serial_team;
4042 kmp_team_t *team = this_thr->th.th_team;
4044 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
4045 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
4053 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
4057 kmp_info_t *master = team->t.t_threads[0];
4058 KMP_DEBUG_ASSERT( this_thr != NULL );
4059 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
4060 KMP_DEBUG_ASSERT( team );
4061 KMP_DEBUG_ASSERT( team->t.t_threads );
4062 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4063 KMP_DEBUG_ASSERT( master );
4064 KMP_DEBUG_ASSERT( master->th.th_root );
4068 TCW_SYNC_PTR(this_thr->th.th_team, team);
4070 this_thr->th.th_info.ds.ds_tid = tid;
4071 this_thr->th.th_set_nproc = 0;
4073 this_thr->th.th_set_proc_bind = proc_bind_default;
4074 # if KMP_AFFINITY_SUPPORTED
4075 this_thr->th.th_new_place = this_thr->th.th_current_place;
4078 this_thr->th.th_root = master->th.th_root;
4081 this_thr->th.th_team_nproc = team->t.t_nproc;
4082 this_thr->th.th_team_master = master;
4083 this_thr->th.th_team_serialized = team->t.t_serialized;
4084 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4086 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
4087 this_thr->th.th_task_state = 0;
4089 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4090 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4092 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4094 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4095 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4099 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
4101 this_thr->th.th_local.this_construct = 0;
4104 this_thr->th.th_local.tv_data = 0;
4107 if ( ! this_thr->th.th_pri_common ) {
4108 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4109 if ( __kmp_storage_map ) {
4110 __kmp_print_storage_map_gtid(
4111 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4112 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4115 this_thr->th.th_pri_head = NULL;
4120 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4124 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4125 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
4126 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4127 KMP_ASSERT( dispatch );
4128 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4129 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4131 dispatch->th_disp_index = 0;
4133 if( ! dispatch->th_disp_buffer ) {
4134 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4136 if ( __kmp_storage_map ) {
4137 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4138 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
4139 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4140 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4141 gtid, team->t.t_id, gtid );
4144 memset( & dispatch->th_disp_buffer[0],
'\0', disp_size );
4147 dispatch->th_dispatch_pr_current = 0;
4148 dispatch->th_dispatch_sh_current = 0;
4150 dispatch->th_deo_fcn = 0;
4151 dispatch->th_dxo_fcn = 0;
4154 this_thr->th.th_next_pool = NULL;
4156 if (!this_thr->th.th_task_state_memo_stack) {
4157 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*
sizeof(kmp_uint8) );
4158 this_thr->th.th_task_state_top = 0;
4159 this_thr->th.th_task_state_stack_sz = 4;
4162 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4163 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4176 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4178 kmp_team_t *serial_team;
4179 kmp_info_t *new_thr;
4182 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4183 KMP_DEBUG_ASSERT( root && team );
4184 #if !KMP_NESTED_HOT_TEAMS
4185 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4190 if ( __kmp_thread_pool ) {
4192 new_thr = (kmp_info_t*)__kmp_thread_pool;
4193 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4194 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4195 __kmp_thread_pool_insert_pt = NULL;
4197 TCW_4(new_thr->th.th_in_pool, FALSE);
4203 __kmp_thread_pool_nth--;
4205 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4206 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4207 KMP_ASSERT( ! new_thr->th.th_team );
4208 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4209 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4212 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4213 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4215 TCW_4(__kmp_nth, __kmp_nth + 1);
4217 new_thr->th.th_task_state_top = 0;
4218 new_thr->th.th_task_state_stack_sz = 4;
4220 #ifdef KMP_ADJUST_BLOCKTIME
4223 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4224 if ( __kmp_nth > __kmp_avail_proc ) {
4225 __kmp_zero_bt = TRUE;
4233 kmp_balign_t * balign = new_thr->th.th_bar;
4234 for( b = 0; b < bs_last_barrier; ++ b )
4235 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4238 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4239 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4247 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4248 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4254 if ( ! TCR_4( __kmp_init_monitor ) ) {
4255 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4256 if ( ! TCR_4( __kmp_init_monitor ) ) {
4257 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
4258 TCW_4( __kmp_init_monitor, 1 );
4259 __kmp_create_monitor( & __kmp_monitor );
4260 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
4269 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4272 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
4275 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4279 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4280 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4284 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4286 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4288 if ( __kmp_storage_map ) {
4289 __kmp_print_thread_storage_map( new_thr, new_gtid );
4294 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4295 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4296 new_thr->th.th_serial_team = serial_team =
4297 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4305 0 USE_NESTED_HOT_ARG(NULL) );
4307 KMP_ASSERT ( serial_team );
4308 serial_team->t.t_serialized = 0;
4309 serial_team->t.t_threads[0] = new_thr;
4310 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4314 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4317 __kmp_initialize_fast_memory( new_thr );
4321 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
4322 __kmp_initialize_bget( new_thr );
4325 __kmp_init_random( new_thr );
4328 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4329 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4332 kmp_balign_t * balign = new_thr->th.th_bar;
4333 for(b=0; b<bs_last_barrier; ++b) {
4334 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4335 balign[b].bb.team = NULL;
4336 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4337 balign[b].bb.use_oncore_barrier = 0;
4340 new_thr->th.th_spin_here = FALSE;
4341 new_thr->th.th_next_waiting = 0;
4343 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4344 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4345 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4346 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4347 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4350 TCW_4(new_thr->th.th_in_pool, FALSE);
4351 new_thr->th.th_active_in_pool = FALSE;
4352 TCW_4(new_thr->th.th_active, TRUE);
4363 if ( __kmp_adjust_gtid_mode ) {
4364 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4365 if ( TCR_4(__kmp_gtid_mode) != 2) {
4366 TCW_4(__kmp_gtid_mode, 2);
4370 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4371 TCW_4(__kmp_gtid_mode, 1);
4376 #ifdef KMP_ADJUST_BLOCKTIME
4379 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4380 if ( __kmp_nth > __kmp_avail_proc ) {
4381 __kmp_zero_bt = TRUE;
4387 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4388 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4389 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4392 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4407 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs,
ident_t *loc ) {
4408 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4409 team->t.t_threads[0], team ) );
4410 KMP_DEBUG_ASSERT( team && new_icvs);
4411 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
4412 team->t.t_ident = loc;
4414 team->t.t_id = KMP_GEN_TEAM_ID();
4417 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
4418 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4420 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4421 team->t.t_threads[0], team ) );
4429 __kmp_initialize_team(
4432 kmp_internal_control_t * new_icvs,
4435 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
4438 KMP_DEBUG_ASSERT( team );
4439 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4440 KMP_DEBUG_ASSERT( team->t.t_threads );
4443 team->t.t_master_tid = 0;
4445 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4446 team->t.t_nproc = new_nproc;
4449 team->t.t_next_pool = NULL;
4452 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4453 team->t.t_invoke = NULL;
4456 team->t.t_sched = new_icvs->sched;
4458 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4459 team->t.t_fp_control_saved = FALSE;
4460 team->t.t_x87_fpu_control_word = 0;
4461 team->t.t_mxcsr = 0;
4464 team->t.t_construct = 0;
4465 __kmp_init_lock( & team->t.t_single_lock );
4467 team->t.t_ordered .dt.t_value = 0;
4468 team->t.t_master_active = FALSE;
4470 memset( & team->t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
4473 team->t.t_copypriv_data = NULL;
4475 team->t.t_copyin_counter = 0;
4477 team->t.t_control_stack_top = NULL;
4479 __kmp_reinitialize_team( team, new_icvs, loc );
4482 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
4485 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4488 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4490 if ( KMP_AFFINITY_CAPABLE() ) {
4492 if ( old_mask != NULL ) {
4493 status = __kmp_get_system_affinity( old_mask, TRUE );
4495 if ( status != 0 ) {
4498 KMP_MSG( ChangeThreadAffMaskError ),
4504 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
4509 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4518 __kmp_partition_places( kmp_team_t *team )
4523 kmp_info_t *master_th = team->t.t_threads[0];
4524 KMP_DEBUG_ASSERT( master_th != NULL );
4525 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4526 int first_place = master_th->th.th_first_place;
4527 int last_place = master_th->th.th_last_place;
4528 int masters_place = master_th->th.th_current_place;
4529 team->t.t_first_place = first_place;
4530 team->t.t_last_place = last_place;
4532 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4533 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4534 masters_place, first_place, last_place ) );
4536 switch ( proc_bind ) {
4538 case proc_bind_default:
4544 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4547 case proc_bind_master:
4550 int n_th = team->t.t_nproc;
4551 for ( f = 1; f < n_th; f++ ) {
4552 kmp_info_t *th = team->t.t_threads[f];
4553 KMP_DEBUG_ASSERT( th != NULL );
4554 th->th.th_first_place = first_place;
4555 th->th.th_last_place = last_place;
4556 th->th.th_new_place = masters_place;
4558 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4559 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4560 team->t.t_id, f, masters_place, first_place, last_place ) );
4565 case proc_bind_close:
4568 int n_th = team->t.t_nproc;
4570 if ( first_place <= last_place ) {
4571 n_places = last_place - first_place + 1;
4574 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4576 if ( n_th <= n_places ) {
4577 int place = masters_place;
4578 for ( f = 1; f < n_th; f++ ) {
4579 kmp_info_t *th = team->t.t_threads[f];
4580 KMP_DEBUG_ASSERT( th != NULL );
4582 if ( place == last_place ) {
4583 place = first_place;
4585 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4591 th->th.th_first_place = first_place;
4592 th->th.th_last_place = last_place;
4593 th->th.th_new_place = place;
4595 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4596 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4597 team->t.t_id, f, place, first_place, last_place ) );
4601 int S, rem, gap, s_count;
4602 S = n_th / n_places;
4604 rem = n_th - ( S * n_places );
4605 gap = rem > 0 ? n_places/rem : n_places;
4606 int place = masters_place;
4608 for ( f = 0; f < n_th; f++ ) {
4609 kmp_info_t *th = team->t.t_threads[f];
4610 KMP_DEBUG_ASSERT( th != NULL );
4612 th->th.th_first_place = first_place;
4613 th->th.th_last_place = last_place;
4614 th->th.th_new_place = place;
4617 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4620 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4622 if ( place == last_place ) {
4623 place = first_place;
4625 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4635 else if (s_count == S) {
4636 if ( place == last_place ) {
4637 place = first_place;
4639 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4649 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4650 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4651 team->t.t_id, f, th->th.th_new_place, first_place,
4654 KMP_DEBUG_ASSERT( place == masters_place );
4659 case proc_bind_spread:
4662 int n_th = team->t.t_nproc;
4664 if ( first_place <= last_place ) {
4665 n_places = last_place - first_place + 1;
4668 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4670 if ( n_th <= n_places ) {
4671 int place = masters_place;
4672 int S = n_places/n_th;
4673 int s_count, rem, gap, gap_ct;
4674 rem = n_places - n_th*S;
4675 gap = rem ? n_th/rem : 1;
4677 for ( f = 0; f < n_th; f++ ) {
4678 kmp_info_t *th = team->t.t_threads[f];
4679 KMP_DEBUG_ASSERT( th != NULL );
4681 th->th.th_first_place = place;
4682 th->th.th_new_place = place;
4684 while (s_count < S) {
4685 if ( place == last_place ) {
4686 place = first_place;
4688 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4696 if (rem && (gap_ct == gap)) {
4697 if ( place == last_place ) {
4698 place = first_place;
4700 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4709 th->th.th_last_place = place;
4712 if ( place == last_place ) {
4713 place = first_place;
4715 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4722 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4723 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4724 team->t.t_id, f, th->th.th_new_place,
4725 th->th.th_first_place, th->th.th_last_place ) );
4727 KMP_DEBUG_ASSERT( place == masters_place );
4730 int S, rem, gap, s_count;
4731 S = n_th / n_places;
4733 rem = n_th - ( S * n_places );
4734 gap = rem > 0 ? n_places/rem : n_places;
4735 int place = masters_place;
4737 for ( f = 0; f < n_th; f++ ) {
4738 kmp_info_t *th = team->t.t_threads[f];
4739 KMP_DEBUG_ASSERT( th != NULL );
4741 th->th.th_first_place = place;
4742 th->th.th_last_place = place;
4743 th->th.th_new_place = place;
4746 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4749 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4751 if ( place == last_place ) {
4752 place = first_place;
4754 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4764 else if (s_count == S) {
4765 if ( place == last_place ) {
4766 place = first_place;
4768 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4778 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4779 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4780 team->t.t_id, f, th->th.th_new_place,
4781 th->th.th_first_place, th->th.th_last_place) );
4783 KMP_DEBUG_ASSERT( place == masters_place );
4792 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4799 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
4801 ompt_parallel_id_t ompt_parallel_id,
4804 kmp_proc_bind_t new_proc_bind,
4806 kmp_internal_control_t *new_icvs,
4807 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
4814 int use_hot_team = ! root->r.r_active;
4817 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
4818 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4819 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4822 #if KMP_NESTED_HOT_TEAMS
4823 kmp_hot_team_ptr_t *hot_teams;
4825 team = master->th.th_team;
4826 level = team->t.t_active_level;
4827 if( master->th.th_teams_microtask ) {
4828 if( master->th.th_teams_size.nteams > 1 && (
4829 team->t.t_pkfn == (microtask_t)__kmp_teams_master ||
4830 master->th.th_teams_level < team->t.t_level ) ) {
4834 hot_teams = master->th.th_hot_teams;
4835 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4844 if( use_hot_team && new_nproc > 1 ) {
4845 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
4846 #if KMP_NESTED_HOT_TEAMS
4847 team = hot_teams[level].hot_team;
4849 team = root->r.r_hot_team;
4852 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4853 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4854 team->t.t_task_team[0], team->t.t_task_team[1] ));
4861 if (team->t.t_nproc == new_nproc) {
4862 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
4865 if ( team->t.t_size_changed == -1 ) {
4866 team->t.t_size_changed = 1;
4868 team->t.t_size_changed = 0;
4872 team->t.t_sched = new_icvs->sched;
4874 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4876 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4877 0, team->t.t_threads[0], team ) );
4878 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4881 # if KMP_AFFINITY_SUPPORTED
4882 if ( ( team->t.t_size_changed == 0 )
4883 && ( team->t.t_proc_bind == new_proc_bind ) ) {
4884 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4885 team->t.t_id, new_proc_bind, team->t.t_first_place,
4886 team->t.t_last_place ) );
4889 team->t.t_proc_bind = new_proc_bind;
4890 __kmp_partition_places( team );
4893 if ( team->t.t_proc_bind != new_proc_bind ) {
4894 team->t.t_proc_bind = new_proc_bind;
4899 else if( team->t.t_nproc > new_nproc ) {
4900 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4902 team->t.t_size_changed = 1;
4903 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4907 for (tt_idx=0; tt_idx<2; ++tt_idx) {
4909 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
4910 if ( (task_team != NULL) && TCR_SYNC_4(task_team->tt.tt_active) ) {
4911 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
4912 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
4914 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
4915 &team->t.t_task_team[tt_idx]));
4916 team->t.t_task_team[tt_idx] = NULL;
4919 KMP_DEBUG_ASSERT( task_team == NULL );
4923 #if KMP_NESTED_HOT_TEAMS
4924 if( __kmp_hot_teams_mode == 0 ) {
4927 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4928 hot_teams[level].hot_team_nth = new_nproc;
4929 #endif // KMP_NESTED_HOT_TEAMS
4931 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4932 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4933 __kmp_free_thread( team->t.t_threads[ f ] );
4934 team->t.t_threads[ f ] = NULL;
4936 #if KMP_NESTED_HOT_TEAMS
4938 #endif // KMP_NESTED_HOT_TEAMS
4939 team->t.t_nproc = new_nproc;
4941 team->t.t_sched = new_icvs->sched;
4942 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4944 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4947 for (tt_idx=0; tt_idx<2; ++tt_idx) {
4948 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
4949 if ( task_team != NULL ) {
4950 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
4951 task_team->tt.tt_nproc = new_nproc;
4952 task_team->tt.tt_unfinished_threads = new_nproc;
4953 task_team->tt.tt_ref_ct = new_nproc - 1;
4960 team->t.t_threads[0]->th.th_team_nproc = new_nproc;
4961 for(f = 1; f < new_nproc; ++f) {
4962 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4963 team->t.t_threads[f]->th.th_task_state = 0;
4967 for(f = 0; f < new_nproc; ++f) {
4968 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4972 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4973 0, team->t.t_threads[0], team ) );
4975 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4978 for ( f = 0; f < team->t.t_nproc; f++ ) {
4979 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4980 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4985 team->t.t_proc_bind = new_proc_bind;
4986 # if KMP_AFFINITY_SUPPORTED
4987 __kmp_partition_places( team );
4992 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4993 kmp_affin_mask_t *old_mask;
4994 if ( KMP_AFFINITY_CAPABLE() ) {
4995 KMP_CPU_ALLOC(old_mask);
4999 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
5001 team->t.t_size_changed = 1;
5004 #if KMP_NESTED_HOT_TEAMS
5005 int avail_threads = hot_teams[level].hot_team_nth;
5006 if( new_nproc < avail_threads )
5007 avail_threads = new_nproc;
5008 kmp_info_t **other_threads = team->t.t_threads;
5009 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
5013 kmp_balign_t * balign = other_threads[f]->th.th_bar;
5014 for ( b = 0; b < bs_last_barrier; ++ b ) {
5015 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5016 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5018 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5022 if( hot_teams[level].hot_team_nth >= new_nproc ) {
5025 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5026 team->t.t_nproc = new_nproc;
5029 team->t.t_nproc = hot_teams[level].hot_team_nth;
5030 hot_teams[level].hot_team_nth = new_nproc;
5031 #endif // KMP_NESTED_HOT_TEAMS
5032 if(team->t.t_max_nproc < new_nproc) {
5034 __kmp_reallocate_team_arrays(team, new_nproc);
5035 __kmp_reinitialize_team( team, new_icvs, NULL );
5038 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5045 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
5049 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
5050 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
5051 KMP_DEBUG_ASSERT( new_worker );
5052 team->t.t_threads[ f ] = new_worker;
5054 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
5055 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
5056 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5057 team->t.t_bar[bs_plain_barrier].b_arrived ) );
5061 kmp_balign_t * balign = new_worker->th.th_bar;
5062 for( b = 0; b < bs_last_barrier; ++ b ) {
5063 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5064 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5066 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5072 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5073 if ( KMP_AFFINITY_CAPABLE() ) {
5075 __kmp_set_system_affinity( old_mask, TRUE );
5076 KMP_CPU_FREE(old_mask);
5079 #if KMP_NESTED_HOT_TEAMS
5081 #endif // KMP_NESTED_HOT_TEAMS
5083 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
5085 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5089 for (tt_idx=0; tt_idx<2; ++tt_idx) {
5091 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5092 if ( (task_team != NULL) && TCR_SYNC_4(task_team->tt.tt_active) ) {
5093 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
5094 team->t.t_task_team[tt_idx] = NULL;
5100 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5102 int old_state = team->t.t_threads[0]->th.th_task_state;
5103 for (f=0; f < team->t.t_nproc; ++f)
5104 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5107 team->t.t_threads[0]->th.th_task_state = old_state;
5110 int old_state = team->t.t_threads[0]->th.th_task_state;
5111 for (f=0; f<team->t.t_nproc; ++f) {
5112 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5113 team->t.t_threads[f]->th.th_task_state = old_state;
5114 team->t.t_threads[f]->th.th_task_team = team->t.t_task_team[old_state];
5119 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5120 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5121 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5126 team->t.t_proc_bind = new_proc_bind;
5127 # if KMP_AFFINITY_SUPPORTED
5128 __kmp_partition_places( team );
5134 kmp_info_t *master = team->t.t_threads[0];
5135 if( master->th.th_teams_microtask ) {
5136 for( f = 1; f < new_nproc; ++f ) {
5138 kmp_info_t *thr = team->t.t_threads[f];
5139 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5140 thr->th.th_teams_level = master->th.th_teams_level;
5141 thr->th.th_teams_size = master->th.th_teams_size;
5145 #if KMP_NESTED_HOT_TEAMS
5148 for( f = 1; f < new_nproc; ++f ) {
5149 kmp_info_t *thr = team->t.t_threads[f];
5151 kmp_balign_t * balign = thr->th.th_bar;
5152 for( b = 0; b < bs_last_barrier; ++ b ) {
5153 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5154 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5156 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5161 #endif // KMP_NESTED_HOT_TEAMS
5164 __kmp_alloc_argv_entries( argc, team, TRUE );
5165 team->t.t_argc = argc;
5171 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5174 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5175 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5176 team->t.t_task_team[0], team->t.t_task_team[1] ));
5181 __ompt_team_assign_id(team, ompt_parallel_id);
5191 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5194 if ( team->t.t_max_nproc >= max_nproc ) {
5196 __kmp_team_pool = team->t.t_next_pool;
5199 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5201 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5202 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5203 team->t.t_task_team[0] = NULL;
5204 team->t.t_task_team[1] = NULL;
5207 __kmp_alloc_argv_entries( argc, team, TRUE );
5208 team->t.t_argc = argc;
5210 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5211 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5214 for ( b = 0; b < bs_last_barrier; ++ b) {
5215 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5217 team->t.t_bar[ b ].b_master_arrived = 0;
5218 team->t.t_bar[ b ].b_team_arrived = 0;
5224 team->t.t_proc_bind = new_proc_bind;
5227 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5230 __ompt_team_assign_id(team, ompt_parallel_id);
5241 team = __kmp_reap_team( team );
5242 __kmp_team_pool = team;
5247 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
5250 team->t.t_max_nproc = max_nproc;
5254 __kmp_allocate_team_arrays( team, max_nproc );
5256 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
5257 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5259 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5260 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5261 team->t.t_task_team[0] = NULL;
5262 team->t.t_task_team[1] = NULL;
5264 if ( __kmp_storage_map ) {
5265 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
5269 __kmp_alloc_argv_entries( argc, team, FALSE );
5270 team->t.t_argc = argc;
5272 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5273 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5276 for ( b = 0; b < bs_last_barrier; ++ b ) {
5277 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5279 team->t.t_bar[ b ].b_master_arrived = 0;
5280 team->t.t_bar[ b ].b_team_arrived = 0;
5286 team->t.t_proc_bind = new_proc_bind;
5290 __ompt_team_assign_id(team, ompt_parallel_id);
5291 team->t.ompt_serialized_team_info = NULL;
5296 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5307 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
5310 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5313 KMP_DEBUG_ASSERT( root );
5314 KMP_DEBUG_ASSERT( team );
5315 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5316 KMP_DEBUG_ASSERT( team->t.t_threads );
5318 int use_hot_team = team == root->r.r_hot_team;
5319 #if KMP_NESTED_HOT_TEAMS
5321 kmp_hot_team_ptr_t *hot_teams;
5323 level = team->t.t_active_level - 1;
5324 if( master->th.th_teams_microtask ) {
5325 if( master->th.th_teams_size.nteams > 1 ) {
5328 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5329 master->th.th_teams_level == team->t.t_level ) {
5333 hot_teams = master->th.th_hot_teams;
5334 if( level < __kmp_hot_teams_max_level ) {
5335 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5339 #endif // KMP_NESTED_HOT_TEAMS
5342 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5343 team->t.t_copyin_counter = 0;
5347 if( ! use_hot_team ) {
5348 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5350 for (tt_idx=0; tt_idx<2; ++tt_idx) {
5352 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5353 if ( task_team != NULL ) {
5357 KA_TRACE( 20, (
"__kmp_free_team: deactivating task_team %p\n", task_team ) );
5358 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
5359 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
5361 team->t.t_task_team[tt_idx] = NULL;
5367 team->t.t_parent = NULL;
5371 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5372 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5373 __kmp_free_thread( team->t.t_threads[ f ] );
5374 team->t.t_threads[ f ] = NULL;
5380 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
5381 __kmp_team_pool = (
volatile kmp_team_t*) team;
5390 __kmp_reap_team( kmp_team_t *team )
5392 kmp_team_t *next_pool = team->t.t_next_pool;
5394 KMP_DEBUG_ASSERT( team );
5395 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5396 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5397 KMP_DEBUG_ASSERT( team->t.t_threads );
5398 KMP_DEBUG_ASSERT( team->t.t_argv );
5404 __kmp_free_team_arrays( team );
5405 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5406 __kmp_free( (
void*) team->t.t_argv );
5441 __kmp_free_thread( kmp_info_t *this_th )
5446 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5447 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5449 KMP_DEBUG_ASSERT( this_th );
5453 kmp_balign_t *balign = this_th->th.th_bar;
5454 for (b=0; b<bs_last_barrier; ++b) {
5455 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5456 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5457 balign[b].bb.team = NULL;
5462 TCW_PTR(this_th->th.th_team, NULL);
5463 TCW_PTR(this_th->th.th_root, NULL);
5464 TCW_PTR(this_th->th.th_dispatch, NULL);
5470 gtid = this_th->th.th_info.ds.ds_gtid;
5471 if ( __kmp_thread_pool_insert_pt != NULL ) {
5472 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5473 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5474 __kmp_thread_pool_insert_pt = NULL;
5485 if ( __kmp_thread_pool_insert_pt != NULL ) {
5486 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5489 scan = (kmp_info_t **)&__kmp_thread_pool;
5491 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5492 scan = &( (*scan)->th.th_next_pool ) );
5498 TCW_PTR(this_th->th.th_next_pool, *scan);
5499 __kmp_thread_pool_insert_pt = *scan = this_th;
5500 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5501 || ( this_th->th.th_info.ds.ds_gtid
5502 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5503 TCW_4(this_th->th.th_in_pool, TRUE);
5504 __kmp_thread_pool_nth++;
5506 TCW_4(__kmp_nth, __kmp_nth - 1);
5508 #ifdef KMP_ADJUST_BLOCKTIME
5511 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5512 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5513 if ( __kmp_nth <= __kmp_avail_proc ) {
5514 __kmp_zero_bt = FALSE;
5526 __kmp_launch_thread( kmp_info_t *this_thr )
5528 int gtid = this_thr->th.th_info.ds.ds_gtid;
5530 kmp_team_t *(*
volatile pteam);
5533 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
5535 if( __kmp_env_consistency_check ) {
5536 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );
5540 if (ompt_status & ompt_status_track) {
5541 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5542 this_thr->th.ompt_thread_info.wait_id = 0;
5543 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5544 if ((ompt_status == ompt_status_track_callback) &&
5545 ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5546 __ompt_thread_begin(ompt_thread_worker, gtid);
5552 while( ! TCR_4(__kmp_global.g.g_done) ) {
5553 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5557 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5560 if (ompt_status & ompt_status_track) {
5561 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5566 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5569 if (ompt_status & ompt_status_track) {
5570 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5574 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5577 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
5579 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5581 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5582 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5584 updateHWFPControl (*pteam);
5587 if (ompt_status & ompt_status_track) {
5588 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5595 rc = (*pteam)->t.t_invoke( gtid );
5601 if (ompt_status & ompt_status_track) {
5603 int tid = __kmp_tid_from_gtid(gtid);
5604 (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
5606 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5610 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5611 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5614 __kmp_join_barrier( gtid );
5617 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5620 if ((ompt_status == ompt_status_track_callback) &&
5621 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5622 __ompt_thread_end(ompt_thread_worker, gtid);
5626 if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
5627 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
5630 __kmp_common_destroy_gtid( gtid );
5632 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
5641 __kmp_internal_end_dest(
void *specific_gtid )
5643 #if KMP_COMPILER_ICC
5644 #pragma warning( push )
5645 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5648 int gtid = (kmp_intptr_t)specific_gtid - 1;
5649 #if KMP_COMPILER_ICC
5650 #pragma warning( pop )
5653 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5667 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5668 __kmp_gtid_set_specific( gtid );
5669 #ifdef KMP_TDATA_GTID
5672 __kmp_internal_end_thread( gtid );
5675 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
5681 __attribute__(( destructor ))
5683 __kmp_internal_end_dtor(
void )
5685 __kmp_internal_end_atexit();
5689 __kmp_internal_end_fini(
void )
5691 __kmp_internal_end_atexit();
5698 __kmp_internal_end_atexit(
void )
5700 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
5722 __kmp_internal_end_library( -1 );
5724 __kmp_close_console();
5730 kmp_info_t * thread,
5738 KMP_DEBUG_ASSERT( thread != NULL );
5740 gtid = thread->th.th_info.ds.ds_gtid;
5744 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5746 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5748 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5749 __kmp_release_64(&flag);
5754 __kmp_reap_worker( thread );
5769 if ( thread->th.th_active_in_pool ) {
5770 thread->th.th_active_in_pool = FALSE;
5771 KMP_TEST_THEN_DEC32(
5772 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5773 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5777 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5778 --__kmp_thread_pool_nth;
5783 __kmp_free_fast_memory( thread );
5786 __kmp_suspend_uninitialize_thread( thread );
5788 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5789 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5794 #ifdef KMP_ADJUST_BLOCKTIME
5797 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5798 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5799 if ( __kmp_nth <= __kmp_avail_proc ) {
5800 __kmp_zero_bt = FALSE;
5806 if( __kmp_env_consistency_check ) {
5807 if ( thread->th.th_cons ) {
5808 __kmp_free_cons_stack( thread->th.th_cons );
5809 thread->th.th_cons = NULL;
5813 if ( thread->th.th_pri_common != NULL ) {
5814 __kmp_free( thread->th.th_pri_common );
5815 thread->th.th_pri_common = NULL;
5818 if (thread->th.th_task_state_memo_stack != NULL) {
5819 __kmp_free(thread->th.th_task_state_memo_stack);
5820 thread->th.th_task_state_memo_stack = NULL;
5824 if ( thread->th.th_local.bget_data != NULL ) {
5825 __kmp_finalize_bget( thread );
5829 #if KMP_AFFINITY_SUPPORTED
5830 if ( thread->th.th_affin_mask != NULL ) {
5831 KMP_CPU_FREE( thread->th.th_affin_mask );
5832 thread->th.th_affin_mask = NULL;
5836 __kmp_reap_team( thread->th.th_serial_team );
5837 thread->th.th_serial_team = NULL;
5838 __kmp_free( thread );
5845 __kmp_internal_end(
void)
5850 __kmp_unregister_library();
5858 __kmp_reclaim_dead_roots();
5861 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5863 if( __kmp_root[i]->r.r_active )
5866 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5868 if ( i < __kmp_threads_capacity ) {
5886 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5887 if ( TCR_4( __kmp_init_monitor ) ) {
5888 __kmp_reap_monitor( & __kmp_monitor );
5889 TCW_4( __kmp_init_monitor, 0 );
5891 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5892 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5897 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5898 if( __kmp_root[i] ) {
5900 KMP_ASSERT( ! __kmp_root[i]->r.r_active );
5909 while ( __kmp_thread_pool != NULL ) {
5911 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5912 __kmp_thread_pool = thread->th.th_next_pool;
5914 thread->th.th_next_pool = NULL;
5915 thread->th.th_in_pool = FALSE;
5916 __kmp_reap_thread( thread, 0 );
5918 __kmp_thread_pool_insert_pt = NULL;
5921 while ( __kmp_team_pool != NULL ) {
5923 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5924 __kmp_team_pool = team->t.t_next_pool;
5926 team->t.t_next_pool = NULL;
5927 __kmp_reap_team( team );
5930 __kmp_reap_task_teams( );
5932 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5939 TCW_SYNC_4(__kmp_init_common, FALSE);
5941 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
5950 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5951 if ( TCR_4( __kmp_init_monitor ) ) {
5952 __kmp_reap_monitor( & __kmp_monitor );
5953 TCW_4( __kmp_init_monitor, 0 );
5955 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5956 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5959 TCW_4(__kmp_init_gtid, FALSE);
5970 __kmp_internal_end_library(
int gtid_req )
5980 if( __kmp_global.g.g_abort ) {
5981 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
5985 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5986 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
5995 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5996 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5997 if( gtid == KMP_GTID_SHUTDOWN ) {
5998 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
6000 }
else if( gtid == KMP_GTID_MONITOR ) {
6001 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
6003 }
else if( gtid == KMP_GTID_DNE ) {
6004 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
6006 }
else if( KMP_UBER_GTID( gtid )) {
6008 if( __kmp_root[gtid]->r.r_active ) {
6009 __kmp_global.g.g_abort = -1;
6010 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6011 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
6014 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
6015 __kmp_unregister_root_current_thread( gtid );
6022 #ifdef DUMP_DEBUG_ON_EXIT
6023 if ( __kmp_debug_buf )
6024 __kmp_dump_debug_buffer( );
6030 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6033 if( __kmp_global.g.g_abort ) {
6034 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
6036 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6039 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6040 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6050 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6053 __kmp_internal_end();
6055 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6056 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6058 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
6060 #ifdef DUMP_DEBUG_ON_EXIT
6061 if ( __kmp_debug_buf )
6062 __kmp_dump_debug_buffer();
6066 __kmp_close_console();
6069 __kmp_fini_allocator();
6074 __kmp_internal_end_thread(
int gtid_req )
6084 if( __kmp_global.g.g_abort ) {
6085 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6089 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6090 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
6098 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6099 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6100 if( gtid == KMP_GTID_SHUTDOWN ) {
6101 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6103 }
else if( gtid == KMP_GTID_MONITOR ) {
6104 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6106 }
else if( gtid == KMP_GTID_DNE ) {
6107 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6110 }
else if( KMP_UBER_GTID( gtid )) {
6112 if( __kmp_root[gtid]->r.r_active ) {
6113 __kmp_global.g.g_abort = -1;
6114 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6115 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6118 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6119 __kmp_unregister_root_current_thread( gtid );
6123 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6126 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6127 if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
6128 __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
6132 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6136 #if defined KMP_DYNAMIC_LIB
6144 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
6148 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6151 if( __kmp_global.g.g_abort ) {
6152 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6154 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6157 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6158 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6170 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6172 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6173 if ( KMP_UBER_GTID( i ) ) {
6174 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6175 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6176 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6183 __kmp_internal_end();
6185 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6186 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6188 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
6190 #ifdef DUMP_DEBUG_ON_EXIT
6191 if ( __kmp_debug_buf )
6192 __kmp_dump_debug_buffer();
6199 static long __kmp_registration_flag = 0;
6201 static char * __kmp_registration_str = NULL;
6207 __kmp_reg_status_name() {
6213 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
6218 __kmp_register_library_startup(
6222 char * name = __kmp_reg_status_name();
6229 __kmp_initialize_system_tick();
6231 __kmp_read_system_time( & time.dtime );
6232 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6233 __kmp_registration_str =
6236 & __kmp_registration_flag,
6237 __kmp_registration_flag,
6241 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6245 char * value = NULL;
6248 __kmp_env_set( name, __kmp_registration_str, 0 );
6250 value = __kmp_env_get( name );
6251 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6260 char * tail = value;
6261 char * flag_addr_str = NULL;
6262 char * flag_val_str = NULL;
6263 char const * file_name = NULL;
6264 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
6265 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
6267 if ( tail != NULL ) {
6268 long * flag_addr = 0;
6270 KMP_SSCANF( flag_addr_str,
"%p", & flag_addr );
6271 KMP_SSCANF( flag_val_str,
"%lx", & flag_val );
6272 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
6276 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6284 switch ( neighbor ) {
6289 file_name =
"unknown library";
6293 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
6294 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6298 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6299 KMP_HNT( DuplicateLibrary ),
6303 KMP_INTERNAL_FREE( duplicate_ok );
6304 __kmp_duplicate_library_ok = 1;
6309 __kmp_env_unset( name );
6312 KMP_DEBUG_ASSERT( 0 );
6317 KMP_INTERNAL_FREE( (
void *) value );
6320 KMP_INTERNAL_FREE( (
void *) name );
6326 __kmp_unregister_library(
void ) {
6328 char * name = __kmp_reg_status_name();
6329 char * value = __kmp_env_get( name );
6331 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6332 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6333 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6335 __kmp_env_unset( name );
6338 KMP_INTERNAL_FREE( __kmp_registration_str );
6339 KMP_INTERNAL_FREE( value );
6340 KMP_INTERNAL_FREE( name );
6342 __kmp_registration_flag = 0;
6343 __kmp_registration_str = NULL;
6351 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6353 static void __kmp_check_mic_type()
6355 kmp_cpuid_t cpuid_state = {0};
6356 kmp_cpuid_t * cs_p = &cpuid_state;
6357 __kmp_x86_cpuid(1, 0, cs_p);
6359 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6360 __kmp_mic_type = mic2;
6361 }
else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6362 __kmp_mic_type = mic3;
6364 __kmp_mic_type = non_mic;
6373 __kmp_do_serial_initialize(
void )
6378 KA_TRACE( 10, (
"__kmp_do_serial_initialize: enter\n" ) );
6380 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
6381 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
6382 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
6383 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
6384 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
6386 __kmp_validate_locks();
6389 __kmp_init_allocator();
6395 __kmp_register_library_startup( );
6398 if( TCR_4(__kmp_global.g.g_done) ) {
6399 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
6402 __kmp_global.g.g_abort = 0;
6403 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6406 #if KMP_USE_ADAPTIVE_LOCKS
6407 #if KMP_DEBUG_ADAPTIVE_LOCKS
6408 __kmp_init_speculative_stats();
6411 __kmp_init_lock( & __kmp_global_lock );
6412 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6413 __kmp_init_lock( & __kmp_debug_lock );
6414 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6415 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6416 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6417 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6418 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6419 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6420 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6421 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6422 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6423 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6424 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6425 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6426 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6427 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6428 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6429 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6430 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6434 __kmp_runtime_initialize();
6436 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6437 __kmp_check_mic_type();
6444 __kmp_abort_delay = 0;
6448 __kmp_dflt_team_nth_ub = __kmp_xproc;
6449 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6450 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6452 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6453 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6455 __kmp_max_nth = __kmp_sys_max_nth;
6458 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6459 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6460 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6462 __kmp_library = library_throughput;
6464 __kmp_static = kmp_sch_static_balanced;
6470 #if KMP_FAST_REDUCTION_BARRIER
6471 #define kmp_reduction_barrier_gather_bb ((int)1)
6472 #define kmp_reduction_barrier_release_bb ((int)1)
6473 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6474 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6475 #endif // KMP_FAST_REDUCTION_BARRIER
6476 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6477 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6478 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6479 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6480 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6481 #if KMP_FAST_REDUCTION_BARRIER
6482 if( i == bs_reduction_barrier ) {
6483 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6484 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6485 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6486 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6488 #endif // KMP_FAST_REDUCTION_BARRIER
6490 #if KMP_FAST_REDUCTION_BARRIER
6491 #undef kmp_reduction_barrier_release_pat
6492 #undef kmp_reduction_barrier_gather_pat
6493 #undef kmp_reduction_barrier_release_bb
6494 #undef kmp_reduction_barrier_gather_bb
6495 #endif // KMP_FAST_REDUCTION_BARRIER
6496 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6497 if( __kmp_mic_type != non_mic ) {
6499 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;
6500 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;
6501 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6502 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6504 #if KMP_FAST_REDUCTION_BARRIER
6505 if( __kmp_mic_type != non_mic ) {
6506 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6507 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6514 __kmp_env_checks = TRUE;
6516 __kmp_env_checks = FALSE;
6520 __kmp_foreign_tp = TRUE;
6522 __kmp_global.g.g_dynamic = FALSE;
6523 __kmp_global.g.g_dynamic_mode = dynamic_default;
6525 __kmp_env_initialize( NULL );
6530 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
6531 if ( __kmp_str_match_true( val ) ) {
6532 kmp_str_buf_t buffer;
6533 __kmp_str_buf_init( & buffer );
6534 __kmp_i18n_dump_catalog( & buffer );
6535 __kmp_printf(
"%s", buffer.str );
6536 __kmp_str_buf_free( & buffer );
6538 __kmp_env_free( & val );
6541 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
6543 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6548 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6549 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6550 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6551 __kmp_thread_pool = NULL;
6552 __kmp_thread_pool_insert_pt = NULL;
6553 __kmp_team_pool = NULL;
6558 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6559 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6560 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
6563 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
6564 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
6569 gtid = __kmp_register_root( TRUE );
6570 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
6571 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6572 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6576 __kmp_common_initialize();
6580 __kmp_register_atfork();
6583 #if ! defined KMP_DYNAMIC_LIB
6588 int rc = atexit( __kmp_internal_end_atexit );
6590 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6595 #if KMP_HANDLE_SIGNALS
6602 __kmp_install_signals( FALSE );
6605 __kmp_install_signals( TRUE );
6610 __kmp_init_counter ++;
6612 __kmp_init_serial = TRUE;
6614 if (__kmp_settings) {
6619 if (__kmp_display_env || __kmp_display_env_verbose) {
6620 __kmp_env_print_2();
6622 #endif // OMP_40_ENABLED
6626 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
6633 __kmp_serial_initialize(
void )
6635 if ( __kmp_init_serial ) {
6638 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6639 if ( __kmp_init_serial ) {
6640 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6643 __kmp_do_serial_initialize();
6644 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6648 __kmp_do_middle_initialize(
void )
6651 int prev_dflt_team_nth;
6653 if( !__kmp_init_serial ) {
6654 __kmp_do_serial_initialize();
6657 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
6663 prev_dflt_team_nth = __kmp_dflt_team_nth;
6665 #if KMP_AFFINITY_SUPPORTED
6670 __kmp_affinity_initialize();
6676 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6677 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6678 __kmp_affinity_set_init_mask( i, TRUE );
6683 KMP_ASSERT( __kmp_xproc > 0 );
6684 if ( __kmp_avail_proc == 0 ) {
6685 __kmp_avail_proc = __kmp_xproc;
6690 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
6691 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6695 if ( __kmp_dflt_team_nth == 0 ) {
6696 #ifdef KMP_DFLT_NTH_CORES
6700 __kmp_dflt_team_nth = __kmp_ncores;
6701 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6702 __kmp_dflt_team_nth ) );
6707 __kmp_dflt_team_nth = __kmp_avail_proc;
6708 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6709 __kmp_dflt_team_nth ) );
6713 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6714 __kmp_dflt_team_nth = KMP_MIN_NTH;
6716 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6717 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6724 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6726 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6733 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6734 kmp_info_t *thread = __kmp_threads[ i ];
6735 if ( thread == NULL )
continue;
6736 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
6738 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
6741 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6742 __kmp_dflt_team_nth) );
6744 #ifdef KMP_ADJUST_BLOCKTIME
6747 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6748 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6749 if ( __kmp_nth > __kmp_avail_proc ) {
6750 __kmp_zero_bt = TRUE;
6756 TCW_SYNC_4(__kmp_init_middle, TRUE);
6758 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
6762 __kmp_middle_initialize(
void )
6764 if ( __kmp_init_middle ) {
6767 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6768 if ( __kmp_init_middle ) {
6769 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6772 __kmp_do_middle_initialize();
6773 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6780 __kmp_parallel_initialize(
void )
6782 int gtid = __kmp_entry_gtid();
6785 if( TCR_4(__kmp_init_parallel) )
return;
6786 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6787 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
6790 if( TCR_4(__kmp_global.g.g_done) ) {
6791 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6792 __kmp_infinite_loop();
6798 if( !__kmp_init_middle ) {
6799 __kmp_do_middle_initialize();
6803 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
6804 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6806 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6811 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6812 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6813 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6817 # if KMP_HANDLE_SIGNALS
6819 __kmp_install_signals( TRUE );
6823 __kmp_suspend_initialize();
6825 # if defined(USE_LOAD_BALANCE)
6826 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6827 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6830 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6831 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6835 if ( __kmp_version ) {
6836 __kmp_print_version_2();
6840 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6843 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
6845 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6855 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6858 kmp_disp_t *dispatch;
6863 this_thr->th.th_local.this_construct = 0;
6864 #if KMP_CACHE_MANAGE
6865 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
6867 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6868 KMP_DEBUG_ASSERT( dispatch );
6869 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6872 dispatch->th_disp_index = 0;
6874 if( __kmp_env_consistency_check )
6875 __kmp_push_parallel( gtid, team->t.t_ident );
6881 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6884 if( __kmp_env_consistency_check )
6885 __kmp_pop_parallel( gtid, team->t.t_ident );
6889 __kmp_invoke_task_func(
int gtid )
6892 int tid = __kmp_tid_from_gtid( gtid );
6893 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6894 kmp_team_t *team = this_thr->th.th_team;
6896 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6898 if ( __itt_stack_caller_create_ptr ) {
6899 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
6902 #if INCLUDE_SSC_MARKS
6903 SSC_MARK_INVOKING();
6908 void **exit_runtime_p;
6909 ompt_task_id_t my_task_id;
6910 ompt_parallel_id_t my_parallel_id;
6912 if (ompt_status & ompt_status_track) {
6913 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6914 ompt_task_info.frame.exit_runtime_frame);
6916 exit_runtime_p = &dummy;
6920 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6921 my_parallel_id = team->t.ompt_team_info.parallel_id;
6922 if ((ompt_status == ompt_status_track_callback) &&
6923 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6924 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6925 my_parallel_id, my_task_id);
6930 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6931 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv
6937 #if OMPT_SUPPORT && OMPT_TRACE
6938 if (ompt_status & ompt_status_track) {
6939 if ((ompt_status == ompt_status_track_callback) &&
6940 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
6941 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
6942 my_parallel_id, my_task_id);
6945 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
6950 if ( __itt_stack_caller_create_ptr ) {
6951 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
6954 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6961 __kmp_teams_master(
int gtid )
6964 kmp_info_t *thr = __kmp_threads[ gtid ];
6965 kmp_team_t *team = thr->th.th_team;
6966 ident_t *loc = team->t.t_ident;
6967 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6968 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6969 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
6970 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
6971 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
6974 #if INCLUDE_SSC_MARKS
6977 __kmp_fork_call( loc, gtid, fork_context_intel,
6980 (
void *)thr->th.th_teams_microtask,
6982 (microtask_t)thr->th.th_teams_microtask,
6983 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6985 #if INCLUDE_SSC_MARKS
6988 __kmp_join_call( loc, gtid, 1 );
6993 __kmp_invoke_teams_master(
int gtid )
6995 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6996 kmp_team_t *team = this_thr->th.th_team;
6998 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6999 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
7001 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
7002 __kmp_teams_master( gtid );
7003 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
7014 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
7016 kmp_info_t *thr = __kmp_threads[gtid];
7018 if( num_threads > 0 )
7019 thr->th.th_set_nproc = num_threads;
7027 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
7029 kmp_info_t *thr = __kmp_threads[gtid];
7030 KMP_DEBUG_ASSERT(num_teams >= 0);
7031 KMP_DEBUG_ASSERT(num_threads >= 0);
7032 if( num_teams == 0 ) {
7036 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7039 if( num_threads > 0 ) {
7040 thr->th.th_teams_size.nth = num_threads;
7042 if( !TCR_4(__kmp_init_middle) )
7043 __kmp_middle_initialize();
7044 thr->th.th_teams_size.nth = __kmp_avail_proc / num_teams;
7053 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
7055 kmp_info_t *thr = __kmp_threads[gtid];
7056 thr->th.th_set_proc_bind = proc_bind;
7064 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
7066 kmp_info_t *this_thr = __kmp_threads[gtid];
7072 KMP_DEBUG_ASSERT( team );
7073 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7074 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7077 team->t.t_construct = 0;
7078 team->t.t_ordered.dt.t_value = 0;
7081 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
7082 if ( team->t.t_max_nproc > 1 ) {
7084 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
7085 team->t.t_disp_buffer[ i ].buffer_index = i;
7087 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
7091 KMP_ASSERT( this_thr->th.th_team == team );
7094 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7095 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7096 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7101 __kmp_fork_barrier( gtid, 0 );
7106 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
7108 kmp_info_t *this_thr = __kmp_threads[gtid];
7110 KMP_DEBUG_ASSERT( team );
7111 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7112 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7118 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7119 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7120 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7121 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7122 __kmp_print_structure();
7124 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7125 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7128 __kmp_join_barrier( gtid );
7131 KMP_ASSERT( this_thr->th.th_team == team );
7138 #ifdef USE_LOAD_BALANCE
7145 __kmp_active_hot_team_nproc( kmp_root_t *root )
7149 kmp_team_t *hot_team;
7151 if ( root->r.r_active ) {
7154 hot_team = root->r.r_hot_team;
7155 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7156 return hot_team->t.t_nproc - 1;
7163 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7164 if ( hot_team->t.t_threads[i]->th.th_active ) {
7176 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
7180 int hot_team_active;
7181 int team_curr_active;
7184 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7185 root, set_nproc ) );
7186 KMP_DEBUG_ASSERT( root );
7187 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
7188 KMP_DEBUG_ASSERT( set_nproc > 1 );
7190 if ( set_nproc == 1) {
7191 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
7202 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7203 hot_team_active = __kmp_active_hot_team_nproc( root );
7204 team_curr_active = pool_active + hot_team_active + 1;
7209 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7210 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7211 system_active, pool_active, hot_team_active ) );
7213 if ( system_active < 0 ) {
7220 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7221 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
7226 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7227 : root->r.r_hot_team->t.t_nproc);
7228 if ( retval > set_nproc ) {
7231 if ( retval < KMP_MIN_NTH ) {
7232 retval = KMP_MIN_NTH;
7235 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7245 if ( system_active < team_curr_active ) {
7246 system_active = team_curr_active;
7248 retval = __kmp_avail_proc - system_active + team_curr_active;
7249 if ( retval > set_nproc ) {
7252 if ( retval < KMP_MIN_NTH ) {
7253 retval = KMP_MIN_NTH;
7256 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7268 __kmp_cleanup(
void )
7272 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
7274 if (TCR_4(__kmp_init_parallel)) {
7275 #if KMP_HANDLE_SIGNALS
7276 __kmp_remove_signals();
7278 TCW_4(__kmp_init_parallel, FALSE);
7281 if (TCR_4(__kmp_init_middle)) {
7282 #if KMP_AFFINITY_SUPPORTED
7283 __kmp_affinity_uninitialize();
7285 __kmp_cleanup_hierarchy();
7286 TCW_4(__kmp_init_middle, FALSE);
7289 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
7291 if (__kmp_init_serial) {
7293 __kmp_runtime_destroy();
7295 __kmp_init_serial = FALSE;
7298 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7299 if ( __kmp_root[ f ] != NULL ) {
7300 __kmp_free( __kmp_root[ f ] );
7301 __kmp_root[ f ] = NULL;
7304 __kmp_free( __kmp_threads );
7307 __kmp_threads = NULL;
7309 __kmp_threads_capacity = 0;
7311 #if KMP_USE_DYNAMIC_LOCK
7312 __kmp_cleanup_indirect_user_locks();
7314 __kmp_cleanup_user_locks();
7317 #if KMP_AFFINITY_SUPPORTED
7318 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
7319 __kmp_cpuinfo_file = NULL;
7322 #if KMP_USE_ADAPTIVE_LOCKS
7323 #if KMP_DEBUG_ADAPTIVE_LOCKS
7324 __kmp_print_speculative_stats();
7327 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7328 __kmp_nested_nth.nth = NULL;
7329 __kmp_nested_nth.size = 0;
7330 __kmp_nested_nth.used = 0;
7332 __kmp_i18n_catclose();
7334 #if KMP_STATS_ENABLED
7335 __kmp_accumulate_stats_at_exit();
7336 __kmp_stats_list.deallocate();
7339 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
7346 __kmp_ignore_mppbeg(
void )
7350 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
7351 if (__kmp_str_match_false( env ))
7359 __kmp_ignore_mppend(
void )
7363 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
7364 if (__kmp_str_match_false( env ))
7372 __kmp_internal_begin(
void )
7379 gtid = __kmp_entry_gtid();
7380 root = __kmp_threads[ gtid ]->th.th_root;
7381 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7383 if( root->r.r_begin )
return;
7384 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7385 if( root->r.r_begin ) {
7386 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7390 root->r.r_begin = TRUE;
7392 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7400 __kmp_user_set_library (
enum library_type arg)
7408 gtid = __kmp_entry_gtid();
7409 thread = __kmp_threads[ gtid ];
7411 root = thread->th.th_root;
7413 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7414 if (root->r.r_in_parallel) {
7415 KMP_WARNING( SetLibraryIncorrectCall );
7420 case library_serial :
7421 thread->th.th_set_nproc = 0;
7422 set__nproc( thread, 1 );
7424 case library_turnaround :
7425 thread->th.th_set_nproc = 0;
7426 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7428 case library_throughput :
7429 thread->th.th_set_nproc = 0;
7430 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7433 KMP_FATAL( UnknownLibraryType, arg );
7436 __kmp_aux_set_library ( arg );
7440 __kmp_aux_set_stacksize(
size_t arg )
7442 if (! __kmp_init_serial)
7443 __kmp_serial_initialize();
7446 if (arg & (0x1000 - 1)) {
7447 arg &= ~(0x1000 - 1);
7452 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7455 if (! TCR_4(__kmp_init_parallel)) {
7458 if (value < __kmp_sys_min_stksize )
7459 value = __kmp_sys_min_stksize ;
7460 else if (value > KMP_MAX_STKSIZE)
7461 value = KMP_MAX_STKSIZE;
7463 __kmp_stksize = value;
7465 __kmp_env_stksize = TRUE;
7468 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7474 __kmp_aux_set_library (
enum library_type arg)
7476 __kmp_library = arg;
7478 switch ( __kmp_library ) {
7479 case library_serial :
7481 KMP_INFORM( LibraryIsSerial );
7482 (void) __kmp_change_library( TRUE );
7485 case library_turnaround :
7486 (void) __kmp_change_library( TRUE );
7488 case library_throughput :
7489 (void) __kmp_change_library( FALSE );
7492 KMP_FATAL( UnknownLibraryType, arg );
7500 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
7502 int blocktime = arg;
7506 __kmp_save_internal_controls( thread );
7509 if (blocktime < KMP_MIN_BLOCKTIME)
7510 blocktime = KMP_MIN_BLOCKTIME;
7511 else if (blocktime > KMP_MAX_BLOCKTIME)
7512 blocktime = KMP_MAX_BLOCKTIME;
7514 set__blocktime_team( thread->th.th_team, tid, blocktime );
7515 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
7518 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7520 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7521 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
7526 set__bt_set_team( thread->th.th_team, tid, bt_set );
7527 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
7528 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7529 __kmp_gtid_from_tid(tid, thread->th.th_team),
7530 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7534 __kmp_aux_set_defaults(
7538 if ( ! __kmp_init_serial ) {
7539 __kmp_serial_initialize();
7541 __kmp_env_initialize( str );
7545 || __kmp_display_env || __kmp_display_env_verbose
7558 PACKED_REDUCTION_METHOD_T
7559 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
7560 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7561 kmp_critical_name *lck )
7569 PACKED_REDUCTION_METHOD_T retval;
7573 int teamsize_cutoff = 4;
7575 KMP_DEBUG_ASSERT( loc );
7576 KMP_DEBUG_ASSERT( lck );
7578 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7579 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7581 retval = critical_reduce_block;
7583 team_size = __kmp_get_team_num_threads( global_tid );
7585 if( team_size == 1 ) {
7587 retval = empty_reduce_block;
7591 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7592 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7594 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
7596 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
7597 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7598 if( __kmp_mic_type != non_mic ) {
7599 teamsize_cutoff = 8;
7602 if( tree_available ) {
7603 if( team_size <= teamsize_cutoff ) {
7604 if ( atomic_available ) {
7605 retval = atomic_reduce_block;
7608 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7610 }
else if ( atomic_available ) {
7611 retval = atomic_reduce_block;
7614 #error "Unknown or unsupported OS"
7615 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
7617 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
7619 #if KMP_OS_LINUX || KMP_OS_WINDOWS
7623 if( atomic_available ) {
7624 if( num_vars <= 2 ) {
7625 retval = atomic_reduce_block;
7631 if( atomic_available && ( num_vars <= 3 ) ) {
7632 retval = atomic_reduce_block;
7633 }
else if( tree_available ) {
7634 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
7635 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7640 #error "Unknown or unsupported OS"
7644 #error "Unknown or unsupported architecture"
7653 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
7655 PACKED_REDUCTION_METHOD_T forced_retval;
7657 int atomic_available, tree_available;
7659 switch( ( forced_retval = __kmp_force_reduction_method ) )
7661 case critical_reduce_block:
7665 case atomic_reduce_block:
7666 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7667 KMP_ASSERT( atomic_available );
7670 case tree_reduce_block:
7671 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7672 KMP_ASSERT( tree_available );
7673 #if KMP_FAST_REDUCTION_BARRIER
7674 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7682 retval = forced_retval;
7685 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
7687 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7688 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7695 __kmp_get_reduce_method(
void ) {
7696 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_STOP_EXPLICIT_TIMER(name)
"Stops" an explicit timer.
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)