51 #include "kmp_error.h"
52 #include "kmp_stats.h"
53 #if KMP_OS_WINDOWS && KMP_ARCH_X86
58 #include "ompt-internal.h"
59 #include "ompt-specific.h"
66 template<
typename T >
72 struct i_maxmin< int > {
73 static const int mx = 0x7fffffff;
74 static const int mn = 0x80000000;
77 struct i_maxmin< unsigned int > {
78 static const unsigned int mx = 0xffffffff;
79 static const unsigned int mn = 0x00000000;
82 struct i_maxmin< long long > {
83 static const long long mx = 0x7fffffffffffffffLL;
84 static const long long mn = 0x8000000000000000LL;
87 struct i_maxmin< unsigned long long > {
88 static const unsigned long long mx = 0xffffffffffffffffLL;
89 static const unsigned long long mn = 0x0000000000000000LL;
93 #ifdef KMP_STATIC_STEAL_ENABLED
96 template<
typename T >
97 struct dispatch_private_infoXX_template {
98 typedef typename traits_t< T >::unsigned_t UT;
99 typedef typename traits_t< T >::signed_t ST;
106 T static_steal_counter;
116 struct KMP_ALIGN( 32 ) {
133 template<
typename T >
134 struct dispatch_private_infoXX_template {
135 typedef typename traits_t< T >::unsigned_t UT;
136 typedef typename traits_t< T >::signed_t ST;
159 template<
typename T >
160 struct KMP_ALIGN_CACHE dispatch_private_info_template {
162 union KMP_ALIGN_CACHE private_info_tmpl {
163 dispatch_private_infoXX_template< T > p;
164 dispatch_private_info64_t p64;
168 kmp_uint32 ordered_bumped;
169 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
170 dispatch_private_info * next;
172 kmp_uint32 type_size;
173 enum cons_type pushed_ws;
178 template<
typename UT >
179 struct dispatch_shared_infoXX_template {
182 volatile UT iteration;
183 volatile UT num_done;
184 volatile UT ordered_iteration;
185 UT ordered_dummy[KMP_MAX_ORDERED-1];
189 template<
typename UT >
190 struct dispatch_shared_info_template {
192 union shared_info_tmpl {
193 dispatch_shared_infoXX_template< UT > s;
194 dispatch_shared_info64_t s64;
196 volatile kmp_uint32 buffer_index;
202 #undef USE_TEST_LOCKS
205 template<
typename T >
206 static __forceinline T
207 test_then_add(
volatile T *p, T d ) { KMP_ASSERT(0); };
210 __forceinline kmp_int32
211 test_then_add< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 d )
214 r = KMP_TEST_THEN_ADD32( p, d );
219 __forceinline kmp_int64
220 test_then_add< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 d )
223 r = KMP_TEST_THEN_ADD64( p, d );
228 template<
typename T >
229 static __forceinline T
230 test_then_inc_acq(
volatile T *p ) { KMP_ASSERT(0); };
233 __forceinline kmp_int32
234 test_then_inc_acq< kmp_int32 >(
volatile kmp_int32 *p )
237 r = KMP_TEST_THEN_INC_ACQ32( p );
242 __forceinline kmp_int64
243 test_then_inc_acq< kmp_int64 >(
volatile kmp_int64 *p )
246 r = KMP_TEST_THEN_INC_ACQ64( p );
251 template<
typename T >
252 static __forceinline T
253 test_then_inc(
volatile T *p ) { KMP_ASSERT(0); };
256 __forceinline kmp_int32
257 test_then_inc< kmp_int32 >(
volatile kmp_int32 *p )
260 r = KMP_TEST_THEN_INC32( p );
265 __forceinline kmp_int64
266 test_then_inc< kmp_int64 >(
volatile kmp_int64 *p )
269 r = KMP_TEST_THEN_INC64( p );
274 template<
typename T >
275 static __forceinline kmp_int32
276 compare_and_swap(
volatile T *p, T c, T s ) { KMP_ASSERT(0); };
279 __forceinline kmp_int32
280 compare_and_swap< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
282 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
286 __forceinline kmp_int32
287 compare_and_swap< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
289 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
305 template<
typename UT >
308 __kmp_wait_yield(
volatile UT * spinner,
310 kmp_uint32 (* pred)( UT, UT )
311 USE_ITT_BUILD_ARG(
void * obj)
315 register volatile UT * spin = spinner;
316 register UT check = checker;
317 register kmp_uint32 spins;
318 register kmp_uint32 (*f) ( UT, UT ) = pred;
321 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
322 KMP_INIT_YIELD( spins );
324 while(!f(r = *spin, check))
326 KMP_FSYNC_SPIN_PREPARE( obj );
335 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
336 KMP_YIELD_SPIN( spins );
338 KMP_FSYNC_SPIN_ACQUIRED( obj );
342 template<
typename UT >
343 static kmp_uint32 __kmp_eq( UT value, UT checker) {
344 return value == checker;
347 template<
typename UT >
348 static kmp_uint32 __kmp_neq( UT value, UT checker) {
349 return value != checker;
352 template<
typename UT >
353 static kmp_uint32 __kmp_lt( UT value, UT checker) {
354 return value < checker;
357 template<
typename UT >
358 static kmp_uint32 __kmp_ge( UT value, UT checker) {
359 return value >= checker;
362 template<
typename UT >
363 static kmp_uint32 __kmp_le( UT value, UT checker) {
364 return value <= checker;
372 __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
376 KMP_DEBUG_ASSERT( gtid_ref );
378 if ( __kmp_env_consistency_check ) {
379 th = __kmp_threads[*gtid_ref];
380 if ( th -> th.th_root -> r.r_active
381 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
382 #if KMP_USE_DYNAMIC_LOCK
383 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 );
385 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
391 template<
typename UT >
393 __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
395 typedef typename traits_t< UT >::signed_t ST;
396 dispatch_private_info_template< UT > * pr;
398 int gtid = *gtid_ref;
400 kmp_info_t *th = __kmp_threads[ gtid ];
401 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
403 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid ) );
404 if ( __kmp_env_consistency_check ) {
405 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
406 ( th -> th.th_dispatch -> th_dispatch_pr_current );
407 if ( pr -> pushed_ws != ct_none ) {
408 #if KMP_USE_DYNAMIC_LOCK
409 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 );
411 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
416 if ( ! th -> th.th_team -> t.t_serialized ) {
417 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
418 ( th -> th.th_dispatch -> th_dispatch_sh_current );
421 if ( ! __kmp_env_consistency_check ) {
422 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
423 ( th -> th.th_dispatch -> th_dispatch_pr_current );
425 lower = pr->u.p.ordered_lower;
427 #if ! defined( KMP_GOMP_COMPAT )
428 if ( __kmp_env_consistency_check ) {
429 if ( pr->ordered_bumped ) {
430 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
431 __kmp_error_construct2(
432 kmp_i18n_msg_CnsMultipleNesting,
433 ct_ordered_in_pdo, loc_ref,
434 & p->stack_data[ p->w_top ]
445 buff = __kmp_str_format(
446 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
447 traits_t< UT >::spec, traits_t< UT >::spec );
448 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
449 __kmp_str_free( &buff );
453 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
454 USE_ITT_BUILD_ARG( NULL )
461 buff = __kmp_str_format(
462 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
463 traits_t< UT >::spec, traits_t< UT >::spec );
464 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
465 __kmp_str_free( &buff );
469 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid ) );
473 __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
477 if ( __kmp_env_consistency_check ) {
478 th = __kmp_threads[*gtid_ref];
479 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
480 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
485 template<
typename UT >
487 __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
489 typedef typename traits_t< UT >::signed_t ST;
490 dispatch_private_info_template< UT > * pr;
492 int gtid = *gtid_ref;
494 kmp_info_t *th = __kmp_threads[ gtid ];
495 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
497 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid ) );
498 if ( __kmp_env_consistency_check ) {
499 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
500 ( th -> th.th_dispatch -> th_dispatch_pr_current );
501 if ( pr -> pushed_ws != ct_none ) {
502 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
506 if ( ! th -> th.th_team -> t.t_serialized ) {
507 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
508 ( th -> th.th_dispatch -> th_dispatch_sh_current );
510 if ( ! __kmp_env_consistency_check ) {
511 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
512 ( th -> th.th_dispatch -> th_dispatch_pr_current );
515 KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
516 #if ! defined( KMP_GOMP_COMPAT )
517 if ( __kmp_env_consistency_check ) {
518 if ( pr->ordered_bumped != 0 ) {
519 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
521 __kmp_error_construct2(
522 kmp_i18n_msg_CnsMultipleNesting,
523 ct_ordered_in_pdo, loc_ref,
524 & p->stack_data[ p->w_top ]
532 pr->ordered_bumped += 1;
534 KD_TRACE(1000, (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
535 gtid, pr->ordered_bumped ) );
540 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
544 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
548 template<
typename UT >
549 static __forceinline
long double
550 __kmp_pow(
long double x, UT y) {
553 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
569 template<
typename T >
570 static __inline
typename traits_t< T >::unsigned_t
571 __kmp_dispatch_guided_remaining(
573 typename traits_t< T >::floating_t base,
574 typename traits_t< T >::unsigned_t idx
583 typedef typename traits_t< T >::unsigned_t UT;
585 long double x = tc * __kmp_pow< UT >(base, idx);
597 static int guided_int_param = 2;
598 static double guided_flt_param = 0.5;
602 template<
typename T >
610 typename traits_t< T >::signed_t st,
611 typename traits_t< T >::signed_t chunk,
614 typedef typename traits_t< T >::unsigned_t UT;
615 typedef typename traits_t< T >::signed_t ST;
616 typedef typename traits_t< T >::floating_t DBL;
617 static const int ___kmp_size_type =
sizeof( UT );
623 kmp_uint32 my_buffer_index;
624 dispatch_private_info_template< T > * pr;
625 dispatch_shared_info_template< UT >
volatile * sh;
627 KMP_BUILD_ASSERT(
sizeof( dispatch_private_info_template< T > ) ==
sizeof( dispatch_private_info ) );
628 KMP_BUILD_ASSERT(
sizeof( dispatch_shared_info_template< UT > ) ==
sizeof( dispatch_shared_info ) );
630 if ( ! TCR_4( __kmp_init_parallel ) )
631 __kmp_parallel_initialize();
633 #if INCLUDE_SSC_MARKS
634 SSC_MARK_DISPATCH_INIT();
640 buff = __kmp_str_format(
641 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
642 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
643 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
644 __kmp_str_free( &buff );
648 th = __kmp_threads[ gtid ];
649 team = th -> th.th_team;
650 active = ! team -> t.t_serialized;
651 th->th.th_ident = loc;
654 kmp_uint64 cur_chunk = chunk;
655 int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
656 KMP_MASTER_GTID(gtid) &&
658 th->th.th_teams_microtask == NULL &&
660 team->t.t_active_level == 1;
663 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
664 ( th -> th.th_dispatch -> th_disp_buffer );
666 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
667 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
669 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
672 pr =
reinterpret_cast< dispatch_private_info_template< T > *
>
673 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
674 sh =
reinterpret_cast< dispatch_shared_info_template< UT >
volatile *
>
675 ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
685 pr->type_size = ___kmp_size_type;
693 schedule = __kmp_static;
695 if ( schedule == kmp_sch_runtime ) {
697 schedule = team -> t.t_sched.r_sched_type;
700 schedule = __kmp_guided;
702 schedule = __kmp_static;
705 chunk = team -> t.t_sched.chunk;
711 buff = __kmp_str_format(
712 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
713 traits_t< ST >::spec );
714 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
715 __kmp_str_free( &buff );
720 schedule = __kmp_guided;
723 chunk = KMP_DEFAULT_CHUNK;
729 schedule = __kmp_auto;
734 buff = __kmp_str_format(
735 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
736 traits_t< ST >::spec );
737 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
738 __kmp_str_free( &buff );
744 if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
745 schedule = kmp_sch_guided_iterative_chunked;
746 KMP_WARNING( DispatchManyThreads );
748 pr->u.p.parm1 = chunk;
751 "unknown scheduling type" );
755 if ( __kmp_env_consistency_check ) {
757 __kmp_error_construct(
758 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
759 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
764 tc = ( ub - lb + st );
779 }
else if ( ub < lb ) {
789 pr->u.p.last_upper = ub + st;
795 if ( pr->ordered == 0 ) {
796 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
797 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
799 pr->ordered_bumped = 0;
801 pr->u.p.ordered_lower = 1;
802 pr->u.p.ordered_upper = 0;
804 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
805 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
809 if ( __kmp_env_consistency_check ) {
810 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
812 __kmp_push_workshare( gtid, ws, loc );
815 __kmp_check_workshare( gtid, ws, loc );
816 pr->pushed_ws = ct_none;
820 switch ( schedule ) {
821 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
824 T nproc = team->t.t_nproc;
827 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
829 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
830 if ( nproc > 1 && ntc >= nproc ) {
831 T
id = __kmp_tid_from_gtid(gtid);
832 T small_chunk, extras;
834 small_chunk = ntc / nproc;
835 extras = ntc % nproc;
837 init =
id * small_chunk + (
id < extras ?
id : extras );
838 pr->u.p.count = init;
839 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0 );
847 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
849 schedule = kmp_sch_static_balanced;
855 case kmp_sch_static_balanced:
857 T nproc = team->t.t_nproc;
860 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
864 T
id = __kmp_tid_from_gtid(gtid);
870 pr->u.p.parm1 = (
id == tc - 1);
873 pr->u.p.parm1 = FALSE;
877 T small_chunk = tc / nproc;
878 T extras = tc % nproc;
879 init =
id * small_chunk + (
id < extras ?
id : extras);
880 limit = init + small_chunk - (
id < extras ? 0 : 1);
881 pr->u.p.parm1 = (
id == nproc - 1);
887 pr->u.p.parm1 = TRUE;
891 pr->u.p.parm1 = FALSE;
897 if ( itt_need_metadata_reporting )
898 cur_chunk = limit - init + 1;
901 pr->u.p.lb = lb + init;
902 pr->u.p.ub = lb + limit;
904 T ub_tmp = lb + limit * st;
905 pr->u.p.lb = lb + init * st;
908 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
910 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
914 pr->u.p.ordered_lower = init;
915 pr->u.p.ordered_upper = limit;
919 case kmp_sch_guided_iterative_chunked :
921 T nproc = team->t.t_nproc;
922 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
925 if ( (2L * chunk + 1 ) * nproc >= tc ) {
927 schedule = kmp_sch_dynamic_chunked;
930 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
931 *(
double*)&pr->u.p.parm3 = guided_flt_param / nproc;
934 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
935 schedule = kmp_sch_static_greedy;
937 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
942 case kmp_sch_guided_analytical_chunked:
944 T nproc = team->t.t_nproc;
945 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
948 if ( (2L * chunk + 1 ) * nproc >= tc ) {
950 schedule = kmp_sch_dynamic_chunked;
955 #if KMP_OS_WINDOWS && KMP_ARCH_X86
968 unsigned int oldFpcw = _control87(0,0);
969 _control87(_PC_64,_MCW_PC);
972 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
979 x = (
long double)1.0 - (
long double)0.5 / nproc;
990 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
992 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
997 *(DBL*)&pr->u.p.parm3 = x;
1001 UT left, right, mid;
1010 p = __kmp_pow< UT >(x,right);
1015 }
while(p>target && right < (1<<27));
1022 while ( left + 1 < right ) {
1023 mid = (left + right) / 2;
1024 if ( __kmp_pow< UT >(x,mid) > target ) {
1033 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
1036 pr->u.p.parm2 = cross;
1039 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) )
1040 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3)
1042 #define GUIDED_ANALYTICAL_WORKAROUND (x)
1045 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
1046 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1048 _control87(oldFpcw,_MCW_PC);
1052 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
1054 schedule = kmp_sch_static_greedy;
1060 case kmp_sch_static_greedy:
1061 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
1062 pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
1063 ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
1066 case kmp_sch_static_chunked :
1067 case kmp_sch_dynamic_chunked :
1068 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
1070 case kmp_sch_trapezoidal :
1074 T parm1, parm2, parm3, parm4;
1075 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
1080 parm2 = ( tc / (2 * team->t.t_nproc) );
1091 }
else if ( parm1 > parm2 ) {
1096 parm3 = ( parm2 + parm1 );
1097 parm3 = ( 2 * tc + parm3 - 1) / parm3;
1104 parm4 = ( parm3 - 1 );
1105 parm4 = ( parm2 - parm1 ) / parm4;
1112 pr->u.p.parm1 = parm1;
1113 pr->u.p.parm2 = parm2;
1114 pr->u.p.parm3 = parm3;
1115 pr->u.p.parm4 = parm4;
1123 KMP_MSG( UnknownSchedTypeDetected ),
1124 KMP_HNT( GetNewerLibrary ),
1130 pr->schedule = schedule;
1134 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
1135 gtid, my_buffer_index, sh->buffer_index) );
1136 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
1137 USE_ITT_BUILD_ARG( NULL )
1142 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
1143 gtid, my_buffer_index, sh->buffer_index) );
1145 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
1146 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
1148 if ( pr->ordered ) {
1149 __kmp_itt_ordered_init( gtid );
1152 if ( itt_need_metadata_reporting ) {
1154 kmp_uint64 schedtype = 0;
1155 switch ( schedule ) {
1156 case kmp_sch_static_chunked:
1157 case kmp_sch_static_balanced:
1159 case kmp_sch_static_greedy:
1160 cur_chunk = pr->u.p.parm1;
1162 case kmp_sch_dynamic_chunked:
1165 case kmp_sch_guided_iterative_chunked:
1166 case kmp_sch_guided_analytical_chunked:
1175 __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
1184 buff = __kmp_str_format(
1185 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
1186 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
1187 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1188 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
1189 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
1190 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
1191 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
1192 KD_TRACE(10, ( buff,
1193 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1194 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
1195 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
1196 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
1197 __kmp_str_free( &buff );
1200 #if ( KMP_STATIC_STEAL_ENABLED )
1201 if ( ___kmp_size_type < 8 ) {
1210 volatile T * p = &pr->u.p.static_steal_counter;
1214 #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
1216 #if OMPT_SUPPORT && OMPT_TRACE
1217 if ((ompt_status == ompt_status_track_callback) &&
1218 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
1219 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1220 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1221 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
1222 team_info->parallel_id, task_info->task_id, team_info->microtask);
1234 template<
typename UT >
1236 __kmp_dispatch_finish(
int gtid,
ident_t *loc )
1238 typedef typename traits_t< UT >::signed_t ST;
1239 kmp_info_t *th = __kmp_threads[ gtid ];
1241 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid ) );
1242 if ( ! th -> th.th_team -> t.t_serialized ) {
1244 dispatch_private_info_template< UT > * pr =
1245 reinterpret_cast< dispatch_private_info_template< UT >*
>
1246 ( th->th.th_dispatch->th_dispatch_pr_current );
1247 dispatch_shared_info_template< UT >
volatile * sh =
1248 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1249 ( th->th.th_dispatch->th_dispatch_sh_current );
1250 KMP_DEBUG_ASSERT( pr );
1251 KMP_DEBUG_ASSERT( sh );
1252 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1253 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1255 if ( pr->ordered_bumped ) {
1256 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1258 pr->ordered_bumped = 0;
1260 UT lower = pr->u.p.ordered_lower;
1266 buff = __kmp_str_format(
1267 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
1268 traits_t< UT >::spec, traits_t< UT >::spec );
1269 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1270 __kmp_str_free( &buff );
1274 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1275 USE_ITT_BUILD_ARG(NULL)
1282 buff = __kmp_str_format(
1283 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
1284 traits_t< UT >::spec, traits_t< UT >::spec );
1285 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1286 __kmp_str_free( &buff );
1290 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
1293 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid ) );
1296 #ifdef KMP_GOMP_COMPAT
1298 template<
typename UT >
1300 __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc )
1302 typedef typename traits_t< UT >::signed_t ST;
1303 kmp_info_t *th = __kmp_threads[ gtid ];
1305 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
1306 if ( ! th -> th.th_team -> t.t_serialized ) {
1308 dispatch_private_info_template< UT > * pr =
1309 reinterpret_cast< dispatch_private_info_template< UT >*
>
1310 ( th->th.th_dispatch->th_dispatch_pr_current );
1311 dispatch_shared_info_template< UT >
volatile * sh =
1312 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1313 ( th->th.th_dispatch->th_dispatch_sh_current );
1314 KMP_DEBUG_ASSERT( pr );
1315 KMP_DEBUG_ASSERT( sh );
1316 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1317 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1320 UT lower = pr->u.p.ordered_lower;
1321 UT upper = pr->u.p.ordered_upper;
1322 UT inc = upper - lower + 1;
1324 if ( pr->ordered_bumped == inc ) {
1325 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1327 pr->ordered_bumped = 0;
1329 inc -= pr->ordered_bumped;
1335 buff = __kmp_str_format(
1336 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
1337 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1338 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1339 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
1340 __kmp_str_free( &buff );
1344 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1345 USE_ITT_BUILD_ARG(NULL)
1349 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
1351 pr->ordered_bumped = 0;
1357 buff = __kmp_str_format(
1358 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
1359 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1360 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1361 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
1362 __kmp_str_free( &buff );
1366 test_then_add< ST >( (
volatile ST *) & sh->u.s.ordered_iteration, inc);
1370 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
1378 #if OMPT_SUPPORT && OMPT_TRACE
1379 #define OMPT_LOOP_END \
1380 if (status == 0) { \
1381 if ((ompt_status == ompt_status_track_callback) && \
1382 ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \
1383 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
1384 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \
1385 ompt_callbacks.ompt_callback(ompt_event_loop_end)( \
1386 team_info->parallel_id, task_info->task_id); \
1390 #define OMPT_LOOP_END // no-op
1393 template<
typename T >
1395 __kmp_dispatch_next(
1396 ident_t *loc,
int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub,
typename traits_t< T >::signed_t *p_st
1399 typedef typename traits_t< T >::unsigned_t UT;
1400 typedef typename traits_t< T >::signed_t ST;
1401 typedef typename traits_t< T >::floating_t DBL;
1402 static const int ___kmp_size_type =
sizeof( UT );
1405 dispatch_private_info_template< T > * pr;
1406 kmp_info_t * th = __kmp_threads[ gtid ];
1407 kmp_team_t * team = th -> th.th_team;
1409 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
1414 buff = __kmp_str_format(
1415 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
1416 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1417 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
1418 __kmp_str_free( &buff );
1422 if ( team -> t.t_serialized ) {
1424 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1425 ( th -> th.th_dispatch -> th_disp_buffer );
1426 KMP_DEBUG_ASSERT( pr );
1428 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
1435 if ( __kmp_env_consistency_check ) {
1436 if ( pr->pushed_ws != ct_none ) {
1437 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1440 }
else if ( pr->nomerge ) {
1443 UT limit, trip, init;
1445 T chunk = pr->u.p.parm1;
1447 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
1449 init = chunk * pr->u.p.count++;
1450 trip = pr->u.p.tc - 1;
1452 if ( (status = (init <= trip)) == 0 ) {
1459 if ( __kmp_env_consistency_check ) {
1460 if ( pr->pushed_ws != ct_none ) {
1461 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1466 limit = chunk + init - 1;
1469 if ( (last = (limit >= trip)) != 0 ) {
1472 pr->u.p.last_upper = pr->u.p.ub;
1475 if ( p_last != NULL )
1480 *p_lb = start + init;
1481 *p_ub = start + limit;
1483 *p_lb = start + init * incr;
1484 *p_ub = start + limit * incr;
1487 if ( pr->ordered ) {
1488 pr->u.p.ordered_lower = init;
1489 pr->u.p.ordered_upper = limit;
1494 buff = __kmp_str_format(
1495 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1496 traits_t< UT >::spec, traits_t< UT >::spec );
1497 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1498 __kmp_str_free( &buff );
1508 pr->u.p.last_upper = *p_ub;
1510 if ( p_last != NULL )
1519 buff = __kmp_str_format(
1520 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
1521 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1522 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1523 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) );
1524 __kmp_str_free( &buff );
1527 #if INCLUDE_SSC_MARKS
1528 SSC_MARK_DISPATCH_NEXT();
1534 dispatch_shared_info_template< UT > *sh;
1537 UT limit, trip, init;
1539 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1540 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1542 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1543 ( th->th.th_dispatch->th_dispatch_pr_current );
1544 KMP_DEBUG_ASSERT( pr );
1545 sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
1546 ( th->th.th_dispatch->th_dispatch_sh_current );
1547 KMP_DEBUG_ASSERT( sh );
1549 if ( pr->u.p.tc == 0 ) {
1553 switch (pr->schedule) {
1554 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1557 T chunk = pr->u.p.parm1;
1559 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
1561 trip = pr->u.p.tc - 1;
1563 if ( ___kmp_size_type > 4 ) {
1566 init = ( pr->u.p.count )++;
1567 status = ( init < (UT)pr->u.p.ub );
1579 union_i4 vold, vnew;
1580 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1583 while( ! KMP_COMPARE_AND_STORE_ACQ64(
1584 (
volatile kmp_int64* )&pr->u.p.count,
1585 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1586 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1588 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1593 init = vnew.p.count;
1594 status = ( init < (UT)vnew.p.ub ) ;
1598 kmp_info_t **other_threads = team->t.t_threads;
1599 int while_limit = 10;
1600 int while_index = 0;
1604 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1605 union_i4 vold, vnew;
1606 kmp_int32 remaining;
1607 T victimIdx = pr->u.p.parm4;
1608 T oldVictimIdx = victimIdx;
1609 dispatch_private_info_template< T > * victim;
1613 victimIdx = team->t.t_nproc - 1;
1617 victim =
reinterpret_cast< dispatch_private_info_template< T >*
>
1618 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
1619 }
while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
1622 ( (*(
volatile T * )&victim->u.p.static_steal_counter) !=
1623 (*(
volatile T * )&pr->u.p.static_steal_counter) ) ) {
1629 if ( oldVictimIdx == victimIdx ) {
1632 pr->u.p.parm4 = victimIdx;
1635 vold.b = *(
volatile kmp_int64 * )( &victim->u.p.count );
1638 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip );
1639 if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
1642 vnew.p.ub -= (remaining >> 2);
1643 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1644 #pragma warning( push )
1646 #pragma warning( disable: 186 )
1647 KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
1648 #pragma warning( pop )
1650 if ( KMP_COMPARE_AND_STORE_ACQ64(
1651 (
volatile kmp_int64 * )&victim->u.p.count,
1652 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1653 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1662 init = vold.p.count;
1664 pr->u.p.count = init + 1;
1665 pr->u.p.ub = vnew.p.count;
1668 vold.p.count = init + 1;
1670 *(
volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
1671 #endif // KMP_ARCH_X86
1682 if ( p_st != NULL ) *p_st = 0;
1684 start = pr->u.p.parm2;
1686 limit = chunk + init - 1;
1689 KMP_DEBUG_ASSERT(init <= trip);
1690 if ( (last = (limit >= trip)) != 0 )
1692 if ( p_st != NULL ) *p_st = incr;
1695 *p_lb = start + init;
1696 *p_ub = start + limit;
1698 *p_lb = start + init * incr;
1699 *p_ub = start + limit * incr;
1702 if ( pr->ordered ) {
1703 pr->u.p.ordered_lower = init;
1704 pr->u.p.ordered_upper = limit;
1709 buff = __kmp_str_format(
1710 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1711 traits_t< UT >::spec, traits_t< UT >::spec );
1712 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1713 __kmp_str_free( &buff );
1720 #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1721 case kmp_sch_static_balanced:
1723 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
1724 if ( (status = !pr->u.p.count) != 0 ) {
1728 last = pr->u.p.parm1;
1732 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1734 if ( pr->ordered ) {
1739 buff = __kmp_str_format(
1740 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1741 traits_t< UT >::spec, traits_t< UT >::spec );
1742 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1743 __kmp_str_free( &buff );
1749 case kmp_sch_static_greedy:
1750 case kmp_sch_static_chunked:
1754 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
1756 parm1 = pr->u.p.parm1;
1758 trip = pr->u.p.tc - 1;
1759 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1761 if ( (status = (init <= trip)) != 0 ) {
1764 limit = parm1 + init - 1;
1766 if ( (last = (limit >= trip)) != 0 )
1769 if ( p_st != NULL ) *p_st = incr;
1771 pr->u.p.count += team->t.t_nproc;
1774 *p_lb = start + init;
1775 *p_ub = start + limit;
1778 *p_lb = start + init * incr;
1779 *p_ub = start + limit * incr;
1782 if ( pr->ordered ) {
1783 pr->u.p.ordered_lower = init;
1784 pr->u.p.ordered_upper = limit;
1789 buff = __kmp_str_format(
1790 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1791 traits_t< UT >::spec, traits_t< UT >::spec );
1792 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1793 __kmp_str_free( &buff );
1801 case kmp_sch_dynamic_chunked:
1803 T chunk = pr->u.p.parm1;
1805 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1808 init = chunk * test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1809 trip = pr->u.p.tc - 1;
1811 if ( (status = (init <= trip)) == 0 ) {
1814 if ( p_st != NULL ) *p_st = 0;
1817 limit = chunk + init - 1;
1820 if ( (last = (limit >= trip)) != 0 )
1823 if ( p_st != NULL ) *p_st = incr;
1826 *p_lb = start + init;
1827 *p_ub = start + limit;
1829 *p_lb = start + init * incr;
1830 *p_ub = start + limit * incr;
1833 if ( pr->ordered ) {
1834 pr->u.p.ordered_lower = init;
1835 pr->u.p.ordered_upper = limit;
1840 buff = __kmp_str_format(
1841 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1842 traits_t< UT >::spec, traits_t< UT >::spec );
1843 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1844 __kmp_str_free( &buff );
1852 case kmp_sch_guided_iterative_chunked:
1854 T chunkspec = pr->u.p.parm1;
1856 (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
1861 init = sh->u.s.iteration;
1862 remaining = trip - init;
1863 if ( remaining <= 0 ) {
1868 if ( (T)remaining < pr->u.p.parm2 ) {
1871 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
1872 remaining = trip - init;
1873 if (remaining <= 0) {
1878 if ( (T)remaining > chunkspec ) {
1879 limit = init + chunkspec - 1;
1882 limit = init + remaining - 1;
1887 limit = init + (UT)( remaining * *(
double*)&pr->u.p.parm3 );
1888 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
1895 if ( status != 0 ) {
1900 *p_lb = start + init * incr;
1901 *p_ub = start + limit * incr;
1902 if ( pr->ordered ) {
1903 pr->u.p.ordered_lower = init;
1904 pr->u.p.ordered_upper = limit;
1909 buff = __kmp_str_format(
1910 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1911 traits_t< UT >::spec, traits_t< UT >::spec );
1912 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1913 __kmp_str_free( &buff );
1926 case kmp_sch_guided_analytical_chunked:
1928 T chunkspec = pr->u.p.parm1;
1930 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1933 unsigned int oldFpcw;
1934 unsigned int fpcwSet = 0;
1936 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
1941 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
1942 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip);
1945 chunkIdx = test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1946 if ( chunkIdx >= (UT)pr->u.p.parm2 ) {
1949 init = chunkIdx * chunkspec + pr->u.p.count;
1951 if ( (status = (init > 0 && init <= trip)) != 0 ) {
1952 limit = init + chunkspec -1;
1954 if ( (last = (limit >= trip)) != 0 )
1963 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1968 oldFpcw = _control87(0,0);
1969 _control87(_PC_64,_MCW_PC);
1974 init = __kmp_dispatch_guided_remaining< T >(
1975 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
1976 KMP_DEBUG_ASSERT(init);
1980 limit = trip - __kmp_dispatch_guided_remaining< T >(
1981 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
1982 KMP_ASSERT(init <= limit);
1983 if ( init < limit ) {
1984 KMP_DEBUG_ASSERT(limit <= trip);
1991 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1995 if ( fpcwSet && ( oldFpcw & fpcwSet ) )
1996 _control87(oldFpcw,_MCW_PC);
1998 if ( status != 0 ) {
2003 *p_lb = start + init * incr;
2004 *p_ub = start + limit * incr;
2005 if ( pr->ordered ) {
2006 pr->u.p.ordered_lower = init;
2007 pr->u.p.ordered_upper = limit;
2012 buff = __kmp_str_format(
2013 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2014 traits_t< UT >::spec, traits_t< UT >::spec );
2015 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2016 __kmp_str_free( &buff );
2029 case kmp_sch_trapezoidal:
2032 T parm2 = pr->u.p.parm2;
2033 T parm3 = pr->u.p.parm3;
2034 T parm4 = pr->u.p.parm4;
2035 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
2038 index = test_then_inc< ST >( (
volatile ST *) & sh->u.s.iteration );
2040 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
2041 trip = pr->u.p.tc - 1;
2043 if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) {
2046 if ( p_st != NULL ) *p_st = 0;
2049 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
2052 if ( (last = (limit >= trip)) != 0 )
2055 if ( p_st != NULL ) *p_st = incr;
2058 *p_lb = start + init;
2059 *p_ub = start + limit;
2061 *p_lb = start + init * incr;
2062 *p_ub = start + limit * incr;
2065 if ( pr->ordered ) {
2066 pr->u.p.ordered_lower = init;
2067 pr->u.p.ordered_upper = limit;
2072 buff = __kmp_str_format(
2073 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2074 traits_t< UT >::spec, traits_t< UT >::spec );
2075 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2076 __kmp_str_free( &buff );
2088 KMP_MSG( UnknownSchedTypeDetected ),
2089 KMP_HNT( GetNewerLibrary ),
2097 if ( status == 0 ) {
2100 num_done = test_then_inc< ST >( (
volatile ST *) & sh->u.s.num_done );
2105 buff = __kmp_str_format(
2106 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2107 traits_t< UT >::spec );
2108 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
2109 __kmp_str_free( &buff );
2113 if ( (ST)num_done == team->t.t_nproc-1 ) {
2118 sh->u.s.num_done = 0;
2119 sh->u.s.iteration = 0;
2122 if ( pr->ordered ) {
2123 sh->u.s.ordered_iteration = 0;
2128 sh -> buffer_index += KMP_MAX_DISP_BUF;
2129 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2130 gtid, sh->buffer_index) );
2135 if ( __kmp_env_consistency_check ) {
2136 if ( pr->pushed_ws != ct_none ) {
2137 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
2141 th -> th.th_dispatch -> th_deo_fcn = NULL;
2142 th -> th.th_dispatch -> th_dxo_fcn = NULL;
2143 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
2144 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
2148 pr->u.p.last_upper = pr->u.p.ub;
2151 if ( p_last != NULL && status != 0 )
2159 buff = __kmp_str_format(
2160 "__kmp_dispatch_next: T#%%d normal case: " \
2161 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2162 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
2163 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
2164 __kmp_str_free( &buff );
2167 #if INCLUDE_SSC_MARKS
2168 SSC_MARK_DISPATCH_NEXT();
2174 template<
typename T >
2176 __kmp_dist_get_bounds(
2179 kmp_int32 *plastiter,
2182 typename traits_t< T >::signed_t incr
2185 typedef typename traits_t< T >::unsigned_t UT;
2186 typedef typename traits_t< T >::signed_t ST;
2187 register kmp_uint32 team_id;
2188 register kmp_uint32 nteams;
2189 register UT trip_count;
2190 register kmp_team_t *team;
2193 KMP_DEBUG_ASSERT( plastiter && plower && pupper );
2194 KE_TRACE( 10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2199 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d "\
2200 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2201 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
2202 traits_t< T >::spec );
2203 KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) );
2204 __kmp_str_free( &buff );
2208 if( __kmp_env_consistency_check ) {
2210 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
2212 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
2222 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
2225 th = __kmp_threads[gtid];
2226 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2227 team = th->th.th_team;
2229 nteams = th->th.th_teams_size.nteams;
2231 team_id = team->t.t_master_tid;
2232 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
2236 trip_count = *pupper - *plower + 1;
2237 }
else if(incr == -1) {
2238 trip_count = *plower - *pupper + 1;
2240 trip_count = (ST)(*pupper - *plower) / incr + 1;
2242 if( trip_count <= nteams ) {
2244 __kmp_static == kmp_sch_static_greedy || \
2245 __kmp_static == kmp_sch_static_balanced
2248 if( team_id < trip_count ) {
2249 *pupper = *plower = *plower + team_id * incr;
2251 *plower = *pupper + incr;
2253 if( plastiter != NULL )
2254 *plastiter = ( team_id == trip_count - 1 );
2256 if( __kmp_static == kmp_sch_static_balanced ) {
2257 register UT chunk = trip_count / nteams;
2258 register UT extras = trip_count % nteams;
2259 *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) );
2260 *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr );
2261 if( plastiter != NULL )
2262 *plastiter = ( team_id == nteams - 1 );
2264 register T chunk_inc_count =
2265 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
2266 register T upper = *pupper;
2267 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
2269 *plower += team_id * chunk_inc_count;
2270 *pupper = *plower + chunk_inc_count - incr;
2273 if( *pupper < *plower )
2274 *pupper = i_maxmin< T >::mx;
2275 if( plastiter != NULL )
2276 *plastiter = *plower <= upper && *pupper > upper - incr;
2277 if( *pupper > upper )
2280 if( *pupper > *plower )
2281 *pupper = i_maxmin< T >::mn;
2282 if( plastiter != NULL )
2283 *plastiter = *plower >= upper && *pupper < upper - incr;
2284 if( *pupper < upper )
2315 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2318 KMP_DEBUG_ASSERT( __kmp_init_serial );
2319 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2326 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2329 KMP_DEBUG_ASSERT( __kmp_init_serial );
2330 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2338 kmp_int64 lb, kmp_int64 ub,
2339 kmp_int64 st, kmp_int64 chunk )
2342 KMP_DEBUG_ASSERT( __kmp_init_serial );
2343 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2351 kmp_uint64 lb, kmp_uint64 ub,
2352 kmp_int64 st, kmp_int64 chunk )
2355 KMP_DEBUG_ASSERT( __kmp_init_serial );
2356 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2370 kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2373 KMP_DEBUG_ASSERT( __kmp_init_serial );
2374 __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
2375 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2379 __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2380 kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2383 KMP_DEBUG_ASSERT( __kmp_init_serial );
2384 __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
2385 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2389 __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2390 kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
2393 KMP_DEBUG_ASSERT( __kmp_init_serial );
2394 __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
2395 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2399 __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2400 kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
2403 KMP_DEBUG_ASSERT( __kmp_init_serial );
2404 __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
2405 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2422 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
2424 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2432 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
2434 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2442 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
2444 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2452 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
2454 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2466 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2475 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2484 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2493 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2500 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
2501 return value == checker;
2504 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
2505 return value != checker;
2508 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
2509 return value < checker;
2512 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
2513 return value >= checker;
2516 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
2517 return value <= checker;
2519 kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) {
2520 return value == checker;
2523 kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) {
2524 return value != checker;
2527 kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) {
2528 return value < checker;
2531 kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) {
2532 return value >= checker;
2535 kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) {
2536 return value <= checker;
2540 __kmp_wait_yield_4(
volatile kmp_uint32 * spinner,
2542 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
2547 register volatile kmp_uint32 * spin = spinner;
2548 register kmp_uint32 check = checker;
2549 register kmp_uint32 spins;
2550 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
2551 register kmp_uint32 r;
2553 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2554 KMP_INIT_YIELD( spins );
2556 while(!f(r = TCR_4(*spin), check)) {
2557 KMP_FSYNC_SPIN_PREPARE( obj );
2565 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2566 KMP_YIELD_SPIN( spins );
2568 KMP_FSYNC_SPIN_ACQUIRED( obj );
2573 __kmp_wait_yield_8(
volatile kmp_uint64 * spinner,
2575 kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 )
2580 register volatile kmp_uint64 * spin = spinner;
2581 register kmp_uint64 check = checker;
2582 register kmp_uint32 spins;
2583 register kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred;
2584 register kmp_uint64 r;
2586 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2587 KMP_INIT_YIELD( spins );
2589 while(!f(r = *spin, check))
2591 KMP_FSYNC_SPIN_PREPARE( obj );
2600 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2601 KMP_YIELD_SPIN( spins );
2603 KMP_FSYNC_SPIN_ACQUIRED( obj );
2609 #ifdef KMP_GOMP_COMPAT
2612 __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2613 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2614 kmp_int32 chunk,
int push_ws )
2616 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
2621 __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2622 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2623 kmp_int32 chunk,
int push_ws )
2625 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
2630 __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2631 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2632 kmp_int64 chunk,
int push_ws )
2634 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
2639 __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2640 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2641 kmp_int64 chunk,
int push_ws )
2643 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
2648 __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid )
2650 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2654 __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid )
2656 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
2660 __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid )
2662 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2666 __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid )
2668 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)