39 #include "kmp_wrapper_getpid.h"
40 #include "kmp_affinity.h"
45 void __kmp_cleanup_hierarchy() {
46 machine_hierarchy.fini();
50 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
53 if (TCR_1(machine_hierarchy.uninitialized))
54 machine_hierarchy.init(NULL, nproc);
56 depth = machine_hierarchy.
depth;
57 KMP_DEBUG_ASSERT(depth > 0);
59 if (nproc > machine_hierarchy.skipPerLevel[depth-1])
60 machine_hierarchy.resize(nproc);
62 thr_bar->depth = depth;
63 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.
numPerLevel[0]-1;
64 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
67 #if KMP_AFFINITY_SUPPORTED
73 __kmp_affinity_print_mask(
char *buf,
int buf_len, kmp_affin_mask_t *mask)
75 KMP_ASSERT(buf_len >= 40);
77 char *end = buf + buf_len - 1;
83 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
84 if (KMP_CPU_ISSET(i, mask)) {
88 if (i == KMP_CPU_SETSIZE) {
89 KMP_SNPRINTF(scan, end-scan+1,
"{<empty>}");
90 while (*scan !=
'\0') scan++;
91 KMP_ASSERT(scan <= end);
95 KMP_SNPRINTF(scan, end-scan+1,
"{%ld", (
long)i);
96 while (*scan !=
'\0') scan++;
98 for (; i < KMP_CPU_SETSIZE; i++) {
99 if (! KMP_CPU_ISSET(i, mask)) {
109 if (end - scan < 15) {
112 KMP_SNPRINTF(scan, end-scan+1,
",%-ld", (
long)i);
113 while (*scan !=
'\0') scan++;
115 if (i < KMP_CPU_SETSIZE) {
116 KMP_SNPRINTF(scan, end-scan+1,
",...");
117 while (*scan !=
'\0') scan++;
119 KMP_SNPRINTF(scan, end-scan+1,
"}");
120 while (*scan !=
'\0') scan++;
121 KMP_ASSERT(scan <= end);
127 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
131 # if KMP_GROUP_AFFINITY
133 if (__kmp_num_proc_groups > 1) {
135 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
136 for (group = 0; group < __kmp_num_proc_groups; group++) {
138 int num = __kmp_GetActiveProcessorCount(group);
139 for (i = 0; i < num; i++) {
140 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
150 for (proc = 0; proc < __kmp_xproc; proc++) {
151 KMP_CPU_SET(proc, mask);
170 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
173 KMP_DEBUG_ASSERT(numAddrs > 0);
174 int depth = address2os->first.depth;
175 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
176 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth
179 for (labCt = 0; labCt < depth; labCt++) {
180 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
181 lastLabel[labCt] = address2os[0].first.labels[labCt];
184 for (i = 1; i < numAddrs; i++) {
185 for (labCt = 0; labCt < depth; labCt++) {
186 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
188 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
190 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
193 lastLabel[labCt] = address2os[i].first.labels[labCt];
197 for (labCt = 0; labCt < depth; labCt++) {
198 address2os[i].first.childNums[labCt] = counts[labCt];
200 for (; labCt < (int)Address::maxDepth; labCt++) {
201 address2os[i].first.childNums[labCt] = 0;
219 static kmp_affin_mask_t *fullMask = NULL;
222 __kmp_affinity_get_fullMask() {
return fullMask; }
225 static int nCoresPerPkg, nPackages;
226 static int __kmp_nThreadsPerCore;
227 #ifndef KMP_DFLT_NTH_CORES
228 static int __kmp_ncores;
238 __kmp_affinity_uniform_topology()
240 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
249 __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
int depth,
250 int pkgLevel,
int coreLevel,
int threadLevel)
254 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
255 for (proc = 0; proc < len; proc++) {
258 __kmp_str_buf_init(&buf);
259 for (level = 0; level < depth; level++) {
260 if (level == threadLevel) {
261 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
263 else if (level == coreLevel) {
264 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
266 else if (level == pkgLevel) {
267 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
269 else if (level > pkgLevel) {
270 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
271 level - pkgLevel - 1);
274 __kmp_str_buf_print(&buf,
"L%d ", level);
276 __kmp_str_buf_print(&buf,
"%d ",
277 address2os[proc].first.labels[level]);
279 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
281 __kmp_str_buf_free(&buf);
292 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
293 kmp_i18n_id_t *
const msg_id)
296 *msg_id = kmp_i18n_null;
303 if (! KMP_AFFINITY_CAPABLE()) {
304 KMP_ASSERT(__kmp_affinity_type == affinity_none);
305 __kmp_ncores = nPackages = __kmp_xproc;
306 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
307 if (__kmp_affinity_verbose) {
308 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
309 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
310 KMP_INFORM(Uniform,
"KMP_AFFINITY");
311 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
312 __kmp_nThreadsPerCore, __kmp_ncores);
323 __kmp_ncores = nPackages = __kmp_avail_proc;
324 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
325 if (__kmp_affinity_verbose) {
326 char buf[KMP_AFFIN_MASK_PRINT_LEN];
327 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
329 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
330 if (__kmp_affinity_respect_mask) {
331 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
333 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
335 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
336 KMP_INFORM(Uniform,
"KMP_AFFINITY");
337 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
338 __kmp_nThreadsPerCore, __kmp_ncores);
340 if (__kmp_affinity_type == affinity_none) {
347 *address2os = (AddrUnsPair*)
348 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
351 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
355 if (! KMP_CPU_ISSET(i, fullMask)) {
361 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
363 if (__kmp_affinity_verbose) {
364 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
367 if (__kmp_affinity_gran_levels < 0) {
372 if (__kmp_affinity_gran > affinity_gran_package) {
373 __kmp_affinity_gran_levels = 1;
376 __kmp_affinity_gran_levels = 0;
383 # if KMP_GROUP_AFFINITY
394 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
395 kmp_i18n_id_t *
const msg_id)
398 *msg_id = kmp_i18n_null;
404 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
412 *address2os = (AddrUnsPair*)
413 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
416 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
420 if (! KMP_CPU_ISSET(i, fullMask)) {
425 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
426 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
427 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
429 if (__kmp_affinity_verbose) {
430 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
435 if (__kmp_affinity_gran_levels < 0) {
436 if (__kmp_affinity_gran == affinity_gran_group) {
437 __kmp_affinity_gran_levels = 1;
439 else if ((__kmp_affinity_gran == affinity_gran_fine)
440 || (__kmp_affinity_gran == affinity_gran_thread)) {
441 __kmp_affinity_gran_levels = 0;
444 const char *gran_str = NULL;
445 if (__kmp_affinity_gran == affinity_gran_core) {
448 else if (__kmp_affinity_gran == affinity_gran_package) {
449 gran_str =
"package";
451 else if (__kmp_affinity_gran == affinity_gran_node) {
459 __kmp_affinity_gran_levels = 0;
468 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
471 __kmp_cpuid_mask_width(
int count) {
474 while((1<<r) < count)
480 class apicThreadInfo {
484 unsigned maxCoresPerPkg;
485 unsigned maxThreadsPerPkg;
493 __kmp_affinity_cmp_apicThreadInfo_os_id(
const void *a,
const void *b)
495 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
496 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
497 if (aa->osId < bb->osId)
return -1;
498 if (aa->osId > bb->osId)
return 1;
504 __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
const void *b)
506 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
507 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
508 if (aa->pkgId < bb->pkgId)
return -1;
509 if (aa->pkgId > bb->pkgId)
return 1;
510 if (aa->coreId < bb->coreId)
return -1;
511 if (aa->coreId > bb->coreId)
return 1;
512 if (aa->threadId < bb->threadId)
return -1;
513 if (aa->threadId > bb->threadId)
return 1;
525 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
526 kmp_i18n_id_t *
const msg_id)
531 *msg_id = kmp_i18n_null;
536 __kmp_x86_cpuid(0, 0, &buf);
538 *msg_id = kmp_i18n_str_NoLeaf4Support;
550 if (! KMP_AFFINITY_CAPABLE()) {
555 KMP_ASSERT(__kmp_affinity_type == affinity_none);
565 __kmp_x86_cpuid(1, 0, &buf);
566 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
567 if (maxThreadsPerPkg == 0) {
568 maxThreadsPerPkg = 1;
584 __kmp_x86_cpuid(0, 0, &buf);
586 __kmp_x86_cpuid(4, 0, &buf);
587 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
610 __kmp_ncores = __kmp_xproc;
611 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
612 __kmp_nThreadsPerCore = 1;
613 if (__kmp_affinity_verbose) {
614 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
615 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
616 if (__kmp_affinity_uniform_topology()) {
617 KMP_INFORM(Uniform,
"KMP_AFFINITY");
619 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
621 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
622 __kmp_nThreadsPerCore, __kmp_ncores);
637 kmp_affin_mask_t *oldMask;
638 KMP_CPU_ALLOC(oldMask);
639 KMP_ASSERT(oldMask != NULL);
640 __kmp_get_system_affinity(oldMask, TRUE);
675 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
676 __kmp_avail_proc *
sizeof(apicThreadInfo));
678 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
682 if (! KMP_CPU_ISSET(i, fullMask)) {
685 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
687 __kmp_affinity_bind_thread(i);
688 threadInfo[nApics].osId = i;
693 __kmp_x86_cpuid(1, 0, &buf);
694 if (! (buf.edx >> 9) & 1) {
695 __kmp_set_system_affinity(oldMask, TRUE);
696 __kmp_free(threadInfo);
697 KMP_CPU_FREE(oldMask);
698 *msg_id = kmp_i18n_str_ApicNotPresent;
701 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
702 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
703 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
704 threadInfo[nApics].maxThreadsPerPkg = 1;
715 __kmp_x86_cpuid(0, 0, &buf);
717 __kmp_x86_cpuid(4, 0, &buf);
718 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
721 threadInfo[nApics].maxCoresPerPkg = 1;
728 int widthCT = __kmp_cpuid_mask_width(
729 threadInfo[nApics].maxThreadsPerPkg);
730 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
732 int widthC = __kmp_cpuid_mask_width(
733 threadInfo[nApics].maxCoresPerPkg);
734 int widthT = widthCT - widthC;
741 __kmp_set_system_affinity(oldMask, TRUE);
742 __kmp_free(threadInfo);
743 KMP_CPU_FREE(oldMask);
744 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
748 int maskC = (1 << widthC) - 1;
749 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
752 int maskT = (1 << widthT) - 1;
753 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
762 __kmp_set_system_affinity(oldMask, TRUE);
774 KMP_ASSERT(nApics > 0);
776 __kmp_ncores = nPackages = 1;
777 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
778 if (__kmp_affinity_verbose) {
779 char buf[KMP_AFFIN_MASK_PRINT_LEN];
780 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
782 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
783 if (__kmp_affinity_respect_mask) {
784 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
786 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
788 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
789 KMP_INFORM(Uniform,
"KMP_AFFINITY");
790 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
791 __kmp_nThreadsPerCore, __kmp_ncores);
794 if (__kmp_affinity_type == affinity_none) {
795 __kmp_free(threadInfo);
796 KMP_CPU_FREE(oldMask);
800 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
802 addr.labels[0] = threadInfo[0].pkgId;
803 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
805 if (__kmp_affinity_gran_levels < 0) {
806 __kmp_affinity_gran_levels = 0;
809 if (__kmp_affinity_verbose) {
810 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
813 __kmp_free(threadInfo);
814 KMP_CPU_FREE(oldMask);
821 qsort(threadInfo, nApics,
sizeof(*threadInfo),
822 __kmp_affinity_cmp_apicThreadInfo_phys_id);
841 __kmp_nThreadsPerCore = 1;
845 unsigned lastPkgId = threadInfo[0].pkgId;
847 unsigned lastCoreId = threadInfo[0].coreId;
848 unsigned threadCt = 1;
849 unsigned lastThreadId = threadInfo[0].threadId;
852 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
853 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
855 for (i = 1; i < nApics; i++) {
856 if (threadInfo[i].pkgId != lastPkgId) {
859 lastPkgId = threadInfo[i].pkgId;
860 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
862 lastCoreId = threadInfo[i].coreId;
863 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
865 lastThreadId = threadInfo[i].threadId;
872 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
873 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
877 if (threadInfo[i].coreId != lastCoreId) {
880 lastCoreId = threadInfo[i].coreId;
881 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
883 lastThreadId = threadInfo[i].threadId;
885 else if (threadInfo[i].threadId != lastThreadId) {
887 lastThreadId = threadInfo[i].threadId;
890 __kmp_free(threadInfo);
891 KMP_CPU_FREE(oldMask);
892 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
900 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
901 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
902 __kmp_free(threadInfo);
903 KMP_CPU_FREE(oldMask);
904 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
909 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
910 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
918 __kmp_ncores = nCores;
919 if (__kmp_affinity_verbose) {
920 char buf[KMP_AFFIN_MASK_PRINT_LEN];
921 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
923 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
924 if (__kmp_affinity_respect_mask) {
925 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
927 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
929 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
930 if (__kmp_affinity_uniform_topology()) {
931 KMP_INFORM(Uniform,
"KMP_AFFINITY");
933 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
935 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
936 __kmp_nThreadsPerCore, __kmp_ncores);
940 if (__kmp_affinity_type == affinity_none) {
941 __kmp_free(threadInfo);
942 KMP_CPU_FREE(oldMask);
952 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
953 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
954 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
956 KMP_ASSERT(depth > 0);
957 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
959 for (i = 0; i < nApics; ++i) {
961 unsigned os = threadInfo[i].osId;
965 addr.labels[d++] = threadInfo[i].pkgId;
967 if (coreLevel >= 0) {
968 addr.labels[d++] = threadInfo[i].coreId;
970 if (threadLevel >= 0) {
971 addr.labels[d++] = threadInfo[i].threadId;
973 (*address2os)[i] = AddrUnsPair(addr, os);
976 if (__kmp_affinity_gran_levels < 0) {
981 __kmp_affinity_gran_levels = 0;
982 if ((threadLevel >= 0)
983 && (__kmp_affinity_gran > affinity_gran_thread)) {
984 __kmp_affinity_gran_levels++;
986 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
987 __kmp_affinity_gran_levels++;
989 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
990 __kmp_affinity_gran_levels++;
994 if (__kmp_affinity_verbose) {
995 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
996 coreLevel, threadLevel);
999 __kmp_free(threadInfo);
1000 KMP_CPU_FREE(oldMask);
1011 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1012 kmp_i18n_id_t *
const msg_id)
1016 *msg_id = kmp_i18n_null;
1021 __kmp_x86_cpuid(0, 0, &buf);
1023 *msg_id = kmp_i18n_str_NoLeaf11Support;
1026 __kmp_x86_cpuid(11, 0, &buf);
1028 *msg_id = kmp_i18n_str_NoLeaf11Support;
1039 int threadLevel = -1;
1042 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1044 for (level = 0;; level++) {
1057 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1060 __kmp_x86_cpuid(11, level, &buf);
1071 int kind = (buf.ecx >> 8) & 0xff;
1076 threadLevel = level;
1079 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1080 if (__kmp_nThreadsPerCore == 0) {
1081 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1085 else if (kind == 2) {
1091 nCoresPerPkg = buf.ebx & 0xff;
1092 if (nCoresPerPkg == 0) {
1093 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1099 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1102 if (pkgLevel >= 0) {
1106 nPackages = buf.ebx & 0xff;
1107 if (nPackages == 0) {
1108 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1121 if (threadLevel >= 0) {
1122 threadLevel = depth - threadLevel - 1;
1124 if (coreLevel >= 0) {
1125 coreLevel = depth - coreLevel - 1;
1127 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1128 pkgLevel = depth - pkgLevel - 1;
1138 if (! KMP_AFFINITY_CAPABLE())
1144 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1146 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1147 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1148 if (__kmp_affinity_verbose) {
1149 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1150 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1151 if (__kmp_affinity_uniform_topology()) {
1152 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1154 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1156 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1157 __kmp_nThreadsPerCore, __kmp_ncores);
1172 kmp_affin_mask_t *oldMask;
1173 KMP_CPU_ALLOC(oldMask);
1174 __kmp_get_system_affinity(oldMask, TRUE);
1179 AddrUnsPair *retval = (AddrUnsPair *)
1180 __kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1188 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1192 if (! KMP_CPU_ISSET(proc, fullMask)) {
1195 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1197 __kmp_affinity_bind_thread(proc);
1203 Address addr(depth);
1206 for (level = 0; level < depth; level++) {
1207 __kmp_x86_cpuid(11, level, &buf);
1208 unsigned apicId = buf.edx;
1210 if (level != depth - 1) {
1211 KMP_CPU_FREE(oldMask);
1212 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1215 addr.labels[depth - level - 1] = apicId >> prev_shift;
1219 int shift = buf.eax & 0x1f;
1220 int mask = (1 << shift) - 1;
1221 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1224 if (level != depth) {
1225 KMP_CPU_FREE(oldMask);
1226 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1230 retval[nApics] = AddrUnsPair(addr, proc);
1238 __kmp_set_system_affinity(oldMask, TRUE);
1243 KMP_ASSERT(nApics > 0);
1245 __kmp_ncores = nPackages = 1;
1246 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1247 if (__kmp_affinity_verbose) {
1248 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1249 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1251 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1252 if (__kmp_affinity_respect_mask) {
1253 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1255 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1257 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1258 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1259 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1260 __kmp_nThreadsPerCore, __kmp_ncores);
1263 if (__kmp_affinity_type == affinity_none) {
1265 KMP_CPU_FREE(oldMask);
1273 addr.labels[0] = retval[0].first.labels[pkgLevel];
1274 retval[0].first = addr;
1276 if (__kmp_affinity_gran_levels < 0) {
1277 __kmp_affinity_gran_levels = 0;
1280 if (__kmp_affinity_verbose) {
1281 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1284 *address2os = retval;
1285 KMP_CPU_FREE(oldMask);
1292 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1297 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1298 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1299 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1300 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1301 for (level = 0; level < depth; level++) {
1305 last[level] = retval[0].first.labels[level];
1314 for (proc = 1; (int)proc < nApics; proc++) {
1316 for (level = 0; level < depth; level++) {
1317 if (retval[proc].first.labels[level] != last[level]) {
1319 for (j = level + 1; j < depth; j++) {
1329 last[j] = retval[proc].first.labels[j];
1333 if (counts[level] > maxCt[level]) {
1334 maxCt[level] = counts[level];
1336 last[level] = retval[proc].first.labels[level];
1339 else if (level == depth - 1) {
1345 KMP_CPU_FREE(oldMask);
1346 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1358 if (threadLevel >= 0) {
1359 __kmp_nThreadsPerCore = maxCt[threadLevel];
1362 __kmp_nThreadsPerCore = 1;
1364 nPackages = totals[pkgLevel];
1366 if (coreLevel >= 0) {
1367 __kmp_ncores = totals[coreLevel];
1368 nCoresPerPkg = maxCt[coreLevel];
1371 __kmp_ncores = nPackages;
1378 unsigned prod = maxCt[0];
1379 for (level = 1; level < depth; level++) {
1380 prod *= maxCt[level];
1382 bool uniform = (prod == totals[level - 1]);
1387 if (__kmp_affinity_verbose) {
1388 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1389 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1391 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1392 if (__kmp_affinity_respect_mask) {
1393 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1395 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1397 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1399 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1401 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1405 __kmp_str_buf_init(&buf);
1407 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1408 for (level = 1; level <= pkgLevel; level++) {
1409 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1411 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1412 __kmp_nThreadsPerCore, __kmp_ncores);
1414 __kmp_str_buf_free(&buf);
1417 if (__kmp_affinity_type == affinity_none) {
1423 KMP_CPU_FREE(oldMask);
1432 for (level = 0; level < depth; level++) {
1433 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1443 if (new_depth != depth) {
1444 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1445 sizeof(AddrUnsPair) * nApics);
1446 for (proc = 0; (int)proc < nApics; proc++) {
1447 Address addr(new_depth);
1448 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1451 int newPkgLevel = -1;
1452 int newCoreLevel = -1;
1453 int newThreadLevel = -1;
1455 for (level = 0; level < depth; level++) {
1456 if ((maxCt[level] == 1)
1457 && (level != pkgLevel)) {
1463 if (level == pkgLevel) {
1464 newPkgLevel = level;
1466 if (level == coreLevel) {
1467 newCoreLevel = level;
1469 if (level == threadLevel) {
1470 newThreadLevel = level;
1472 for (proc = 0; (int)proc < nApics; proc++) {
1473 new_retval[proc].first.labels[new_level]
1474 = retval[proc].first.labels[level];
1480 retval = new_retval;
1482 pkgLevel = newPkgLevel;
1483 coreLevel = newCoreLevel;
1484 threadLevel = newThreadLevel;
1487 if (__kmp_affinity_gran_levels < 0) {
1492 __kmp_affinity_gran_levels = 0;
1493 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1494 __kmp_affinity_gran_levels++;
1496 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1497 __kmp_affinity_gran_levels++;
1499 if (__kmp_affinity_gran > affinity_gran_package) {
1500 __kmp_affinity_gran_levels++;
1504 if (__kmp_affinity_verbose) {
1505 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1506 coreLevel, threadLevel);
1513 KMP_CPU_FREE(oldMask);
1514 *address2os = retval;
1523 #define threadIdIndex 1
1524 #define coreIdIndex 2
1525 #define pkgIdIndex 3
1526 #define nodeIdIndex 4
1528 typedef unsigned *ProcCpuInfo;
1529 static unsigned maxIndex = pkgIdIndex;
1533 __kmp_affinity_cmp_ProcCpuInfo_os_id(
const void *a,
const void *b)
1535 const unsigned *aa = (
const unsigned *)a;
1536 const unsigned *bb = (
const unsigned *)b;
1537 if (aa[osIdIndex] < bb[osIdIndex])
return -1;
1538 if (aa[osIdIndex] > bb[osIdIndex])
return 1;
1544 __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
const void *b)
1547 const unsigned *aa = *((
const unsigned **)a);
1548 const unsigned *bb = *((
const unsigned **)b);
1549 for (i = maxIndex; ; i--) {
1550 if (aa[i] < bb[i])
return -1;
1551 if (aa[i] > bb[i])
return 1;
1552 if (i == osIdIndex)
break;
1563 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
int *line,
1564 kmp_i18n_id_t *
const msg_id, FILE *f)
1567 *msg_id = kmp_i18n_null;
1574 unsigned num_records = 0;
1576 buf[
sizeof(buf) - 1] = 1;
1577 if (! fgets(buf,
sizeof(buf), f)) {
1584 char s1[] =
"processor";
1585 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1594 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
1595 if (nodeIdIndex + level >= maxIndex) {
1596 maxIndex = nodeIdIndex + level;
1607 if (num_records == 0) {
1609 *msg_id = kmp_i18n_str_NoProcRecords;
1612 if (num_records > (
unsigned)__kmp_xproc) {
1614 *msg_id = kmp_i18n_str_TooManyProcRecords;
1625 if (fseek(f, 0, SEEK_SET) != 0) {
1627 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1635 unsigned **threadInfo = (
unsigned **)__kmp_allocate((num_records + 1)
1636 *
sizeof(
unsigned *));
1638 for (i = 0; i <= num_records; i++) {
1639 threadInfo[i] = (
unsigned *)__kmp_allocate((maxIndex + 1)
1640 *
sizeof(unsigned));
1643 #define CLEANUP_THREAD_INFO \
1644 for (i = 0; i <= num_records; i++) { \
1645 __kmp_free(threadInfo[i]); \
1647 __kmp_free(threadInfo);
1654 #define INIT_PROC_INFO(p) \
1655 for (__index = 0; __index <= maxIndex; __index++) { \
1656 (p)[__index] = UINT_MAX; \
1659 for (i = 0; i <= num_records; i++) {
1660 INIT_PROC_INFO(threadInfo[i]);
1663 unsigned num_avail = 0;
1673 buf[
sizeof(buf) - 1] = 1;
1674 bool long_line =
false;
1675 if (! fgets(buf,
sizeof(buf), f)) {
1683 for (i = 0; i <= maxIndex; i++) {
1684 if (threadInfo[num_avail][i] != UINT_MAX) {
1692 }
else if (!buf[
sizeof(buf) - 1]) {
1699 #define CHECK_LINE \
1701 CLEANUP_THREAD_INFO; \
1702 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1708 char s1[] =
"processor";
1709 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1711 char *p = strchr(buf +
sizeof(s1) - 1,
':');
1713 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
1714 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
goto dup_field;
1715 threadInfo[num_avail][osIdIndex] = val;
1716 #if KMP_OS_LINUX && USE_SYSFS_INFO
1718 KMP_SNPRINTF(path,
sizeof(path),
1719 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1720 threadInfo[num_avail][osIdIndex]);
1721 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
1723 KMP_SNPRINTF(path,
sizeof(path),
1724 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1725 threadInfo[num_avail][osIdIndex]);
1726 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
1730 char s2[] =
"physical id";
1731 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
1733 char *p = strchr(buf +
sizeof(s2) - 1,
':');
1735 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
1736 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
goto dup_field;
1737 threadInfo[num_avail][pkgIdIndex] = val;
1740 char s3[] =
"core id";
1741 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
1743 char *p = strchr(buf +
sizeof(s3) - 1,
':');
1745 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
1746 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
goto dup_field;
1747 threadInfo[num_avail][coreIdIndex] = val;
1749 #endif // KMP_OS_LINUX && USE_SYSFS_INFO
1751 char s4[] =
"thread id";
1752 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
1754 char *p = strchr(buf +
sizeof(s4) - 1,
':');
1756 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
1757 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
goto dup_field;
1758 threadInfo[num_avail][threadIdIndex] = val;
1762 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
1764 char *p = strchr(buf +
sizeof(s4) - 1,
':');
1766 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
1767 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
1768 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
goto dup_field;
1769 threadInfo[num_avail][nodeIdIndex + level] = val;
1778 if ((*buf != 0) && (*buf !=
'\n')) {
1785 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'));
1794 if ((
int)num_avail == __kmp_xproc) {
1795 CLEANUP_THREAD_INFO;
1796 *msg_id = kmp_i18n_str_TooManyEntries;
1804 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
1805 CLEANUP_THREAD_INFO;
1806 *msg_id = kmp_i18n_str_MissingProcField;
1809 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
1810 CLEANUP_THREAD_INFO;
1811 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
1818 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
1819 INIT_PROC_INFO(threadInfo[num_avail]);
1828 KMP_ASSERT(num_avail <= num_records);
1829 INIT_PROC_INFO(threadInfo[num_avail]);
1834 CLEANUP_THREAD_INFO;
1835 *msg_id = kmp_i18n_str_MissingValCpuinfo;
1839 CLEANUP_THREAD_INFO;
1840 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
1845 # if KMP_MIC && REDUCE_TEAM_SIZE
1846 unsigned teamSize = 0;
1847 # endif // KMP_MIC && REDUCE_TEAM_SIZE
1861 KMP_ASSERT(num_avail > 0);
1862 KMP_ASSERT(num_avail <= num_records);
1863 if (num_avail == 1) {
1865 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1866 if (__kmp_affinity_verbose) {
1867 if (! KMP_AFFINITY_CAPABLE()) {
1868 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
1869 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1870 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1873 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1874 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
1876 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
1877 if (__kmp_affinity_respect_mask) {
1878 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1880 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1882 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1883 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1887 __kmp_str_buf_init(&buf);
1888 __kmp_str_buf_print(&buf,
"1");
1889 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
1890 __kmp_str_buf_print(&buf,
" x 1");
1892 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
1893 __kmp_str_buf_free(&buf);
1896 if (__kmp_affinity_type == affinity_none) {
1897 CLEANUP_THREAD_INFO;
1901 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
1903 addr.labels[0] = threadInfo[0][pkgIdIndex];
1904 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
1906 if (__kmp_affinity_gran_levels < 0) {
1907 __kmp_affinity_gran_levels = 0;
1910 if (__kmp_affinity_verbose) {
1911 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1914 CLEANUP_THREAD_INFO;
1921 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
1922 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
1935 unsigned *counts = (
unsigned *)__kmp_allocate((maxIndex + 1)
1936 *
sizeof(unsigned));
1937 unsigned *maxCt = (
unsigned *)__kmp_allocate((maxIndex + 1)
1938 *
sizeof(unsigned));
1939 unsigned *totals = (
unsigned *)__kmp_allocate((maxIndex + 1)
1940 *
sizeof(unsigned));
1941 unsigned *lastId = (
unsigned *)__kmp_allocate((maxIndex + 1)
1942 *
sizeof(unsigned));
1944 bool assign_thread_ids =
false;
1945 unsigned threadIdCt;
1948 restart_radix_check:
1954 if (assign_thread_ids) {
1955 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
1956 threadInfo[0][threadIdIndex] = threadIdCt++;
1958 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
1959 threadIdCt = threadInfo[0][threadIdIndex] + 1;
1962 for (index = 0; index <= maxIndex; index++) {
1966 lastId[index] = threadInfo[0][index];;
1972 for (i = 1; i < num_avail; i++) {
1977 for (index = maxIndex; index >= threadIdIndex; index--) {
1978 if (assign_thread_ids && (index == threadIdIndex)) {
1982 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
1983 threadInfo[i][threadIdIndex] = threadIdCt++;
1991 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
1992 threadIdCt = threadInfo[i][threadIdIndex] + 1;
1995 if (threadInfo[i][index] != lastId[index]) {
2004 for (index2 = threadIdIndex; index2 < index; index2++) {
2006 if (counts[index2] > maxCt[index2]) {
2007 maxCt[index2] = counts[index2];
2010 lastId[index2] = threadInfo[i][index2];
2014 lastId[index] = threadInfo[i][index];
2016 if (assign_thread_ids && (index > threadIdIndex)) {
2018 # if KMP_MIC && REDUCE_TEAM_SIZE
2023 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2024 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2034 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2035 threadInfo[i][threadIdIndex] = threadIdCt++;
2043 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2044 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2050 if (index < threadIdIndex) {
2056 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2057 || assign_thread_ids) {
2062 CLEANUP_THREAD_INFO;
2063 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2072 assign_thread_ids =
true;
2073 goto restart_radix_check;
2077 # if KMP_MIC && REDUCE_TEAM_SIZE
2082 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2083 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2085 for (index = threadIdIndex; index <= maxIndex; index++) {
2086 if (counts[index] > maxCt[index]) {
2087 maxCt[index] = counts[index];
2091 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2092 nCoresPerPkg = maxCt[coreIdIndex];
2093 nPackages = totals[pkgIdIndex];
2098 unsigned prod = totals[maxIndex];
2099 for (index = threadIdIndex; index < maxIndex; index++) {
2100 prod *= maxCt[index];
2102 bool uniform = (prod == totals[threadIdIndex]);
2110 __kmp_ncores = totals[coreIdIndex];
2112 if (__kmp_affinity_verbose) {
2113 if (! KMP_AFFINITY_CAPABLE()) {
2114 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2115 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2117 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2119 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2123 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2124 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2125 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2126 if (__kmp_affinity_respect_mask) {
2127 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2129 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2131 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2133 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2135 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2139 __kmp_str_buf_init(&buf);
2141 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2142 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2143 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2145 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2146 maxCt[threadIdIndex], __kmp_ncores);
2148 __kmp_str_buf_free(&buf);
2151 # if KMP_MIC && REDUCE_TEAM_SIZE
2155 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2156 __kmp_dflt_team_nth = teamSize;
2157 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2158 __kmp_dflt_team_nth));
2160 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2162 if (__kmp_affinity_type == affinity_none) {
2167 CLEANUP_THREAD_INFO;
2178 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2180 for (index = threadIdIndex; index < maxIndex; index++) {
2181 KMP_ASSERT(totals[index] >= totals[index + 1]);
2182 inMap[index] = (totals[index] > totals[index + 1]);
2184 inMap[maxIndex] = (totals[maxIndex] > 1);
2185 inMap[pkgIdIndex] =
true;
2188 for (index = threadIdIndex; index <= maxIndex; index++) {
2193 KMP_ASSERT(depth > 0);
2198 *address2os = (AddrUnsPair*)
2199 __kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2202 int threadLevel = -1;
2204 for (i = 0; i < num_avail; ++i) {
2205 Address addr(depth);
2206 unsigned os = threadInfo[i][osIdIndex];
2210 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2211 if (! inMap[src_index]) {
2214 addr.labels[dst_index] = threadInfo[i][src_index];
2215 if (src_index == pkgIdIndex) {
2216 pkgLevel = dst_index;
2218 else if (src_index == coreIdIndex) {
2219 coreLevel = dst_index;
2221 else if (src_index == threadIdIndex) {
2222 threadLevel = dst_index;
2226 (*address2os)[i] = AddrUnsPair(addr, os);
2229 if (__kmp_affinity_gran_levels < 0) {
2235 __kmp_affinity_gran_levels = 0;
2236 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2237 if (! inMap[src_index]) {
2240 switch (src_index) {
2242 if (__kmp_affinity_gran > affinity_gran_thread) {
2243 __kmp_affinity_gran_levels++;
2248 if (__kmp_affinity_gran > affinity_gran_core) {
2249 __kmp_affinity_gran_levels++;
2254 if (__kmp_affinity_gran > affinity_gran_package) {
2255 __kmp_affinity_gran_levels++;
2262 if (__kmp_affinity_verbose) {
2263 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2264 coreLevel, threadLevel);
2272 CLEANUP_THREAD_INFO;
2282 static kmp_affin_mask_t *
2283 __kmp_create_masks(
unsigned *maxIndex,
unsigned *numUnique,
2284 AddrUnsPair *address2os,
unsigned numAddrs)
2293 KMP_ASSERT(numAddrs > 0);
2294 depth = address2os[0].first.depth;
2297 for (i = 0; i < numAddrs; i++) {
2298 unsigned osId = address2os[i].second;
2299 if (osId > maxOsId) {
2303 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2304 (maxOsId + 1) * __kmp_affin_mask_size);
2311 qsort(address2os, numAddrs,
sizeof(*address2os),
2312 __kmp_affinity_cmp_Address_labels);
2314 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2315 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2316 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2318 if (__kmp_affinity_gran_levels >= (
int)depth) {
2319 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2320 && (__kmp_affinity_type != affinity_none))) {
2321 KMP_WARNING(AffThreadsMayMigrate);
2331 unsigned unique = 0;
2333 unsigned leader = 0;
2334 Address *leaderAddr = &(address2os[0].first);
2335 kmp_affin_mask_t *sum
2336 = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
2338 KMP_CPU_SET(address2os[0].second, sum);
2339 for (i = 1; i < numAddrs; i++) {
2345 if (leaderAddr->isClose(address2os[i].first,
2346 __kmp_affinity_gran_levels)) {
2347 KMP_CPU_SET(address2os[i].second, sum);
2356 for (; j < i; j++) {
2357 unsigned osId = address2os[j].second;
2358 KMP_DEBUG_ASSERT(osId <= maxOsId);
2359 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2360 KMP_CPU_COPY(mask, sum);
2361 address2os[j].first.leader = (j == leader);
2369 leaderAddr = &(address2os[i].first);
2371 KMP_CPU_SET(address2os[i].second, sum);
2378 for (; j < i; j++) {
2379 unsigned osId = address2os[j].second;
2380 KMP_DEBUG_ASSERT(osId <= maxOsId);
2381 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2382 KMP_CPU_COPY(mask, sum);
2383 address2os[j].first.leader = (j == leader);
2387 *maxIndex = maxOsId;
2388 *numUnique = unique;
2398 static kmp_affin_mask_t *newMasks;
2399 static int numNewMasks;
2400 static int nextNewMask;
2402 #define ADD_MASK(_mask) \
2404 if (nextNewMask >= numNewMasks) { \
2406 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2407 numNewMasks * __kmp_affin_mask_size); \
2409 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2413 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2415 if (((_osId) > _maxOsId) || \
2416 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2417 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2418 && (__kmp_affinity_type != affinity_none))) { \
2419 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2423 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2433 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2434 unsigned int *out_numMasks,
const char *proclist,
2435 kmp_affin_mask_t *osId2Mask,
int maxOsId)
2437 const char *scan = proclist;
2438 const char *next = proclist;
2445 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2446 * __kmp_affin_mask_size);
2448 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2449 __kmp_affin_mask_size);
2453 int start, end, stride;
2457 if (*next ==
'\0') {
2471 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2474 num = __kmp_str_to_int(scan, *next);
2475 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2480 if ((num > maxOsId) ||
2481 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2482 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2483 && (__kmp_affinity_type != affinity_none))) {
2484 KMP_WARNING(AffIgnoreInvalidProcID, num);
2486 KMP_CPU_ZERO(sumMask);
2489 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2515 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2516 "bad explicit proc list");
2519 num = __kmp_str_to_int(scan, *next);
2520 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2525 if ((num > maxOsId) ||
2526 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2527 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2528 && (__kmp_affinity_type != affinity_none))) {
2529 KMP_WARNING(AffIgnoreInvalidProcID, num);
2533 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2552 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2554 start = __kmp_str_to_int(scan, *next);
2555 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2562 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2580 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2582 end = __kmp_str_to_int(scan, *next);
2583 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2604 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2605 "bad explicit proc list");
2607 stride = __kmp_str_to_int(scan, *next);
2608 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2615 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2617 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2620 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2622 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2629 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2631 }
while (start <= end);
2635 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2637 }
while (start >= end);
2650 *out_numMasks = nextNewMask;
2651 if (nextNewMask == 0) {
2653 KMP_INTERNAL_FREE(newMasks);
2657 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
2658 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
2659 __kmp_free(sumMask);
2660 KMP_INTERNAL_FREE(newMasks);
2690 __kmp_process_subplace_list(
const char **scan, kmp_affin_mask_t *osId2Mask,
2691 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
2696 int start, count, stride, i;
2702 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
2703 "bad explicit places list");
2706 start = __kmp_str_to_int(*scan, *next);
2707 KMP_ASSERT(start >= 0);
2714 if (**scan ==
'}' || **scan ==
',') {
2715 if ((start > maxOsId) ||
2716 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2717 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2718 && (__kmp_affinity_type != affinity_none))) {
2719 KMP_WARNING(AffIgnoreInvalidProcID, start);
2723 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2726 if (**scan ==
'}') {
2732 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
2739 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
2740 "bad explicit places list");
2743 count = __kmp_str_to_int(*scan, *next);
2744 KMP_ASSERT(count >= 0);
2751 if (**scan ==
'}' || **scan ==
',') {
2752 for (i = 0; i < count; i++) {
2753 if ((start > maxOsId) ||
2754 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2755 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2756 && (__kmp_affinity_type != affinity_none))) {
2757 KMP_WARNING(AffIgnoreInvalidProcID, start);
2762 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2767 if (**scan ==
'}') {
2773 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
2782 if (**scan ==
'+') {
2786 if (**scan ==
'-') {
2794 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
2795 "bad explicit places list");
2798 stride = __kmp_str_to_int(*scan, *next);
2799 KMP_ASSERT(stride >= 0);
2807 if (**scan ==
'}' || **scan ==
',') {
2808 for (i = 0; i < count; i++) {
2809 if ((start > maxOsId) ||
2810 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2811 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2812 && (__kmp_affinity_type != affinity_none))) {
2813 KMP_WARNING(AffIgnoreInvalidProcID, start);
2818 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2823 if (**scan ==
'}') {
2830 KMP_ASSERT2(0,
"bad explicit places list");
2836 __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
2837 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
2845 if (**scan ==
'{') {
2847 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
2849 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
2852 else if (**scan ==
'!') {
2853 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
2854 KMP_CPU_COMPLEMENT(tempMask);
2857 else if ((**scan >=
'0') && (**scan <=
'9')) {
2860 int num = __kmp_str_to_int(*scan, *next);
2861 KMP_ASSERT(num >= 0);
2862 if ((num > maxOsId) ||
2863 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2864 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2865 && (__kmp_affinity_type != affinity_none))) {
2866 KMP_WARNING(AffIgnoreInvalidProcID, num);
2870 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
2876 KMP_ASSERT2(0,
"bad explicit places list");
2883 __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
2884 unsigned int *out_numMasks,
const char *placelist,
2885 kmp_affin_mask_t *osId2Mask,
int maxOsId)
2887 const char *scan = placelist;
2888 const char *next = placelist;
2891 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2892 * __kmp_affin_mask_size);
2895 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
2896 __kmp_affin_mask_size);
2897 KMP_CPU_ZERO(tempMask);
2901 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
2907 if (*scan ==
'\0' || *scan ==
',') {
2911 KMP_CPU_ZERO(tempMask);
2913 if (*scan ==
'\0') {
2920 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
2927 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
2928 "bad explicit places list");
2931 int count = __kmp_str_to_int(scan, *next);
2932 KMP_ASSERT(count >= 0);
2940 if (*scan ==
'\0' || *scan ==
',') {
2944 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
2965 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
2966 "bad explicit places list");
2969 stride = __kmp_str_to_int(scan, *next);
2970 KMP_DEBUG_ASSERT(stride >= 0);
2977 for (i = 0; i < count; i++) {
2984 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
2985 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
2986 KMP_CPU_CLR(j, tempMask);
2988 else if ((j > maxOsId) ||
2989 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
2990 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
2991 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
2992 KMP_WARNING(AffIgnoreInvalidProcID, j);
2994 KMP_CPU_CLR(j, tempMask);
2997 KMP_CPU_SET(j, tempMask);
3001 for (; j >= 0; j--) {
3002 KMP_CPU_CLR(j, tempMask);
3008 for (i = 0; i < count; i++) {
3015 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
3017 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3018 KMP_CPU_CLR(j, tempMask);
3020 else if ((j > maxOsId) ||
3021 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3022 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3023 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3024 KMP_WARNING(AffIgnoreInvalidProcID, j);
3026 KMP_CPU_CLR(j, tempMask);
3029 KMP_CPU_SET(j, tempMask);
3033 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
3034 KMP_CPU_CLR(j, tempMask);
3038 KMP_CPU_ZERO(tempMask);
3045 if (*scan ==
'\0') {
3053 KMP_ASSERT2(0,
"bad explicit places list");
3056 *out_numMasks = nextNewMask;
3057 if (nextNewMask == 0) {
3059 KMP_INTERNAL_FREE(newMasks);
3063 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3064 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3065 __kmp_free(tempMask);
3066 KMP_INTERNAL_FREE(newMasks);
3072 #undef ADD_MASK_OSID
3075 __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth)
3077 if (__kmp_place_num_sockets == 0 &&
3078 __kmp_place_num_cores == 0 &&
3079 __kmp_place_num_threads_per_core == 0 )
3081 if (__kmp_place_num_sockets == 0)
3082 __kmp_place_num_sockets = nPackages;
3083 if (__kmp_place_num_cores == 0)
3084 __kmp_place_num_cores = nCoresPerPkg;
3085 if (__kmp_place_num_threads_per_core == 0 ||
3086 __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
3087 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore;
3089 if ( !__kmp_affinity_uniform_topology() ) {
3090 KMP_WARNING( AffThrPlaceNonUniform );
3094 KMP_WARNING( AffThrPlaceNonThreeLevel );
3097 if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
3098 KMP_WARNING(AffThrPlaceManySockets);
3101 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
3102 KMP_WARNING( AffThrPlaceManyCores );
3106 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3107 __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3109 int i, j, k, n_old = 0, n_new = 0;
3110 for (i = 0; i < nPackages; ++i)
3111 if (i < __kmp_place_socket_offset ||
3112 i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
3113 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3115 for (j = 0; j < nCoresPerPkg; ++j)
3116 if (j < __kmp_place_core_offset ||
3117 j >= __kmp_place_core_offset + __kmp_place_num_cores)
3118 n_old += __kmp_nThreadsPerCore;
3120 for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
3121 if (k < __kmp_place_num_threads_per_core) {
3122 newAddr[n_new] = (*pAddr)[n_old];
3127 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3128 KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
3129 __kmp_place_num_threads_per_core);
3131 nPackages = __kmp_place_num_sockets;
3132 nCoresPerPkg = __kmp_place_num_cores;
3133 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core;
3134 __kmp_avail_proc = n_new;
3135 __kmp_ncores = nPackages * __kmp_place_num_cores;
3137 __kmp_free( *pAddr );
3142 static AddrUnsPair *address2os = NULL;
3143 static int * procarr = NULL;
3144 static int __kmp_aff_depth = 0;
3147 __kmp_aux_affinity_initialize(
void)
3149 if (__kmp_affinity_masks != NULL) {
3150 KMP_ASSERT(fullMask != NULL);
3160 if (fullMask == NULL) {
3161 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3163 if (KMP_AFFINITY_CAPABLE()) {
3164 if (__kmp_affinity_respect_mask) {
3165 __kmp_get_system_affinity(fullMask, TRUE);
3171 __kmp_avail_proc = 0;
3172 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3173 if (! KMP_CPU_ISSET(i, fullMask)) {
3178 if (__kmp_avail_proc > __kmp_xproc) {
3179 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3180 && (__kmp_affinity_type != affinity_none))) {
3181 KMP_WARNING(ErrorInitializeAffinity);
3183 __kmp_affinity_type = affinity_none;
3184 KMP_AFFINITY_DISABLE();
3189 __kmp_affinity_entire_machine_mask(fullMask);
3190 __kmp_avail_proc = __kmp_xproc;
3195 kmp_i18n_id_t msg_id = kmp_i18n_null;
3201 if ((__kmp_cpuinfo_file != NULL) &&
3202 (__kmp_affinity_top_method == affinity_top_method_all)) {
3203 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3206 if (__kmp_affinity_top_method == affinity_top_method_all) {
3212 const char *file_name = NULL;
3215 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3217 if (__kmp_affinity_verbose) {
3218 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3222 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3224 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3225 KMP_ASSERT(address2os == NULL);
3230 if (__kmp_affinity_verbose) {
3231 if (msg_id != kmp_i18n_null) {
3232 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3233 KMP_I18N_STR(DecodingLegacyAPIC));
3236 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3241 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3243 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3244 KMP_ASSERT(address2os == NULL);
3254 if (__kmp_affinity_verbose) {
3255 if (msg_id != kmp_i18n_null) {
3256 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
3259 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
3263 FILE *f = fopen(
"/proc/cpuinfo",
"r");
3265 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3268 file_name =
"/proc/cpuinfo";
3269 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3272 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3273 KMP_ASSERT(address2os == NULL);
3281 # if KMP_GROUP_AFFINITY
3283 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3284 if (__kmp_affinity_verbose) {
3285 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3288 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3289 KMP_ASSERT(depth != 0);
3295 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
3296 if (file_name == NULL) {
3297 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3299 else if (line == 0) {
3300 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3303 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
3309 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3311 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3312 KMP_ASSERT(address2os == NULL);
3315 KMP_ASSERT(depth > 0);
3316 KMP_ASSERT(address2os != NULL);
3326 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3328 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3329 if (__kmp_affinity_verbose) {
3330 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3331 KMP_I18N_STR(Decodingx2APIC));
3334 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3336 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3337 KMP_ASSERT(address2os == NULL);
3341 KMP_ASSERT(msg_id != kmp_i18n_null);
3342 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3345 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3346 if (__kmp_affinity_verbose) {
3347 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3348 KMP_I18N_STR(DecodingLegacyAPIC));
3351 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3353 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3354 KMP_ASSERT(address2os == NULL);
3358 KMP_ASSERT(msg_id != kmp_i18n_null);
3359 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3365 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3366 const char *filename;
3367 if (__kmp_cpuinfo_file != NULL) {
3368 filename = __kmp_cpuinfo_file;
3371 filename =
"/proc/cpuinfo";
3374 if (__kmp_affinity_verbose) {
3375 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
3378 FILE *f = fopen(filename,
"r");
3381 if (__kmp_cpuinfo_file != NULL) {
3384 KMP_MSG(CantOpenFileForReading, filename),
3386 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3393 KMP_MSG(CantOpenFileForReading, filename),
3400 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3403 KMP_ASSERT(msg_id != kmp_i18n_null);
3405 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3408 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3411 if (__kmp_affinity_type == affinity_none) {
3412 KMP_ASSERT(depth == 0);
3413 KMP_ASSERT(address2os == NULL);
3418 # if KMP_GROUP_AFFINITY
3420 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3421 if (__kmp_affinity_verbose) {
3422 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3425 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3426 KMP_ASSERT(depth != 0);
3428 KMP_ASSERT(msg_id != kmp_i18n_null);
3429 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3435 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3436 if (__kmp_affinity_verbose) {
3437 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
3440 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3442 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3443 KMP_ASSERT(address2os == NULL);
3447 KMP_ASSERT(depth > 0);
3448 KMP_ASSERT(address2os != NULL);
3451 if (address2os == NULL) {
3452 if (KMP_AFFINITY_CAPABLE()
3453 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3454 && (__kmp_affinity_type != affinity_none)))) {
3455 KMP_WARNING(ErrorInitializeAffinity);
3457 __kmp_affinity_type = affinity_none;
3458 KMP_AFFINITY_DISABLE();
3462 __kmp_apply_thread_places(&address2os, depth);
3469 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3470 address2os, __kmp_avail_proc);
3471 if (__kmp_affinity_gran_levels == 0) {
3472 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
3480 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3482 switch (__kmp_affinity_type) {
3484 case affinity_explicit:
3485 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3487 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3490 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3491 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3496 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3497 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3501 if (__kmp_affinity_num_masks == 0) {
3502 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3503 && (__kmp_affinity_type != affinity_none))) {
3504 KMP_WARNING(AffNoValidProcID);
3506 __kmp_affinity_type = affinity_none;
3519 case affinity_logical:
3520 __kmp_affinity_compact = 0;
3521 if (__kmp_affinity_offset) {
3522 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3527 case affinity_physical:
3528 if (__kmp_nThreadsPerCore > 1) {
3529 __kmp_affinity_compact = 1;
3530 if (__kmp_affinity_compact >= depth) {
3531 __kmp_affinity_compact = 0;
3534 __kmp_affinity_compact = 0;
3536 if (__kmp_affinity_offset) {
3537 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3542 case affinity_scatter:
3543 if (__kmp_affinity_compact >= depth) {
3544 __kmp_affinity_compact = 0;
3547 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3551 case affinity_compact:
3552 if (__kmp_affinity_compact >= depth) {
3553 __kmp_affinity_compact = depth - 1;
3557 case affinity_balanced:
3559 if( nPackages > 1 ) {
3560 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3561 KMP_WARNING( AffBalancedNotAvail,
"KMP_AFFINITY" );
3563 __kmp_affinity_type = affinity_none;
3565 }
else if( __kmp_affinity_uniform_topology() ) {
3570 __kmp_aff_depth = depth;
3573 int nth_per_core = __kmp_nThreadsPerCore;
3576 if( nth_per_core > 1 ) {
3577 core_level = depth - 2;
3579 core_level = depth - 1;
3581 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3582 int nproc = nth_per_core * ncores;
3584 procarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
3585 for(
int i = 0; i < nproc; i++ ) {
3589 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
3590 int proc = address2os[ i ].second;
3594 int level = depth - 1;
3598 int core = address2os[ i ].first.labels[ level ];
3600 if( nth_per_core > 1 ) {
3601 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3602 core = address2os[ i ].first.labels[ level - 1 ];
3604 procarr[ core * nth_per_core + thread ] = proc;
3614 if (__kmp_affinity_dups) {
3615 __kmp_affinity_num_masks = __kmp_avail_proc;
3618 __kmp_affinity_num_masks = numUnique;
3622 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3623 && ( __kmp_affinity_num_places > 0 )
3624 && ( (
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3625 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3629 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3630 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3636 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
3637 __kmp_affinity_cmp_Address_child_num);
3641 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3642 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3645 unsigned osId = address2os[i].second;
3646 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3647 kmp_affin_mask_t *dest
3648 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3649 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3650 KMP_CPU_COPY(dest, src);
3651 if (++j >= __kmp_affinity_num_masks) {
3655 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3660 KMP_ASSERT2(0,
"Unexpected affinity setting");
3663 __kmp_free(osId2Mask);
3664 machine_hierarchy.init(address2os, __kmp_avail_proc);
3669 __kmp_affinity_initialize(
void)
3682 int disabled = (__kmp_affinity_type == affinity_disabled);
3683 if (! KMP_AFFINITY_CAPABLE()) {
3684 KMP_ASSERT(disabled);
3687 __kmp_affinity_type = affinity_none;
3689 __kmp_aux_affinity_initialize();
3691 __kmp_affinity_type = affinity_disabled;
3697 __kmp_affinity_uninitialize(
void)
3699 if (__kmp_affinity_masks != NULL) {
3700 __kmp_free(__kmp_affinity_masks);
3701 __kmp_affinity_masks = NULL;
3703 if (fullMask != NULL) {
3704 KMP_CPU_FREE(fullMask);
3707 __kmp_affinity_num_masks = 0;
3709 __kmp_affinity_num_places = 0;
3711 if (__kmp_affinity_proclist != NULL) {
3712 __kmp_free(__kmp_affinity_proclist);
3713 __kmp_affinity_proclist = NULL;
3715 if( address2os != NULL ) {
3716 __kmp_free( address2os );
3719 if( procarr != NULL ) {
3720 __kmp_free( procarr );
3727 __kmp_affinity_set_init_mask(
int gtid,
int isa_root)
3729 if (! KMP_AFFINITY_CAPABLE()) {
3733 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3734 if (th->th.th_affin_mask == NULL) {
3735 KMP_CPU_ALLOC(th->th.th_affin_mask);
3738 KMP_CPU_ZERO(th->th.th_affin_mask);
3748 kmp_affin_mask_t *mask;
3752 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3755 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
3757 # if KMP_GROUP_AFFINITY
3758 if (__kmp_num_proc_groups > 1) {
3762 KMP_ASSERT(fullMask != NULL);
3767 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
3768 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
3769 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
3775 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
3776 # if KMP_GROUP_AFFINITY
3777 if (__kmp_num_proc_groups > 1) {
3781 KMP_ASSERT(fullMask != NULL);
3790 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
3791 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
3792 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
3798 th->th.th_current_place = i;
3800 th->th.th_new_place = i;
3801 th->th.th_first_place = 0;
3802 th->th.th_last_place = __kmp_affinity_num_masks - 1;
3805 if (i == KMP_PLACE_ALL) {
3806 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
3810 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
3815 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
3819 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
3824 KMP_CPU_COPY(th->th.th_affin_mask, mask);
3826 if (__kmp_affinity_verbose) {
3827 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3828 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3829 th->th.th_affin_mask);
3830 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(), gtid,
3840 if ( __kmp_affinity_type == affinity_none ) {
3841 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
3845 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
3852 __kmp_affinity_set_place(
int gtid)
3856 if (! KMP_AFFINITY_CAPABLE()) {
3860 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3862 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
3863 gtid, th->th.th_new_place, th->th.th_current_place));
3868 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
3869 KMP_ASSERT(th->th.th_new_place >= 0);
3870 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
3871 if (th->th.th_first_place <= th->th.th_last_place) {
3872 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
3873 && (th->th.th_new_place <= th->th.th_last_place));
3876 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
3877 || (th->th.th_new_place >= th->th.th_last_place));
3884 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
3885 th->th.th_new_place);
3886 KMP_CPU_COPY(th->th.th_affin_mask, mask);
3887 th->th.th_current_place = th->th.th_new_place;
3889 if (__kmp_affinity_verbose) {
3890 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3891 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3892 th->th.th_affin_mask);
3893 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
3896 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
3903 __kmp_aux_set_affinity(
void **mask)
3909 if (! KMP_AFFINITY_CAPABLE()) {
3913 gtid = __kmp_entry_gtid();
3915 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3916 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3917 (kmp_affin_mask_t *)(*mask));
3918 __kmp_debug_printf(
"kmp_set_affinity: setting affinity mask for thread %d = %s\n",
3922 if (__kmp_env_consistency_check) {
3923 if ((mask == NULL) || (*mask == NULL)) {
3924 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
3930 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
3931 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
3935 if (! KMP_CPU_ISSET(proc, fullMask)) {
3936 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
3940 if (num_procs == 0) {
3941 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
3944 # if KMP_GROUP_AFFINITY
3945 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
3946 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
3953 th = __kmp_threads[gtid];
3954 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
3955 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
3957 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
3961 th->th.th_current_place = KMP_PLACE_UNDEFINED;
3962 th->th.th_new_place = KMP_PLACE_UNDEFINED;
3963 th->th.th_first_place = 0;
3964 th->th.th_last_place = __kmp_affinity_num_masks - 1;
3969 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
3977 __kmp_aux_get_affinity(
void **mask)
3983 if (! KMP_AFFINITY_CAPABLE()) {
3987 gtid = __kmp_entry_gtid();
3988 th = __kmp_threads[gtid];
3989 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
3992 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3993 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3994 th->th.th_affin_mask);
3995 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
3998 if (__kmp_env_consistency_check) {
3999 if ((mask == NULL) || (*mask == NULL)) {
4000 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4004 # if !KMP_OS_WINDOWS
4006 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4008 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4009 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4010 (kmp_affin_mask_t *)(*mask));
4011 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4017 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4025 __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask)
4029 if (! KMP_AFFINITY_CAPABLE()) {
4034 int gtid = __kmp_entry_gtid();
4035 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4036 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4037 (kmp_affin_mask_t *)(*mask));
4038 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4042 if (__kmp_env_consistency_check) {
4043 if ((mask == NULL) || (*mask == NULL)) {
4044 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4048 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4051 if (! KMP_CPU_ISSET(proc, fullMask)) {
4055 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4061 __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask)
4065 if (! KMP_AFFINITY_CAPABLE()) {
4070 int gtid = __kmp_entry_gtid();
4071 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4072 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4073 (kmp_affin_mask_t *)(*mask));
4074 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4078 if (__kmp_env_consistency_check) {
4079 if ((mask == NULL) || (*mask == NULL)) {
4080 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4084 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4087 if (! KMP_CPU_ISSET(proc, fullMask)) {
4091 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4097 __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask)
4101 if (! KMP_AFFINITY_CAPABLE()) {
4106 int gtid = __kmp_entry_gtid();
4107 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4108 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4109 (kmp_affin_mask_t *)(*mask));
4110 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4114 if (__kmp_env_consistency_check) {
4115 if ((mask == NULL) || (*mask == NULL)) {
4116 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
4120 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4123 if (! KMP_CPU_ISSET(proc, fullMask)) {
4127 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4132 void __kmp_balanced_affinity(
int tid,
int nthreads )
4134 if( __kmp_affinity_uniform_topology() ) {
4138 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4140 int ncores = __kmp_ncores;
4142 int chunk = nthreads / ncores;
4144 int big_cores = nthreads % ncores;
4146 int big_nth = ( chunk + 1 ) * big_cores;
4147 if( tid < big_nth ) {
4148 coreID = tid / (chunk + 1 );
4149 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4151 coreID = ( tid - big_cores ) / chunk;
4152 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4155 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4156 "Illegal set affinity operation when not capable");
4158 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
4162 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4163 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4164 KMP_CPU_SET( osID, mask);
4165 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4166 for(
int i = 0; i < __kmp_nth_per_core; i++ ) {
4168 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4169 KMP_CPU_SET( osID, mask);
4172 if (__kmp_affinity_verbose) {
4173 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4174 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4175 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4178 __kmp_set_system_affinity( mask, TRUE );
4181 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
4185 int nth_per_core = __kmp_nThreadsPerCore;
4187 if( nth_per_core > 1 ) {
4188 core_level = __kmp_aff_depth - 2;
4190 core_level = __kmp_aff_depth - 1;
4194 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4197 if( nthreads == __kmp_avail_proc ) {
4198 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4199 int osID = address2os[ tid ].second;
4200 KMP_CPU_SET( osID, mask);
4201 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4202 int coreID = address2os[ tid ].first.labels[ core_level ];
4206 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
4207 int osID = address2os[ i ].second;
4208 int core = address2os[ i ].first.labels[ core_level ];
4209 if( core == coreID ) {
4210 KMP_CPU_SET( osID, mask);
4212 if( cnt == nth_per_core ) {
4218 }
else if( nthreads <= __kmp_ncores ) {
4221 for(
int i = 0; i < ncores; i++ ) {
4224 for(
int j = 0; j < nth_per_core; j++ ) {
4225 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4232 for(
int j = 0; j < nth_per_core; j++ ) {
4233 int osID = procarr[ i * nth_per_core + j ];
4235 KMP_CPU_SET( osID, mask );
4237 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4252 int* nproc_at_core = (
int*)KMP_ALLOCA(
sizeof(
int)*ncores);
4254 int* ncores_with_x_procs = (
int*)KMP_ALLOCA(
sizeof(
int)*(nth_per_core+1));
4256 int* ncores_with_x_to_max_procs = (
int*)KMP_ALLOCA(
sizeof(
int)*(nth_per_core+1));
4258 for(
int i = 0; i <= nth_per_core; i++ ) {
4259 ncores_with_x_procs[ i ] = 0;
4260 ncores_with_x_to_max_procs[ i ] = 0;
4263 for(
int i = 0; i < ncores; i++ ) {
4265 for(
int j = 0; j < nth_per_core; j++ ) {
4266 if( procarr[ i * nth_per_core + j ] != -1 ) {
4270 nproc_at_core[ i ] = cnt;
4271 ncores_with_x_procs[ cnt ]++;
4274 for(
int i = 0; i <= nth_per_core; i++ ) {
4275 for(
int j = i; j <= nth_per_core; j++ ) {
4276 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4281 int nproc = nth_per_core * ncores;
4283 int * newarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
4284 for(
int i = 0; i < nproc; i++ ) {
4291 for(
int j = 1; j <= nth_per_core; j++ ) {
4292 int cnt = ncores_with_x_to_max_procs[ j ];
4293 for(
int i = 0; i < ncores; i++ ) {
4295 if( nproc_at_core[ i ] == 0 ) {
4298 for(
int k = 0; k < nth_per_core; k++ ) {
4299 if( procarr[ i * nth_per_core + k ] != -1 ) {
4300 if( newarr[ i * nth_per_core + k ] == 0 ) {
4301 newarr[ i * nth_per_core + k ] = 1;
4307 newarr[ i * nth_per_core + k ] ++;
4315 if( cnt == 0 || nth == 0 ) {
4326 for(
int i = 0; i < nproc; i++ ) {
4330 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4331 int osID = procarr[ i ];
4332 KMP_CPU_SET( osID, mask);
4333 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4334 int coreID = i / nth_per_core;
4335 for(
int ii = 0; ii < nth_per_core; ii++ ) {
4336 int osID = procarr[ coreID * nth_per_core + ii ];
4338 KMP_CPU_SET( osID, mask);
4345 __kmp_free( newarr );
4348 if (__kmp_affinity_verbose) {
4349 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4350 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4351 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4354 __kmp_set_system_affinity( mask, TRUE );
4358 #endif // KMP_AFFINITY_SUPPORTED