Intel® OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #ifndef KMP_AFFINITY_H
36 #define KMP_AFFINITY_H
37 
38 extern int __kmp_affinity_compact; /* Affinity 'compact' value */
39 
40 class Address {
41 public:
42  static const unsigned maxDepth = 32;
43  unsigned labels[maxDepth];
44  unsigned childNums[maxDepth];
45  unsigned depth;
46  unsigned leader;
47  Address(unsigned _depth)
48  : depth(_depth), leader(FALSE) {
49  }
50  Address &operator=(const Address &b) {
51  depth = b.depth;
52  for (unsigned i = 0; i < depth; i++) {
53  labels[i] = b.labels[i];
54  childNums[i] = b.childNums[i];
55  }
56  leader = FALSE;
57  return *this;
58  }
59  bool operator==(const Address &b) const {
60  if (depth != b.depth)
61  return false;
62  for (unsigned i = 0; i < depth; i++)
63  if(labels[i] != b.labels[i])
64  return false;
65  return true;
66  }
67  bool isClose(const Address &b, int level) const {
68  if (depth != b.depth)
69  return false;
70  if ((unsigned)level >= depth)
71  return true;
72  for (unsigned i = 0; i < (depth - level); i++)
73  if(labels[i] != b.labels[i])
74  return false;
75  return true;
76  }
77  bool operator!=(const Address &b) const {
78  return !operator==(b);
79  }
80 };
81 
82 class AddrUnsPair {
83 public:
84  Address first;
85  unsigned second;
86  AddrUnsPair(Address _first, unsigned _second)
87  : first(_first), second(_second) {
88  }
89  AddrUnsPair &operator=(const AddrUnsPair &b)
90  {
91  first = b.first;
92  second = b.second;
93  return *this;
94  }
95 };
96 
97 
98 static int
99 __kmp_affinity_cmp_Address_labels(const void *a, const void *b)
100 {
101  const Address *aa = (const Address *)&(((AddrUnsPair *)a)
102  ->first);
103  const Address *bb = (const Address *)&(((AddrUnsPair *)b)
104  ->first);
105  unsigned depth = aa->depth;
106  unsigned i;
107  KMP_DEBUG_ASSERT(depth == bb->depth);
108  for (i = 0; i < depth; i++) {
109  if (aa->labels[i] < bb->labels[i]) return -1;
110  if (aa->labels[i] > bb->labels[i]) return 1;
111  }
112  return 0;
113 }
114 
115 
116 static int
117 __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
118 {
119  const Address *aa = (const Address *)&(((AddrUnsPair *)a)
120  ->first);
121  const Address *bb = (const Address *)&(((AddrUnsPair *)b)
122  ->first);
123  unsigned depth = aa->depth;
124  unsigned i;
125  KMP_DEBUG_ASSERT(depth == bb->depth);
126  KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
127  KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
128  for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
129  int j = depth - i - 1;
130  if (aa->childNums[j] < bb->childNums[j]) return -1;
131  if (aa->childNums[j] > bb->childNums[j]) return 1;
132  }
133  for (; i < depth; i++) {
134  int j = i - __kmp_affinity_compact;
135  if (aa->childNums[j] < bb->childNums[j]) return -1;
136  if (aa->childNums[j] > bb->childNums[j]) return 1;
137  }
138  return 0;
139 }
140 
141 
148 public:
151  static const kmp_uint32 maxLeaves=4;
152  static const kmp_uint32 minBranch=4;
157  kmp_uint32 maxLevels;
158 
162  kmp_uint32 depth;
163  kmp_uint32 base_num_threads;
164  enum init_status { initialized=0, not_initialized=1, initializing=2 };
165  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
166  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
167 
171  kmp_uint32 *numPerLevel;
172  kmp_uint32 *skipPerLevel;
173 
174  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
175  int hier_depth = adr2os[0].first.depth;
176  int level = 0;
177  for (int i=hier_depth-1; i>=0; --i) {
178  int max = -1;
179  for (int j=0; j<num_addrs; ++j) {
180  int next = adr2os[j].first.childNums[i];
181  if (next > max) max = next;
182  }
183  numPerLevel[level] = max+1;
184  ++level;
185  }
186  }
187 
188  hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
189 
190  void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
191 
192  void init(AddrUnsPair *adr2os, int num_addrs)
193  {
194  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
195  if (bool_result == 0) { // Wait for initialization
196  while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
197  return;
198  }
199  KMP_DEBUG_ASSERT(bool_result==1);
200 
201  /* Added explicit initialization of the data fields here to prevent usage of dirty value
202  observed when static library is re-initialized multiple times (e.g. when
203  non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
204  depth = 1;
205  resizing = 0;
206  maxLevels = 7;
207  numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
208  skipPerLevel = &(numPerLevel[maxLevels]);
209  for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
210  numPerLevel[i] = 1;
211  skipPerLevel[i] = 1;
212  }
213 
214  // Sort table by physical ID
215  if (adr2os) {
216  qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
217  deriveLevels(adr2os, num_addrs);
218  }
219  else {
220  numPerLevel[0] = maxLeaves;
221  numPerLevel[1] = num_addrs/maxLeaves;
222  if (num_addrs%maxLeaves) numPerLevel[1]++;
223  }
224 
225  base_num_threads = num_addrs;
226  for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
227  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
228  depth++;
229 
230  kmp_uint32 branch = minBranch;
231  if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
232  if (branch<minBranch) branch=minBranch;
233  for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
234  while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
235  if (numPerLevel[d] & 1) numPerLevel[d]++;
236  numPerLevel[d] = numPerLevel[d] >> 1;
237  if (numPerLevel[d+1] == 1) depth++;
238  numPerLevel[d+1] = numPerLevel[d+1] << 1;
239  }
240  if(numPerLevel[0] == 1) {
241  branch = branch >> 1;
242  if (branch<4) branch = minBranch;
243  }
244  }
245 
246  for (kmp_uint32 i=1; i<depth; ++i)
247  skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
248  // Fill in hierarchy in the case of oversubscription
249  for (kmp_uint32 i=depth; i<maxLevels; ++i)
250  skipPerLevel[i] = 2*skipPerLevel[i-1];
251 
252  uninitialized = initialized; // One writer
253 
254  }
255 
256  // Resize the hierarchy if nproc changes to something larger than before
257  void resize(kmp_uint32 nproc)
258  {
259  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
260  if (bool_result == 0) { // Someone else is resizing
261  while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
262  return;
263  }
264  KMP_DEBUG_ASSERT(bool_result!=0);
265  KMP_DEBUG_ASSERT(nproc > base_num_threads);
266 
267  // Calculate new maxLevels
268  kmp_uint32 old_sz = skipPerLevel[depth-1];
269  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
270  // First see if old maxLevels is enough to contain new size
271  for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
272  skipPerLevel[i] = 2*skipPerLevel[i-1];
273  old_sz *= 2;
274  depth++;
275  }
276  if (nproc <= old_sz) // enough space already
277  return;
278  // Not enough space, need to expand hierarchy
279  while (nproc > old_sz) {
280  old_sz *=2;
281  incs++;
282  depth++;
283  }
284  maxLevels += incs;
285 
286  // Resize arrays
287  kmp_uint32 *old_numPerLevel = numPerLevel;
288  kmp_uint32 *old_skipPerLevel = skipPerLevel;
289  numPerLevel = skipPerLevel = NULL;
290  numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
291  skipPerLevel = &(numPerLevel[maxLevels]);
292 
293  // Copy old elements from old arrays
294  for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
295  numPerLevel[i] = old_numPerLevel[i];
296  skipPerLevel[i] = old_skipPerLevel[i];
297  }
298 
299  // Init new elements in arrays to 1
300  for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
301  numPerLevel[i] = 1;
302  skipPerLevel[i] = 1;
303  }
304 
305  // Free old arrays
306  __kmp_free(old_numPerLevel);
307 
308  // Fill in oversubscription levels of hierarchy
309  for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
310  skipPerLevel[i] = 2*skipPerLevel[i-1];
311 
312  base_num_threads = nproc;
313  resizing = 0; // One writer
314 
315  }
316 };
317 #endif // KMP_AFFINITY_H
kmp_uint32 depth
Definition: kmp_affinity.h:162
static const kmp_uint32 maxLeaves
Definition: kmp_affinity.h:151
kmp_uint32 * numPerLevel
Definition: kmp_affinity.h:171
kmp_uint32 maxLevels
Definition: kmp_affinity.h:157