Intel® OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include <stddef.h>
36 
37 #include "kmp.h"
38 #include "kmp_itt.h"
39 #include "kmp_i18n.h"
40 #include "kmp_lock.h"
41 #include "kmp_io.h"
42 
43 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
44 # include <unistd.h>
45 # include <sys/syscall.h>
46 // We should really include <futex.h>, but that causes compatibility problems on different
47 // Linux* OS distributions that either require that you include (or break when you try to include)
48 // <pci/types.h>.
49 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
50 // we just define the constants here and don't include <futex.h>
51 # ifndef FUTEX_WAIT
52 # define FUTEX_WAIT 0
53 # endif
54 # ifndef FUTEX_WAKE
55 # define FUTEX_WAKE 1
56 # endif
57 #endif
58 
59 /* Implement spin locks for internal library use. */
60 /* The algorithm implemented is Lamport's bakery lock [1974]. */
61 
62 void
63 __kmp_validate_locks( void )
64 {
65  int i;
66  kmp_uint32 x, y;
67 
68  /* Check to make sure unsigned arithmetic does wraps properly */
69  x = ~((kmp_uint32) 0) - 2;
70  y = x - 2;
71 
72  for (i = 0; i < 8; ++i, ++x, ++y) {
73  kmp_uint32 z = (x - y);
74  KMP_ASSERT( z == 2 );
75  }
76 
77  KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
78 }
79 
80 
81 /* ------------------------------------------------------------------------ */
82 /* test and set locks */
83 
84 //
85 // For the non-nested locks, we can only assume that the first 4 bytes were
86 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
87 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
88 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
89 //
90 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
91 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
92 //
93 
94 static kmp_int32
95 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
96 {
97  return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
98 }
99 
100 static inline bool
101 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
102 {
103  return lck->lk.depth_locked != -1;
104 }
105 
106 __forceinline static void
107 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
108 {
109  KMP_MB();
110 
111 #ifdef USE_LOCK_PROFILE
112  kmp_uint32 curr = TCR_4( lck->lk.poll );
113  if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
114  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
115  /* else __kmp_printf( "." );*/
116 #endif /* USE_LOCK_PROFILE */
117 
118  if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
119  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
120  KMP_FSYNC_ACQUIRED(lck);
121  return;
122  }
123 
124  kmp_uint32 spins;
125  KMP_FSYNC_PREPARE( lck );
126  KMP_INIT_YIELD( spins );
127  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
128  __kmp_xproc ) ) {
129  KMP_YIELD( TRUE );
130  }
131  else {
132  KMP_YIELD_SPIN( spins );
133  }
134 
135  while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) ||
136  ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) {
137  //
138  // FIXME - use exponential backoff here
139  //
140  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
141  __kmp_xproc ) ) {
142  KMP_YIELD( TRUE );
143  }
144  else {
145  KMP_YIELD_SPIN( spins );
146  }
147  }
148  KMP_FSYNC_ACQUIRED( lck );
149 }
150 
151 void
152 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
153 {
154  __kmp_acquire_tas_lock_timed_template( lck, gtid );
155 }
156 
157 static void
158 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
159 {
160  char const * const func = "omp_set_lock";
161  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
162  && __kmp_is_tas_lock_nestable( lck ) ) {
163  KMP_FATAL( LockNestableUsedAsSimple, func );
164  }
165  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
166  KMP_FATAL( LockIsAlreadyOwned, func );
167  }
168  __kmp_acquire_tas_lock( lck, gtid );
169 }
170 
171 int
172 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
173 {
174  if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
175  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
176  KMP_FSYNC_ACQUIRED( lck );
177  return TRUE;
178  }
179  return FALSE;
180 }
181 
182 static int
183 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
184 {
185  char const * const func = "omp_test_lock";
186  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
187  && __kmp_is_tas_lock_nestable( lck ) ) {
188  KMP_FATAL( LockNestableUsedAsSimple, func );
189  }
190  return __kmp_test_tas_lock( lck, gtid );
191 }
192 
193 int
194 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
195 {
196  KMP_MB(); /* Flush all pending memory write invalidates. */
197 
198  KMP_FSYNC_RELEASING(lck);
199  KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) );
200  KMP_MB(); /* Flush all pending memory write invalidates. */
201 
202  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
203  __kmp_xproc ) );
204  return KMP_LOCK_RELEASED;
205 }
206 
207 static int
208 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
209 {
210  char const * const func = "omp_unset_lock";
211  KMP_MB(); /* in case another processor initialized lock */
212  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
213  && __kmp_is_tas_lock_nestable( lck ) ) {
214  KMP_FATAL( LockNestableUsedAsSimple, func );
215  }
216  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
217  KMP_FATAL( LockUnsettingFree, func );
218  }
219  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
220  && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
221  KMP_FATAL( LockUnsettingSetByAnother, func );
222  }
223  return __kmp_release_tas_lock( lck, gtid );
224 }
225 
226 void
227 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
228 {
229  TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) );
230 }
231 
232 static void
233 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
234 {
235  __kmp_init_tas_lock( lck );
236 }
237 
238 void
239 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
240 {
241  lck->lk.poll = 0;
242 }
243 
244 static void
245 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
246 {
247  char const * const func = "omp_destroy_lock";
248  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
249  && __kmp_is_tas_lock_nestable( lck ) ) {
250  KMP_FATAL( LockNestableUsedAsSimple, func );
251  }
252  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
253  KMP_FATAL( LockStillOwned, func );
254  }
255  __kmp_destroy_tas_lock( lck );
256 }
257 
258 
259 //
260 // nested test and set locks
261 //
262 
263 void
264 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
265 {
266  KMP_DEBUG_ASSERT( gtid >= 0 );
267 
268  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
269  lck->lk.depth_locked += 1;
270  }
271  else {
272  __kmp_acquire_tas_lock_timed_template( lck, gtid );
273  lck->lk.depth_locked = 1;
274  }
275 }
276 
277 static void
278 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
279 {
280  char const * const func = "omp_set_nest_lock";
281  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
282  KMP_FATAL( LockSimpleUsedAsNestable, func );
283  }
284  __kmp_acquire_nested_tas_lock( lck, gtid );
285 }
286 
287 int
288 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
289 {
290  int retval;
291 
292  KMP_DEBUG_ASSERT( gtid >= 0 );
293 
294  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
295  retval = ++lck->lk.depth_locked;
296  }
297  else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
298  retval = 0;
299  }
300  else {
301  KMP_MB();
302  retval = lck->lk.depth_locked = 1;
303  }
304  return retval;
305 }
306 
307 static int
308 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
309 {
310  char const * const func = "omp_test_nest_lock";
311  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
312  KMP_FATAL( LockSimpleUsedAsNestable, func );
313  }
314  return __kmp_test_nested_tas_lock( lck, gtid );
315 }
316 
317 int
318 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
319 {
320  KMP_DEBUG_ASSERT( gtid >= 0 );
321 
322  KMP_MB();
323  if ( --(lck->lk.depth_locked) == 0 ) {
324  __kmp_release_tas_lock( lck, gtid );
325  return KMP_LOCK_RELEASED;
326  }
327  return KMP_LOCK_STILL_HELD;
328 }
329 
330 static int
331 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
332 {
333  char const * const func = "omp_unset_nest_lock";
334  KMP_MB(); /* in case another processor initialized lock */
335  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
336  KMP_FATAL( LockSimpleUsedAsNestable, func );
337  }
338  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
339  KMP_FATAL( LockUnsettingFree, func );
340  }
341  if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
342  KMP_FATAL( LockUnsettingSetByAnother, func );
343  }
344  return __kmp_release_nested_tas_lock( lck, gtid );
345 }
346 
347 void
348 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
349 {
350  __kmp_init_tas_lock( lck );
351  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
352 }
353 
354 static void
355 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
356 {
357  __kmp_init_nested_tas_lock( lck );
358 }
359 
360 void
361 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
362 {
363  __kmp_destroy_tas_lock( lck );
364  lck->lk.depth_locked = 0;
365 }
366 
367 static void
368 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
369 {
370  char const * const func = "omp_destroy_nest_lock";
371  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
372  KMP_FATAL( LockSimpleUsedAsNestable, func );
373  }
374  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
375  KMP_FATAL( LockStillOwned, func );
376  }
377  __kmp_destroy_nested_tas_lock( lck );
378 }
379 
380 
381 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
382 
383 /* ------------------------------------------------------------------------ */
384 /* futex locks */
385 
386 // futex locks are really just test and set locks, with a different method
387 // of handling contention. They take the same amount of space as test and
388 // set locks, and are allocated the same way (i.e. use the area allocated by
389 // the compiler for non-nested locks / allocate nested locks on the heap).
390 
391 static kmp_int32
392 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
393 {
394  return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
395 }
396 
397 static inline bool
398 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
399 {
400  return lck->lk.depth_locked != -1;
401 }
402 
403 __forceinline static void
404 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
405 {
406  kmp_int32 gtid_code = ( gtid + 1 ) << 1;
407 
408  KMP_MB();
409 
410 #ifdef USE_LOCK_PROFILE
411  kmp_uint32 curr = TCR_4( lck->lk.poll );
412  if ( ( curr != 0 ) && ( curr != gtid_code ) )
413  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
414  /* else __kmp_printf( "." );*/
415 #endif /* USE_LOCK_PROFILE */
416 
417  KMP_FSYNC_PREPARE( lck );
418  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
419  lck, lck->lk.poll, gtid ) );
420 
421  kmp_int32 poll_val;
422 
423  while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex),
424  DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) {
425 
426  kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1;
427  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
428  lck, gtid, poll_val, cond ) );
429 
430  //
431  // NOTE: if you try to use the following condition for this branch
432  //
433  // if ( poll_val & 1 == 0 )
434  //
435  // Then the 12.0 compiler has a bug where the following block will
436  // always be skipped, regardless of the value of the LSB of poll_val.
437  //
438  if ( ! cond ) {
439  //
440  // Try to set the lsb in the poll to indicate to the owner
441  // thread that they need to wake this thread up.
442  //
443  if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) {
444  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
445  lck, lck->lk.poll, gtid ) );
446  continue;
447  }
448  poll_val |= DYNA_LOCK_BUSY(1, futex);
449 
450  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
451  lck, lck->lk.poll, gtid ) );
452  }
453 
454  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
455  lck, gtid, poll_val ) );
456 
457  kmp_int32 rc;
458  if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
459  poll_val, NULL, NULL, 0 ) ) != 0 ) {
460  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
461  lck, gtid, poll_val, rc, errno ) );
462  continue;
463  }
464 
465  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
466  lck, gtid, poll_val ) );
467  //
468  // This thread has now done a successful futex wait call and was
469  // entered on the OS futex queue. We must now perform a futex
470  // wake call when releasing the lock, as we have no idea how many
471  // other threads are in the queue.
472  //
473  gtid_code |= 1;
474  }
475 
476  KMP_FSYNC_ACQUIRED( lck );
477  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
478  lck, lck->lk.poll, gtid ) );
479 }
480 
481 void
482 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
483 {
484  __kmp_acquire_futex_lock_timed_template( lck, gtid );
485 }
486 
487 static void
488 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
489 {
490  char const * const func = "omp_set_lock";
491  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
492  && __kmp_is_futex_lock_nestable( lck ) ) {
493  KMP_FATAL( LockNestableUsedAsSimple, func );
494  }
495  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
496  KMP_FATAL( LockIsAlreadyOwned, func );
497  }
498  __kmp_acquire_futex_lock( lck, gtid );
499 }
500 
501 int
502 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
503 {
504  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
505  KMP_FSYNC_ACQUIRED( lck );
506  return TRUE;
507  }
508  return FALSE;
509 }
510 
511 static int
512 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
513 {
514  char const * const func = "omp_test_lock";
515  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
516  && __kmp_is_futex_lock_nestable( lck ) ) {
517  KMP_FATAL( LockNestableUsedAsSimple, func );
518  }
519  return __kmp_test_futex_lock( lck, gtid );
520 }
521 
522 int
523 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
524 {
525  KMP_MB(); /* Flush all pending memory write invalidates. */
526 
527  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
528  lck, lck->lk.poll, gtid ) );
529 
530  KMP_FSYNC_RELEASING(lck);
531 
532  kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) );
533 
534  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
535  lck, gtid, poll_val ) );
536 
537  if ( DYNA_LOCK_STRIP(poll_val) & 1 ) {
538  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
539  lck, gtid ) );
540  syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 );
541  }
542 
543  KMP_MB(); /* Flush all pending memory write invalidates. */
544 
545  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
546  lck, lck->lk.poll, gtid ) );
547 
548  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
549  __kmp_xproc ) );
550  return KMP_LOCK_RELEASED;
551 }
552 
553 static int
554 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
555 {
556  char const * const func = "omp_unset_lock";
557  KMP_MB(); /* in case another processor initialized lock */
558  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
559  && __kmp_is_futex_lock_nestable( lck ) ) {
560  KMP_FATAL( LockNestableUsedAsSimple, func );
561  }
562  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
563  KMP_FATAL( LockUnsettingFree, func );
564  }
565  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
566  && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
567  KMP_FATAL( LockUnsettingSetByAnother, func );
568  }
569  return __kmp_release_futex_lock( lck, gtid );
570 }
571 
572 void
573 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
574 {
575  TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) );
576 }
577 
578 static void
579 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
580 {
581  __kmp_init_futex_lock( lck );
582 }
583 
584 void
585 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
586 {
587  lck->lk.poll = 0;
588 }
589 
590 static void
591 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
592 {
593  char const * const func = "omp_destroy_lock";
594  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
595  && __kmp_is_futex_lock_nestable( lck ) ) {
596  KMP_FATAL( LockNestableUsedAsSimple, func );
597  }
598  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
599  KMP_FATAL( LockStillOwned, func );
600  }
601  __kmp_destroy_futex_lock( lck );
602 }
603 
604 
605 //
606 // nested futex locks
607 //
608 
609 void
610 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
611 {
612  KMP_DEBUG_ASSERT( gtid >= 0 );
613 
614  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
615  lck->lk.depth_locked += 1;
616  }
617  else {
618  __kmp_acquire_futex_lock_timed_template( lck, gtid );
619  lck->lk.depth_locked = 1;
620  }
621 }
622 
623 static void
624 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
625 {
626  char const * const func = "omp_set_nest_lock";
627  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
628  KMP_FATAL( LockSimpleUsedAsNestable, func );
629  }
630  __kmp_acquire_nested_futex_lock( lck, gtid );
631 }
632 
633 int
634 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
635 {
636  int retval;
637 
638  KMP_DEBUG_ASSERT( gtid >= 0 );
639 
640  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
641  retval = ++lck->lk.depth_locked;
642  }
643  else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
644  retval = 0;
645  }
646  else {
647  KMP_MB();
648  retval = lck->lk.depth_locked = 1;
649  }
650  return retval;
651 }
652 
653 static int
654 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
655 {
656  char const * const func = "omp_test_nest_lock";
657  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
658  KMP_FATAL( LockSimpleUsedAsNestable, func );
659  }
660  return __kmp_test_nested_futex_lock( lck, gtid );
661 }
662 
663 int
664 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
665 {
666  KMP_DEBUG_ASSERT( gtid >= 0 );
667 
668  KMP_MB();
669  if ( --(lck->lk.depth_locked) == 0 ) {
670  __kmp_release_futex_lock( lck, gtid );
671  return KMP_LOCK_RELEASED;
672  }
673  return KMP_LOCK_STILL_HELD;
674 }
675 
676 static int
677 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
678 {
679  char const * const func = "omp_unset_nest_lock";
680  KMP_MB(); /* in case another processor initialized lock */
681  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
682  KMP_FATAL( LockSimpleUsedAsNestable, func );
683  }
684  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
685  KMP_FATAL( LockUnsettingFree, func );
686  }
687  if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
688  KMP_FATAL( LockUnsettingSetByAnother, func );
689  }
690  return __kmp_release_nested_futex_lock( lck, gtid );
691 }
692 
693 void
694 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
695 {
696  __kmp_init_futex_lock( lck );
697  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
698 }
699 
700 static void
701 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
702 {
703  __kmp_init_nested_futex_lock( lck );
704 }
705 
706 void
707 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
708 {
709  __kmp_destroy_futex_lock( lck );
710  lck->lk.depth_locked = 0;
711 }
712 
713 static void
714 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
715 {
716  char const * const func = "omp_destroy_nest_lock";
717  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
718  KMP_FATAL( LockSimpleUsedAsNestable, func );
719  }
720  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
721  KMP_FATAL( LockStillOwned, func );
722  }
723  __kmp_destroy_nested_futex_lock( lck );
724 }
725 
726 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
727 
728 
729 /* ------------------------------------------------------------------------ */
730 /* ticket (bakery) locks */
731 
732 static kmp_int32
733 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
734 {
735  return TCR_4( lck->lk.owner_id ) - 1;
736 }
737 
738 static inline bool
739 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
740 {
741  return lck->lk.depth_locked != -1;
742 }
743 
744 static kmp_uint32
745 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
746 {
747  register kmp_uint32 pause;
748 
749  if (value == checker) {
750  return TRUE;
751  }
752  for (pause = checker - value; pause != 0; --pause);
753  return FALSE;
754 }
755 
756 __forceinline static void
757 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
758 {
759  kmp_uint32 my_ticket;
760  KMP_MB();
761 
762  my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
763 
764 #ifdef USE_LOCK_PROFILE
765  if ( TCR_4( lck->lk.now_serving ) != my_ticket )
766  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
767  /* else __kmp_printf( "." );*/
768 #endif /* USE_LOCK_PROFILE */
769 
770  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
771  KMP_FSYNC_ACQUIRED(lck);
772  return;
773  }
774  KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
775  KMP_FSYNC_ACQUIRED(lck);
776 }
777 
778 void
779 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
780 {
781  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
782 }
783 
784 static void
785 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
786 {
787  char const * const func = "omp_set_lock";
788  if ( lck->lk.initialized != lck ) {
789  KMP_FATAL( LockIsUninitialized, func );
790  }
791  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
792  KMP_FATAL( LockNestableUsedAsSimple, func );
793  }
794  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
795  KMP_FATAL( LockIsAlreadyOwned, func );
796  }
797 
798  __kmp_acquire_ticket_lock( lck, gtid );
799 
800  lck->lk.owner_id = gtid + 1;
801 }
802 
803 int
804 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
805 {
806  kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
807  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
808  kmp_uint32 next_ticket = my_ticket + 1;
809  if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
810  my_ticket, next_ticket ) ) {
811  KMP_FSYNC_ACQUIRED( lck );
812  return TRUE;
813  }
814  }
815  return FALSE;
816 }
817 
818 static int
819 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
820 {
821  char const * const func = "omp_test_lock";
822  if ( lck->lk.initialized != lck ) {
823  KMP_FATAL( LockIsUninitialized, func );
824  }
825  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
826  KMP_FATAL( LockNestableUsedAsSimple, func );
827  }
828 
829  int retval = __kmp_test_ticket_lock( lck, gtid );
830 
831  if ( retval ) {
832  lck->lk.owner_id = gtid + 1;
833  }
834  return retval;
835 }
836 
837 int
838 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
839 {
840  kmp_uint32 distance;
841 
842  KMP_MB(); /* Flush all pending memory write invalidates. */
843 
844  KMP_FSYNC_RELEASING(lck);
845  distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
846 
847  KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
848 
849  KMP_MB(); /* Flush all pending memory write invalidates. */
850 
851  KMP_YIELD( distance
852  > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
853  return KMP_LOCK_RELEASED;
854 }
855 
856 static int
857 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
858 {
859  char const * const func = "omp_unset_lock";
860  KMP_MB(); /* in case another processor initialized lock */
861  if ( lck->lk.initialized != lck ) {
862  KMP_FATAL( LockIsUninitialized, func );
863  }
864  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
865  KMP_FATAL( LockNestableUsedAsSimple, func );
866  }
867  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
868  KMP_FATAL( LockUnsettingFree, func );
869  }
870  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
871  && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
872  KMP_FATAL( LockUnsettingSetByAnother, func );
873  }
874  lck->lk.owner_id = 0;
875  return __kmp_release_ticket_lock( lck, gtid );
876 }
877 
878 void
879 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
880 {
881  lck->lk.location = NULL;
882  TCW_4( lck->lk.next_ticket, 0 );
883  TCW_4( lck->lk.now_serving, 0 );
884  lck->lk.owner_id = 0; // no thread owns the lock.
885  lck->lk.depth_locked = -1; // -1 => not a nested lock.
886  lck->lk.initialized = (kmp_ticket_lock *)lck;
887 }
888 
889 static void
890 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
891 {
892  __kmp_init_ticket_lock( lck );
893 }
894 
895 void
896 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
897 {
898  lck->lk.initialized = NULL;
899  lck->lk.location = NULL;
900  lck->lk.next_ticket = 0;
901  lck->lk.now_serving = 0;
902  lck->lk.owner_id = 0;
903  lck->lk.depth_locked = -1;
904 }
905 
906 static void
907 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
908 {
909  char const * const func = "omp_destroy_lock";
910  if ( lck->lk.initialized != lck ) {
911  KMP_FATAL( LockIsUninitialized, func );
912  }
913  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
914  KMP_FATAL( LockNestableUsedAsSimple, func );
915  }
916  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
917  KMP_FATAL( LockStillOwned, func );
918  }
919  __kmp_destroy_ticket_lock( lck );
920 }
921 
922 
923 //
924 // nested ticket locks
925 //
926 
927 void
928 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
929 {
930  KMP_DEBUG_ASSERT( gtid >= 0 );
931 
932  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
933  lck->lk.depth_locked += 1;
934  }
935  else {
936  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
937  KMP_MB();
938  lck->lk.depth_locked = 1;
939  KMP_MB();
940  lck->lk.owner_id = gtid + 1;
941  }
942 }
943 
944 static void
945 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
946 {
947  char const * const func = "omp_set_nest_lock";
948  if ( lck->lk.initialized != lck ) {
949  KMP_FATAL( LockIsUninitialized, func );
950  }
951  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
952  KMP_FATAL( LockSimpleUsedAsNestable, func );
953  }
954  __kmp_acquire_nested_ticket_lock( lck, gtid );
955 }
956 
957 int
958 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
959 {
960  int retval;
961 
962  KMP_DEBUG_ASSERT( gtid >= 0 );
963 
964  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
965  retval = ++lck->lk.depth_locked;
966  }
967  else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
968  retval = 0;
969  }
970  else {
971  KMP_MB();
972  retval = lck->lk.depth_locked = 1;
973  KMP_MB();
974  lck->lk.owner_id = gtid + 1;
975  }
976  return retval;
977 }
978 
979 static int
980 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
981  kmp_int32 gtid )
982 {
983  char const * const func = "omp_test_nest_lock";
984  if ( lck->lk.initialized != lck ) {
985  KMP_FATAL( LockIsUninitialized, func );
986  }
987  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
988  KMP_FATAL( LockSimpleUsedAsNestable, func );
989  }
990  return __kmp_test_nested_ticket_lock( lck, gtid );
991 }
992 
993 int
994 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
995 {
996  KMP_DEBUG_ASSERT( gtid >= 0 );
997 
998  KMP_MB();
999  if ( --(lck->lk.depth_locked) == 0 ) {
1000  KMP_MB();
1001  lck->lk.owner_id = 0;
1002  __kmp_release_ticket_lock( lck, gtid );
1003  return KMP_LOCK_RELEASED;
1004  }
1005  return KMP_LOCK_STILL_HELD;
1006 }
1007 
1008 static int
1009 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1010 {
1011  char const * const func = "omp_unset_nest_lock";
1012  KMP_MB(); /* in case another processor initialized lock */
1013  if ( lck->lk.initialized != lck ) {
1014  KMP_FATAL( LockIsUninitialized, func );
1015  }
1016  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1017  KMP_FATAL( LockSimpleUsedAsNestable, func );
1018  }
1019  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1020  KMP_FATAL( LockUnsettingFree, func );
1021  }
1022  if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1023  KMP_FATAL( LockUnsettingSetByAnother, func );
1024  }
1025  return __kmp_release_nested_ticket_lock( lck, gtid );
1026 }
1027 
1028 void
1029 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1030 {
1031  __kmp_init_ticket_lock( lck );
1032  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1033 }
1034 
1035 static void
1036 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1037 {
1038  __kmp_init_nested_ticket_lock( lck );
1039 }
1040 
1041 void
1042 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1043 {
1044  __kmp_destroy_ticket_lock( lck );
1045  lck->lk.depth_locked = 0;
1046 }
1047 
1048 static void
1049 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1050 {
1051  char const * const func = "omp_destroy_nest_lock";
1052  if ( lck->lk.initialized != lck ) {
1053  KMP_FATAL( LockIsUninitialized, func );
1054  }
1055  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1056  KMP_FATAL( LockSimpleUsedAsNestable, func );
1057  }
1058  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1059  KMP_FATAL( LockStillOwned, func );
1060  }
1061  __kmp_destroy_nested_ticket_lock( lck );
1062 }
1063 
1064 
1065 //
1066 // access functions to fields which don't exist for all lock kinds.
1067 //
1068 
1069 static int
1070 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1071 {
1072  return lck == lck->lk.initialized;
1073 }
1074 
1075 static const ident_t *
1076 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1077 {
1078  return lck->lk.location;
1079 }
1080 
1081 static void
1082 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1083 {
1084  lck->lk.location = loc;
1085 }
1086 
1087 static kmp_lock_flags_t
1088 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1089 {
1090  return lck->lk.flags;
1091 }
1092 
1093 static void
1094 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1095 {
1096  lck->lk.flags = flags;
1097 }
1098 
1099 /* ------------------------------------------------------------------------ */
1100 /* queuing locks */
1101 
1102 /*
1103  * First the states
1104  * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1105  * UINT_MAX or -1, 0 means lock is held, nobody on queue
1106  * h, h means lock is held or about to transition, 1 element on queue
1107  * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1108  *
1109  * Now the transitions
1110  * Acquire(0,0) = -1 ,0
1111  * Release(0,0) = Error
1112  * Acquire(-1,0) = h ,h h > 0
1113  * Release(-1,0) = 0 ,0
1114  * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1115  * Release(h,h) = -1 ,0 h > 0
1116  * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1117  * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1118  *
1119  * And pictorially
1120  *
1121  *
1122  * +-----+
1123  * | 0, 0|------- release -------> Error
1124  * +-----+
1125  * | ^
1126  * acquire| |release
1127  * | |
1128  * | |
1129  * v |
1130  * +-----+
1131  * |-1, 0|
1132  * +-----+
1133  * | ^
1134  * acquire| |release
1135  * | |
1136  * | |
1137  * v |
1138  * +-----+
1139  * | h, h|
1140  * +-----+
1141  * | ^
1142  * acquire| |release
1143  * | |
1144  * | |
1145  * v |
1146  * +-----+
1147  * | h, t|----- acquire, release loopback ---+
1148  * +-----+ |
1149  * ^ |
1150  * | |
1151  * +------------------------------------+
1152  *
1153  */
1154 
1155 #ifdef DEBUG_QUEUING_LOCKS
1156 
1157 /* Stuff for circular trace buffer */
1158 #define TRACE_BUF_ELE 1024
1159 static char traces[TRACE_BUF_ELE][128] = { 0 }
1160 static int tc = 0;
1161 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1162 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1163 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1164 
1165 static void
1166 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1167  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1168 {
1169  kmp_int32 t, i;
1170 
1171  __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1172 
1173  i = tc % TRACE_BUF_ELE;
1174  __kmp_printf_no_lock( "%s\n", traces[i] );
1175  i = (i+1) % TRACE_BUF_ELE;
1176  while ( i != (tc % TRACE_BUF_ELE) ) {
1177  __kmp_printf_no_lock( "%s", traces[i] );
1178  i = (i+1) % TRACE_BUF_ELE;
1179  }
1180  __kmp_printf_no_lock( "\n" );
1181 
1182  __kmp_printf_no_lock(
1183  "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1184  gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1185  head_id, tail_id );
1186 
1187  __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1188 
1189  if ( lck->lk.head_id >= 1 ) {
1190  t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1191  while (t > 0) {
1192  __kmp_printf_no_lock( "-> %d ", t );
1193  t = __kmp_threads[t-1]->th.th_next_waiting;
1194  }
1195  }
1196  __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1197  __kmp_printf_no_lock( "\n\n" );
1198 }
1199 
1200 #endif /* DEBUG_QUEUING_LOCKS */
1201 
1202 static kmp_int32
1203 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1204 {
1205  return TCR_4( lck->lk.owner_id ) - 1;
1206 }
1207 
1208 static inline bool
1209 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1210 {
1211  return lck->lk.depth_locked != -1;
1212 }
1213 
1214 /* Acquire a lock using a the queuing lock implementation */
1215 template <bool takeTime>
1216 /* [TLW] The unused template above is left behind because of what BEB believes is a
1217  potential compiler problem with __forceinline. */
1218 __forceinline static void
1219 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1220  kmp_int32 gtid )
1221 {
1222  register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1223  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1224  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1225  volatile kmp_uint32 *spin_here_p;
1226  kmp_int32 need_mf = 1;
1227 
1228 #if OMPT_SUPPORT
1229  ompt_state_t prev_state = ompt_state_undefined;
1230 #endif
1231 
1232  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1233 
1234  KMP_FSYNC_PREPARE( lck );
1235  KMP_DEBUG_ASSERT( this_thr != NULL );
1236  spin_here_p = & this_thr->th.th_spin_here;
1237 
1238 #ifdef DEBUG_QUEUING_LOCKS
1239  TRACE_LOCK( gtid+1, "acq ent" );
1240  if ( *spin_here_p )
1241  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1242  if ( this_thr->th.th_next_waiting != 0 )
1243  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1244 #endif
1245  KMP_DEBUG_ASSERT( !*spin_here_p );
1246  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1247 
1248 
1249  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1250  that may follow, not just in execution order, but also in visibility order. This way,
1251  when a releasing thread observes the changes to the queue by this thread, it can
1252  rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1253  spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1254  to FALSE before this thread sets it to TRUE, this thread will hang.
1255  */
1256  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1257 
1258  while( 1 ) {
1259  kmp_int32 enqueued;
1260  kmp_int32 head;
1261  kmp_int32 tail;
1262 
1263  head = *head_id_p;
1264 
1265  switch ( head ) {
1266 
1267  case -1:
1268  {
1269 #ifdef DEBUG_QUEUING_LOCKS
1270  tail = *tail_id_p;
1271  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1272 #endif
1273  tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1274  this assignment prevents us from entering the if ( t > 0 )
1275  condition in the enqueued case below, which is not necessary for
1276  this state transition */
1277 
1278  need_mf = 0;
1279  /* try (-1,0)->(tid,tid) */
1280  enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1281  KMP_PACK_64( -1, 0 ),
1282  KMP_PACK_64( gtid+1, gtid+1 ) );
1283 #ifdef DEBUG_QUEUING_LOCKS
1284  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1285 #endif
1286  }
1287  break;
1288 
1289  default:
1290  {
1291  tail = *tail_id_p;
1292  KMP_DEBUG_ASSERT( tail != gtid + 1 );
1293 
1294 #ifdef DEBUG_QUEUING_LOCKS
1295  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1296 #endif
1297 
1298  if ( tail == 0 ) {
1299  enqueued = FALSE;
1300  }
1301  else {
1302  need_mf = 0;
1303  /* try (h,t) or (h,h)->(h,tid) */
1304  enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1305 
1306 #ifdef DEBUG_QUEUING_LOCKS
1307  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1308 #endif
1309  }
1310  }
1311  break;
1312 
1313  case 0: /* empty queue */
1314  {
1315  kmp_int32 grabbed_lock;
1316 
1317 #ifdef DEBUG_QUEUING_LOCKS
1318  tail = *tail_id_p;
1319  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1320 #endif
1321  /* try (0,0)->(-1,0) */
1322 
1323  /* only legal transition out of head = 0 is head = -1 with no change to tail */
1324  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1325 
1326  if ( grabbed_lock ) {
1327 
1328  *spin_here_p = FALSE;
1329 
1330  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1331  lck, gtid ));
1332 #ifdef DEBUG_QUEUING_LOCKS
1333  TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1334 #endif
1335 
1336 #if OMPT_SUPPORT
1337  if ((ompt_status & ompt_status_track) &&
1338  prev_state != ompt_state_undefined) {
1339  /* change the state before clearing wait_id */
1340  this_thr->th.ompt_thread_info.state = prev_state;
1341  this_thr->th.ompt_thread_info.wait_id = 0;
1342  }
1343 #endif
1344 
1345  KMP_FSYNC_ACQUIRED( lck );
1346  return; /* lock holder cannot be on queue */
1347  }
1348  enqueued = FALSE;
1349  }
1350  break;
1351  }
1352 
1353 #if OMPT_SUPPORT
1354  if ((ompt_status & ompt_status_track) &&
1355  prev_state == ompt_state_undefined) {
1356  /* this thread will spin; set wait_id before entering wait state */
1357  prev_state = this_thr->th.ompt_thread_info.state;
1358  this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1359  this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1360  }
1361 #endif
1362 
1363  if ( enqueued ) {
1364  if ( tail > 0 ) {
1365  kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1366  KMP_ASSERT( tail_thr != NULL );
1367  tail_thr->th.th_next_waiting = gtid+1;
1368  /* corresponding wait for this write in release code */
1369  }
1370  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1371 
1372 
1373  /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1374  * throughput only here.
1375  */
1376  KMP_MB();
1377  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1378 
1379 #ifdef DEBUG_QUEUING_LOCKS
1380  TRACE_LOCK( gtid+1, "acq spin" );
1381 
1382  if ( this_thr->th.th_next_waiting != 0 )
1383  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1384 #endif
1385  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1386  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1387  lck, gtid ));
1388 
1389 #ifdef DEBUG_QUEUING_LOCKS
1390  TRACE_LOCK( gtid+1, "acq exit 2" );
1391 #endif
1392 
1393 #if OMPT_SUPPORT
1394  /* change the state before clearing wait_id */
1395  this_thr->th.ompt_thread_info.state = prev_state;
1396  this_thr->th.ompt_thread_info.wait_id = 0;
1397 #endif
1398 
1399  /* got lock, we were dequeued by the thread that released lock */
1400  return;
1401  }
1402 
1403  /* Yield if number of threads > number of logical processors */
1404  /* ToDo: Not sure why this should only be in oversubscription case,
1405  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1406  KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1407  __kmp_xproc ) );
1408 #ifdef DEBUG_QUEUING_LOCKS
1409  TRACE_LOCK( gtid+1, "acq retry" );
1410 #endif
1411 
1412  }
1413  KMP_ASSERT2( 0, "should not get here" );
1414 }
1415 
1416 void
1417 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1418 {
1419  KMP_DEBUG_ASSERT( gtid >= 0 );
1420 
1421  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1422 }
1423 
1424 static void
1425 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1426  kmp_int32 gtid )
1427 {
1428  char const * const func = "omp_set_lock";
1429  if ( lck->lk.initialized != lck ) {
1430  KMP_FATAL( LockIsUninitialized, func );
1431  }
1432  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1433  KMP_FATAL( LockNestableUsedAsSimple, func );
1434  }
1435  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1436  KMP_FATAL( LockIsAlreadyOwned, func );
1437  }
1438 
1439  __kmp_acquire_queuing_lock( lck, gtid );
1440 
1441  lck->lk.owner_id = gtid + 1;
1442 }
1443 
1444 int
1445 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1446 {
1447  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1448  kmp_int32 head;
1449 #ifdef KMP_DEBUG
1450  kmp_info_t *this_thr;
1451 #endif
1452 
1453  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1454  KMP_DEBUG_ASSERT( gtid >= 0 );
1455 #ifdef KMP_DEBUG
1456  this_thr = __kmp_thread_from_gtid( gtid );
1457  KMP_DEBUG_ASSERT( this_thr != NULL );
1458  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1459 #endif
1460 
1461  head = *head_id_p;
1462 
1463  if ( head == 0 ) { /* nobody on queue, nobody holding */
1464 
1465  /* try (0,0)->(-1,0) */
1466 
1467  if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1468  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1469  KMP_FSYNC_ACQUIRED(lck);
1470  return TRUE;
1471  }
1472  }
1473 
1474  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1475  return FALSE;
1476 }
1477 
1478 static int
1479 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1480 {
1481  char const * const func = "omp_test_lock";
1482  if ( lck->lk.initialized != lck ) {
1483  KMP_FATAL( LockIsUninitialized, func );
1484  }
1485  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1486  KMP_FATAL( LockNestableUsedAsSimple, func );
1487  }
1488 
1489  int retval = __kmp_test_queuing_lock( lck, gtid );
1490 
1491  if ( retval ) {
1492  lck->lk.owner_id = gtid + 1;
1493  }
1494  return retval;
1495 }
1496 
1497 int
1498 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1499 {
1500  register kmp_info_t *this_thr;
1501  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1502  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1503 
1504  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1505  KMP_DEBUG_ASSERT( gtid >= 0 );
1506  this_thr = __kmp_thread_from_gtid( gtid );
1507  KMP_DEBUG_ASSERT( this_thr != NULL );
1508 #ifdef DEBUG_QUEUING_LOCKS
1509  TRACE_LOCK( gtid+1, "rel ent" );
1510 
1511  if ( this_thr->th.th_spin_here )
1512  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1513  if ( this_thr->th.th_next_waiting != 0 )
1514  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1515 #endif
1516  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1517  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1518 
1519  KMP_FSYNC_RELEASING(lck);
1520 
1521  while( 1 ) {
1522  kmp_int32 dequeued;
1523  kmp_int32 head;
1524  kmp_int32 tail;
1525 
1526  head = *head_id_p;
1527 
1528 #ifdef DEBUG_QUEUING_LOCKS
1529  tail = *tail_id_p;
1530  TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1531  if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1532 #endif
1533  KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1534 
1535  if ( head == -1 ) { /* nobody on queue */
1536 
1537  /* try (-1,0)->(0,0) */
1538  if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1539  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1540  lck, gtid ));
1541 #ifdef DEBUG_QUEUING_LOCKS
1542  TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1543 #endif
1544 
1545 #if OMPT_SUPPORT
1546  /* nothing to do - no other thread is trying to shift blame */
1547 #endif
1548 
1549  return KMP_LOCK_RELEASED;
1550  }
1551  dequeued = FALSE;
1552  }
1553  else {
1554 
1555  tail = *tail_id_p;
1556  if ( head == tail ) { /* only one thread on the queue */
1557 
1558 #ifdef DEBUG_QUEUING_LOCKS
1559  if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1560 #endif
1561  KMP_DEBUG_ASSERT( head > 0 );
1562 
1563  /* try (h,h)->(-1,0) */
1564  dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1565  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1566 #ifdef DEBUG_QUEUING_LOCKS
1567  TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1568 #endif
1569 
1570  }
1571  else {
1572  volatile kmp_int32 *waiting_id_p;
1573  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1574  KMP_DEBUG_ASSERT( head_thr != NULL );
1575  waiting_id_p = & head_thr->th.th_next_waiting;
1576 
1577  /* Does this require synchronous reads? */
1578 #ifdef DEBUG_QUEUING_LOCKS
1579  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1580 #endif
1581  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1582 
1583  /* try (h,t)->(h',t) or (t,t) */
1584 
1585  KMP_MB();
1586  /* make sure enqueuing thread has time to update next waiting thread field */
1587  *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1588 #ifdef DEBUG_QUEUING_LOCKS
1589  TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1590 #endif
1591  dequeued = TRUE;
1592  }
1593  }
1594 
1595  if ( dequeued ) {
1596  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1597  KMP_DEBUG_ASSERT( head_thr != NULL );
1598 
1599  /* Does this require synchronous reads? */
1600 #ifdef DEBUG_QUEUING_LOCKS
1601  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1602 #endif
1603  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1604 
1605  /* For clean code only.
1606  * Thread not released until next statement prevents race with acquire code.
1607  */
1608  head_thr->th.th_next_waiting = 0;
1609 #ifdef DEBUG_QUEUING_LOCKS
1610  TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1611 #endif
1612 
1613  KMP_MB();
1614  /* reset spin value */
1615  head_thr->th.th_spin_here = FALSE;
1616 
1617  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1618  lck, gtid ));
1619 #ifdef DEBUG_QUEUING_LOCKS
1620  TRACE_LOCK( gtid+1, "rel exit 2" );
1621 #endif
1622  return KMP_LOCK_RELEASED;
1623  }
1624  /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1625 
1626 #ifdef DEBUG_QUEUING_LOCKS
1627  TRACE_LOCK( gtid+1, "rel retry" );
1628 #endif
1629 
1630  } /* while */
1631  KMP_ASSERT2( 0, "should not get here" );
1632  return KMP_LOCK_RELEASED;
1633 }
1634 
1635 static int
1636 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1637  kmp_int32 gtid )
1638 {
1639  char const * const func = "omp_unset_lock";
1640  KMP_MB(); /* in case another processor initialized lock */
1641  if ( lck->lk.initialized != lck ) {
1642  KMP_FATAL( LockIsUninitialized, func );
1643  }
1644  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1645  KMP_FATAL( LockNestableUsedAsSimple, func );
1646  }
1647  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1648  KMP_FATAL( LockUnsettingFree, func );
1649  }
1650  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1651  KMP_FATAL( LockUnsettingSetByAnother, func );
1652  }
1653  lck->lk.owner_id = 0;
1654  return __kmp_release_queuing_lock( lck, gtid );
1655 }
1656 
1657 void
1658 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1659 {
1660  lck->lk.location = NULL;
1661  lck->lk.head_id = 0;
1662  lck->lk.tail_id = 0;
1663  lck->lk.next_ticket = 0;
1664  lck->lk.now_serving = 0;
1665  lck->lk.owner_id = 0; // no thread owns the lock.
1666  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1667  lck->lk.initialized = lck;
1668 
1669  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1670 }
1671 
1672 static void
1673 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1674 {
1675  __kmp_init_queuing_lock( lck );
1676 }
1677 
1678 void
1679 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1680 {
1681  lck->lk.initialized = NULL;
1682  lck->lk.location = NULL;
1683  lck->lk.head_id = 0;
1684  lck->lk.tail_id = 0;
1685  lck->lk.next_ticket = 0;
1686  lck->lk.now_serving = 0;
1687  lck->lk.owner_id = 0;
1688  lck->lk.depth_locked = -1;
1689 }
1690 
1691 static void
1692 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1693 {
1694  char const * const func = "omp_destroy_lock";
1695  if ( lck->lk.initialized != lck ) {
1696  KMP_FATAL( LockIsUninitialized, func );
1697  }
1698  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1699  KMP_FATAL( LockNestableUsedAsSimple, func );
1700  }
1701  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1702  KMP_FATAL( LockStillOwned, func );
1703  }
1704  __kmp_destroy_queuing_lock( lck );
1705 }
1706 
1707 
1708 //
1709 // nested queuing locks
1710 //
1711 
1712 void
1713 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1714 {
1715  KMP_DEBUG_ASSERT( gtid >= 0 );
1716 
1717  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1718  lck->lk.depth_locked += 1;
1719  }
1720  else {
1721  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1722  KMP_MB();
1723  lck->lk.depth_locked = 1;
1724  KMP_MB();
1725  lck->lk.owner_id = gtid + 1;
1726  }
1727 }
1728 
1729 static void
1730 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1731 {
1732  char const * const func = "omp_set_nest_lock";
1733  if ( lck->lk.initialized != lck ) {
1734  KMP_FATAL( LockIsUninitialized, func );
1735  }
1736  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1737  KMP_FATAL( LockSimpleUsedAsNestable, func );
1738  }
1739  __kmp_acquire_nested_queuing_lock( lck, gtid );
1740 }
1741 
1742 int
1743 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1744 {
1745  int retval;
1746 
1747  KMP_DEBUG_ASSERT( gtid >= 0 );
1748 
1749  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1750  retval = ++lck->lk.depth_locked;
1751  }
1752  else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1753  retval = 0;
1754  }
1755  else {
1756  KMP_MB();
1757  retval = lck->lk.depth_locked = 1;
1758  KMP_MB();
1759  lck->lk.owner_id = gtid + 1;
1760  }
1761  return retval;
1762 }
1763 
1764 static int
1765 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1766  kmp_int32 gtid )
1767 {
1768  char const * const func = "omp_test_nest_lock";
1769  if ( lck->lk.initialized != lck ) {
1770  KMP_FATAL( LockIsUninitialized, func );
1771  }
1772  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1773  KMP_FATAL( LockSimpleUsedAsNestable, func );
1774  }
1775  return __kmp_test_nested_queuing_lock( lck, gtid );
1776 }
1777 
1778 int
1779 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1780 {
1781  KMP_DEBUG_ASSERT( gtid >= 0 );
1782 
1783  KMP_MB();
1784  if ( --(lck->lk.depth_locked) == 0 ) {
1785  KMP_MB();
1786  lck->lk.owner_id = 0;
1787  __kmp_release_queuing_lock( lck, gtid );
1788  return KMP_LOCK_RELEASED;
1789  }
1790  return KMP_LOCK_STILL_HELD;
1791 }
1792 
1793 static int
1794 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1795 {
1796  char const * const func = "omp_unset_nest_lock";
1797  KMP_MB(); /* in case another processor initialized lock */
1798  if ( lck->lk.initialized != lck ) {
1799  KMP_FATAL( LockIsUninitialized, func );
1800  }
1801  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1802  KMP_FATAL( LockSimpleUsedAsNestable, func );
1803  }
1804  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1805  KMP_FATAL( LockUnsettingFree, func );
1806  }
1807  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1808  KMP_FATAL( LockUnsettingSetByAnother, func );
1809  }
1810  return __kmp_release_nested_queuing_lock( lck, gtid );
1811 }
1812 
1813 void
1814 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1815 {
1816  __kmp_init_queuing_lock( lck );
1817  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1818 }
1819 
1820 static void
1821 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1822 {
1823  __kmp_init_nested_queuing_lock( lck );
1824 }
1825 
1826 void
1827 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1828 {
1829  __kmp_destroy_queuing_lock( lck );
1830  lck->lk.depth_locked = 0;
1831 }
1832 
1833 static void
1834 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1835 {
1836  char const * const func = "omp_destroy_nest_lock";
1837  if ( lck->lk.initialized != lck ) {
1838  KMP_FATAL( LockIsUninitialized, func );
1839  }
1840  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1841  KMP_FATAL( LockSimpleUsedAsNestable, func );
1842  }
1843  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1844  KMP_FATAL( LockStillOwned, func );
1845  }
1846  __kmp_destroy_nested_queuing_lock( lck );
1847 }
1848 
1849 
1850 //
1851 // access functions to fields which don't exist for all lock kinds.
1852 //
1853 
1854 static int
1855 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1856 {
1857  return lck == lck->lk.initialized;
1858 }
1859 
1860 static const ident_t *
1861 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1862 {
1863  return lck->lk.location;
1864 }
1865 
1866 static void
1867 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1868 {
1869  lck->lk.location = loc;
1870 }
1871 
1872 static kmp_lock_flags_t
1873 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1874 {
1875  return lck->lk.flags;
1876 }
1877 
1878 static void
1879 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1880 {
1881  lck->lk.flags = flags;
1882 }
1883 
1884 #if KMP_USE_ADAPTIVE_LOCKS
1885 
1886 /*
1887  RTM Adaptive locks
1888 */
1889 
1890 // TODO: Use the header for intrinsics below with the compiler 13.0
1891 //#include <immintrin.h>
1892 
1893 // Values from the status register after failed speculation.
1894 #define _XBEGIN_STARTED (~0u)
1895 #define _XABORT_EXPLICIT (1 << 0)
1896 #define _XABORT_RETRY (1 << 1)
1897 #define _XABORT_CONFLICT (1 << 2)
1898 #define _XABORT_CAPACITY (1 << 3)
1899 #define _XABORT_DEBUG (1 << 4)
1900 #define _XABORT_NESTED (1 << 5)
1901 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1902 
1903 // Aborts for which it's worth trying again immediately
1904 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1905 
1906 #define STRINGIZE_INTERNAL(arg) #arg
1907 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1908 
1909 // Access to RTM instructions
1910 
1911 /*
1912  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1913  This is the same definition as the compiler intrinsic that will be supported at some point.
1914 */
1915 static __inline int _xbegin()
1916 {
1917  int res = -1;
1918 
1919 #if KMP_OS_WINDOWS
1920 #if KMP_ARCH_X86_64
1921  _asm {
1922  _emit 0xC7
1923  _emit 0xF8
1924  _emit 2
1925  _emit 0
1926  _emit 0
1927  _emit 0
1928  jmp L2
1929  mov res, eax
1930  L2:
1931  }
1932 #else /* IA32 */
1933  _asm {
1934  _emit 0xC7
1935  _emit 0xF8
1936  _emit 2
1937  _emit 0
1938  _emit 0
1939  _emit 0
1940  jmp L2
1941  mov res, eax
1942  L2:
1943  }
1944 #endif // KMP_ARCH_X86_64
1945 #else
1946  /* Note that %eax must be noted as killed (clobbered), because
1947  * the XSR is returned in %eax(%rax) on abort. Other register
1948  * values are restored, so don't need to be killed.
1949  *
1950  * We must also mark 'res' as an input and an output, since otherwise
1951  * 'res=-1' may be dropped as being dead, whereas we do need the
1952  * assignment on the successful (i.e., non-abort) path.
1953  */
1954  __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1955  " .long 1f-1b-6\n"
1956  " jmp 2f\n"
1957  "1: movl %%eax,%0\n"
1958  "2:"
1959  :"+r"(res)::"memory","%eax");
1960 #endif // KMP_OS_WINDOWS
1961  return res;
1962 }
1963 
1964 /*
1965  Transaction end
1966 */
1967 static __inline void _xend()
1968 {
1969 #if KMP_OS_WINDOWS
1970  __asm {
1971  _emit 0x0f
1972  _emit 0x01
1973  _emit 0xd5
1974  }
1975 #else
1976  __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1977 #endif
1978 }
1979 
1980 /*
1981  This is a macro, the argument must be a single byte constant which
1982  can be evaluated by the inline assembler, since it is emitted as a
1983  byte into the assembly code.
1984 */
1985 #if KMP_OS_WINDOWS
1986 #define _xabort(ARG) \
1987  _asm _emit 0xc6 \
1988  _asm _emit 0xf8 \
1989  _asm _emit ARG
1990 #else
1991 #define _xabort(ARG) \
1992  __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1993 #endif
1994 
1995 //
1996 // Statistics is collected for testing purpose
1997 //
1998 #if KMP_DEBUG_ADAPTIVE_LOCKS
1999 
2000 // We accumulate speculative lock statistics when the lock is destroyed.
2001 // We keep locks that haven't been destroyed in the liveLocks list
2002 // so that we can grab their statistics too.
2003 static kmp_adaptive_lock_statistics_t destroyedStats;
2004 
2005 // To hold the list of live locks.
2006 static kmp_adaptive_lock_info_t liveLocks;
2007 
2008 // A lock so we can safely update the list of locks.
2009 static kmp_bootstrap_lock_t chain_lock;
2010 
2011 // Initialize the list of stats.
2012 void
2013 __kmp_init_speculative_stats()
2014 {
2015  kmp_adaptive_lock_info_t *lck = &liveLocks;
2016 
2017  memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2018  lck->stats.next = lck;
2019  lck->stats.prev = lck;
2020 
2021  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2022  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2023 
2024  __kmp_init_bootstrap_lock( &chain_lock );
2025 
2026 }
2027 
2028 // Insert the lock into the circular list
2029 static void
2030 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2031 {
2032  __kmp_acquire_bootstrap_lock( &chain_lock );
2033 
2034  lck->stats.next = liveLocks.stats.next;
2035  lck->stats.prev = &liveLocks;
2036 
2037  liveLocks.stats.next = lck;
2038  lck->stats.next->stats.prev = lck;
2039 
2040  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2041  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2042 
2043  __kmp_release_bootstrap_lock( &chain_lock );
2044 }
2045 
2046 static void
2047 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2048 {
2049  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2050  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2051 
2052  kmp_adaptive_lock_info_t * n = lck->stats.next;
2053  kmp_adaptive_lock_info_t * p = lck->stats.prev;
2054 
2055  n->stats.prev = p;
2056  p->stats.next = n;
2057 }
2058 
2059 static void
2060 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2061 {
2062  memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2063  __kmp_remember_lock( lck );
2064 }
2065 
2066 static void
2067 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2068 {
2069  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2070 
2071  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2072  t->successfulSpeculations += s->successfulSpeculations;
2073  t->hardFailedSpeculations += s->hardFailedSpeculations;
2074  t->softFailedSpeculations += s->softFailedSpeculations;
2075  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2076  t->lemmingYields += s->lemmingYields;
2077 }
2078 
2079 static void
2080 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2081 {
2082  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2083 
2084  __kmp_acquire_bootstrap_lock( &chain_lock );
2085 
2086  __kmp_add_stats( &destroyedStats, lck );
2087  __kmp_forget_lock( lck );
2088 
2089  __kmp_release_bootstrap_lock( &chain_lock );
2090 }
2091 
2092 static float
2093 percent (kmp_uint32 count, kmp_uint32 total)
2094 {
2095  return (total == 0) ? 0.0: (100.0 * count)/total;
2096 }
2097 
2098 static
2099 FILE * __kmp_open_stats_file()
2100 {
2101  if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2102  return stdout;
2103 
2104  size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2105  char buffer[buffLen];
2106  KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2107  (kmp_int32)getpid());
2108  FILE * result = fopen(&buffer[0], "w");
2109 
2110  // Maybe we should issue a warning here...
2111  return result ? result : stdout;
2112 }
2113 
2114 void
2115 __kmp_print_speculative_stats()
2116 {
2117  if (__kmp_user_lock_kind != lk_adaptive)
2118  return;
2119 
2120  FILE * statsFile = __kmp_open_stats_file();
2121 
2122  kmp_adaptive_lock_statistics_t total = destroyedStats;
2123  kmp_adaptive_lock_info_t *lck;
2124 
2125  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2126  __kmp_add_stats( &total, lck );
2127  }
2128  kmp_adaptive_lock_statistics_t *t = &total;
2129  kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2130  kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2131  t->softFailedSpeculations;
2132 
2133  fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2134  fprintf ( statsFile, " Lock parameters: \n"
2135  " max_soft_retries : %10d\n"
2136  " max_badness : %10d\n",
2137  __kmp_adaptive_backoff_params.max_soft_retries,
2138  __kmp_adaptive_backoff_params.max_badness);
2139  fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2140  fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2141  fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2142  t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2143  fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2144  t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2145  fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2146 
2147  fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2148  fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2149  t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2150  fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2151  t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2152  fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2153  t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2154 
2155  if (statsFile != stdout)
2156  fclose( statsFile );
2157 }
2158 
2159 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2160 #else
2161 # define KMP_INC_STAT(lck,stat)
2162 
2163 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2164 
2165 static inline bool
2166 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2167 {
2168  // It is enough to check that the head_id is zero.
2169  // We don't also need to check the tail.
2170  bool res = lck->lk.head_id == 0;
2171 
2172  // We need a fence here, since we must ensure that no memory operations
2173  // from later in this thread float above that read.
2174 #if KMP_COMPILER_ICC
2175  _mm_mfence();
2176 #else
2177  __sync_synchronize();
2178 #endif
2179 
2180  return res;
2181 }
2182 
2183 // Functions for manipulating the badness
2184 static __inline void
2185 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2186 {
2187  // Reset the badness to zero so we eagerly try to speculate again
2188  lck->lk.adaptive.badness = 0;
2189  KMP_INC_STAT(lck,successfulSpeculations);
2190 }
2191 
2192 // Create a bit mask with one more set bit.
2193 static __inline void
2194 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2195 {
2196  kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2197  if ( newBadness > lck->lk.adaptive.max_badness) {
2198  return;
2199  } else {
2200  lck->lk.adaptive.badness = newBadness;
2201  }
2202 }
2203 
2204 // Check whether speculation should be attempted.
2205 static __inline int
2206 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2207 {
2208  kmp_uint32 badness = lck->lk.adaptive.badness;
2209  kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2210  int res = (attempts & badness) == 0;
2211  return res;
2212 }
2213 
2214 // Attempt to acquire only the speculative lock.
2215 // Does not back off to the non-speculative lock.
2216 //
2217 static int
2218 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2219 {
2220  int retries = lck->lk.adaptive.max_soft_retries;
2221 
2222  // We don't explicitly count the start of speculation, rather we record
2223  // the results (success, hard fail, soft fail). The sum of all of those
2224  // is the total number of times we started speculation since all
2225  // speculations must end one of those ways.
2226  do
2227  {
2228  kmp_uint32 status = _xbegin();
2229  // Switch this in to disable actual speculation but exercise
2230  // at least some of the rest of the code. Useful for debugging...
2231  // kmp_uint32 status = _XABORT_NESTED;
2232 
2233  if (status == _XBEGIN_STARTED )
2234  { /* We have successfully started speculation
2235  * Check that no-one acquired the lock for real between when we last looked
2236  * and now. This also gets the lock cache line into our read-set,
2237  * which we need so that we'll abort if anyone later claims it for real.
2238  */
2239  if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2240  {
2241  // Lock is now visibly acquired, so someone beat us to it.
2242  // Abort the transaction so we'll restart from _xbegin with the
2243  // failure status.
2244  _xabort(0x01)
2245  KMP_ASSERT2( 0, "should not get here" );
2246  }
2247  return 1; // Lock has been acquired (speculatively)
2248  } else {
2249  // We have aborted, update the statistics
2250  if ( status & SOFT_ABORT_MASK)
2251  {
2252  KMP_INC_STAT(lck,softFailedSpeculations);
2253  // and loop round to retry.
2254  }
2255  else
2256  {
2257  KMP_INC_STAT(lck,hardFailedSpeculations);
2258  // Give up if we had a hard failure.
2259  break;
2260  }
2261  }
2262  } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2263 
2264  // Either we had a hard failure or we didn't succeed softly after
2265  // the full set of attempts, so back off the badness.
2266  __kmp_step_badness( lck );
2267  return 0;
2268 }
2269 
2270 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2271 // if the speculative lock cannot be acquired.
2272 // We can succeed speculatively, non-speculatively, or fail.
2273 static int
2274 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2275 {
2276  // First try to acquire the lock speculatively
2277  if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2278  return 1;
2279 
2280  // Speculative acquisition failed, so try to acquire it non-speculatively.
2281  // Count the non-speculative acquire attempt
2282  lck->lk.adaptive.acquire_attempts++;
2283 
2284  // Use base, non-speculative lock.
2285  if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2286  {
2287  KMP_INC_STAT(lck,nonSpeculativeAcquires);
2288  return 1; // Lock is acquired (non-speculatively)
2289  }
2290  else
2291  {
2292  return 0; // Failed to acquire the lock, it's already visibly locked.
2293  }
2294 }
2295 
2296 static int
2297 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2298 {
2299  char const * const func = "omp_test_lock";
2300  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2301  KMP_FATAL( LockIsUninitialized, func );
2302  }
2303 
2304  int retval = __kmp_test_adaptive_lock( lck, gtid );
2305 
2306  if ( retval ) {
2307  lck->lk.qlk.owner_id = gtid + 1;
2308  }
2309  return retval;
2310 }
2311 
2312 // Block until we can acquire a speculative, adaptive lock.
2313 // We check whether we should be trying to speculate.
2314 // If we should be, we check the real lock to see if it is free,
2315 // and, if not, pause without attempting to acquire it until it is.
2316 // Then we try the speculative acquire.
2317 // This means that although we suffer from lemmings a little (
2318 // because all we can't acquire the lock speculatively until
2319 // the queue of threads waiting has cleared), we don't get into a
2320 // state where we can never acquire the lock speculatively (because we
2321 // force the queue to clear by preventing new arrivals from entering the
2322 // queue).
2323 // This does mean that when we're trying to break lemmings, the lock
2324 // is no longer fair. However OpenMP makes no guarantee that its
2325 // locks are fair, so this isn't a real problem.
2326 static void
2327 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2328 {
2329  if ( __kmp_should_speculate( lck, gtid ) )
2330  {
2331  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2332  {
2333  if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2334  return;
2335  // We tried speculation and failed, so give up.
2336  }
2337  else
2338  {
2339  // We can't try speculation until the lock is free, so we
2340  // pause here (without suspending on the queueing lock,
2341  // to allow it to drain, then try again.
2342  // All other threads will also see the same result for
2343  // shouldSpeculate, so will be doing the same if they
2344  // try to claim the lock from now on.
2345  while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2346  {
2347  KMP_INC_STAT(lck,lemmingYields);
2348  __kmp_yield (TRUE);
2349  }
2350 
2351  if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2352  return;
2353  }
2354  }
2355 
2356  // Speculative acquisition failed, so acquire it non-speculatively.
2357  // Count the non-speculative acquire attempt
2358  lck->lk.adaptive.acquire_attempts++;
2359 
2360  __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2361  // We have acquired the base lock, so count that.
2362  KMP_INC_STAT(lck,nonSpeculativeAcquires );
2363 }
2364 
2365 static void
2366 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2367 {
2368  char const * const func = "omp_set_lock";
2369  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2370  KMP_FATAL( LockIsUninitialized, func );
2371  }
2372  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2373  KMP_FATAL( LockIsAlreadyOwned, func );
2374  }
2375 
2376  __kmp_acquire_adaptive_lock( lck, gtid );
2377 
2378  lck->lk.qlk.owner_id = gtid + 1;
2379 }
2380 
2381 static int
2382 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2383 {
2384  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2385  { // If the lock doesn't look claimed we must be speculating.
2386  // (Or the user's code is buggy and they're releasing without locking;
2387  // if we had XTEST we'd be able to check that case...)
2388  _xend(); // Exit speculation
2389  __kmp_update_badness_after_success( lck );
2390  }
2391  else
2392  { // Since the lock *is* visibly locked we're not speculating,
2393  // so should use the underlying lock's release scheme.
2394  __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2395  }
2396  return KMP_LOCK_RELEASED;
2397 }
2398 
2399 static int
2400 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2401 {
2402  char const * const func = "omp_unset_lock";
2403  KMP_MB(); /* in case another processor initialized lock */
2404  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2405  KMP_FATAL( LockIsUninitialized, func );
2406  }
2407  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2408  KMP_FATAL( LockUnsettingFree, func );
2409  }
2410  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2411  KMP_FATAL( LockUnsettingSetByAnother, func );
2412  }
2413  lck->lk.qlk.owner_id = 0;
2414  __kmp_release_adaptive_lock( lck, gtid );
2415  return KMP_LOCK_RELEASED;
2416 }
2417 
2418 static void
2419 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2420 {
2421  __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2422  lck->lk.adaptive.badness = 0;
2423  lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2424  lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2425  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2426 #if KMP_DEBUG_ADAPTIVE_LOCKS
2427  __kmp_zero_speculative_stats( &lck->lk.adaptive );
2428 #endif
2429  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2430 }
2431 
2432 static void
2433 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2434 {
2435  __kmp_init_adaptive_lock( lck );
2436 }
2437 
2438 static void
2439 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2440 {
2441 #if KMP_DEBUG_ADAPTIVE_LOCKS
2442  __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2443 #endif
2444  __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2445  // Nothing needed for the speculative part.
2446 }
2447 
2448 static void
2449 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2450 {
2451  char const * const func = "omp_destroy_lock";
2452  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2453  KMP_FATAL( LockIsUninitialized, func );
2454  }
2455  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2456  KMP_FATAL( LockStillOwned, func );
2457  }
2458  __kmp_destroy_adaptive_lock( lck );
2459 }
2460 
2461 
2462 #endif // KMP_USE_ADAPTIVE_LOCKS
2463 
2464 
2465 /* ------------------------------------------------------------------------ */
2466 /* DRDPA ticket locks */
2467 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2468 
2469 static kmp_int32
2470 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2471 {
2472  return TCR_4( lck->lk.owner_id ) - 1;
2473 }
2474 
2475 static inline bool
2476 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2477 {
2478  return lck->lk.depth_locked != -1;
2479 }
2480 
2481 __forceinline static void
2482 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2483 {
2484  kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2485  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2486  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2487  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2488  TCR_PTR(lck->lk.polls); // volatile load
2489 
2490 #ifdef USE_LOCK_PROFILE
2491  if (TCR_8(polls[ticket & mask].poll) != ticket)
2492  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2493  /* else __kmp_printf( "." );*/
2494 #endif /* USE_LOCK_PROFILE */
2495 
2496  //
2497  // Now spin-wait, but reload the polls pointer and mask, in case the
2498  // polling area has been reconfigured. Unless it is reconfigured, the
2499  // reloads stay in L1 cache and are cheap.
2500  //
2501  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2502  //
2503  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2504  // and poll to be re-read every spin iteration.
2505  //
2506  kmp_uint32 spins;
2507 
2508  KMP_FSYNC_PREPARE(lck);
2509  KMP_INIT_YIELD(spins);
2510  while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2511  // If we are oversubscribed,
2512  // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2513  // CPU Pause is in the macros for yield.
2514  //
2515  KMP_YIELD(TCR_4(__kmp_nth)
2516  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2517  KMP_YIELD_SPIN(spins);
2518 
2519  // Re-read the mask and the poll pointer from the lock structure.
2520  //
2521  // Make certain that "mask" is read before "polls" !!!
2522  //
2523  // If another thread picks reconfigures the polling area and updates
2524  // their values, and we get the new value of mask and the old polls
2525  // pointer, we could access memory beyond the end of the old polling
2526  // area.
2527  //
2528  mask = TCR_8(lck->lk.mask); // volatile load
2529  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2530  TCR_PTR(lck->lk.polls); // volatile load
2531  }
2532 
2533  //
2534  // Critical section starts here
2535  //
2536  KMP_FSYNC_ACQUIRED(lck);
2537  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2538  ticket, lck));
2539  lck->lk.now_serving = ticket; // non-volatile store
2540 
2541  //
2542  // Deallocate a garbage polling area if we know that we are the last
2543  // thread that could possibly access it.
2544  //
2545  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2546  // ticket.
2547  //
2548  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2549  __kmp_free((void *)lck->lk.old_polls);
2550  lck->lk.old_polls = NULL;
2551  lck->lk.cleanup_ticket = 0;
2552  }
2553 
2554  //
2555  // Check to see if we should reconfigure the polling area.
2556  // If there is still a garbage polling area to be deallocated from a
2557  // previous reconfiguration, let a later thread reconfigure it.
2558  //
2559  if (lck->lk.old_polls == NULL) {
2560  bool reconfigure = false;
2561  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2562  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2563 
2564  if (TCR_4(__kmp_nth)
2565  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2566  //
2567  // We are in oversubscription mode. Contract the polling area
2568  // down to a single location, if that hasn't been done already.
2569  //
2570  if (num_polls > 1) {
2571  reconfigure = true;
2572  num_polls = TCR_4(lck->lk.num_polls);
2573  mask = 0;
2574  num_polls = 1;
2575  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2576  __kmp_allocate(num_polls * sizeof(*polls));
2577  polls[0].poll = ticket;
2578  }
2579  }
2580  else {
2581  //
2582  // We are in under/fully subscribed mode. Check the number of
2583  // threads waiting on the lock. The size of the polling area
2584  // should be at least the number of threads waiting.
2585  //
2586  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2587  if (num_waiting > num_polls) {
2588  kmp_uint32 old_num_polls = num_polls;
2589  reconfigure = true;
2590  do {
2591  mask = (mask << 1) | 1;
2592  num_polls *= 2;
2593  } while (num_polls <= num_waiting);
2594 
2595  //
2596  // Allocate the new polling area, and copy the relevant portion
2597  // of the old polling area to the new area. __kmp_allocate()
2598  // zeroes the memory it allocates, and most of the old area is
2599  // just zero padding, so we only copy the release counters.
2600  //
2601  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2602  __kmp_allocate(num_polls * sizeof(*polls));
2603  kmp_uint32 i;
2604  for (i = 0; i < old_num_polls; i++) {
2605  polls[i].poll = old_polls[i].poll;
2606  }
2607  }
2608  }
2609 
2610  if (reconfigure) {
2611  //
2612  // Now write the updated fields back to the lock structure.
2613  //
2614  // Make certain that "polls" is written before "mask" !!!
2615  //
2616  // If another thread picks up the new value of mask and the old
2617  // polls pointer , it could access memory beyond the end of the
2618  // old polling area.
2619  //
2620  // On x86, we need memory fences.
2621  //
2622  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2623  ticket, lck, num_polls));
2624 
2625  lck->lk.old_polls = old_polls; // non-volatile store
2626  lck->lk.polls = polls; // volatile store
2627 
2628  KMP_MB();
2629 
2630  lck->lk.num_polls = num_polls; // non-volatile store
2631  lck->lk.mask = mask; // volatile store
2632 
2633  KMP_MB();
2634 
2635  //
2636  // Only after the new polling area and mask have been flushed
2637  // to main memory can we update the cleanup ticket field.
2638  //
2639  // volatile load / non-volatile store
2640  //
2641  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2642  }
2643  }
2644 }
2645 
2646 void
2647 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2648 {
2649  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2650 }
2651 
2652 static void
2653 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2654 {
2655  char const * const func = "omp_set_lock";
2656  if ( lck->lk.initialized != lck ) {
2657  KMP_FATAL( LockIsUninitialized, func );
2658  }
2659  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2660  KMP_FATAL( LockNestableUsedAsSimple, func );
2661  }
2662  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2663  KMP_FATAL( LockIsAlreadyOwned, func );
2664  }
2665 
2666  __kmp_acquire_drdpa_lock( lck, gtid );
2667 
2668  lck->lk.owner_id = gtid + 1;
2669 }
2670 
2671 int
2672 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2673 {
2674  //
2675  // First get a ticket, then read the polls pointer and the mask.
2676  // The polls pointer must be read before the mask!!! (See above)
2677  //
2678  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2679  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2680  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2681  TCR_PTR(lck->lk.polls); // volatile load
2682  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2683  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2684  kmp_uint64 next_ticket = ticket + 1;
2685  if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2686  ticket, next_ticket)) {
2687  KMP_FSYNC_ACQUIRED(lck);
2688  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2689  ticket, lck));
2690  lck->lk.now_serving = ticket; // non-volatile store
2691 
2692  //
2693  // Since no threads are waiting, there is no possibility that
2694  // we would want to reconfigure the polling area. We might
2695  // have the cleanup ticket value (which says that it is now
2696  // safe to deallocate old_polls), but we'll let a later thread
2697  // which calls __kmp_acquire_lock do that - this routine
2698  // isn't supposed to block, and we would risk blocks if we
2699  // called __kmp_free() to do the deallocation.
2700  //
2701  return TRUE;
2702  }
2703  }
2704  return FALSE;
2705 }
2706 
2707 static int
2708 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2709 {
2710  char const * const func = "omp_test_lock";
2711  if ( lck->lk.initialized != lck ) {
2712  KMP_FATAL( LockIsUninitialized, func );
2713  }
2714  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2715  KMP_FATAL( LockNestableUsedAsSimple, func );
2716  }
2717 
2718  int retval = __kmp_test_drdpa_lock( lck, gtid );
2719 
2720  if ( retval ) {
2721  lck->lk.owner_id = gtid + 1;
2722  }
2723  return retval;
2724 }
2725 
2726 int
2727 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2728 {
2729  //
2730  // Read the ticket value from the lock data struct, then the polls
2731  // pointer and the mask. The polls pointer must be read before the
2732  // mask!!! (See above)
2733  //
2734  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2735  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2736  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2737  TCR_PTR(lck->lk.polls); // volatile load
2738  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2739  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2740  ticket - 1, lck));
2741  KMP_FSYNC_RELEASING(lck);
2742  KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2743  return KMP_LOCK_RELEASED;
2744 }
2745 
2746 static int
2747 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2748 {
2749  char const * const func = "omp_unset_lock";
2750  KMP_MB(); /* in case another processor initialized lock */
2751  if ( lck->lk.initialized != lck ) {
2752  KMP_FATAL( LockIsUninitialized, func );
2753  }
2754  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2755  KMP_FATAL( LockNestableUsedAsSimple, func );
2756  }
2757  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2758  KMP_FATAL( LockUnsettingFree, func );
2759  }
2760  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2761  && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2762  KMP_FATAL( LockUnsettingSetByAnother, func );
2763  }
2764  lck->lk.owner_id = 0;
2765  return __kmp_release_drdpa_lock( lck, gtid );
2766 }
2767 
2768 void
2769 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2770 {
2771  lck->lk.location = NULL;
2772  lck->lk.mask = 0;
2773  lck->lk.num_polls = 1;
2774  lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2775  __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2776  lck->lk.cleanup_ticket = 0;
2777  lck->lk.old_polls = NULL;
2778  lck->lk.next_ticket = 0;
2779  lck->lk.now_serving = 0;
2780  lck->lk.owner_id = 0; // no thread owns the lock.
2781  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2782  lck->lk.initialized = lck;
2783 
2784  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2785 }
2786 
2787 static void
2788 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2789 {
2790  __kmp_init_drdpa_lock( lck );
2791 }
2792 
2793 void
2794 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2795 {
2796  lck->lk.initialized = NULL;
2797  lck->lk.location = NULL;
2798  if (lck->lk.polls != NULL) {
2799  __kmp_free((void *)lck->lk.polls);
2800  lck->lk.polls = NULL;
2801  }
2802  if (lck->lk.old_polls != NULL) {
2803  __kmp_free((void *)lck->lk.old_polls);
2804  lck->lk.old_polls = NULL;
2805  }
2806  lck->lk.mask = 0;
2807  lck->lk.num_polls = 0;
2808  lck->lk.cleanup_ticket = 0;
2809  lck->lk.next_ticket = 0;
2810  lck->lk.now_serving = 0;
2811  lck->lk.owner_id = 0;
2812  lck->lk.depth_locked = -1;
2813 }
2814 
2815 static void
2816 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2817 {
2818  char const * const func = "omp_destroy_lock";
2819  if ( lck->lk.initialized != lck ) {
2820  KMP_FATAL( LockIsUninitialized, func );
2821  }
2822  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2823  KMP_FATAL( LockNestableUsedAsSimple, func );
2824  }
2825  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2826  KMP_FATAL( LockStillOwned, func );
2827  }
2828  __kmp_destroy_drdpa_lock( lck );
2829 }
2830 
2831 
2832 //
2833 // nested drdpa ticket locks
2834 //
2835 
2836 void
2837 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2838 {
2839  KMP_DEBUG_ASSERT( gtid >= 0 );
2840 
2841  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2842  lck->lk.depth_locked += 1;
2843  }
2844  else {
2845  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2846  KMP_MB();
2847  lck->lk.depth_locked = 1;
2848  KMP_MB();
2849  lck->lk.owner_id = gtid + 1;
2850  }
2851 }
2852 
2853 static void
2854 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2855 {
2856  char const * const func = "omp_set_nest_lock";
2857  if ( lck->lk.initialized != lck ) {
2858  KMP_FATAL( LockIsUninitialized, func );
2859  }
2860  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2861  KMP_FATAL( LockSimpleUsedAsNestable, func );
2862  }
2863  __kmp_acquire_nested_drdpa_lock( lck, gtid );
2864 }
2865 
2866 int
2867 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2868 {
2869  int retval;
2870 
2871  KMP_DEBUG_ASSERT( gtid >= 0 );
2872 
2873  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2874  retval = ++lck->lk.depth_locked;
2875  }
2876  else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2877  retval = 0;
2878  }
2879  else {
2880  KMP_MB();
2881  retval = lck->lk.depth_locked = 1;
2882  KMP_MB();
2883  lck->lk.owner_id = gtid + 1;
2884  }
2885  return retval;
2886 }
2887 
2888 static int
2889 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2890 {
2891  char const * const func = "omp_test_nest_lock";
2892  if ( lck->lk.initialized != lck ) {
2893  KMP_FATAL( LockIsUninitialized, func );
2894  }
2895  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2896  KMP_FATAL( LockSimpleUsedAsNestable, func );
2897  }
2898  return __kmp_test_nested_drdpa_lock( lck, gtid );
2899 }
2900 
2901 int
2902 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2903 {
2904  KMP_DEBUG_ASSERT( gtid >= 0 );
2905 
2906  KMP_MB();
2907  if ( --(lck->lk.depth_locked) == 0 ) {
2908  KMP_MB();
2909  lck->lk.owner_id = 0;
2910  __kmp_release_drdpa_lock( lck, gtid );
2911  return KMP_LOCK_RELEASED;
2912  }
2913  return KMP_LOCK_STILL_HELD;
2914 }
2915 
2916 static int
2917 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2918 {
2919  char const * const func = "omp_unset_nest_lock";
2920  KMP_MB(); /* in case another processor initialized lock */
2921  if ( lck->lk.initialized != lck ) {
2922  KMP_FATAL( LockIsUninitialized, func );
2923  }
2924  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2925  KMP_FATAL( LockSimpleUsedAsNestable, func );
2926  }
2927  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2928  KMP_FATAL( LockUnsettingFree, func );
2929  }
2930  if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2931  KMP_FATAL( LockUnsettingSetByAnother, func );
2932  }
2933  return __kmp_release_nested_drdpa_lock( lck, gtid );
2934 }
2935 
2936 void
2937 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2938 {
2939  __kmp_init_drdpa_lock( lck );
2940  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2941 }
2942 
2943 static void
2944 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2945 {
2946  __kmp_init_nested_drdpa_lock( lck );
2947 }
2948 
2949 void
2950 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2951 {
2952  __kmp_destroy_drdpa_lock( lck );
2953  lck->lk.depth_locked = 0;
2954 }
2955 
2956 static void
2957 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2958 {
2959  char const * const func = "omp_destroy_nest_lock";
2960  if ( lck->lk.initialized != lck ) {
2961  KMP_FATAL( LockIsUninitialized, func );
2962  }
2963  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2964  KMP_FATAL( LockSimpleUsedAsNestable, func );
2965  }
2966  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2967  KMP_FATAL( LockStillOwned, func );
2968  }
2969  __kmp_destroy_nested_drdpa_lock( lck );
2970 }
2971 
2972 
2973 //
2974 // access functions to fields which don't exist for all lock kinds.
2975 //
2976 
2977 static int
2978 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2979 {
2980  return lck == lck->lk.initialized;
2981 }
2982 
2983 static const ident_t *
2984 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2985 {
2986  return lck->lk.location;
2987 }
2988 
2989 static void
2990 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2991 {
2992  lck->lk.location = loc;
2993 }
2994 
2995 static kmp_lock_flags_t
2996 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2997 {
2998  return lck->lk.flags;
2999 }
3000 
3001 static void
3002 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3003 {
3004  lck->lk.flags = flags;
3005 }
3006 
3007 #if KMP_USE_DYNAMIC_LOCK
3008 
3009 // Definitions of lock hints.
3010 # ifndef __OMP_H
3011 typedef enum kmp_lock_hint_t {
3012  kmp_lock_hint_none = 0,
3013  kmp_lock_hint_contended,
3014  kmp_lock_hint_uncontended,
3015  kmp_lock_hint_nonspeculative,
3016  kmp_lock_hint_speculative,
3017  kmp_lock_hint_adaptive,
3018 } kmp_lock_hint_t;
3019 # endif
3020 
3021 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3022 #define expand_init_lock(l, a) \
3023 static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
3024  *lck = DYNA_LOCK_FREE(l); \
3025  KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \
3026 }
3027 FOREACH_D_LOCK(expand_init_lock, 0)
3028 #undef expand_init_lock
3029 
3030 #if DYNA_HAS_HLE
3031 
3032 // HLE lock functions - imported from the testbed runtime.
3033 #if KMP_MIC
3034 # define machine_pause() _mm_delay_32(10) // TODO: find the right argument
3035 #else
3036 # define machine_pause() _mm_pause()
3037 #endif
3038 #define HLE_ACQUIRE ".byte 0xf2;"
3039 #define HLE_RELEASE ".byte 0xf3;"
3040 
3041 static inline kmp_uint32
3042 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3043 {
3044  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3045  : "+r"(v), "+m"(*p)
3046  :
3047  : "memory");
3048  return v;
3049 }
3050 
3051 static void
3052 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3053 {
3054  *lck = 0;
3055 }
3056 
3057 static void
3058 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3059 {
3060  // Use gtid for DYNA_LOCK_BUSY if necessary
3061  if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) {
3062  int delay = 1;
3063  do {
3064  while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) {
3065  for (int i = delay; i != 0; --i)
3066  machine_pause();
3067  delay = ((delay << 1) | 1) & 7;
3068  }
3069  } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle));
3070  }
3071 }
3072 
3073 static void
3074 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3075 {
3076  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3077 }
3078 
3079 static void
3080 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3081 {
3082  __asm__ volatile(HLE_RELEASE "movl %1,%0"
3083  : "=m"(*lck)
3084  : "r"(DYNA_LOCK_FREE(hle))
3085  : "memory");
3086 }
3087 
3088 static void
3089 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3090 {
3091  __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3092 }
3093 
3094 static int
3095 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3096 {
3097  return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle);
3098 }
3099 
3100 static int
3101 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3102 {
3103  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3104 }
3105 
3106 #endif // DYNA_HAS_HLE
3107 
3108 // Entry functions for indirect locks (first element of direct_*_ops[]).
3109 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3110 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3111 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3112 static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3113 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3114 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3115 static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3116 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3117 
3118 //
3119 // Jump tables for the indirect lock functions.
3120 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3121 //
3122 #define expand_func0(l, op) 0,op##_##l##_##lock,
3123 void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3124  = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
3125 
3126 #define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
3127 void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
3128  = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
3129 
3130 // Differentiates *lock and *lock_with_checks.
3131 #define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3132 #define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3133 static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3134  = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) },
3135  { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
3136 static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3137  = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) },
3138  { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
3139 
3140 #define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3141 #define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3142 static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3143  = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) },
3144  { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
3145 
3146 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3147 void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3148 void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3149 int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3150 
3151 //
3152 // Jump tables for the indirect lock functions.
3153 //
3154 #define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3155 void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
3156  = { FOREACH_I_LOCK(expand_func4, init) };
3157 void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
3158  = { FOREACH_I_LOCK(expand_func4, destroy) };
3159 
3160 // Differentiates *lock and *lock_with_checks.
3161 #define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3162 #define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3163 static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3164  = { { FOREACH_I_LOCK(expand_func5, acquire) },
3165  { FOREACH_I_LOCK(expand_func5c, acquire) } };
3166 static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3167  = { { FOREACH_I_LOCK(expand_func5, release) },
3168  { FOREACH_I_LOCK(expand_func5c, release) } };
3169 
3170 #define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3171 #define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3172 static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3173  = { { FOREACH_I_LOCK(expand_func6, test) },
3174  { FOREACH_I_LOCK(expand_func6c, test) } };
3175 
3176 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3177 void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
3178 void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
3179 int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
3180 
3181 // Lock index table.
3182 kmp_indirect_lock_t **__kmp_indirect_lock_table;
3183 kmp_lock_index_t __kmp_indirect_lock_table_size;
3184 kmp_lock_index_t __kmp_indirect_lock_table_next;
3185 
3186 // Size of indirect locks.
3187 static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = {
3188  sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3189 #if KMP_USE_ADAPTIVE_LOCKS
3190  sizeof(kmp_adaptive_lock_t),
3191 #endif
3192  sizeof(kmp_drdpa_lock_t),
3193  sizeof(kmp_tas_lock_t),
3194 #if DYNA_HAS_FUTEX
3195  sizeof(kmp_futex_lock_t),
3196 #endif
3197  sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3198  sizeof(kmp_drdpa_lock_t)
3199 };
3200 
3201 // Jump tables for lock accessor/modifier.
3202 void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3203 void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3204 const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3205 kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3206 
3207 // Use different lock pools for different lock types.
3208 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 };
3209 
3210 // Inserts the given lock ptr to the lock table.
3211 kmp_lock_index_t
3212 __kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
3213 {
3214  kmp_lock_index_t next = __kmp_indirect_lock_table_next;
3215  // Check capacity and double the size if required
3216  if (next >= __kmp_indirect_lock_table_size) {
3217  kmp_lock_index_t i;
3218  kmp_lock_index_t size = __kmp_indirect_lock_table_size;
3219  kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
3220  __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
3221  KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
3222  __kmp_free(old_table);
3223  __kmp_indirect_lock_table_size = 2*next;
3224  }
3225  // Insert lck to the table and return the index.
3226  __kmp_indirect_lock_table[next] = lck;
3227  __kmp_indirect_lock_table_next++;
3228  return next;
3229 }
3230 
3231 // User lock allocator for dynamically dispatched locks.
3232 kmp_indirect_lock_t *
3233 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3234 {
3235  kmp_indirect_lock_t *lck;
3236  kmp_lock_index_t idx;
3237 
3238  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3239 
3240  if (__kmp_indirect_lock_pool[tag] != NULL) {
3241  lck = __kmp_indirect_lock_pool[tag];
3242  if (OMP_LOCK_T_SIZE < sizeof(void *))
3243  idx = lck->lock->pool.index;
3244  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3245  } else {
3246  lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
3247  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3248  if (OMP_LOCK_T_SIZE < sizeof(void *))
3249  idx = __kmp_insert_indirect_lock(lck);
3250  }
3251 
3252  __kmp_release_lock(&__kmp_global_lock, gtid);
3253 
3254  lck->type = tag;
3255 
3256  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3257  *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3258  } else {
3259  *((kmp_indirect_lock_t **)user_lock) = lck;
3260  }
3261 
3262  return lck;
3263 }
3264 
3265 // User lock lookup for dynamically dispatched locks.
3266 static __forceinline
3267 kmp_indirect_lock_t *
3268 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3269 {
3270  if (__kmp_env_consistency_check) {
3271  kmp_indirect_lock_t *lck = NULL;
3272  if (user_lock == NULL) {
3273  KMP_FATAL(LockIsUninitialized, func);
3274  }
3275  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3276  kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock);
3277  if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
3278  KMP_FATAL(LockIsUninitialized, func);
3279  }
3280  lck = __kmp_indirect_lock_table[idx];
3281  } else {
3282  lck = *((kmp_indirect_lock_t **)user_lock);
3283  }
3284  if (lck == NULL) {
3285  KMP_FATAL(LockIsUninitialized, func);
3286  }
3287  return lck;
3288  } else {
3289  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3290  return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)];
3291  } else {
3292  return *((kmp_indirect_lock_t **)user_lock);
3293  }
3294  }
3295 }
3296 
3297 static void
3298 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3299 {
3300 #if KMP_USE_ADAPTIVE_LOCKS
3301  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3302  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3303  seq = lockseq_queuing;
3304  }
3305 #endif
3306  kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
3307  kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3308  DYNA_I_LOCK_FUNC(l, init)(l->lock);
3309  KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
3310 }
3311 
3312 static void
3313 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3314 {
3315  kmp_uint32 gtid = __kmp_entry_gtid();
3316  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3317  DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3318  kmp_indirect_locktag_t tag = l->type;
3319 
3320  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3321 
3322  // Use the base lock's space to keep the pool chain.
3323  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3324  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3325  l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock);
3326  }
3327  __kmp_indirect_lock_pool[tag] = l;
3328 
3329  __kmp_release_lock(&__kmp_global_lock, gtid);
3330 }
3331 
3332 static void
3333 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3334 {
3335  kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3336  DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3337 }
3338 
3339 static void
3340 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3341 {
3342  kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3343  DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3344 }
3345 
3346 static int
3347 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3348 {
3349  kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3350  return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3351 }
3352 
3353 static void
3354 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3355 {
3356  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3357  DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3358 }
3359 
3360 static void
3361 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3362 {
3363  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3364  DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3365 }
3366 
3367 static int
3368 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3369 {
3370  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3371  return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3372 }
3373 
3374 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3375 
3376 // Initialize a hinted lock.
3377 void
3378 __kmp_init_lock_hinted(void **lock, int hint)
3379 {
3380  kmp_dyna_lockseq_t seq;
3381  switch (hint) {
3382  case kmp_lock_hint_uncontended:
3383  seq = lockseq_tas;
3384  break;
3385  case kmp_lock_hint_speculative:
3386 #if DYNA_HAS_HLE
3387  seq = lockseq_hle;
3388 #else
3389  seq = lockseq_tas;
3390 #endif
3391  break;
3392  case kmp_lock_hint_adaptive:
3393 #if KMP_USE_ADAPTIVE_LOCKS
3394  seq = lockseq_adaptive;
3395 #else
3396  seq = lockseq_queuing;
3397 #endif
3398  break;
3399  // Defaults to queuing locks.
3400  case kmp_lock_hint_contended:
3401  case kmp_lock_hint_nonspeculative:
3402  default:
3403  seq = lockseq_queuing;
3404  break;
3405  }
3406  if (DYNA_IS_D_LOCK(seq)) {
3407  DYNA_INIT_D_LOCK(lock, seq);
3408 #if USE_ITT_BUILD
3409  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
3410 #endif
3411  } else {
3412  DYNA_INIT_I_LOCK(lock, seq);
3413 #if USE_ITT_BUILD
3414  kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3415  __kmp_itt_lock_creating(ilk->lock, NULL);
3416 #endif
3417  }
3418 }
3419 
3420 // This is used only in kmp_error.c when consistency checking is on.
3421 kmp_int32
3422 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3423 {
3424  switch (seq) {
3425  case lockseq_tas:
3426  case lockseq_nested_tas:
3427  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3428 #if DYNA_HAS_FUTEX
3429  case lockseq_futex:
3430  case lockseq_nested_futex:
3431  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3432 #endif
3433  case lockseq_ticket:
3434  case lockseq_nested_ticket:
3435  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3436  case lockseq_queuing:
3437  case lockseq_nested_queuing:
3438 #if KMP_USE_ADAPTIVE_LOCKS
3439  case lockseq_adaptive:
3440  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3441 #endif
3442  case lockseq_drdpa:
3443  case lockseq_nested_drdpa:
3444  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3445  default:
3446  return 0;
3447  }
3448 }
3449 
3450 // The value initialized from KMP_LOCK_KIND needs to be translated to its
3451 // nested version.
3452 void
3453 __kmp_init_nest_lock_hinted(void **lock, int hint)
3454 {
3455  kmp_dyna_lockseq_t seq;
3456  switch (hint) {
3457  case kmp_lock_hint_uncontended:
3458  seq = lockseq_nested_tas;
3459  break;
3460  // Defaults to queuing locks.
3461  case kmp_lock_hint_contended:
3462  case kmp_lock_hint_nonspeculative:
3463  default:
3464  seq = lockseq_nested_queuing;
3465  break;
3466  }
3467  DYNA_INIT_I_LOCK(lock, seq);
3468 #if USE_ITT_BUILD
3469  kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3470  __kmp_itt_lock_creating(ilk->lock, NULL);
3471 #endif
3472 }
3473 
3474 // Initializes the lock table for indirect locks.
3475 static void
3476 __kmp_init_indirect_lock_table()
3477 {
3478  __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
3479  __kmp_indirect_lock_table_size = 1024;
3480  __kmp_indirect_lock_table_next = 0;
3481 }
3482 
3483 #if KMP_USE_ADAPTIVE_LOCKS
3484 # define init_lock_func(table, expand) { \
3485  table[locktag_ticket] = expand(ticket); \
3486  table[locktag_queuing] = expand(queuing); \
3487  table[locktag_adaptive] = expand(queuing); \
3488  table[locktag_drdpa] = expand(drdpa); \
3489  table[locktag_nested_ticket] = expand(ticket); \
3490  table[locktag_nested_queuing] = expand(queuing); \
3491  table[locktag_nested_drdpa] = expand(drdpa); \
3492 }
3493 #else
3494 # define init_lock_func(table, expand) { \
3495  table[locktag_ticket] = expand(ticket); \
3496  table[locktag_queuing] = expand(queuing); \
3497  table[locktag_drdpa] = expand(drdpa); \
3498  table[locktag_nested_ticket] = expand(ticket); \
3499  table[locktag_nested_queuing] = expand(queuing); \
3500  table[locktag_nested_drdpa] = expand(drdpa); \
3501 }
3502 #endif // KMP_USE_ADAPTIVE_LOCKS
3503 
3504 // Initializes data for dynamic user locks.
3505 void
3506 __kmp_init_dynamic_user_locks()
3507 {
3508  // Initialize jump table location
3509  int offset = (__kmp_env_consistency_check)? 1: 0;
3510  __kmp_direct_set_ops = direct_set_tab[offset];
3511  __kmp_direct_unset_ops = direct_unset_tab[offset];
3512  __kmp_direct_test_ops = direct_test_tab[offset];
3513  __kmp_indirect_set_ops = indirect_set_tab[offset];
3514  __kmp_indirect_unset_ops = indirect_unset_tab[offset];
3515  __kmp_indirect_test_ops = indirect_test_tab[offset];
3516  __kmp_init_indirect_lock_table();
3517 
3518  // Initialize lock accessor/modifier
3519  // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
3520 #define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3521  init_lock_func(__kmp_indirect_set_location, expand_func);
3522 #undef expand_func
3523 #define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3524  init_lock_func(__kmp_indirect_set_flags, expand_func);
3525 #undef expand_func
3526 #define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3527  init_lock_func(__kmp_indirect_get_location, expand_func);
3528 #undef expand_func
3529 #define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3530  init_lock_func(__kmp_indirect_get_flags, expand_func);
3531 #undef expand_func
3532 
3533  __kmp_init_user_locks = TRUE;
3534 }
3535 
3536 // Clean up the lock table.
3537 void
3538 __kmp_cleanup_indirect_user_locks()
3539 {
3540  kmp_lock_index_t i;
3541  int k;
3542 
3543  // Clean up locks in the pools first (they were already destroyed before going into the pools).
3544  for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) {
3545  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3546  while (l != NULL) {
3547  kmp_indirect_lock_t *ll = l;
3548  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3549  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3550  __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
3551  }
3552  __kmp_free(ll->lock);
3553  __kmp_free(ll);
3554  }
3555  }
3556  // Clean up the remaining undestroyed locks.
3557  for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
3558  kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
3559  if (l != NULL) {
3560  // Locks not destroyed explicitly need to be destroyed here.
3561  DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3562  __kmp_free(l->lock);
3563  __kmp_free(l);
3564  }
3565  }
3566  // Free the table
3567  __kmp_free(__kmp_indirect_lock_table);
3568 
3569  __kmp_init_user_locks = FALSE;
3570 }
3571 
3572 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3573 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3574 
3575 #else // KMP_USE_DYNAMIC_LOCK
3576 
3577 /* ------------------------------------------------------------------------ */
3578 /* user locks
3579  *
3580  * They are implemented as a table of function pointers which are set to the
3581  * lock functions of the appropriate kind, once that has been determined.
3582  */
3583 
3584 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3585 
3586 size_t __kmp_base_user_lock_size = 0;
3587 size_t __kmp_user_lock_size = 0;
3588 
3589 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3590 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3591 
3592 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3593 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3594 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3595 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3596 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3597 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3598 
3599 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3600 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3601 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3602 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3603 
3604 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3605 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3606 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3607 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3608 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3609 
3610 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3611 {
3612  switch ( user_lock_kind ) {
3613  case lk_default:
3614  default:
3615  KMP_ASSERT( 0 );
3616 
3617  case lk_tas: {
3618  __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3619  __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3620 
3621  __kmp_get_user_lock_owner_ =
3622  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3623  ( &__kmp_get_tas_lock_owner );
3624 
3625  if ( __kmp_env_consistency_check ) {
3626  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3627  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3628  }
3629  else {
3630  KMP_BIND_USER_LOCK(tas);
3631  KMP_BIND_NESTED_USER_LOCK(tas);
3632  }
3633 
3634  __kmp_destroy_user_lock_ =
3635  ( void ( * )( kmp_user_lock_p ) )
3636  ( &__kmp_destroy_tas_lock );
3637 
3638  __kmp_is_user_lock_initialized_ =
3639  ( int ( * )( kmp_user_lock_p ) ) NULL;
3640 
3641  __kmp_get_user_lock_location_ =
3642  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3643 
3644  __kmp_set_user_lock_location_ =
3645  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3646 
3647  __kmp_get_user_lock_flags_ =
3648  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3649 
3650  __kmp_set_user_lock_flags_ =
3651  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3652  }
3653  break;
3654 
3655 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3656 
3657  case lk_futex: {
3658  __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3659  __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3660 
3661  __kmp_get_user_lock_owner_ =
3662  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3663  ( &__kmp_get_futex_lock_owner );
3664 
3665  if ( __kmp_env_consistency_check ) {
3666  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3667  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3668  }
3669  else {
3670  KMP_BIND_USER_LOCK(futex);
3671  KMP_BIND_NESTED_USER_LOCK(futex);
3672  }
3673 
3674  __kmp_destroy_user_lock_ =
3675  ( void ( * )( kmp_user_lock_p ) )
3676  ( &__kmp_destroy_futex_lock );
3677 
3678  __kmp_is_user_lock_initialized_ =
3679  ( int ( * )( kmp_user_lock_p ) ) NULL;
3680 
3681  __kmp_get_user_lock_location_ =
3682  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3683 
3684  __kmp_set_user_lock_location_ =
3685  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3686 
3687  __kmp_get_user_lock_flags_ =
3688  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3689 
3690  __kmp_set_user_lock_flags_ =
3691  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3692  }
3693  break;
3694 
3695 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3696 
3697  case lk_ticket: {
3698  __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3699  __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3700 
3701  __kmp_get_user_lock_owner_ =
3702  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3703  ( &__kmp_get_ticket_lock_owner );
3704 
3705  if ( __kmp_env_consistency_check ) {
3706  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3707  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3708  }
3709  else {
3710  KMP_BIND_USER_LOCK(ticket);
3711  KMP_BIND_NESTED_USER_LOCK(ticket);
3712  }
3713 
3714  __kmp_destroy_user_lock_ =
3715  ( void ( * )( kmp_user_lock_p ) )
3716  ( &__kmp_destroy_ticket_lock );
3717 
3718  __kmp_is_user_lock_initialized_ =
3719  ( int ( * )( kmp_user_lock_p ) )
3720  ( &__kmp_is_ticket_lock_initialized );
3721 
3722  __kmp_get_user_lock_location_ =
3723  ( const ident_t * ( * )( kmp_user_lock_p ) )
3724  ( &__kmp_get_ticket_lock_location );
3725 
3726  __kmp_set_user_lock_location_ =
3727  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3728  ( &__kmp_set_ticket_lock_location );
3729 
3730  __kmp_get_user_lock_flags_ =
3731  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3732  ( &__kmp_get_ticket_lock_flags );
3733 
3734  __kmp_set_user_lock_flags_ =
3735  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3736  ( &__kmp_set_ticket_lock_flags );
3737  }
3738  break;
3739 
3740  case lk_queuing: {
3741  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3742  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3743 
3744  __kmp_get_user_lock_owner_ =
3745  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3746  ( &__kmp_get_queuing_lock_owner );
3747 
3748  if ( __kmp_env_consistency_check ) {
3749  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3750  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3751  }
3752  else {
3753  KMP_BIND_USER_LOCK(queuing);
3754  KMP_BIND_NESTED_USER_LOCK(queuing);
3755  }
3756 
3757  __kmp_destroy_user_lock_ =
3758  ( void ( * )( kmp_user_lock_p ) )
3759  ( &__kmp_destroy_queuing_lock );
3760 
3761  __kmp_is_user_lock_initialized_ =
3762  ( int ( * )( kmp_user_lock_p ) )
3763  ( &__kmp_is_queuing_lock_initialized );
3764 
3765  __kmp_get_user_lock_location_ =
3766  ( const ident_t * ( * )( kmp_user_lock_p ) )
3767  ( &__kmp_get_queuing_lock_location );
3768 
3769  __kmp_set_user_lock_location_ =
3770  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3771  ( &__kmp_set_queuing_lock_location );
3772 
3773  __kmp_get_user_lock_flags_ =
3774  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3775  ( &__kmp_get_queuing_lock_flags );
3776 
3777  __kmp_set_user_lock_flags_ =
3778  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3779  ( &__kmp_set_queuing_lock_flags );
3780  }
3781  break;
3782 
3783 #if KMP_USE_ADAPTIVE_LOCKS
3784  case lk_adaptive: {
3785  __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3786  __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3787 
3788  __kmp_get_user_lock_owner_ =
3789  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3790  ( &__kmp_get_queuing_lock_owner );
3791 
3792  if ( __kmp_env_consistency_check ) {
3793  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3794  }
3795  else {
3796  KMP_BIND_USER_LOCK(adaptive);
3797  }
3798 
3799  __kmp_destroy_user_lock_ =
3800  ( void ( * )( kmp_user_lock_p ) )
3801  ( &__kmp_destroy_adaptive_lock );
3802 
3803  __kmp_is_user_lock_initialized_ =
3804  ( int ( * )( kmp_user_lock_p ) )
3805  ( &__kmp_is_queuing_lock_initialized );
3806 
3807  __kmp_get_user_lock_location_ =
3808  ( const ident_t * ( * )( kmp_user_lock_p ) )
3809  ( &__kmp_get_queuing_lock_location );
3810 
3811  __kmp_set_user_lock_location_ =
3812  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3813  ( &__kmp_set_queuing_lock_location );
3814 
3815  __kmp_get_user_lock_flags_ =
3816  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3817  ( &__kmp_get_queuing_lock_flags );
3818 
3819  __kmp_set_user_lock_flags_ =
3820  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3821  ( &__kmp_set_queuing_lock_flags );
3822 
3823  }
3824  break;
3825 #endif // KMP_USE_ADAPTIVE_LOCKS
3826 
3827  case lk_drdpa: {
3828  __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3829  __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3830 
3831  __kmp_get_user_lock_owner_ =
3832  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3833  ( &__kmp_get_drdpa_lock_owner );
3834 
3835  if ( __kmp_env_consistency_check ) {
3836  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3837  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3838  }
3839  else {
3840  KMP_BIND_USER_LOCK(drdpa);
3841  KMP_BIND_NESTED_USER_LOCK(drdpa);
3842  }
3843 
3844  __kmp_destroy_user_lock_ =
3845  ( void ( * )( kmp_user_lock_p ) )
3846  ( &__kmp_destroy_drdpa_lock );
3847 
3848  __kmp_is_user_lock_initialized_ =
3849  ( int ( * )( kmp_user_lock_p ) )
3850  ( &__kmp_is_drdpa_lock_initialized );
3851 
3852  __kmp_get_user_lock_location_ =
3853  ( const ident_t * ( * )( kmp_user_lock_p ) )
3854  ( &__kmp_get_drdpa_lock_location );
3855 
3856  __kmp_set_user_lock_location_ =
3857  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3858  ( &__kmp_set_drdpa_lock_location );
3859 
3860  __kmp_get_user_lock_flags_ =
3861  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3862  ( &__kmp_get_drdpa_lock_flags );
3863 
3864  __kmp_set_user_lock_flags_ =
3865  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3866  ( &__kmp_set_drdpa_lock_flags );
3867  }
3868  break;
3869  }
3870 }
3871 
3872 
3873 // ----------------------------------------------------------------------------
3874 // User lock table & lock allocation
3875 
3876 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3877 kmp_user_lock_p __kmp_lock_pool = NULL;
3878 
3879 // Lock block-allocation support.
3880 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3881 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3882 
3883 static kmp_lock_index_t
3884 __kmp_lock_table_insert( kmp_user_lock_p lck )
3885 {
3886  // Assume that kmp_global_lock is held upon entry/exit.
3887  kmp_lock_index_t index;
3888  if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3889  kmp_lock_index_t size;
3890  kmp_user_lock_p *table;
3891  kmp_lock_index_t i;
3892  // Reallocate lock table.
3893  if ( __kmp_user_lock_table.allocated == 0 ) {
3894  size = 1024;
3895  }
3896  else {
3897  size = __kmp_user_lock_table.allocated * 2;
3898  }
3899  table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3900  KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3901  table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3902  // We cannot free the previous table now, since it may be in use by other
3903  // threads. So save the pointer to the previous table in in the first element of the
3904  // new table. All the tables will be organized into a list, and could be freed when
3905  // library shutting down.
3906  __kmp_user_lock_table.table = table;
3907  __kmp_user_lock_table.allocated = size;
3908  }
3909  KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3910  index = __kmp_user_lock_table.used;
3911  __kmp_user_lock_table.table[ index ] = lck;
3912  ++ __kmp_user_lock_table.used;
3913  return index;
3914 }
3915 
3916 static kmp_user_lock_p
3917 __kmp_lock_block_allocate()
3918 {
3919  // Assume that kmp_global_lock is held upon entry/exit.
3920  static int last_index = 0;
3921  if ( ( last_index >= __kmp_num_locks_in_block )
3922  || ( __kmp_lock_blocks == NULL ) ) {
3923  // Restart the index.
3924  last_index = 0;
3925  // Need to allocate a new block.
3926  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3927  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3928  char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3929  // Set up the new block.
3930  kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3931  new_block->next_block = __kmp_lock_blocks;
3932  new_block->locks = (void *)buffer;
3933  // Publish the new block.
3934  KMP_MB();
3935  __kmp_lock_blocks = new_block;
3936  }
3937  kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3938  [ last_index * __kmp_user_lock_size ] ) );
3939  last_index++;
3940  return ret;
3941 }
3942 
3943 //
3944 // Get memory for a lock. It may be freshly allocated memory or reused memory
3945 // from lock pool.
3946 //
3947 kmp_user_lock_p
3948 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3949  kmp_lock_flags_t flags )
3950 {
3951  kmp_user_lock_p lck;
3952  kmp_lock_index_t index;
3953  KMP_DEBUG_ASSERT( user_lock );
3954 
3955  __kmp_acquire_lock( &__kmp_global_lock, gtid );
3956 
3957  if ( __kmp_lock_pool == NULL ) {
3958  // Lock pool is empty. Allocate new memory.
3959  if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3960  lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3961  }
3962  else {
3963  lck = __kmp_lock_block_allocate();
3964  }
3965 
3966  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3967  // and debugger has info on all allocated locks.
3968  index = __kmp_lock_table_insert( lck );
3969  }
3970  else {
3971  // Pick up lock from pool.
3972  lck = __kmp_lock_pool;
3973  index = __kmp_lock_pool->pool.index;
3974  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3975  }
3976 
3977  //
3978  // We could potentially differentiate between nested and regular locks
3979  // here, and do the lock table lookup for regular locks only.
3980  //
3981  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3982  * ( (kmp_lock_index_t *) user_lock ) = index;
3983  }
3984  else {
3985  * ( (kmp_user_lock_p *) user_lock ) = lck;
3986  }
3987 
3988  // mark the lock if it is critical section lock.
3989  __kmp_set_user_lock_flags( lck, flags );
3990 
3991  __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3992 
3993  return lck;
3994 }
3995 
3996 // Put lock's memory to pool for reusing.
3997 void
3998 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3999 {
4000  kmp_lock_pool_t * lock_pool;
4001 
4002  KMP_DEBUG_ASSERT( user_lock != NULL );
4003  KMP_DEBUG_ASSERT( lck != NULL );
4004 
4005  __kmp_acquire_lock( & __kmp_global_lock, gtid );
4006 
4007  lck->pool.next = __kmp_lock_pool;
4008  __kmp_lock_pool = lck;
4009  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4010  kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4011  KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4012  lck->pool.index = index;
4013  }
4014 
4015  __kmp_release_lock( & __kmp_global_lock, gtid );
4016 }
4017 
4018 kmp_user_lock_p
4019 __kmp_lookup_user_lock( void **user_lock, char const *func )
4020 {
4021  kmp_user_lock_p lck = NULL;
4022 
4023  if ( __kmp_env_consistency_check ) {
4024  if ( user_lock == NULL ) {
4025  KMP_FATAL( LockIsUninitialized, func );
4026  }
4027  }
4028 
4029  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4030  kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4031  if ( __kmp_env_consistency_check ) {
4032  if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4033  KMP_FATAL( LockIsUninitialized, func );
4034  }
4035  }
4036  KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4037  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4038  lck = __kmp_user_lock_table.table[index];
4039  }
4040  else {
4041  lck = *( (kmp_user_lock_p *)user_lock );
4042  }
4043 
4044  if ( __kmp_env_consistency_check ) {
4045  if ( lck == NULL ) {
4046  KMP_FATAL( LockIsUninitialized, func );
4047  }
4048  }
4049 
4050  return lck;
4051 }
4052 
4053 void
4054 __kmp_cleanup_user_locks( void )
4055 {
4056  //
4057  // Reset lock pool. Do not worry about lock in the pool -- we will free
4058  // them when iterating through lock table (it includes all the locks,
4059  // dead or alive).
4060  //
4061  __kmp_lock_pool = NULL;
4062 
4063 #define IS_CRITICAL(lck) \
4064  ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4065  ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4066 
4067  //
4068  // Loop through lock table, free all locks.
4069  //
4070  // Do not free item [0], it is reserved for lock tables list.
4071  //
4072  // FIXME - we are iterating through a list of (pointers to) objects of
4073  // type union kmp_user_lock, but we have no way of knowing whether the
4074  // base type is currently "pool" or whatever the global user lock type
4075  // is.
4076  //
4077  // We are relying on the fact that for all of the user lock types
4078  // (except "tas"), the first field in the lock struct is the "initialized"
4079  // field, which is set to the address of the lock object itself when
4080  // the lock is initialized. When the union is of type "pool", the
4081  // first field is a pointer to the next object in the free list, which
4082  // will not be the same address as the object itself.
4083  //
4084  // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4085  // will fail for "pool" objects on the free list. This must happen as
4086  // the "location" field of real user locks overlaps the "index" field
4087  // of "pool" objects.
4088  //
4089  // It would be better to run through the free list, and remove all "pool"
4090  // objects from the lock table before executing this loop. However,
4091  // "pool" objects do not always have their index field set (only on
4092  // lin_32e), and I don't want to search the lock table for the address
4093  // of every "pool" object on the free list.
4094  //
4095  while ( __kmp_user_lock_table.used > 1 ) {
4096  const ident *loc;
4097 
4098  //
4099  // reduce __kmp_user_lock_table.used before freeing the lock,
4100  // so that state of locks is consistent
4101  //
4102  kmp_user_lock_p lck = __kmp_user_lock_table.table[
4103  --__kmp_user_lock_table.used ];
4104 
4105  if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4106  ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4107  //
4108  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4109  // initialized AND it is NOT a critical section (user is not
4110  // responsible for destroying criticals) AND we know source
4111  // location to report.
4112  //
4113  if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4114  ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4115  ( loc->psource != NULL ) ) {
4116  kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4117  KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4118  __kmp_str_loc_free( &str_loc);
4119  }
4120 
4121 #ifdef KMP_DEBUG
4122  if ( IS_CRITICAL( lck ) ) {
4123  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4124  }
4125  else {
4126  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4127  }
4128 #endif // KMP_DEBUG
4129 
4130  //
4131  // Cleanup internal lock dynamic resources
4132  // (for drdpa locks particularly).
4133  //
4134  __kmp_destroy_user_lock( lck );
4135  }
4136 
4137  //
4138  // Free the lock if block allocation of locks is not used.
4139  //
4140  if ( __kmp_lock_blocks == NULL ) {
4141  __kmp_free( lck );
4142  }
4143  }
4144 
4145 #undef IS_CRITICAL
4146 
4147  //
4148  // delete lock table(s).
4149  //
4150  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4151  __kmp_user_lock_table.table = NULL;
4152  __kmp_user_lock_table.allocated = 0;
4153 
4154  while ( table_ptr != NULL ) {
4155  //
4156  // In the first element we saved the pointer to the previous
4157  // (smaller) lock table.
4158  //
4159  kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4160  __kmp_free( table_ptr );
4161  table_ptr = next;
4162  }
4163 
4164  //
4165  // Free buffers allocated for blocks of locks.
4166  //
4167  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4168  __kmp_lock_blocks = NULL;
4169 
4170  while ( block_ptr != NULL ) {
4171  kmp_block_of_locks_t *next = block_ptr->next_block;
4172  __kmp_free( block_ptr->locks );
4173  //
4174  // *block_ptr itself was allocated at the end of the locks vector.
4175  //
4176  block_ptr = next;
4177  }
4178 
4179  TCW_4(__kmp_init_user_locks, FALSE);
4180 }
4181 
4182 #endif // KMP_USE_DYNAMIC_LOCK
Definition: kmp.h:218
char const * psource
Definition: kmp.h:227