LLVM OpenMP* Runtime Library
kmp.h
1
2/*
3 * kmp.h -- KPTS runtime header file.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef KMP_H
15#define KMP_H
16
17#include "kmp_config.h"
18
19/* #define BUILD_PARALLEL_ORDERED 1 */
20
21/* This fix replaces gettimeofday with clock_gettime for better scalability on
22 the Altix. Requires user code to be linked with -lrt. */
23//#define FIX_SGI_CLOCK
24
25/* Defines for OpenMP 3.0 tasking and auto scheduling */
26
27#ifndef KMP_STATIC_STEAL_ENABLED
28#define KMP_STATIC_STEAL_ENABLED 1
29#endif
30
31#define TASK_CURRENT_NOT_QUEUED 0
32#define TASK_CURRENT_QUEUED 1
33
34#ifdef BUILD_TIED_TASK_STACK
35#define TASK_STACK_EMPTY 0 // entries when the stack is empty
36#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
37// Number of entries in each task stack array
38#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
39// Mask for determining index into stack block
40#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
41#endif // BUILD_TIED_TASK_STACK
42
43#define TASK_NOT_PUSHED 1
44#define TASK_SUCCESSFULLY_PUSHED 0
45#define TASK_TIED 1
46#define TASK_UNTIED 0
47#define TASK_EXPLICIT 1
48#define TASK_IMPLICIT 0
49#define TASK_PROXY 1
50#define TASK_FULL 0
51#define TASK_DETACHABLE 1
52#define TASK_UNDETACHABLE 0
53
54#define KMP_CANCEL_THREADS
55#define KMP_THREAD_ATTR
56
57// Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being
58// built on Android
59#if defined(__ANDROID__)
60#undef KMP_CANCEL_THREADS
61#endif
62
63#include <signal.h>
64#include <stdarg.h>
65#include <stddef.h>
66#include <stdio.h>
67#include <stdlib.h>
68#include <string.h>
69#include <limits>
70#include <type_traits>
71/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
72 Microsoft library. Some macros provided below to replace these functions */
73#ifndef __ABSOFT_WIN
74#include <sys/types.h>
75#endif
76#include <limits.h>
77#include <time.h>
78
79#include <errno.h>
80
81#include "kmp_os.h"
82
83#include "kmp_safe_c_api.h"
84
85#if KMP_STATS_ENABLED
86class kmp_stats_list;
87#endif
88
89#if KMP_USE_HIER_SCHED
90// Only include hierarchical scheduling if affinity is supported
91#undef KMP_USE_HIER_SCHED
92#define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED
93#endif
94
95#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED
96#include "hwloc.h"
97#ifndef HWLOC_OBJ_NUMANODE
98#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
99#endif
100#ifndef HWLOC_OBJ_PACKAGE
101#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
102#endif
103#endif
104
105#if KMP_ARCH_X86 || KMP_ARCH_X86_64
106#include <xmmintrin.h>
107#endif
108
109// The below has to be defined before including "kmp_barrier.h".
110#define KMP_INTERNAL_MALLOC(sz) malloc(sz)
111#define KMP_INTERNAL_FREE(p) free(p)
112#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
113#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
114
115#include "kmp_debug.h"
116#include "kmp_lock.h"
117#include "kmp_version.h"
118#include "kmp_barrier.h"
119#if USE_DEBUGGER
120#include "kmp_debugger.h"
121#endif
122#include "kmp_i18n.h"
123
124#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS)
125
126#include "kmp_wrapper_malloc.h"
127#if KMP_OS_UNIX
128#include <unistd.h>
129#if !defined NSIG && defined _NSIG
130#define NSIG _NSIG
131#endif
132#endif
133
134#if KMP_OS_LINUX
135#pragma weak clock_gettime
136#endif
137
138#if OMPT_SUPPORT
139#include "ompt-internal.h"
140#endif
141
142#if OMPD_SUPPORT
143#include "ompd-specific.h"
144#endif
145
146#ifndef UNLIKELY
147#define UNLIKELY(x) (x)
148#endif
149
150// Affinity format function
151#include "kmp_str.h"
152
153// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
154// 3 - fast allocation using sync, non-sync free lists of any size, non-self
155// free lists of limited size.
156#ifndef USE_FAST_MEMORY
157#define USE_FAST_MEMORY 3
158#endif
159
160#ifndef KMP_NESTED_HOT_TEAMS
161#define KMP_NESTED_HOT_TEAMS 0
162#define USE_NESTED_HOT_ARG(x)
163#else
164#if KMP_NESTED_HOT_TEAMS
165#define USE_NESTED_HOT_ARG(x) , x
166#else
167#define USE_NESTED_HOT_ARG(x)
168#endif
169#endif
170
171// Assume using BGET compare_exchange instruction instead of lock by default.
172#ifndef USE_CMP_XCHG_FOR_BGET
173#define USE_CMP_XCHG_FOR_BGET 1
174#endif
175
176// Test to see if queuing lock is better than bootstrap lock for bget
177// #ifndef USE_QUEUING_LOCK_FOR_BGET
178// #define USE_QUEUING_LOCK_FOR_BGET
179// #endif
180
181#define KMP_NSEC_PER_SEC 1000000000L
182#define KMP_USEC_PER_SEC 1000000L
183
192enum {
197 /* 0x04 is no longer used */
206 KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
207 KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
208 KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
209
210 KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
211 KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
212
224 KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
225 KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
226 KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
227 KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
228 KMP_IDENT_OPENMP_SPEC_VERSION_MASK = 0xFF000000
229};
230
234typedef struct ident {
235 kmp_int32 reserved_1;
236 kmp_int32 flags;
238 kmp_int32 reserved_2;
239#if USE_ITT_BUILD
240/* but currently used for storing region-specific ITT */
241/* contextual information. */
242#endif /* USE_ITT_BUILD */
243 kmp_int32 reserved_3;
244 char const *psource;
248 // Returns the OpenMP version in form major*10+minor (e.g., 50 for 5.0)
249 kmp_int32 get_openmp_version() {
250 return (((flags & KMP_IDENT_OPENMP_SPEC_VERSION_MASK) >> 24) & 0xFF);
251 }
257// Some forward declarations.
258typedef union kmp_team kmp_team_t;
259typedef struct kmp_taskdata kmp_taskdata_t;
260typedef union kmp_task_team kmp_task_team_t;
261typedef union kmp_team kmp_team_p;
262typedef union kmp_info kmp_info_p;
263typedef union kmp_root kmp_root_p;
264
265template <bool C = false, bool S = true> class kmp_flag_32;
266template <bool C = false, bool S = true> class kmp_flag_64;
267template <bool C = false, bool S = true> class kmp_atomic_flag_64;
268class kmp_flag_oncore;
269
270#ifdef __cplusplus
271extern "C" {
272#endif
273
274/* ------------------------------------------------------------------------ */
275
276/* Pack two 32-bit signed integers into a 64-bit signed integer */
277/* ToDo: Fix word ordering for big-endian machines. */
278#define KMP_PACK_64(HIGH_32, LOW_32) \
279 ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
280
281// Generic string manipulation macros. Assume that _x is of type char *
282#define SKIP_WS(_x) \
283 { \
284 while (*(_x) == ' ' || *(_x) == '\t') \
285 (_x)++; \
286 }
287#define SKIP_DIGITS(_x) \
288 { \
289 while (*(_x) >= '0' && *(_x) <= '9') \
290 (_x)++; \
291 }
292#define SKIP_TOKEN(_x) \
293 { \
294 while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
295 (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \
296 (_x)++; \
297 }
298#define SKIP_TO(_x, _c) \
299 { \
300 while (*(_x) != '\0' && *(_x) != (_c)) \
301 (_x)++; \
302 }
303
304/* ------------------------------------------------------------------------ */
305
306#define KMP_MAX(x, y) ((x) > (y) ? (x) : (y))
307#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
308
309/* ------------------------------------------------------------------------ */
310/* Enumeration types */
311
312enum kmp_state_timer {
313 ts_stop,
314 ts_start,
315 ts_pause,
316
317 ts_last_state
318};
319
320enum dynamic_mode {
321 dynamic_default,
322#ifdef USE_LOAD_BALANCE
323 dynamic_load_balance,
324#endif /* USE_LOAD_BALANCE */
325 dynamic_random,
326 dynamic_thread_limit,
327 dynamic_max
328};
329
330/* external schedule constants, duplicate enum omp_sched in omp.h in order to
331 * not include it here */
332#ifndef KMP_SCHED_TYPE_DEFINED
333#define KMP_SCHED_TYPE_DEFINED
334typedef enum kmp_sched {
335 kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
336 // Note: need to adjust __kmp_sch_map global array in case enum is changed
337 kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
338 kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
339 kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
340 kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
341 kmp_sched_upper_std = 5, // upper bound for standard schedules
342 kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
343 kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
344#if KMP_STATIC_STEAL_ENABLED
345 kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
346#endif
347 kmp_sched_upper,
348 kmp_sched_default = kmp_sched_static, // default scheduling
349 kmp_sched_monotonic = 0x80000000
350} kmp_sched_t;
351#endif
352
357enum sched_type : kmp_int32 {
359 kmp_sch_static_chunked = 33,
361 kmp_sch_dynamic_chunked = 35,
363 kmp_sch_runtime = 37,
365 kmp_sch_trapezoidal = 39,
366
367 /* accessible only through KMP_SCHEDULE environment variable */
368 kmp_sch_static_greedy = 40,
369 kmp_sch_static_balanced = 41,
370 /* accessible only through KMP_SCHEDULE environment variable */
371 kmp_sch_guided_iterative_chunked = 42,
372 kmp_sch_guided_analytical_chunked = 43,
373 /* accessible only through KMP_SCHEDULE environment variable */
374 kmp_sch_static_steal = 44,
375
376 /* static with chunk adjustment (e.g., simd) */
377 kmp_sch_static_balanced_chunked = 45,
381 /* accessible only through KMP_SCHEDULE environment variable */
385 kmp_ord_static_chunked = 65,
387 kmp_ord_dynamic_chunked = 67,
388 kmp_ord_guided_chunked = 68,
389 kmp_ord_runtime = 69,
391 kmp_ord_trapezoidal = 71,
394 /* Schedules for Distribute construct */
398 /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
399 single iteration/chunk, even if the loop is serialized. For the schedule
400 types listed above, the entire iteration vector is returned if the loop is
401 serialized. This doesn't work for gcc/gcomp sections. */
404 kmp_nm_static_chunked =
405 (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
407 kmp_nm_dynamic_chunked = 163,
409 kmp_nm_runtime = 165,
411 kmp_nm_trapezoidal = 167,
412
413 /* accessible only through KMP_SCHEDULE environment variable */
414 kmp_nm_static_greedy = 168,
415 kmp_nm_static_balanced = 169,
416 /* accessible only through KMP_SCHEDULE environment variable */
417 kmp_nm_guided_iterative_chunked = 170,
418 kmp_nm_guided_analytical_chunked = 171,
419 kmp_nm_static_steal =
420 172, /* accessible only through OMP_SCHEDULE environment variable */
421
422 kmp_nm_ord_static_chunked = 193,
424 kmp_nm_ord_dynamic_chunked = 195,
425 kmp_nm_ord_guided_chunked = 196,
426 kmp_nm_ord_runtime = 197,
428 kmp_nm_ord_trapezoidal = 199,
431 /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
432 we need to distinguish the three possible cases (no modifier, monotonic
433 modifier, nonmonotonic modifier), we need separate bits for each modifier.
434 The absence of monotonic does not imply nonmonotonic, especially since 4.5
435 says that the behaviour of the "no modifier" case is implementation defined
436 in 4.5, but will become "nonmonotonic" in 5.0.
437
438 Since we're passing a full 32 bit value, we can use a couple of high bits
439 for these flags; out of paranoia we avoid the sign bit.
440
441 These modifiers can be or-ed into non-static schedules by the compiler to
442 pass the additional information. They will be stripped early in the
443 processing in __kmp_dispatch_init when setting up schedules, so most of the
444 code won't ever see schedules with these bits set. */
446 (1 << 29),
448 (1 << 30),
450#define SCHEDULE_WITHOUT_MODIFIERS(s) \
451 (enum sched_type)( \
453#define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)
454#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
455#define SCHEDULE_HAS_NO_MODIFIERS(s) \
456 (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
457#define SCHEDULE_GET_MODIFIERS(s) \
458 ((enum sched_type)( \
459 (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
460#define SCHEDULE_SET_MODIFIERS(s, m) \
461 (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
462#define SCHEDULE_NONMONOTONIC 0
463#define SCHEDULE_MONOTONIC 1
464
467
468// Apply modifiers on internal kind to standard kind
469static inline void
470__kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
471 enum sched_type internal_kind) {
472 if (SCHEDULE_HAS_MONOTONIC(internal_kind)) {
473 *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
474 }
475}
476
477// Apply modifiers on standard kind to internal kind
478static inline void
479__kmp_sched_apply_mods_intkind(kmp_sched_t kind,
480 enum sched_type *internal_kind) {
481 if ((int)kind & (int)kmp_sched_monotonic) {
482 *internal_kind = (enum sched_type)((int)*internal_kind |
484 }
485}
486
487// Get standard schedule without modifiers
488static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
489 return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
490}
491
492/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
493typedef union kmp_r_sched {
494 struct {
495 enum sched_type r_sched_type;
496 int chunk;
497 };
498 kmp_int64 sched;
499} kmp_r_sched_t;
500
501extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
502// internal schedule types
503
504enum library_type {
505 library_none,
506 library_serial,
507 library_turnaround,
508 library_throughput
509};
510
511#if KMP_OS_LINUX
512enum clock_function_type {
513 clock_function_gettimeofday,
514 clock_function_clock_gettime
515};
516#endif /* KMP_OS_LINUX */
517
518#if KMP_MIC_SUPPORTED
519enum mic_type { non_mic, mic1, mic2, mic3, dummy };
520#endif
521
522/* -- fast reduction stuff ------------------------------------------------ */
523
524#undef KMP_FAST_REDUCTION_BARRIER
525#define KMP_FAST_REDUCTION_BARRIER 1
526
527#undef KMP_FAST_REDUCTION_CORE_DUO
528#if KMP_ARCH_X86 || KMP_ARCH_X86_64
529#define KMP_FAST_REDUCTION_CORE_DUO 1
530#endif
531
532enum _reduction_method {
533 reduction_method_not_defined = 0,
534 critical_reduce_block = (1 << 8),
535 atomic_reduce_block = (2 << 8),
536 tree_reduce_block = (3 << 8),
537 empty_reduce_block = (4 << 8)
538};
539
540// Description of the packed_reduction_method variable:
541// The packed_reduction_method variable consists of two enum types variables
542// that are packed together into 0-th byte and 1-st byte:
543// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
544// barrier that will be used in fast reduction: bs_plain_barrier or
545// bs_reduction_barrier
546// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
547// be used in fast reduction;
548// Reduction method is of 'enum _reduction_method' type and it's defined the way
549// so that the bits of 0-th byte are empty, so no need to execute a shift
550// instruction while packing/unpacking
551
552#if KMP_FAST_REDUCTION_BARRIER
553#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \
554 ((reduction_method) | (barrier_type))
555
556#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
557 ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
558
559#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
560 ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
561#else
562#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \
563 (reduction_method)
564
565#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
566 (packed_reduction_method)
567
568#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier)
569#endif
570
571#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \
572 ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \
573 (which_reduction_block))
574
575#if KMP_FAST_REDUCTION_BARRIER
576#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
577 (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier))
578
579#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
580 (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier))
581#endif
582
583typedef int PACKED_REDUCTION_METHOD_T;
584
585/* -- end of fast reduction stuff ----------------------------------------- */
586
587#if KMP_OS_WINDOWS
588#define USE_CBLKDATA
589#if KMP_MSVC_COMPAT
590#pragma warning(push)
591#pragma warning(disable : 271 310)
592#endif
593#include <windows.h>
594#if KMP_MSVC_COMPAT
595#pragma warning(pop)
596#endif
597#endif
598
599#if KMP_OS_UNIX
600#include <dlfcn.h>
601#include <pthread.h>
602#endif
603
604enum kmp_hw_t : int {
605 KMP_HW_UNKNOWN = -1,
606 KMP_HW_SOCKET = 0,
607 KMP_HW_PROC_GROUP,
608 KMP_HW_NUMA,
609 KMP_HW_DIE,
610 KMP_HW_LLC,
611 KMP_HW_L3,
612 KMP_HW_TILE,
613 KMP_HW_MODULE,
614 KMP_HW_L2,
615 KMP_HW_L1,
616 KMP_HW_CORE,
617 KMP_HW_THREAD,
618 KMP_HW_LAST
619};
620
621typedef enum kmp_hw_core_type_t {
622 KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
623#if KMP_ARCH_X86 || KMP_ARCH_X86_64
624 KMP_HW_CORE_TYPE_ATOM = 0x20,
625 KMP_HW_CORE_TYPE_CORE = 0x40,
626 KMP_HW_MAX_NUM_CORE_TYPES = 3,
627#else
628 KMP_HW_MAX_NUM_CORE_TYPES = 1,
629#endif
630} kmp_hw_core_type_t;
631
632#define KMP_HW_MAX_NUM_CORE_EFFS 8
633
634#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type) \
635 KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
636#define KMP_ASSERT_VALID_HW_TYPE(type) \
637 KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
638
639#define KMP_FOREACH_HW_TYPE(type) \
640 for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
641 type = (kmp_hw_t)((int)type + 1))
642
643const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);
644const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
645const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type);
646
647/* Only Linux* OS and Windows* OS support thread affinity. */
648#if KMP_AFFINITY_SUPPORTED
649
650// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
651#if KMP_OS_WINDOWS
652#if _MSC_VER < 1600 && KMP_MSVC_COMPAT
653typedef struct GROUP_AFFINITY {
654 KAFFINITY Mask;
655 WORD Group;
656 WORD Reserved[3];
657} GROUP_AFFINITY;
658#endif /* _MSC_VER < 1600 */
659#if KMP_GROUP_AFFINITY
660extern int __kmp_num_proc_groups;
661#else
662static const int __kmp_num_proc_groups = 1;
663#endif /* KMP_GROUP_AFFINITY */
664typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
665extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
666
667typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
668extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
669
670typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
671extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
672
673typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
674 GROUP_AFFINITY *);
675extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
676#endif /* KMP_OS_WINDOWS */
677
678#if KMP_USE_HWLOC
679extern hwloc_topology_t __kmp_hwloc_topology;
680extern int __kmp_hwloc_error;
681#endif
682
683extern size_t __kmp_affin_mask_size;
684#define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
685#define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
686#define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
687#define KMP_CPU_SET_ITERATE(i, mask) \
688 for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
689#define KMP_CPU_SET(i, mask) (mask)->set(i)
690#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
691#define KMP_CPU_CLR(i, mask) (mask)->clear(i)
692#define KMP_CPU_ZERO(mask) (mask)->zero()
693#define KMP_CPU_COPY(dest, src) (dest)->copy(src)
694#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
695#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
696#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
697#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
698#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
699#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
700#define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
701#define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
702#define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
703#define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
704#define KMP_CPU_ALLOC_ARRAY(arr, n) \
705 (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
706#define KMP_CPU_FREE_ARRAY(arr, n) \
707 __kmp_affinity_dispatch->deallocate_mask_array(arr)
708#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
709#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
710#define __kmp_get_system_affinity(mask, abort_bool) \
711 (mask)->get_system_affinity(abort_bool)
712#define __kmp_set_system_affinity(mask, abort_bool) \
713 (mask)->set_system_affinity(abort_bool)
714#define __kmp_get_proc_group(mask) (mask)->get_proc_group()
715
716class KMPAffinity {
717public:
718 class Mask {
719 public:
720 void *operator new(size_t n);
721 void operator delete(void *p);
722 void *operator new[](size_t n);
723 void operator delete[](void *p);
724 virtual ~Mask() {}
725 // Set bit i to 1
726 virtual void set(int i) {}
727 // Return bit i
728 virtual bool is_set(int i) const { return false; }
729 // Set bit i to 0
730 virtual void clear(int i) {}
731 // Zero out entire mask
732 virtual void zero() {}
733 // Copy src into this mask
734 virtual void copy(const Mask *src) {}
735 // this &= rhs
736 virtual void bitwise_and(const Mask *rhs) {}
737 // this |= rhs
738 virtual void bitwise_or(const Mask *rhs) {}
739 // this = ~this
740 virtual void bitwise_not() {}
741 // API for iterating over an affinity mask
742 // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
743 virtual int begin() const { return 0; }
744 virtual int end() const { return 0; }
745 virtual int next(int previous) const { return 0; }
746#if KMP_OS_WINDOWS
747 virtual int set_process_affinity(bool abort_on_error) const { return -1; }
748#endif
749 // Set the system's affinity to this affinity mask's value
750 virtual int set_system_affinity(bool abort_on_error) const { return -1; }
751 // Set this affinity mask to the current system affinity
752 virtual int get_system_affinity(bool abort_on_error) { return -1; }
753 // Only 1 DWORD in the mask should have any procs set.
754 // Return the appropriate index, or -1 for an invalid mask.
755 virtual int get_proc_group() const { return -1; }
756 };
757 void *operator new(size_t n);
758 void operator delete(void *p);
759 // Need virtual destructor
760 virtual ~KMPAffinity() = default;
761 // Determine if affinity is capable
762 virtual void determine_capable(const char *env_var) {}
763 // Bind the current thread to os proc
764 virtual void bind_thread(int proc) {}
765 // Factory functions to allocate/deallocate a mask
766 virtual Mask *allocate_mask() { return nullptr; }
767 virtual void deallocate_mask(Mask *m) {}
768 virtual Mask *allocate_mask_array(int num) { return nullptr; }
769 virtual void deallocate_mask_array(Mask *m) {}
770 virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
771 static void pick_api();
772 static void destroy_api();
773 enum api_type {
774 NATIVE_OS
775#if KMP_USE_HWLOC
776 ,
777 HWLOC
778#endif
779 };
780 virtual api_type get_api_type() const {
781 KMP_ASSERT(0);
782 return NATIVE_OS;
783 }
784
785private:
786 static bool picked_api;
787};
788
789typedef KMPAffinity::Mask kmp_affin_mask_t;
790extern KMPAffinity *__kmp_affinity_dispatch;
791
792// Declare local char buffers with this size for printing debug and info
793// messages, using __kmp_affinity_print_mask().
794#define KMP_AFFIN_MASK_PRINT_LEN 1024
795
796enum affinity_type {
797 affinity_none = 0,
798 affinity_physical,
799 affinity_logical,
800 affinity_compact,
801 affinity_scatter,
802 affinity_explicit,
803 affinity_balanced,
804 affinity_disabled, // not used outsize the env var parser
805 affinity_default
806};
807
808enum affinity_top_method {
809 affinity_top_method_all = 0, // try all (supported) methods, in order
810#if KMP_ARCH_X86 || KMP_ARCH_X86_64
811 affinity_top_method_apicid,
812 affinity_top_method_x2apicid,
813 affinity_top_method_x2apicid_1f,
814#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
815 affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
816#if KMP_GROUP_AFFINITY
817 affinity_top_method_group,
818#endif /* KMP_GROUP_AFFINITY */
819 affinity_top_method_flat,
820#if KMP_USE_HWLOC
821 affinity_top_method_hwloc,
822#endif
823 affinity_top_method_default
824};
825
826#define affinity_respect_mask_default (2)
827
828typedef struct kmp_affinity_flags_t {
829 unsigned dups : 1;
830 unsigned verbose : 1;
831 unsigned warnings : 1;
832 unsigned respect : 2;
833 unsigned reset : 1;
834 unsigned initialized : 1;
835 unsigned reserved : 25;
836} kmp_affinity_flags_t;
837KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);
838
839typedef struct kmp_affinity_t {
840 char *proclist;
841 enum affinity_type type;
842 kmp_hw_t gran;
843 int gran_levels;
844 int compact;
845 int offset;
846 kmp_affinity_flags_t flags;
847 unsigned num_masks;
848 kmp_affin_mask_t *masks;
849 unsigned num_os_id_masks;
850 kmp_affin_mask_t *os_id_masks;
851 const char *env_var;
852} kmp_affinity_t;
853
854#define KMP_AFFINITY_INIT(env) \
855 { \
856 nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
857 {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \
858 nullptr, 0, nullptr, env \
859 }
860
861extern enum affinity_top_method __kmp_affinity_top_method;
862extern kmp_affinity_t __kmp_affinity;
863extern kmp_affinity_t __kmp_hh_affinity;
864extern kmp_affinity_t *__kmp_affinities[2];
865
866extern void __kmp_affinity_bind_thread(int which);
867
868extern kmp_affin_mask_t *__kmp_affin_fullMask;
869extern kmp_affin_mask_t *__kmp_affin_origMask;
870extern char *__kmp_cpuinfo_file;
871
872#endif /* KMP_AFFINITY_SUPPORTED */
873
874// This needs to be kept in sync with the values in omp.h !!!
875typedef enum kmp_proc_bind_t {
876 proc_bind_false = 0,
877 proc_bind_true,
878 proc_bind_primary,
879 proc_bind_close,
880 proc_bind_spread,
881 proc_bind_intel, // use KMP_AFFINITY interface
882 proc_bind_default
883} kmp_proc_bind_t;
884
885typedef struct kmp_nested_proc_bind_t {
886 kmp_proc_bind_t *bind_types;
887 int size;
888 int used;
889} kmp_nested_proc_bind_t;
890
891extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
892extern kmp_proc_bind_t __kmp_teams_proc_bind;
893
894extern int __kmp_display_affinity;
895extern char *__kmp_affinity_format;
896static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
897#if OMPT_SUPPORT
898extern int __kmp_tool;
899extern char *__kmp_tool_libraries;
900#endif // OMPT_SUPPORT
901
902#if KMP_AFFINITY_SUPPORTED
903#define KMP_PLACE_ALL (-1)
904#define KMP_PLACE_UNDEFINED (-2)
905// Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
906#define KMP_AFFINITY_NON_PROC_BIND \
907 ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \
908 __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \
909 (__kmp_affinity.num_masks > 0 || __kmp_affinity.type == affinity_balanced))
910#endif /* KMP_AFFINITY_SUPPORTED */
911
912extern int __kmp_affinity_num_places;
913
914typedef enum kmp_cancel_kind_t {
915 cancel_noreq = 0,
916 cancel_parallel = 1,
917 cancel_loop = 2,
918 cancel_sections = 3,
919 cancel_taskgroup = 4
920} kmp_cancel_kind_t;
921
922// KMP_HW_SUBSET support:
923typedef struct kmp_hws_item {
924 int num;
925 int offset;
926} kmp_hws_item_t;
927
928extern kmp_hws_item_t __kmp_hws_socket;
929extern kmp_hws_item_t __kmp_hws_die;
930extern kmp_hws_item_t __kmp_hws_node;
931extern kmp_hws_item_t __kmp_hws_tile;
932extern kmp_hws_item_t __kmp_hws_core;
933extern kmp_hws_item_t __kmp_hws_proc;
934extern int __kmp_hws_requested;
935extern int __kmp_hws_abs_flag; // absolute or per-item number requested
936
937/* ------------------------------------------------------------------------ */
938
939#define KMP_PAD(type, sz) \
940 (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
941
942// We need to avoid using -1 as a GTID as +1 is added to the gtid
943// when storing it in a lock, and the value 0 is reserved.
944#define KMP_GTID_DNE (-2) /* Does not exist */
945#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
946#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
947#define KMP_GTID_UNKNOWN (-5) /* Is not known */
948#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
949
950/* OpenMP 5.0 Memory Management support */
951
952#ifndef __OMP_H
953// Duplicate type definitions from omp.h
954typedef uintptr_t omp_uintptr_t;
955
956typedef enum {
957 omp_atk_sync_hint = 1,
958 omp_atk_alignment = 2,
959 omp_atk_access = 3,
960 omp_atk_pool_size = 4,
961 omp_atk_fallback = 5,
962 omp_atk_fb_data = 6,
963 omp_atk_pinned = 7,
964 omp_atk_partition = 8
965} omp_alloctrait_key_t;
966
967typedef enum {
968 omp_atv_false = 0,
969 omp_atv_true = 1,
970 omp_atv_contended = 3,
971 omp_atv_uncontended = 4,
972 omp_atv_serialized = 5,
973 omp_atv_sequential = omp_atv_serialized, // (deprecated)
974 omp_atv_private = 6,
975 omp_atv_all = 7,
976 omp_atv_thread = 8,
977 omp_atv_pteam = 9,
978 omp_atv_cgroup = 10,
979 omp_atv_default_mem_fb = 11,
980 omp_atv_null_fb = 12,
981 omp_atv_abort_fb = 13,
982 omp_atv_allocator_fb = 14,
983 omp_atv_environment = 15,
984 omp_atv_nearest = 16,
985 omp_atv_blocked = 17,
986 omp_atv_interleaved = 18
987} omp_alloctrait_value_t;
988#define omp_atv_default ((omp_uintptr_t)-1)
989
990typedef void *omp_memspace_handle_t;
991extern omp_memspace_handle_t const omp_default_mem_space;
992extern omp_memspace_handle_t const omp_large_cap_mem_space;
993extern omp_memspace_handle_t const omp_const_mem_space;
994extern omp_memspace_handle_t const omp_high_bw_mem_space;
995extern omp_memspace_handle_t const omp_low_lat_mem_space;
996extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
997extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
998extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
999
1000typedef struct {
1001 omp_alloctrait_key_t key;
1002 omp_uintptr_t value;
1003} omp_alloctrait_t;
1004
1005typedef void *omp_allocator_handle_t;
1006extern omp_allocator_handle_t const omp_null_allocator;
1007extern omp_allocator_handle_t const omp_default_mem_alloc;
1008extern omp_allocator_handle_t const omp_large_cap_mem_alloc;
1009extern omp_allocator_handle_t const omp_const_mem_alloc;
1010extern omp_allocator_handle_t const omp_high_bw_mem_alloc;
1011extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
1012extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
1013extern omp_allocator_handle_t const omp_pteam_mem_alloc;
1014extern omp_allocator_handle_t const omp_thread_mem_alloc;
1015extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
1016extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
1017extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
1018extern omp_allocator_handle_t const kmp_max_mem_alloc;
1019extern omp_allocator_handle_t __kmp_def_allocator;
1020
1021// end of duplicate type definitions from omp.h
1022#endif
1023
1024extern int __kmp_memkind_available;
1025
1026typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
1027
1028typedef struct kmp_allocator_t {
1029 omp_memspace_handle_t memspace;
1030 void **memkind; // pointer to memkind
1031 size_t alignment;
1032 omp_alloctrait_value_t fb;
1033 kmp_allocator_t *fb_data;
1034 kmp_uint64 pool_size;
1035 kmp_uint64 pool_used;
1036} kmp_allocator_t;
1037
1038extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
1039 omp_memspace_handle_t,
1040 int ntraits,
1041 omp_alloctrait_t traits[]);
1042extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
1043extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
1044extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
1045// external interfaces, may be used by compiler
1046extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
1047extern void *__kmpc_aligned_alloc(int gtid, size_t align, size_t sz,
1048 omp_allocator_handle_t al);
1049extern void *__kmpc_calloc(int gtid, size_t nmemb, size_t sz,
1050 omp_allocator_handle_t al);
1051extern void *__kmpc_realloc(int gtid, void *ptr, size_t sz,
1052 omp_allocator_handle_t al,
1053 omp_allocator_handle_t free_al);
1054extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1055// internal interfaces, contain real implementation
1056extern void *__kmp_alloc(int gtid, size_t align, size_t sz,
1057 omp_allocator_handle_t al);
1058extern void *__kmp_calloc(int gtid, size_t align, size_t nmemb, size_t sz,
1059 omp_allocator_handle_t al);
1060extern void *__kmp_realloc(int gtid, void *ptr, size_t sz,
1061 omp_allocator_handle_t al,
1062 omp_allocator_handle_t free_al);
1063extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1064
1065extern void __kmp_init_memkind();
1066extern void __kmp_fini_memkind();
1067extern void __kmp_init_target_mem();
1068
1069/* ------------------------------------------------------------------------ */
1070
1071#define KMP_UINT64_MAX \
1072 (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
1073
1074#define KMP_MIN_NTH 1
1075
1076#ifndef KMP_MAX_NTH
1077#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
1078#define KMP_MAX_NTH PTHREAD_THREADS_MAX
1079#else
1080#define KMP_MAX_NTH INT_MAX
1081#endif
1082#endif /* KMP_MAX_NTH */
1083
1084#ifdef PTHREAD_STACK_MIN
1085#define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
1086#else
1087#define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
1088#endif
1089
1090#define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1091
1092#if KMP_ARCH_X86
1093#define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
1094#elif KMP_ARCH_X86_64
1095#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
1096#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
1097#else
1098#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
1099#endif
1100
1101#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024))
1102#define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024))
1103#define KMP_MAX_MALLOC_POOL_INCR \
1104 (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1105
1106#define KMP_MIN_STKOFFSET (0)
1107#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
1108#if KMP_OS_DARWIN
1109#define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
1110#else
1111#define KMP_DEFAULT_STKOFFSET CACHE_LINE
1112#endif
1113
1114#define KMP_MIN_STKPADDING (0)
1115#define KMP_MAX_STKPADDING (2 * 1024 * 1024)
1116
1117#define KMP_BLOCKTIME_MULTIPLIER \
1118 (1000) /* number of blocktime units per second */
1119#define KMP_MIN_BLOCKTIME (0)
1120#define KMP_MAX_BLOCKTIME \
1121 (INT_MAX) /* Must be this for "infinite" setting the work */
1122
1123/* __kmp_blocktime is in milliseconds */
1124#define KMP_DEFAULT_BLOCKTIME (__kmp_is_hybrid_cpu() ? (0) : (200))
1125
1126#if KMP_USE_MONITOR
1127#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
1128#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
1129#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
1130
1131/* Calculate new number of monitor wakeups for a specific block time based on
1132 previous monitor_wakeups. Only allow increasing number of wakeups */
1133#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1134 (((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) \
1135 : ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS \
1136 : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \
1137 ? (monitor_wakeups) \
1138 : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
1139
1140/* Calculate number of intervals for a specific block time based on
1141 monitor_wakeups */
1142#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1143 (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \
1144 (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)))
1145#else
1146#define KMP_BLOCKTIME(team, tid) \
1147 (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime)
1148#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1149// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
1150extern kmp_uint64 __kmp_ticks_per_msec;
1151#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
1152#define KMP_NOW() ((kmp_uint64)_rdtsc())
1153#else
1154#define KMP_NOW() __kmp_hardware_timestamp()
1155#endif
1156#define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec)
1157#define KMP_BLOCKTIME_INTERVAL(team, tid) \
1158 (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec)
1159#define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
1160#else
1161// System time is retrieved sporadically while blocking.
1162extern kmp_uint64 __kmp_now_nsec();
1163#define KMP_NOW() __kmp_now_nsec()
1164#define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC)
1165#define KMP_BLOCKTIME_INTERVAL(team, tid) \
1166 (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
1167#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
1168#endif
1169#endif // KMP_USE_MONITOR
1170
1171#define KMP_MIN_STATSCOLS 40
1172#define KMP_MAX_STATSCOLS 4096
1173#define KMP_DEFAULT_STATSCOLS 80
1174
1175#define KMP_MIN_INTERVAL 0
1176#define KMP_MAX_INTERVAL (INT_MAX - 1)
1177#define KMP_DEFAULT_INTERVAL 0
1178
1179#define KMP_MIN_CHUNK 1
1180#define KMP_MAX_CHUNK (INT_MAX - 1)
1181#define KMP_DEFAULT_CHUNK 1
1182
1183#define KMP_MIN_DISP_NUM_BUFF 1
1184#define KMP_DFLT_DISP_NUM_BUFF 7
1185#define KMP_MAX_DISP_NUM_BUFF 4096
1186
1187#define KMP_MAX_ORDERED 8
1188
1189#define KMP_MAX_FIELDS 32
1190
1191#define KMP_MAX_BRANCH_BITS 31
1192
1193#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
1194
1195#define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX
1196
1197#define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX
1198
1199/* Minimum number of threads before switch to TLS gtid (experimentally
1200 determined) */
1201/* josh TODO: what about OS X* tuning? */
1202#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1203#define KMP_TLS_GTID_MIN 5
1204#else
1205#define KMP_TLS_GTID_MIN INT_MAX
1206#endif
1207
1208#define KMP_MASTER_TID(tid) (0 == (tid))
1209#define KMP_WORKER_TID(tid) (0 != (tid))
1210
1211#define KMP_MASTER_GTID(gtid) (0 == __kmp_tid_from_gtid((gtid)))
1212#define KMP_WORKER_GTID(gtid) (0 != __kmp_tid_from_gtid((gtid)))
1213#define KMP_INITIAL_GTID(gtid) (0 == (gtid))
1214
1215#ifndef TRUE
1216#define FALSE 0
1217#define TRUE (!FALSE)
1218#endif
1219
1220/* NOTE: all of the following constants must be even */
1221
1222#if KMP_OS_WINDOWS
1223#define KMP_INIT_WAIT 64U /* initial number of spin-tests */
1224#define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
1225#elif KMP_OS_LINUX
1226#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1227#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1228#elif KMP_OS_DARWIN
1229/* TODO: tune for KMP_OS_DARWIN */
1230#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1231#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1232#elif KMP_OS_DRAGONFLY
1233/* TODO: tune for KMP_OS_DRAGONFLY */
1234#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1235#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1236#elif KMP_OS_FREEBSD
1237/* TODO: tune for KMP_OS_FREEBSD */
1238#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1239#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1240#elif KMP_OS_NETBSD
1241/* TODO: tune for KMP_OS_NETBSD */
1242#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1243#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1244#elif KMP_OS_HURD
1245/* TODO: tune for KMP_OS_HURD */
1246#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1247#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1248#elif KMP_OS_OPENBSD
1249/* TODO: tune for KMP_OS_OPENBSD */
1250#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1251#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1252#endif
1253
1254#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1255typedef struct kmp_cpuid {
1256 kmp_uint32 eax;
1257 kmp_uint32 ebx;
1258 kmp_uint32 ecx;
1259 kmp_uint32 edx;
1260} kmp_cpuid_t;
1261
1262typedef struct kmp_cpuinfo_flags_t {
1263 unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
1264 unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
1265 unsigned hybrid : 1;
1266 unsigned reserved : 29; // Ensure size of 32 bits
1267} kmp_cpuinfo_flags_t;
1268
1269typedef struct kmp_cpuinfo {
1270 int initialized; // If 0, other fields are not initialized.
1271 int signature; // CPUID(1).EAX
1272 int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
1273 int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
1274 // Model << 4 ) + Model)
1275 int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1276 kmp_cpuinfo_flags_t flags;
1277 int apic_id;
1278 int physical_id;
1279 int logical_id;
1280 kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1281 char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
1282} kmp_cpuinfo_t;
1283
1284extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
1285
1286#if KMP_OS_UNIX
1287// subleaf is only needed for cache and topology discovery and can be set to
1288// zero in most cases
1289static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
1290 __asm__ __volatile__("cpuid"
1291 : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
1292 : "a"(leaf), "c"(subleaf));
1293}
1294// Load p into FPU control word
1295static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
1296 __asm__ __volatile__("fldcw %0" : : "m"(*p));
1297}
1298// Store FPU control word into p
1299static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
1300 __asm__ __volatile__("fstcw %0" : "=m"(*p));
1301}
1302static inline void __kmp_clear_x87_fpu_status_word() {
1303#if KMP_MIC
1304 // 32-bit protected mode x87 FPU state
1305 struct x87_fpu_state {
1306 unsigned cw;
1307 unsigned sw;
1308 unsigned tw;
1309 unsigned fip;
1310 unsigned fips;
1311 unsigned fdp;
1312 unsigned fds;
1313 };
1314 struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
1315 __asm__ __volatile__("fstenv %0\n\t" // store FP env
1316 "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
1317 "fldenv %0\n\t" // load FP env back
1318 : "+m"(fpu_state), "+m"(fpu_state.sw));
1319#else
1320 __asm__ __volatile__("fnclex");
1321#endif // KMP_MIC
1322}
1323#if __SSE__
1324static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1325static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1326#else
1327static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
1328static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
1329#endif
1330#else
1331// Windows still has these as external functions in assembly file
1332extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
1333extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
1334extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
1335extern void __kmp_clear_x87_fpu_status_word();
1336static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1337static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1338#endif // KMP_OS_UNIX
1339
1340#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
1341
1342// User-level Monitor/Mwait
1343#if KMP_HAVE_UMWAIT
1344// We always try for UMWAIT first
1345#if KMP_HAVE_WAITPKG_INTRINSICS
1346#if KMP_HAVE_IMMINTRIN_H
1347#include <immintrin.h>
1348#elif KMP_HAVE_INTRIN_H
1349#include <intrin.h>
1350#endif
1351#endif // KMP_HAVE_WAITPKG_INTRINSICS
1352
1353KMP_ATTRIBUTE_TARGET_WAITPKG
1354static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
1355#if !KMP_HAVE_WAITPKG_INTRINSICS
1356 uint32_t timeHi = uint32_t(counter >> 32);
1357 uint32_t timeLo = uint32_t(counter & 0xffffffff);
1358 char flag;
1359 __asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
1360 "setb %0"
1361 // The "=q" restraint means any register accessible as rl
1362 // in 32-bit mode: a, b, c, and d;
1363 // in 64-bit mode: any integer register
1364 : "=q"(flag)
1365 : "a"(timeLo), "d"(timeHi), "c"(hint)
1366 :);
1367 return flag;
1368#else
1369 return _tpause(hint, counter);
1370#endif
1371}
1372KMP_ATTRIBUTE_TARGET_WAITPKG
1373static inline void __kmp_umonitor(void *cacheline) {
1374#if !KMP_HAVE_WAITPKG_INTRINSICS
1375 __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
1376 :
1377 : "a"(cacheline)
1378 :);
1379#else
1380 _umonitor(cacheline);
1381#endif
1382}
1383KMP_ATTRIBUTE_TARGET_WAITPKG
1384static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
1385#if !KMP_HAVE_WAITPKG_INTRINSICS
1386 uint32_t timeHi = uint32_t(counter >> 32);
1387 uint32_t timeLo = uint32_t(counter & 0xffffffff);
1388 char flag;
1389 __asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
1390 "setb %0"
1391 // The "=q" restraint means any register accessible as rl
1392 // in 32-bit mode: a, b, c, and d;
1393 // in 64-bit mode: any integer register
1394 : "=q"(flag)
1395 : "a"(timeLo), "d"(timeHi), "c"(hint)
1396 :);
1397 return flag;
1398#else
1399 return _umwait(hint, counter);
1400#endif
1401}
1402#elif KMP_HAVE_MWAIT
1403#if KMP_OS_UNIX
1404#include <pmmintrin.h>
1405#else
1406#include <intrin.h>
1407#endif
1408#if KMP_OS_UNIX
1409__attribute__((target("sse3")))
1410#endif
1411static inline void
1412__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
1413 _mm_monitor(cacheline, extensions, hints);
1414}
1415#if KMP_OS_UNIX
1416__attribute__((target("sse3")))
1417#endif
1418static inline void
1419__kmp_mm_mwait(unsigned extensions, unsigned hints) {
1420 _mm_mwait(extensions, hints);
1421}
1422#endif // KMP_HAVE_UMWAIT
1423
1424#if KMP_ARCH_X86
1425extern void __kmp_x86_pause(void);
1426#elif KMP_MIC
1427// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
1428// regression after removal of extra PAUSE from spin loops. Changing
1429// the delay from 100 to 300 showed even better performance than double PAUSE
1430// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
1431static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
1432#else
1433static inline void __kmp_x86_pause(void) { _mm_pause(); }
1434#endif
1435#define KMP_CPU_PAUSE() __kmp_x86_pause()
1436#elif KMP_ARCH_PPC64
1437#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
1438#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
1439#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
1440#define KMP_CPU_PAUSE() \
1441 do { \
1442 KMP_PPC64_PRI_LOW(); \
1443 KMP_PPC64_PRI_MED(); \
1444 KMP_PPC64_PRI_LOC_MB(); \
1445 } while (0)
1446#else
1447#define KMP_CPU_PAUSE() /* nothing to do */
1448#endif
1449
1450#define KMP_INIT_YIELD(count) \
1451 { (count) = __kmp_yield_init; }
1452
1453#define KMP_INIT_BACKOFF(time) \
1454 { (time) = __kmp_pause_init; }
1455
1456#define KMP_OVERSUBSCRIBED \
1457 (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
1458
1459#define KMP_TRY_YIELD \
1460 ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
1461
1462#define KMP_TRY_YIELD_OVERSUB \
1463 ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
1464
1465#define KMP_YIELD(cond) \
1466 { \
1467 KMP_CPU_PAUSE(); \
1468 if ((cond) && (KMP_TRY_YIELD)) \
1469 __kmp_yield(); \
1470 }
1471
1472#define KMP_YIELD_OVERSUB() \
1473 { \
1474 KMP_CPU_PAUSE(); \
1475 if ((KMP_TRY_YIELD_OVERSUB)) \
1476 __kmp_yield(); \
1477 }
1478
1479// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1480// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
1481#define KMP_YIELD_SPIN(count) \
1482 { \
1483 KMP_CPU_PAUSE(); \
1484 if (KMP_TRY_YIELD) { \
1485 (count) -= 2; \
1486 if (!(count)) { \
1487 __kmp_yield(); \
1488 (count) = __kmp_yield_next; \
1489 } \
1490 } \
1491 }
1492
1493// If TPAUSE is available & enabled, use it. If oversubscribed, use the slower
1494// (C0.2) state, which improves performance of other SMT threads on the same
1495// core, otherwise, use the fast (C0.1) default state, or whatever the user has
1496// requested. Uses a timed TPAUSE, and exponential backoff. If TPAUSE isn't
1497// available, fall back to the regular CPU pause and yield combination.
1498#if KMP_HAVE_UMWAIT
1499#define KMP_TPAUSE_MAX_MASK ((kmp_uint64)0xFFFF)
1500#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \
1501 { \
1502 if (__kmp_tpause_enabled) { \
1503 if (KMP_OVERSUBSCRIBED) { \
1504 __kmp_tpause(0, (time)); \
1505 } else { \
1506 __kmp_tpause(__kmp_tpause_hint, (time)); \
1507 } \
1508 (time) = (time << 1 | 1) & KMP_TPAUSE_MAX_MASK; \
1509 } else { \
1510 KMP_CPU_PAUSE(); \
1511 if ((KMP_TRY_YIELD_OVERSUB)) { \
1512 __kmp_yield(); \
1513 } else if (__kmp_use_yield == 1) { \
1514 (count) -= 2; \
1515 if (!(count)) { \
1516 __kmp_yield(); \
1517 (count) = __kmp_yield_next; \
1518 } \
1519 } \
1520 } \
1521 }
1522#else
1523#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \
1524 { \
1525 KMP_CPU_PAUSE(); \
1526 if ((KMP_TRY_YIELD_OVERSUB)) \
1527 __kmp_yield(); \
1528 else if (__kmp_use_yield == 1) { \
1529 (count) -= 2; \
1530 if (!(count)) { \
1531 __kmp_yield(); \
1532 (count) = __kmp_yield_next; \
1533 } \
1534 } \
1535 }
1536#endif // KMP_HAVE_UMWAIT
1537
1538/* ------------------------------------------------------------------------ */
1539/* Support datatypes for the orphaned construct nesting checks. */
1540/* ------------------------------------------------------------------------ */
1541
1542/* When adding to this enum, add its corresponding string in cons_text_c[]
1543 * array in kmp_error.cpp */
1544enum cons_type {
1545 ct_none,
1546 ct_parallel,
1547 ct_pdo,
1548 ct_pdo_ordered,
1549 ct_psections,
1550 ct_psingle,
1551 ct_critical,
1552 ct_ordered_in_parallel,
1553 ct_ordered_in_pdo,
1554 ct_master,
1555 ct_reduce,
1556 ct_barrier,
1557 ct_masked
1558};
1559
1560#define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered)
1561
1562struct cons_data {
1563 ident_t const *ident;
1564 enum cons_type type;
1565 int prev;
1566 kmp_user_lock_p
1567 name; /* address exclusively for critical section name comparison */
1568};
1569
1570struct cons_header {
1571 int p_top, w_top, s_top;
1572 int stack_size, stack_top;
1573 struct cons_data *stack_data;
1574};
1575
1576struct kmp_region_info {
1577 char *text;
1578 int offset[KMP_MAX_FIELDS];
1579 int length[KMP_MAX_FIELDS];
1580};
1581
1582/* ---------------------------------------------------------------------- */
1583/* ---------------------------------------------------------------------- */
1584
1585#if KMP_OS_WINDOWS
1586typedef HANDLE kmp_thread_t;
1587typedef DWORD kmp_key_t;
1588#endif /* KMP_OS_WINDOWS */
1589
1590#if KMP_OS_UNIX
1591typedef pthread_t kmp_thread_t;
1592typedef pthread_key_t kmp_key_t;
1593#endif
1594
1595extern kmp_key_t __kmp_gtid_threadprivate_key;
1596
1597typedef struct kmp_sys_info {
1598 long maxrss; /* the maximum resident set size utilized (in kilobytes) */
1599 long minflt; /* the number of page faults serviced without any I/O */
1600 long majflt; /* the number of page faults serviced that required I/O */
1601 long nswap; /* the number of times a process was "swapped" out of memory */
1602 long inblock; /* the number of times the file system had to perform input */
1603 long oublock; /* the number of times the file system had to perform output */
1604 long nvcsw; /* the number of times a context switch was voluntarily */
1605 long nivcsw; /* the number of times a context switch was forced */
1606} kmp_sys_info_t;
1607
1608#if USE_ITT_BUILD
1609// We cannot include "kmp_itt.h" due to circular dependency. Declare the only
1610// required type here. Later we will check the type meets requirements.
1611typedef int kmp_itt_mark_t;
1612#define KMP_ITT_DEBUG 0
1613#endif /* USE_ITT_BUILD */
1614
1615typedef kmp_int32 kmp_critical_name[8];
1616
1626typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
1627typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
1628 ...);
1629
1634/* ---------------------------------------------------------------------------
1635 */
1636/* Threadprivate initialization/finalization function declarations */
1637
1638/* for non-array objects: __kmpc_threadprivate_register() */
1639
1644typedef void *(*kmpc_ctor)(void *);
1645
1650typedef void (*kmpc_dtor)(
1651 void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
1652 compiler */
1657typedef void *(*kmpc_cctor)(void *, void *);
1658
1659/* for array objects: __kmpc_threadprivate_register_vec() */
1660/* First arg: "this" pointer */
1661/* Last arg: number of array elements */
1667typedef void *(*kmpc_ctor_vec)(void *, size_t);
1673typedef void (*kmpc_dtor_vec)(void *, size_t);
1679typedef void *(*kmpc_cctor_vec)(void *, void *,
1680 size_t); /* function unused by compiler */
1681
1686/* keeps tracked of threadprivate cache allocations for cleanup later */
1687typedef struct kmp_cached_addr {
1688 void **addr; /* address of allocated cache */
1689 void ***compiler_cache; /* pointer to compiler's cache */
1690 void *data; /* pointer to global data */
1691 struct kmp_cached_addr *next; /* pointer to next cached address */
1692} kmp_cached_addr_t;
1693
1694struct private_data {
1695 struct private_data *next; /* The next descriptor in the list */
1696 void *data; /* The data buffer for this descriptor */
1697 int more; /* The repeat count for this descriptor */
1698 size_t size; /* The data size for this descriptor */
1699};
1700
1701struct private_common {
1702 struct private_common *next;
1703 struct private_common *link;
1704 void *gbl_addr;
1705 void *par_addr; /* par_addr == gbl_addr for PRIMARY thread */
1706 size_t cmn_size;
1707};
1708
1709struct shared_common {
1710 struct shared_common *next;
1711 struct private_data *pod_init;
1712 void *obj_init;
1713 void *gbl_addr;
1714 union {
1715 kmpc_ctor ctor;
1716 kmpc_ctor_vec ctorv;
1717 } ct;
1718 union {
1719 kmpc_cctor cctor;
1720 kmpc_cctor_vec cctorv;
1721 } cct;
1722 union {
1723 kmpc_dtor dtor;
1724 kmpc_dtor_vec dtorv;
1725 } dt;
1726 size_t vec_len;
1727 int is_vec;
1728 size_t cmn_size;
1729};
1730
1731#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
1732#define KMP_HASH_TABLE_SIZE \
1733 (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
1734#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
1735#define KMP_HASH(x) \
1736 ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1))
1737
1738struct common_table {
1739 struct private_common *data[KMP_HASH_TABLE_SIZE];
1740};
1741
1742struct shared_table {
1743 struct shared_common *data[KMP_HASH_TABLE_SIZE];
1744};
1745
1746/* ------------------------------------------------------------------------ */
1747
1748#if KMP_USE_HIER_SCHED
1749// Shared barrier data that exists inside a single unit of the scheduling
1750// hierarchy
1751typedef struct kmp_hier_private_bdata_t {
1752 kmp_int32 num_active;
1753 kmp_uint64 index;
1754 kmp_uint64 wait_val[2];
1755} kmp_hier_private_bdata_t;
1756#endif
1757
1758typedef struct kmp_sched_flags {
1759 unsigned ordered : 1;
1760 unsigned nomerge : 1;
1761 unsigned contains_last : 1;
1762#if KMP_USE_HIER_SCHED
1763 unsigned use_hier : 1;
1764 unsigned unused : 28;
1765#else
1766 unsigned unused : 29;
1767#endif
1768} kmp_sched_flags_t;
1769
1770KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4);
1771
1772#if KMP_STATIC_STEAL_ENABLED
1773typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1774 kmp_int32 count;
1775 kmp_int32 ub;
1776 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1777 kmp_int32 lb;
1778 kmp_int32 st;
1779 kmp_int32 tc;
1780 kmp_lock_t *steal_lock; // lock used for chunk stealing
1781 // KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on)
1782 // a) parm3 is properly aligned and
1783 // b) all parm1-4 are on the same cache line.
1784 // Because of parm1-4 are used together, performance seems to be better
1785 // if they are on the same cache line (not measured though).
1786
1787 struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template
1788 kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
1789 kmp_int32 parm2; // make no real change at least while padding is off.
1790 kmp_int32 parm3;
1791 kmp_int32 parm4;
1792 };
1793
1794 kmp_uint32 ordered_lower;
1795 kmp_uint32 ordered_upper;
1796#if KMP_OS_WINDOWS
1797 kmp_int32 last_upper;
1798#endif /* KMP_OS_WINDOWS */
1799} dispatch_private_info32_t;
1800
1801typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1802 kmp_int64 count; // current chunk number for static & static-steal scheduling
1803 kmp_int64 ub; /* upper-bound */
1804 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1805 kmp_int64 lb; /* lower-bound */
1806 kmp_int64 st; /* stride */
1807 kmp_int64 tc; /* trip count (number of iterations) */
1808 kmp_lock_t *steal_lock; // lock used for chunk stealing
1809 /* parm[1-4] are used in different ways by different scheduling algorithms */
1810
1811 // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1812 // a) parm3 is properly aligned and
1813 // b) all parm1-4 are in the same cache line.
1814 // Because of parm1-4 are used together, performance seems to be better
1815 // if they are in the same line (not measured though).
1816
1817 struct KMP_ALIGN(32) {
1818 kmp_int64 parm1;
1819 kmp_int64 parm2;
1820 kmp_int64 parm3;
1821 kmp_int64 parm4;
1822 };
1823
1824 kmp_uint64 ordered_lower;
1825 kmp_uint64 ordered_upper;
1826#if KMP_OS_WINDOWS
1827 kmp_int64 last_upper;
1828#endif /* KMP_OS_WINDOWS */
1829} dispatch_private_info64_t;
1830#else /* KMP_STATIC_STEAL_ENABLED */
1831typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1832 kmp_int32 lb;
1833 kmp_int32 ub;
1834 kmp_int32 st;
1835 kmp_int32 tc;
1836
1837 kmp_int32 parm1;
1838 kmp_int32 parm2;
1839 kmp_int32 parm3;
1840 kmp_int32 parm4;
1841
1842 kmp_int32 count;
1843
1844 kmp_uint32 ordered_lower;
1845 kmp_uint32 ordered_upper;
1846#if KMP_OS_WINDOWS
1847 kmp_int32 last_upper;
1848#endif /* KMP_OS_WINDOWS */
1849} dispatch_private_info32_t;
1850
1851typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1852 kmp_int64 lb; /* lower-bound */
1853 kmp_int64 ub; /* upper-bound */
1854 kmp_int64 st; /* stride */
1855 kmp_int64 tc; /* trip count (number of iterations) */
1856
1857 /* parm[1-4] are used in different ways by different scheduling algorithms */
1858 kmp_int64 parm1;
1859 kmp_int64 parm2;
1860 kmp_int64 parm3;
1861 kmp_int64 parm4;
1862
1863 kmp_int64 count; /* current chunk number for static scheduling */
1864
1865 kmp_uint64 ordered_lower;
1866 kmp_uint64 ordered_upper;
1867#if KMP_OS_WINDOWS
1868 kmp_int64 last_upper;
1869#endif /* KMP_OS_WINDOWS */
1870} dispatch_private_info64_t;
1871#endif /* KMP_STATIC_STEAL_ENABLED */
1872
1873typedef struct KMP_ALIGN_CACHE dispatch_private_info {
1874 union private_info {
1875 dispatch_private_info32_t p32;
1876 dispatch_private_info64_t p64;
1877 } u;
1878 enum sched_type schedule; /* scheduling algorithm */
1879 kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
1880 std::atomic<kmp_uint32> steal_flag; // static_steal only, state of a buffer
1881 kmp_int32 ordered_bumped;
1882 // Stack of buffers for nest of serial regions
1883 struct dispatch_private_info *next;
1884 kmp_int32 type_size; /* the size of types in private_info */
1885#if KMP_USE_HIER_SCHED
1886 kmp_int32 hier_id;
1887 void *parent; /* hierarchical scheduling parent pointer */
1888#endif
1889 enum cons_type pushed_ws;
1890} dispatch_private_info_t;
1891
1892typedef struct dispatch_shared_info32 {
1893 /* chunk index under dynamic, number of idle threads under static-steal;
1894 iteration index otherwise */
1895 volatile kmp_uint32 iteration;
1896 volatile kmp_int32 num_done;
1897 volatile kmp_uint32 ordered_iteration;
1898 // Dummy to retain the structure size after making ordered_iteration scalar
1899 kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];
1900} dispatch_shared_info32_t;
1901
1902typedef struct dispatch_shared_info64 {
1903 /* chunk index under dynamic, number of idle threads under static-steal;
1904 iteration index otherwise */
1905 volatile kmp_uint64 iteration;
1906 volatile kmp_int64 num_done;
1907 volatile kmp_uint64 ordered_iteration;
1908 // Dummy to retain the structure size after making ordered_iteration scalar
1909 kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];
1910} dispatch_shared_info64_t;
1911
1912typedef struct dispatch_shared_info {
1913 union shared_info {
1914 dispatch_shared_info32_t s32;
1915 dispatch_shared_info64_t s64;
1916 } u;
1917 volatile kmp_uint32 buffer_index;
1918 volatile kmp_int32 doacross_buf_idx; // teamwise index
1919 volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
1920 kmp_int32 doacross_num_done; // count finished threads
1921#if KMP_USE_HIER_SCHED
1922 void *hier;
1923#endif
1924#if KMP_USE_HWLOC
1925 // When linking with libhwloc, the ORDERED EPCC test slows down on big
1926 // machines (> 48 cores). Performance analysis showed that a cache thrash
1927 // was occurring and this padding helps alleviate the problem.
1928 char padding[64];
1929#endif
1930} dispatch_shared_info_t;
1931
1932typedef struct kmp_disp {
1933 /* Vector for ORDERED SECTION */
1934 void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
1935 /* Vector for END ORDERED SECTION */
1936 void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
1937
1938 dispatch_shared_info_t *th_dispatch_sh_current;
1939 dispatch_private_info_t *th_dispatch_pr_current;
1940
1941 dispatch_private_info_t *th_disp_buffer;
1942 kmp_uint32 th_disp_index;
1943 kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
1944 volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
1945 kmp_int64 *th_doacross_info; // info on loop bounds
1946#if KMP_USE_INTERNODE_ALIGNMENT
1947 char more_padding[INTERNODE_CACHE_LINE];
1948#endif
1949} kmp_disp_t;
1950
1951/* ------------------------------------------------------------------------ */
1952/* Barrier stuff */
1953
1954/* constants for barrier state update */
1955#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
1956#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
1957#define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state
1958#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
1959
1960#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
1961#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
1962#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
1963
1964#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
1965#error "Barrier sleep bit must be smaller than barrier bump bit"
1966#endif
1967#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
1968#error "Barrier unused bit must be smaller than barrier bump bit"
1969#endif
1970
1971// Constants for release barrier wait state: currently, hierarchical only
1972#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
1973#define KMP_BARRIER_OWN_FLAG \
1974 1 // Normal state; worker waiting on own b_go flag in release
1975#define KMP_BARRIER_PARENT_FLAG \
1976 2 // Special state; worker waiting on parent's b_go flag in release
1977#define KMP_BARRIER_SWITCH_TO_OWN_FLAG \
1978 3 // Special state; tells worker to shift from parent to own b_go
1979#define KMP_BARRIER_SWITCHING \
1980 4 // Special state; worker resets appropriate flag on wake-up
1981
1982#define KMP_NOT_SAFE_TO_REAP \
1983 0 // Thread th_reap_state: not safe to reap (tasking)
1984#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
1985
1986// The flag_type describes the storage used for the flag.
1987enum flag_type {
1988 flag32,
1989 flag64,
1990 atomic_flag64,
1991 flag_oncore,
1992 flag_unset
1993};
1994
1995enum barrier_type {
1996 bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
1997 barriers if enabled) */
1998 bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
1999#if KMP_FAST_REDUCTION_BARRIER
2000 bs_reduction_barrier, /* 2, All barriers that are used in reduction */
2001#endif // KMP_FAST_REDUCTION_BARRIER
2002 bs_last_barrier /* Just a placeholder to mark the end */
2003};
2004
2005// to work with reduction barriers just like with plain barriers
2006#if !KMP_FAST_REDUCTION_BARRIER
2007#define bs_reduction_barrier bs_plain_barrier
2008#endif // KMP_FAST_REDUCTION_BARRIER
2009
2010typedef enum kmp_bar_pat { /* Barrier communication patterns */
2011 bp_linear_bar =
2012 0, /* Single level (degenerate) tree */
2013 bp_tree_bar =
2014 1, /* Balanced tree with branching factor 2^n */
2015 bp_hyper_bar = 2, /* Hypercube-embedded tree with min
2016 branching factor 2^n */
2017 bp_hierarchical_bar = 3, /* Machine hierarchy tree */
2018 bp_dist_bar = 4, /* Distributed barrier */
2019 bp_last_bar /* Placeholder to mark the end */
2020} kmp_bar_pat_e;
2021
2022#define KMP_BARRIER_ICV_PUSH 1
2023
2024/* Record for holding the values of the internal controls stack records */
2025typedef struct kmp_internal_control {
2026 int serial_nesting_level; /* corresponds to the value of the
2027 th_team_serialized field */
2028 kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
2029 thread) */
2030 kmp_int8
2031 bt_set; /* internal control for whether blocktime is explicitly set */
2032 int blocktime; /* internal control for blocktime */
2033#if KMP_USE_MONITOR
2034 int bt_intervals; /* internal control for blocktime intervals */
2035#endif
2036 int nproc; /* internal control for #threads for next parallel region (per
2037 thread) */
2038 int thread_limit; /* internal control for thread-limit-var */
2039 int max_active_levels; /* internal control for max_active_levels */
2040 kmp_r_sched_t
2041 sched; /* internal control for runtime schedule {sched,chunk} pair */
2042 kmp_proc_bind_t proc_bind; /* internal control for affinity */
2043 kmp_int32 default_device; /* internal control for default device */
2044 struct kmp_internal_control *next;
2045} kmp_internal_control_t;
2046
2047static inline void copy_icvs(kmp_internal_control_t *dst,
2048 kmp_internal_control_t *src) {
2049 *dst = *src;
2050}
2051
2052/* Thread barrier needs volatile barrier fields */
2053typedef struct KMP_ALIGN_CACHE kmp_bstate {
2054 // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
2055 // uses of it). It is not explicitly aligned below, because we *don't* want
2056 // it to be padded -- instead, we fit b_go into the same cache line with
2057 // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
2058 kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
2059 // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
2060 // same NGO store
2061 volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
2062 KMP_ALIGN_CACHE volatile kmp_uint64
2063 b_arrived; // STATE => task reached synch point.
2064 kmp_uint32 *skip_per_level;
2065 kmp_uint32 my_level;
2066 kmp_int32 parent_tid;
2067 kmp_int32 old_tid;
2068 kmp_uint32 depth;
2069 struct kmp_bstate *parent_bar;
2070 kmp_team_t *team;
2071 kmp_uint64 leaf_state;
2072 kmp_uint32 nproc;
2073 kmp_uint8 base_leaf_kids;
2074 kmp_uint8 leaf_kids;
2075 kmp_uint8 offset;
2076 kmp_uint8 wait_flag;
2077 kmp_uint8 use_oncore_barrier;
2078#if USE_DEBUGGER
2079 // The following field is intended for the debugger solely. Only the worker
2080 // thread itself accesses this field: the worker increases it by 1 when it
2081 // arrives to a barrier.
2082 KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
2083#endif /* USE_DEBUGGER */
2084} kmp_bstate_t;
2085
2086union KMP_ALIGN_CACHE kmp_barrier_union {
2087 double b_align; /* use worst case alignment */
2088 char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)];
2089 kmp_bstate_t bb;
2090};
2091
2092typedef union kmp_barrier_union kmp_balign_t;
2093
2094/* Team barrier needs only non-volatile arrived counter */
2095union KMP_ALIGN_CACHE kmp_barrier_team_union {
2096 double b_align; /* use worst case alignment */
2097 char b_pad[CACHE_LINE];
2098 struct {
2099 kmp_uint64 b_arrived; /* STATE => task reached synch point. */
2100#if USE_DEBUGGER
2101 // The following two fields are indended for the debugger solely. Only
2102 // primary thread of the team accesses these fields: the first one is
2103 // increased by 1 when the primary thread arrives to a barrier, the second
2104 // one is increased by one when all the threads arrived.
2105 kmp_uint b_master_arrived;
2106 kmp_uint b_team_arrived;
2107#endif
2108 };
2109};
2110
2111typedef union kmp_barrier_team_union kmp_balign_team_t;
2112
2113/* Padding for Linux* OS pthreads condition variables and mutexes used to signal
2114 threads when a condition changes. This is to workaround an NPTL bug where
2115 padding was added to pthread_cond_t which caused the initialization routine
2116 to write outside of the structure if compiled on pre-NPTL threads. */
2117#if KMP_OS_WINDOWS
2118typedef struct kmp_win32_mutex {
2119 /* The Lock */
2120 CRITICAL_SECTION cs;
2121} kmp_win32_mutex_t;
2122
2123typedef struct kmp_win32_cond {
2124 /* Count of the number of waiters. */
2125 int waiters_count_;
2126
2127 /* Serialize access to <waiters_count_> */
2128 kmp_win32_mutex_t waiters_count_lock_;
2129
2130 /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
2131 int release_count_;
2132
2133 /* Keeps track of the current "generation" so that we don't allow */
2134 /* one thread to steal all the "releases" from the broadcast. */
2135 int wait_generation_count_;
2136
2137 /* A manual-reset event that's used to block and release waiting threads. */
2138 HANDLE event_;
2139} kmp_win32_cond_t;
2140#endif
2141
2142#if KMP_OS_UNIX
2143
2144union KMP_ALIGN_CACHE kmp_cond_union {
2145 double c_align;
2146 char c_pad[CACHE_LINE];
2147 pthread_cond_t c_cond;
2148};
2149
2150typedef union kmp_cond_union kmp_cond_align_t;
2151
2152union KMP_ALIGN_CACHE kmp_mutex_union {
2153 double m_align;
2154 char m_pad[CACHE_LINE];
2155 pthread_mutex_t m_mutex;
2156};
2157
2158typedef union kmp_mutex_union kmp_mutex_align_t;
2159
2160#endif /* KMP_OS_UNIX */
2161
2162typedef struct kmp_desc_base {
2163 void *ds_stackbase;
2164 size_t ds_stacksize;
2165 int ds_stackgrow;
2166 kmp_thread_t ds_thread;
2167 volatile int ds_tid;
2168 int ds_gtid;
2169#if KMP_OS_WINDOWS
2170 volatile int ds_alive;
2171 DWORD ds_thread_id;
2172/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
2173 However, debugger support (libomp_db) cannot work with handles, because they
2174 uncomparable. For example, debugger requests info about thread with handle h.
2175 h is valid within debugger process, and meaningless within debugee process.
2176 Even if h is duped by call to DuplicateHandle(), so the result h' is valid
2177 within debugee process, but it is a *new* handle which does *not* equal to
2178 any other handle in debugee... The only way to compare handles is convert
2179 them to system-wide ids. GetThreadId() function is available only in
2180 Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
2181 on all Windows* OS flavours (including Windows* 95). Thus, we have to get
2182 thread id by call to GetCurrentThreadId() from within the thread and save it
2183 to let libomp_db identify threads. */
2184#endif /* KMP_OS_WINDOWS */
2185} kmp_desc_base_t;
2186
2187typedef union KMP_ALIGN_CACHE kmp_desc {
2188 double ds_align; /* use worst case alignment */
2189 char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)];
2190 kmp_desc_base_t ds;
2191} kmp_desc_t;
2192
2193typedef struct kmp_local {
2194 volatile int this_construct; /* count of single's encountered by thread */
2195 void *reduce_data;
2196#if KMP_USE_BGET
2197 void *bget_data;
2198 void *bget_list;
2199#if !USE_CMP_XCHG_FOR_BGET
2200#ifdef USE_QUEUING_LOCK_FOR_BGET
2201 kmp_lock_t bget_lock; /* Lock for accessing bget free list */
2202#else
2203 kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
2204// bootstrap lock so we can use it at library
2205// shutdown.
2206#endif /* USE_LOCK_FOR_BGET */
2207#endif /* ! USE_CMP_XCHG_FOR_BGET */
2208#endif /* KMP_USE_BGET */
2209
2210 PACKED_REDUCTION_METHOD_T
2211 packed_reduction_method; /* stored by __kmpc_reduce*(), used by
2212 __kmpc_end_reduce*() */
2213
2214} kmp_local_t;
2215
2216#define KMP_CHECK_UPDATE(a, b) \
2217 if ((a) != (b)) \
2218 (a) = (b)
2219#define KMP_CHECK_UPDATE_SYNC(a, b) \
2220 if ((a) != (b)) \
2221 TCW_SYNC_PTR((a), (b))
2222
2223#define get__blocktime(xteam, xtid) \
2224 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
2225#define get__bt_set(xteam, xtid) \
2226 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
2227#if KMP_USE_MONITOR
2228#define get__bt_intervals(xteam, xtid) \
2229 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
2230#endif
2231
2232#define get__dynamic_2(xteam, xtid) \
2233 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
2234#define get__nproc_2(xteam, xtid) \
2235 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
2236#define get__sched_2(xteam, xtid) \
2237 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
2238
2239#define set__blocktime_team(xteam, xtid, xval) \
2240 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \
2241 (xval))
2242
2243#if KMP_USE_MONITOR
2244#define set__bt_intervals_team(xteam, xtid, xval) \
2245 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \
2246 (xval))
2247#endif
2248
2249#define set__bt_set_team(xteam, xtid, xval) \
2250 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
2251
2252#define set__dynamic(xthread, xval) \
2253 (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
2254#define get__dynamic(xthread) \
2255 (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE))
2256
2257#define set__nproc(xthread, xval) \
2258 (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
2259
2260#define set__thread_limit(xthread, xval) \
2261 (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
2262
2263#define set__max_active_levels(xthread, xval) \
2264 (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
2265
2266#define get__max_active_levels(xthread) \
2267 ((xthread)->th.th_current_task->td_icvs.max_active_levels)
2268
2269#define set__sched(xthread, xval) \
2270 (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
2271
2272#define set__proc_bind(xthread, xval) \
2273 (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
2274#define get__proc_bind(xthread) \
2275 ((xthread)->th.th_current_task->td_icvs.proc_bind)
2276
2277// OpenMP tasking data structures
2278
2279typedef enum kmp_tasking_mode {
2280 tskm_immediate_exec = 0,
2281 tskm_extra_barrier = 1,
2282 tskm_task_teams = 2,
2283 tskm_max = 2
2284} kmp_tasking_mode_t;
2285
2286extern kmp_tasking_mode_t
2287 __kmp_tasking_mode; /* determines how/when to execute tasks */
2288extern int __kmp_task_stealing_constraint;
2289extern int __kmp_enable_task_throttling;
2290extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
2291// specified, defaults to 0 otherwise
2292// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
2293extern kmp_int32 __kmp_max_task_priority;
2294// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
2295extern kmp_uint64 __kmp_taskloop_min_tasks;
2296
2297/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
2298 taskdata first */
2299#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1)
2300#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1)
2301
2302// The tt_found_tasks flag is a signal to all threads in the team that tasks
2303// were spawned and queued since the previous barrier release.
2304#define KMP_TASKING_ENABLED(task_team) \
2305 (TRUE == TCR_SYNC_4((task_team)->tt.tt_found_tasks))
2313typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
2314
2315typedef union kmp_cmplrdata {
2316 kmp_int32 priority;
2317 kmp_routine_entry_t
2318 destructors; /* pointer to function to invoke deconstructors of
2319 firstprivate C++ objects */
2320 /* future data */
2321} kmp_cmplrdata_t;
2322
2323/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
2326typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
2327 void *shareds;
2328 kmp_routine_entry_t
2329 routine;
2330 kmp_int32 part_id;
2331 kmp_cmplrdata_t
2332 data1; /* Two known optional additions: destructors and priority */
2333 kmp_cmplrdata_t data2; /* Process destructors first, priority second */
2334 /* future data */
2335 /* private vars */
2336} kmp_task_t;
2337
2342typedef struct kmp_taskgroup {
2343 std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
2344 std::atomic<kmp_int32>
2345 cancel_request; // request for cancellation of this taskgroup
2346 struct kmp_taskgroup *parent; // parent taskgroup
2347 // Block of data to perform task reduction
2348 void *reduce_data; // reduction related info
2349 kmp_int32 reduce_num_data; // number of data items to reduce
2350 uintptr_t *gomp_data; // gomp reduction data
2351} kmp_taskgroup_t;
2352
2353// forward declarations
2354typedef union kmp_depnode kmp_depnode_t;
2355typedef struct kmp_depnode_list kmp_depnode_list_t;
2356typedef struct kmp_dephash_entry kmp_dephash_entry_t;
2357
2358// macros for checking dep flag as an integer
2359#define KMP_DEP_IN 0x1
2360#define KMP_DEP_OUT 0x2
2361#define KMP_DEP_INOUT 0x3
2362#define KMP_DEP_MTX 0x4
2363#define KMP_DEP_SET 0x8
2364#define KMP_DEP_ALL 0x80
2365// Compiler sends us this info:
2366typedef struct kmp_depend_info {
2367 kmp_intptr_t base_addr;
2368 size_t len;
2369 union {
2370 kmp_uint8 flag; // flag as an unsigned char
2371 struct { // flag as a set of 8 bits
2372 unsigned in : 1;
2373 unsigned out : 1;
2374 unsigned mtx : 1;
2375 unsigned set : 1;
2376 unsigned unused : 3;
2377 unsigned all : 1;
2378 } flags;
2379 };
2380} kmp_depend_info_t;
2381
2382// Internal structures to work with task dependencies:
2383struct kmp_depnode_list {
2384 kmp_depnode_t *node;
2385 kmp_depnode_list_t *next;
2386};
2387
2388// Max number of mutexinoutset dependencies per node
2389#define MAX_MTX_DEPS 4
2390
2391typedef struct kmp_base_depnode {
2392 kmp_depnode_list_t *successors; /* used under lock */
2393 kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
2394 kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */
2395 kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
2396 kmp_lock_t lock; /* guards shared fields: task, successors */
2397#if KMP_SUPPORT_GRAPH_OUTPUT
2398 kmp_uint32 id;
2399#endif
2400 std::atomic<kmp_int32> npredecessors;
2401 std::atomic<kmp_int32> nrefs;
2402} kmp_base_depnode_t;
2403
2404union KMP_ALIGN_CACHE kmp_depnode {
2405 double dn_align; /* use worst case alignment */
2406 char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)];
2407 kmp_base_depnode_t dn;
2408};
2409
2410struct kmp_dephash_entry {
2411 kmp_intptr_t addr;
2412 kmp_depnode_t *last_out;
2413 kmp_depnode_list_t *last_set;
2414 kmp_depnode_list_t *prev_set;
2415 kmp_uint8 last_flag;
2416 kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
2417 kmp_dephash_entry_t *next_in_bucket;
2418};
2419
2420typedef struct kmp_dephash {
2421 kmp_dephash_entry_t **buckets;
2422 size_t size;
2423 kmp_depnode_t *last_all;
2424 size_t generation;
2425 kmp_uint32 nelements;
2426 kmp_uint32 nconflicts;
2427} kmp_dephash_t;
2428
2429typedef struct kmp_task_affinity_info {
2430 kmp_intptr_t base_addr;
2431 size_t len;
2432 struct {
2433 bool flag1 : 1;
2434 bool flag2 : 1;
2435 kmp_int32 reserved : 30;
2436 } flags;
2437} kmp_task_affinity_info_t;
2438
2439typedef enum kmp_event_type_t {
2440 KMP_EVENT_UNINITIALIZED = 0,
2441 KMP_EVENT_ALLOW_COMPLETION = 1
2442} kmp_event_type_t;
2443
2444typedef struct {
2445 kmp_event_type_t type;
2446 kmp_tas_lock_t lock;
2447 union {
2448 kmp_task_t *task;
2449 } ed;
2450} kmp_event_t;
2451
2452#ifdef BUILD_TIED_TASK_STACK
2453
2454/* Tied Task stack definitions */
2455typedef struct kmp_stack_block {
2456 kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
2457 struct kmp_stack_block *sb_next;
2458 struct kmp_stack_block *sb_prev;
2459} kmp_stack_block_t;
2460
2461typedef struct kmp_task_stack {
2462 kmp_stack_block_t ts_first_block; // first block of stack entries
2463 kmp_taskdata_t **ts_top; // pointer to the top of stack
2464 kmp_int32 ts_entries; // number of entries on the stack
2465} kmp_task_stack_t;
2466
2467#endif // BUILD_TIED_TASK_STACK
2468
2469typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2470 /* Compiler flags */ /* Total compiler flags must be 16 bits */
2471 unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
2472 unsigned final : 1; /* task is final(1) so execute immediately */
2473 unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
2474 code path */
2475 unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
2476 invoke destructors from the runtime */
2477 unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
2478 context of the RTL) */
2479 unsigned priority_specified : 1; /* set if the compiler provides priority
2480 setting for the task */
2481 unsigned detachable : 1; /* 1 == can detach */
2482 unsigned hidden_helper : 1; /* 1 == hidden helper task */
2483 unsigned reserved : 8; /* reserved for compiler use */
2484
2485 /* Library flags */ /* Total library flags must be 16 bits */
2486 unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2487 unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
2488 unsigned tasking_ser : 1; // all tasks in team are either executed immediately
2489 // (1) or may be deferred (0)
2490 unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
2491 // (0) [>= 2 threads]
2492 /* If either team_serial or tasking_ser is set, task team may be NULL */
2493 /* Task State Flags: */
2494 unsigned started : 1; /* 1==started, 0==not started */
2495 unsigned executing : 1; /* 1==executing, 0==not executing */
2496 unsigned complete : 1; /* 1==complete, 0==not complete */
2497 unsigned freed : 1; /* 1==freed, 0==allocated */
2498 unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2499 unsigned reserved31 : 7; /* reserved for library use */
2500
2501} kmp_tasking_flags_t;
2502
2503struct kmp_taskdata { /* aligned during dynamic allocation */
2504 kmp_int32 td_task_id; /* id, assigned by debugger */
2505 kmp_tasking_flags_t td_flags; /* task flags */
2506 kmp_team_t *td_team; /* team for this task */
2507 kmp_info_p *td_alloc_thread; /* thread that allocated data structures */
2508 /* Currently not used except for perhaps IDB */
2509 kmp_taskdata_t *td_parent; /* parent task */
2510 kmp_int32 td_level; /* task nesting level */
2511 std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
2512 ident_t *td_ident; /* task identifier */
2513 // Taskwait data.
2514 ident_t *td_taskwait_ident;
2515 kmp_uint32 td_taskwait_counter;
2516 kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2517 KMP_ALIGN_CACHE kmp_internal_control_t
2518 td_icvs; /* Internal control variables for the task */
2519 KMP_ALIGN_CACHE std::atomic<kmp_int32>
2520 td_allocated_child_tasks; /* Child tasks (+ current task) not yet
2521 deallocated */
2522 std::atomic<kmp_int32>
2523 td_incomplete_child_tasks; /* Child tasks not yet complete */
2524 kmp_taskgroup_t
2525 *td_taskgroup; // Each task keeps pointer to its current taskgroup
2526 kmp_dephash_t
2527 *td_dephash; // Dependencies for children tasks are tracked from here
2528 kmp_depnode_t
2529 *td_depnode; // Pointer to graph node if this task has dependencies
2530 kmp_task_team_t *td_task_team;
2531 size_t td_size_alloc; // Size of task structure, including shareds etc.
2532#if defined(KMP_GOMP_COMPAT)
2533 // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
2534 kmp_int32 td_size_loop_bounds;
2535#endif
2536 kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
2537#if defined(KMP_GOMP_COMPAT)
2538 // GOMP sends in a copy function for copy constructors
2539 void (*td_copy_func)(void *, void *);
2540#endif
2541 kmp_event_t td_allow_completion_event;
2542#if OMPT_SUPPORT
2543 ompt_task_info_t ompt_task_info;
2544#endif
2545}; // struct kmp_taskdata
2546
2547// Make sure padding above worked
2548KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);
2549
2550// Data for task team but per thread
2551typedef struct kmp_base_thread_data {
2552 kmp_info_p *td_thr; // Pointer back to thread info
2553 // Used only in __kmp_execute_tasks_template, maybe not avail until task is
2554 // queued?
2555 kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2556 kmp_taskdata_t *
2557 *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2558 kmp_int32 td_deque_size; // Size of deck
2559 kmp_uint32 td_deque_head; // Head of deque (will wrap)
2560 kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2561 kmp_int32 td_deque_ntasks; // Number of tasks in deque
2562 // GEH: shouldn't this be volatile since used in while-spin?
2563 kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2564#ifdef BUILD_TIED_TASK_STACK
2565 kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
2566// scheduling constraint
2567#endif // BUILD_TIED_TASK_STACK
2568} kmp_base_thread_data_t;
2569
2570#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
2571#define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS)
2572
2573#define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
2574#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
2575
2576typedef union KMP_ALIGN_CACHE kmp_thread_data {
2577 kmp_base_thread_data_t td;
2578 double td_align; /* use worst case alignment */
2579 char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];
2580} kmp_thread_data_t;
2581
2582typedef struct kmp_task_pri {
2583 kmp_thread_data_t td;
2584 kmp_int32 priority;
2585 kmp_task_pri *next;
2586} kmp_task_pri_t;
2587
2588// Data for task teams which are used when tasking is enabled for the team
2589typedef struct kmp_base_task_team {
2590 kmp_bootstrap_lock_t
2591 tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2592 /* must be bootstrap lock since used at library shutdown*/
2593
2594 // TODO: check performance vs kmp_tas_lock_t
2595 kmp_bootstrap_lock_t tt_task_pri_lock; /* Lock to access priority tasks */
2596 kmp_task_pri_t *tt_task_pri_list;
2597
2598 kmp_task_team_t *tt_next; /* For linking the task team free list */
2599 kmp_thread_data_t
2600 *tt_threads_data; /* Array of per-thread structures for task team */
2601 /* Data survives task team deallocation */
2602 kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
2603 executing this team? */
2604 /* TRUE means tt_threads_data is set up and initialized */
2605 kmp_int32 tt_nproc; /* #threads in team */
2606 kmp_int32 tt_max_threads; // # entries allocated for threads_data array
2607 kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
2608 kmp_int32 tt_untied_task_encountered;
2609 std::atomic<kmp_int32> tt_num_task_pri; // number of priority tasks enqueued
2610 // There is hidden helper thread encountered in this task team so that we must
2611 // wait when waiting on task team
2612 kmp_int32 tt_hidden_helper_task_encountered;
2613
2614 KMP_ALIGN_CACHE
2615 std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
2616
2617 KMP_ALIGN_CACHE
2618 volatile kmp_uint32
2619 tt_active; /* is the team still actively executing tasks */
2620} kmp_base_task_team_t;
2621
2622union KMP_ALIGN_CACHE kmp_task_team {
2623 kmp_base_task_team_t tt;
2624 double tt_align; /* use worst case alignment */
2625 char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
2626};
2627
2628#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
2629// Free lists keep same-size free memory slots for fast memory allocation
2630// routines
2631typedef struct kmp_free_list {
2632 void *th_free_list_self; // Self-allocated tasks free list
2633 void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
2634 // threads
2635 void *th_free_list_other; // Non-self free list (to be returned to owner's
2636 // sync list)
2637} kmp_free_list_t;
2638#endif
2639#if KMP_NESTED_HOT_TEAMS
2640// Hot teams array keeps hot teams and their sizes for given thread. Hot teams
2641// are not put in teams pool, and they don't put threads in threads pool.
2642typedef struct kmp_hot_team_ptr {
2643 kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2644 kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2645} kmp_hot_team_ptr_t;
2646#endif
2647typedef struct kmp_teams_size {
2648 kmp_int32 nteams; // number of teams in a league
2649 kmp_int32 nth; // number of threads in each team of the league
2650} kmp_teams_size_t;
2651
2652// This struct stores a thread that acts as a "root" for a contention
2653// group. Contention groups are rooted at kmp_root threads, but also at
2654// each primary thread of each team created in the teams construct.
2655// This struct therefore also stores a thread_limit associated with
2656// that contention group, and a counter to track the number of threads
2657// active in that contention group. Each thread has a list of these: CG
2658// root threads have an entry in their list in which cg_root refers to
2659// the thread itself, whereas other workers in the CG will have a
2660// single entry where cg_root is same as the entry containing their CG
2661// root. When a thread encounters a teams construct, it will add a new
2662// entry to the front of its list, because it now roots a new CG.
2663typedef struct kmp_cg_root {
2664 kmp_info_p *cg_root; // "root" thread for a contention group
2665 // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
2666 // thread_limit clause for teams primary threads
2667 kmp_int32 cg_thread_limit;
2668 kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
2669 struct kmp_cg_root *up; // pointer to higher level CG root in list
2670} kmp_cg_root_t;
2671
2672// OpenMP thread data structures
2673
2674typedef struct KMP_ALIGN_CACHE kmp_base_info {
2675 /* Start with the readonly data which is cache aligned and padded. This is
2676 written before the thread starts working by the primary thread. Uber
2677 masters may update themselves later. Usage does not consider serialized
2678 regions. */
2679 kmp_desc_t th_info;
2680 kmp_team_p *th_team; /* team we belong to */
2681 kmp_root_p *th_root; /* pointer to root of task hierarchy */
2682 kmp_info_p *th_next_pool; /* next available thread in the pool */
2683 kmp_disp_t *th_dispatch; /* thread's dispatch data */
2684 int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2685
2686 /* The following are cached from the team info structure */
2687 /* TODO use these in more places as determined to be needed via profiling */
2688 int th_team_nproc; /* number of threads in a team */
2689 kmp_info_p *th_team_master; /* the team's primary thread */
2690 int th_team_serialized; /* team is serialized */
2691 microtask_t th_teams_microtask; /* save entry address for teams construct */
2692 int th_teams_level; /* save initial level of teams construct */
2693/* it is 0 on device but may be any on host */
2694
2695/* The blocktime info is copied from the team struct to the thread struct */
2696/* at the start of a barrier, and the values stored in the team are used */
2697/* at points in the code where the team struct is no longer guaranteed */
2698/* to exist (from the POV of worker threads). */
2699#if KMP_USE_MONITOR
2700 int th_team_bt_intervals;
2701 int th_team_bt_set;
2702#else
2703 kmp_uint64 th_team_bt_intervals;
2704#endif
2705
2706#if KMP_AFFINITY_SUPPORTED
2707 kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2708#endif
2709 omp_allocator_handle_t th_def_allocator; /* default allocator */
2710 /* The data set by the primary thread at reinit, then R/W by the worker */
2711 KMP_ALIGN_CACHE int
2712 th_set_nproc; /* if > 0, then only use this request for the next fork */
2713#if KMP_NESTED_HOT_TEAMS
2714 kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2715#endif
2716 kmp_proc_bind_t
2717 th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2718 kmp_teams_size_t
2719 th_teams_size; /* number of teams/threads in teams construct */
2720#if KMP_AFFINITY_SUPPORTED
2721 int th_current_place; /* place currently bound to */
2722 int th_new_place; /* place to bind to in par reg */
2723 int th_first_place; /* first place in partition */
2724 int th_last_place; /* last place in partition */
2725#endif
2726 int th_prev_level; /* previous level for affinity format */
2727 int th_prev_num_threads; /* previous num_threads for affinity format */
2728#if USE_ITT_BUILD
2729 kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2730 kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2731 kmp_uint64 th_frame_time; /* frame timestamp */
2732#endif /* USE_ITT_BUILD */
2733 kmp_local_t th_local;
2734 struct private_common *th_pri_head;
2735
2736 /* Now the data only used by the worker (after initial allocation) */
2737 /* TODO the first serial team should actually be stored in the info_t
2738 structure. this will help reduce initial allocation overhead */
2739 KMP_ALIGN_CACHE kmp_team_p
2740 *th_serial_team; /*serialized team held in reserve*/
2741
2742#if OMPT_SUPPORT
2743 ompt_thread_info_t ompt_thread_info;
2744#endif
2745
2746 /* The following are also read by the primary thread during reinit */
2747 struct common_table *th_pri_common;
2748
2749 volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2750 /* while awaiting queuing lock acquire */
2751
2752 volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2753 flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc
2754
2755 ident_t *th_ident;
2756 unsigned th_x; // Random number generator data
2757 unsigned th_a; // Random number generator data
2758
2759 /* Tasking-related data for the thread */
2760 kmp_task_team_t *th_task_team; // Task team struct
2761 kmp_taskdata_t *th_current_task; // Innermost Task being executed
2762 kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2763 kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
2764 // at nested levels
2765 kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2766 kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2767 kmp_uint32 th_reap_state; // Non-zero indicates thread is not
2768 // tasking, thus safe to reap
2769
2770 /* More stuff for keeping track of active/sleeping threads (this part is
2771 written by the worker thread) */
2772 kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
2773 int th_active; // ! sleeping; 32 bits for TCR/TCW
2774 std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
2775 // 0 = not used in team; 1 = used in team;
2776 // 2 = transitioning to not used in team; 3 = transitioning to used in team
2777 struct cons_header *th_cons; // used for consistency check
2778#if KMP_USE_HIER_SCHED
2779 // used for hierarchical scheduling
2780 kmp_hier_private_bdata_t *th_hier_bar_data;
2781#endif
2782
2783 /* Add the syncronizing data which is cache aligned and padded. */
2784 KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];
2785
2786 KMP_ALIGN_CACHE volatile kmp_int32
2787 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2788
2789#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
2790#define NUM_LISTS 4
2791 kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory
2792// allocation routines
2793#endif
2794
2795#if KMP_OS_WINDOWS
2796 kmp_win32_cond_t th_suspend_cv;
2797 kmp_win32_mutex_t th_suspend_mx;
2798 std::atomic<int> th_suspend_init;
2799#endif
2800#if KMP_OS_UNIX
2801 kmp_cond_align_t th_suspend_cv;
2802 kmp_mutex_align_t th_suspend_mx;
2803 std::atomic<int> th_suspend_init_count;
2804#endif
2805
2806#if USE_ITT_BUILD
2807 kmp_itt_mark_t th_itt_mark_single;
2808// alignment ???
2809#endif /* USE_ITT_BUILD */
2810#if KMP_STATS_ENABLED
2811 kmp_stats_list *th_stats;
2812#endif
2813#if KMP_OS_UNIX
2814 std::atomic<bool> th_blocking;
2815#endif
2816 kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
2817} kmp_base_info_t;
2818
2819typedef union KMP_ALIGN_CACHE kmp_info {
2820 double th_align; /* use worst case alignment */
2821 char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)];
2822 kmp_base_info_t th;
2823} kmp_info_t;
2824
2825// OpenMP thread team data structures
2826
2827typedef struct kmp_base_data {
2828 volatile kmp_uint32 t_value;
2829} kmp_base_data_t;
2830
2831typedef union KMP_ALIGN_CACHE kmp_sleep_team {
2832 double dt_align; /* use worst case alignment */
2833 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2834 kmp_base_data_t dt;
2835} kmp_sleep_team_t;
2836
2837typedef union KMP_ALIGN_CACHE kmp_ordered_team {
2838 double dt_align; /* use worst case alignment */
2839 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2840 kmp_base_data_t dt;
2841} kmp_ordered_team_t;
2842
2843typedef int (*launch_t)(int gtid);
2844
2845/* Minimum number of ARGV entries to malloc if necessary */
2846#define KMP_MIN_MALLOC_ARGV_ENTRIES 100
2847
2848// Set up how many argv pointers will fit in cache lines containing
2849// t_inline_argv. Historically, we have supported at least 96 bytes. Using a
2850// larger value for more space between the primary write/worker read section and
2851// read/write by all section seems to buy more performance on EPCC PARALLEL.
2852#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2853#define KMP_INLINE_ARGV_BYTES \
2854 (4 * CACHE_LINE - \
2855 ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \
2856 sizeof(kmp_int16) + sizeof(kmp_uint32)) % \
2857 CACHE_LINE))
2858#else
2859#define KMP_INLINE_ARGV_BYTES \
2860 (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE))
2861#endif
2862#define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP)
2863
2864typedef struct KMP_ALIGN_CACHE kmp_base_team {
2865 // Synchronization Data
2866 // ---------------------------------------------------------------------------
2867 KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
2868 kmp_balign_team_t t_bar[bs_last_barrier];
2869 std::atomic<int> t_construct; // count of single directive encountered by team
2870 char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
2871
2872 // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
2873 std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
2874 std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
2875
2876 // Primary thread only
2877 // ---------------------------------------------------------------------------
2878 KMP_ALIGN_CACHE int t_master_tid; // tid of primary thread in parent team
2879 int t_master_this_cons; // "this_construct" single counter of primary thread
2880 // in parent team
2881 ident_t *t_ident; // if volatile, have to change too much other crud to
2882 // volatile too
2883 kmp_team_p *t_parent; // parent team
2884 kmp_team_p *t_next_pool; // next free team in the team pool
2885 kmp_disp_t *t_dispatch; // thread's dispatch data
2886 kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2887 kmp_proc_bind_t t_proc_bind; // bind type for par region
2888#if USE_ITT_BUILD
2889 kmp_uint64 t_region_time; // region begin timestamp
2890#endif /* USE_ITT_BUILD */
2891
2892 // Primary thread write, workers read
2893 // --------------------------------------------------------------------------
2894 KMP_ALIGN_CACHE void **t_argv;
2895 int t_argc;
2896 int t_nproc; // number of threads in team
2897 microtask_t t_pkfn;
2898 launch_t t_invoke; // procedure to launch the microtask
2899
2900#if OMPT_SUPPORT
2901 ompt_team_info_t ompt_team_info;
2902 ompt_lw_taskteam_t *ompt_serialized_team_info;
2903#endif
2904
2905#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2906 kmp_int8 t_fp_control_saved;
2907 kmp_int8 t_pad2b;
2908 kmp_int16 t_x87_fpu_control_word; // FP control regs
2909 kmp_uint32 t_mxcsr;
2910#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2911
2912 void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES];
2913
2914 KMP_ALIGN_CACHE kmp_info_t **t_threads;
2915 kmp_taskdata_t
2916 *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2917 int t_level; // nested parallel level
2918
2919 KMP_ALIGN_CACHE int t_max_argc;
2920 int t_max_nproc; // max threads this team can handle (dynamically expandable)
2921 int t_serialized; // levels deep of serialized teams
2922 dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2923 int t_id; // team's id, assigned by debugger.
2924 int t_active_level; // nested active parallel level
2925 kmp_r_sched_t t_sched; // run-time schedule for the team
2926#if KMP_AFFINITY_SUPPORTED
2927 int t_first_place; // first & last place in parent thread's partition.
2928 int t_last_place; // Restore these values to primary thread after par region.
2929#endif // KMP_AFFINITY_SUPPORTED
2930 int t_display_affinity;
2931 int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
2932 // omp_set_num_threads() call
2933 omp_allocator_handle_t t_def_allocator; /* default allocator */
2934
2935// Read/write by workers as well
2936#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
2937 // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
2938 // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
2939 // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
2940 // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2941 char dummy_padding[1024];
2942#endif
2943 // Internal control stack for additional nested teams.
2944 KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;
2945 // for SERIALIZED teams nested 2 or more levels deep
2946 // typed flag to store request state of cancellation
2947 std::atomic<kmp_int32> t_cancel_request;
2948 int t_master_active; // save on fork, restore on join
2949 void *t_copypriv_data; // team specific pointer to copyprivate data array
2950#if KMP_OS_WINDOWS
2951 std::atomic<kmp_uint32> t_copyin_counter;
2952#endif
2953#if USE_ITT_BUILD
2954 void *t_stack_id; // team specific stack stitching id (for ittnotify)
2955#endif /* USE_ITT_BUILD */
2956 distributedBarrier *b; // Distributed barrier data associated with team
2957} kmp_base_team_t;
2958
2959union KMP_ALIGN_CACHE kmp_team {
2960 kmp_base_team_t t;
2961 double t_align; /* use worst case alignment */
2962 char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];
2963};
2964
2965typedef union KMP_ALIGN_CACHE kmp_time_global {
2966 double dt_align; /* use worst case alignment */
2967 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2968 kmp_base_data_t dt;
2969} kmp_time_global_t;
2970
2971typedef struct kmp_base_global {
2972 /* cache-aligned */
2973 kmp_time_global_t g_time;
2974
2975 /* non cache-aligned */
2976 volatile int g_abort;
2977 volatile int g_done;
2978
2979 int g_dynamic;
2980 enum dynamic_mode g_dynamic_mode;
2981} kmp_base_global_t;
2982
2983typedef union KMP_ALIGN_CACHE kmp_global {
2984 kmp_base_global_t g;
2985 double g_align; /* use worst case alignment */
2986 char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)];
2987} kmp_global_t;
2988
2989typedef struct kmp_base_root {
2990 // TODO: GEH - combine r_active with r_in_parallel then r_active ==
2991 // (r_in_parallel>= 0)
2992 // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
2993 // the synch overhead or keeping r_active
2994 volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
2995 // keeps a count of active parallel regions per root
2996 std::atomic<int> r_in_parallel;
2997 // GEH: This is misnamed, should be r_active_levels
2998 kmp_team_t *r_root_team;
2999 kmp_team_t *r_hot_team;
3000 kmp_info_t *r_uber_thread;
3001 kmp_lock_t r_begin_lock;
3002 volatile int r_begin;
3003 int r_blocktime; /* blocktime for this root and descendants */
3004#if KMP_AFFINITY_SUPPORTED
3005 int r_affinity_assigned;
3006#endif // KMP_AFFINITY_SUPPORTED
3007} kmp_base_root_t;
3008
3009typedef union KMP_ALIGN_CACHE kmp_root {
3010 kmp_base_root_t r;
3011 double r_align; /* use worst case alignment */
3012 char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)];
3013} kmp_root_t;
3014
3015struct fortran_inx_info {
3016 kmp_int32 data;
3017};
3018
3019// This list type exists to hold old __kmp_threads arrays so that
3020// old references to them may complete while reallocation takes place when
3021// expanding the array. The items in this list are kept alive until library
3022// shutdown.
3023typedef struct kmp_old_threads_list_t {
3024 kmp_info_t **threads;
3025 struct kmp_old_threads_list_t *next;
3026} kmp_old_threads_list_t;
3027
3028/* ------------------------------------------------------------------------ */
3029
3030extern int __kmp_settings;
3031extern int __kmp_duplicate_library_ok;
3032#if USE_ITT_BUILD
3033extern int __kmp_forkjoin_frames;
3034extern int __kmp_forkjoin_frames_mode;
3035#endif
3036extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
3037extern int __kmp_determ_red;
3038
3039#ifdef KMP_DEBUG
3040extern int kmp_a_debug;
3041extern int kmp_b_debug;
3042extern int kmp_c_debug;
3043extern int kmp_d_debug;
3044extern int kmp_e_debug;
3045extern int kmp_f_debug;
3046#endif /* KMP_DEBUG */
3047
3048/* For debug information logging using rotating buffer */
3049#define KMP_DEBUG_BUF_LINES_INIT 512
3050#define KMP_DEBUG_BUF_LINES_MIN 1
3051
3052#define KMP_DEBUG_BUF_CHARS_INIT 128
3053#define KMP_DEBUG_BUF_CHARS_MIN 2
3054
3055extern int
3056 __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
3057extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
3058extern int
3059 __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
3060extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
3061 entry pointer */
3062
3063extern char *__kmp_debug_buffer; /* Debug buffer itself */
3064extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
3065 printed in buffer so far */
3066extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
3067 recommended in warnings */
3068/* end rotating debug buffer */
3069
3070#ifdef KMP_DEBUG
3071extern int __kmp_par_range; /* +1 => only go par for constructs in range */
3072
3073#define KMP_PAR_RANGE_ROUTINE_LEN 1024
3074extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
3075#define KMP_PAR_RANGE_FILENAME_LEN 1024
3076extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
3077extern int __kmp_par_range_lb;
3078extern int __kmp_par_range_ub;
3079#endif
3080
3081/* For printing out dynamic storage map for threads and teams */
3082extern int
3083 __kmp_storage_map; /* True means print storage map for threads and teams */
3084extern int __kmp_storage_map_verbose; /* True means storage map includes
3085 placement info */
3086extern int __kmp_storage_map_verbose_specified;
3087
3088#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3089extern kmp_cpuinfo_t __kmp_cpuinfo;
3090static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
3091#elif KMP_OS_DARWIN && KMP_ARCH_AARCH64
3092static inline bool __kmp_is_hybrid_cpu() { return true; }
3093#else
3094static inline bool __kmp_is_hybrid_cpu() { return false; }
3095#endif
3096
3097extern volatile int __kmp_init_serial;
3098extern volatile int __kmp_init_gtid;
3099extern volatile int __kmp_init_common;
3100extern volatile int __kmp_need_register_serial;
3101extern volatile int __kmp_init_middle;
3102extern volatile int __kmp_init_parallel;
3103#if KMP_USE_MONITOR
3104extern volatile int __kmp_init_monitor;
3105#endif
3106extern volatile int __kmp_init_user_locks;
3107extern volatile int __kmp_init_hidden_helper_threads;
3108extern int __kmp_init_counter;
3109extern int __kmp_root_counter;
3110extern int __kmp_version;
3111
3112/* list of address of allocated caches for commons */
3113extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
3114
3115/* Barrier algorithm types and options */
3116extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
3117extern kmp_uint32 __kmp_barrier_release_bb_dflt;
3118extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
3119extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
3120extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
3121extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
3122extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
3123extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
3124extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
3125extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
3126extern char const *__kmp_barrier_type_name[bs_last_barrier];
3127extern char const *__kmp_barrier_pattern_name[bp_last_bar];
3128
3129/* Global Locks */
3130extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
3131extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
3132extern kmp_bootstrap_lock_t __kmp_task_team_lock;
3133extern kmp_bootstrap_lock_t
3134 __kmp_exit_lock; /* exit() is not always thread-safe */
3135#if KMP_USE_MONITOR
3136extern kmp_bootstrap_lock_t
3137 __kmp_monitor_lock; /* control monitor thread creation */
3138#endif
3139extern kmp_bootstrap_lock_t
3140 __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
3141 __kmp_threads expansion to co-exist */
3142
3143extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
3144extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
3145extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
3146
3147extern enum library_type __kmp_library;
3148
3149extern enum sched_type __kmp_sched; /* default runtime scheduling */
3150extern enum sched_type __kmp_static; /* default static scheduling method */
3151extern enum sched_type __kmp_guided; /* default guided scheduling method */
3152extern enum sched_type __kmp_auto; /* default auto scheduling method */
3153extern int __kmp_chunk; /* default runtime chunk size */
3154extern int __kmp_force_monotonic; /* whether monotonic scheduling forced */
3155
3156extern size_t __kmp_stksize; /* stack size per thread */
3157#if KMP_USE_MONITOR
3158extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
3159#endif
3160extern size_t __kmp_stkoffset; /* stack offset per thread */
3161extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
3162
3163extern size_t
3164 __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
3165extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
3166extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
3167extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
3168extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
3169extern int __kmp_generate_warnings; /* should we issue warnings? */
3170extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
3171
3172#ifdef DEBUG_SUSPEND
3173extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
3174#endif
3175
3176extern kmp_int32 __kmp_use_yield;
3177extern kmp_int32 __kmp_use_yield_exp_set;
3178extern kmp_uint32 __kmp_yield_init;
3179extern kmp_uint32 __kmp_yield_next;
3180extern kmp_uint64 __kmp_pause_init;
3181
3182/* ------------------------------------------------------------------------- */
3183extern int __kmp_allThreadsSpecified;
3184
3185extern size_t __kmp_align_alloc;
3186/* following data protected by initialization routines */
3187extern int __kmp_xproc; /* number of processors in the system */
3188extern int __kmp_avail_proc; /* number of processors available to the process */
3189extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
3190extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
3191// maximum total number of concurrently-existing threads on device
3192extern int __kmp_max_nth;
3193// maximum total number of concurrently-existing threads in a contention group
3194extern int __kmp_cg_max_nth;
3195extern int __kmp_teams_max_nth; // max threads used in a teams construct
3196extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
3197 __kmp_root */
3198extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
3199 region a la OMP_NUM_THREADS */
3200extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
3201 initialization */
3202extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
3203 used (fixed) */
3204extern int __kmp_tp_cached; /* whether threadprivate cache has been created
3205 (__kmpc_threadprivate_cached()) */
3206extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
3207 blocking (env setting) */
3208extern bool __kmp_wpolicy_passive; /* explicitly set passive wait policy */
3209#if KMP_USE_MONITOR
3210extern int
3211 __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
3212extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
3213 blocking */
3214#endif
3215#ifdef KMP_ADJUST_BLOCKTIME
3216extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
3217#endif /* KMP_ADJUST_BLOCKTIME */
3218#ifdef KMP_DFLT_NTH_CORES
3219extern int __kmp_ncores; /* Total number of cores for threads placement */
3220#endif
3221/* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
3222extern int __kmp_abort_delay;
3223
3224extern int __kmp_need_register_atfork_specified;
3225extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
3226 to install fork handler */
3227extern int __kmp_gtid_mode; /* Method of getting gtid, values:
3228 0 - not set, will be set at runtime
3229 1 - using stack search
3230 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
3231 X*) or TlsGetValue(Windows* OS))
3232 3 - static TLS (__declspec(thread) __kmp_gtid),
3233 Linux* OS .so only. */
3234extern int
3235 __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
3236#ifdef KMP_TDATA_GTID
3237extern KMP_THREAD_LOCAL int __kmp_gtid;
3238#endif
3239extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
3240extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
3241#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3242extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
3243extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
3244extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
3245#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3246
3247// max_active_levels for nested parallelism enabled by default via
3248// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
3249extern int __kmp_dflt_max_active_levels;
3250// Indicates whether value of __kmp_dflt_max_active_levels was already
3251// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
3252extern bool __kmp_dflt_max_active_levels_set;
3253extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
3254 concurrent execution per team */
3255#if KMP_NESTED_HOT_TEAMS
3256extern int __kmp_hot_teams_mode;
3257extern int __kmp_hot_teams_max_level;
3258#endif
3259
3260#if KMP_OS_LINUX
3261extern enum clock_function_type __kmp_clock_function;
3262extern int __kmp_clock_function_param;
3263#endif /* KMP_OS_LINUX */
3264
3265#if KMP_MIC_SUPPORTED
3266extern enum mic_type __kmp_mic_type;
3267#endif
3268
3269#ifdef USE_LOAD_BALANCE
3270extern double __kmp_load_balance_interval; // load balance algorithm interval
3271#endif /* USE_LOAD_BALANCE */
3272
3273// OpenMP 3.1 - Nested num threads array
3274typedef struct kmp_nested_nthreads_t {
3275 int *nth;
3276 int size;
3277 int used;
3278} kmp_nested_nthreads_t;
3279
3280extern kmp_nested_nthreads_t __kmp_nested_nth;
3281
3282#if KMP_USE_ADAPTIVE_LOCKS
3283
3284// Parameters for the speculative lock backoff system.
3285struct kmp_adaptive_backoff_params_t {
3286 // Number of soft retries before it counts as a hard retry.
3287 kmp_uint32 max_soft_retries;
3288 // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
3289 // the right
3290 kmp_uint32 max_badness;
3291};
3292
3293extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
3294
3295#if KMP_DEBUG_ADAPTIVE_LOCKS
3296extern const char *__kmp_speculative_statsfile;
3297#endif
3298
3299#endif // KMP_USE_ADAPTIVE_LOCKS
3300
3301extern int __kmp_display_env; /* TRUE or FALSE */
3302extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
3303extern int __kmp_omp_cancellation; /* TRUE or FALSE */
3304extern int __kmp_nteams;
3305extern int __kmp_teams_thread_limit;
3306
3307/* ------------------------------------------------------------------------- */
3308
3309/* the following are protected by the fork/join lock */
3310/* write: lock read: anytime */
3311extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
3312/* Holds old arrays of __kmp_threads until library shutdown */
3313extern kmp_old_threads_list_t *__kmp_old_threads_list;
3314/* read/write: lock */
3315extern volatile kmp_team_t *__kmp_team_pool;
3316extern volatile kmp_info_t *__kmp_thread_pool;
3317extern kmp_info_t *__kmp_thread_pool_insert_pt;
3318
3319// total num threads reachable from some root thread including all root threads
3320extern volatile int __kmp_nth;
3321/* total number of threads reachable from some root thread including all root
3322 threads, and those in the thread pool */
3323extern volatile int __kmp_all_nth;
3324extern std::atomic<int> __kmp_thread_pool_active_nth;
3325
3326extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
3327/* end data protected by fork/join lock */
3328/* ------------------------------------------------------------------------- */
3329
3330#define __kmp_get_gtid() __kmp_get_global_thread_id()
3331#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
3332#define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid()))
3333#define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team)
3334#define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid()))
3335
3336// AT: Which way is correct?
3337// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
3338// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
3339#define __kmp_get_team_num_threads(gtid) \
3340 (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
3341
3342static inline bool KMP_UBER_GTID(int gtid) {
3343 KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN);
3344 KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity);
3345 return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
3346 __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
3347}
3348
3349static inline int __kmp_tid_from_gtid(int gtid) {
3350 KMP_DEBUG_ASSERT(gtid >= 0);
3351 return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
3352}
3353
3354static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
3355 KMP_DEBUG_ASSERT(tid >= 0 && team);
3356 return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
3357}
3358
3359static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
3360 KMP_DEBUG_ASSERT(thr);
3361 return thr->th.th_info.ds.ds_gtid;
3362}
3363
3364static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
3365 KMP_DEBUG_ASSERT(gtid >= 0);
3366 return __kmp_threads[gtid];
3367}
3368
3369static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
3370 KMP_DEBUG_ASSERT(gtid >= 0);
3371 return __kmp_threads[gtid]->th.th_team;
3372}
3373
3374static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) {
3375 if (UNLIKELY(gtid < 0 || gtid >= __kmp_threads_capacity))
3376 KMP_FATAL(ThreadIdentInvalid);
3377}
3378
3379#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
3380extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT
3381extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled
3382extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
3383extern int __kmp_mwait_hints; // Hints to pass in to mwait
3384#endif
3385
3386#if KMP_HAVE_UMWAIT
3387extern int __kmp_waitpkg_enabled; // Runtime check if waitpkg exists
3388extern int __kmp_tpause_state; // 0 (default), 1=C0.1, 2=C0.2; from KMP_TPAUSE
3389extern int __kmp_tpause_hint; // 1=C0.1 (default), 0=C0.2; from KMP_TPAUSE
3390extern int __kmp_tpause_enabled; // 0 (default), 1 (KMP_TPAUSE is non-zero)
3391#endif
3392
3393/* ------------------------------------------------------------------------- */
3394
3395extern kmp_global_t __kmp_global; /* global status */
3396
3397extern kmp_info_t __kmp_monitor;
3398// For Debugging Support Library
3399extern std::atomic<kmp_int32> __kmp_team_counter;
3400// For Debugging Support Library
3401extern std::atomic<kmp_int32> __kmp_task_counter;
3402
3403#if USE_DEBUGGER
3404#define _KMP_GEN_ID(counter) \
3405 (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0)
3406#else
3407#define _KMP_GEN_ID(counter) (~0)
3408#endif /* USE_DEBUGGER */
3409
3410#define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter)
3411#define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter)
3412
3413/* ------------------------------------------------------------------------ */
3414
3415extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
3416 size_t size, char const *format, ...);
3417
3418extern void __kmp_serial_initialize(void);
3419extern void __kmp_middle_initialize(void);
3420extern void __kmp_parallel_initialize(void);
3421
3422extern void __kmp_internal_begin(void);
3423extern void __kmp_internal_end_library(int gtid);
3424extern void __kmp_internal_end_thread(int gtid);
3425extern void __kmp_internal_end_atexit(void);
3426extern void __kmp_internal_end_dtor(void);
3427extern void __kmp_internal_end_dest(void *);
3428
3429extern int __kmp_register_root(int initial_thread);
3430extern void __kmp_unregister_root(int gtid);
3431extern void __kmp_unregister_library(void); // called by __kmp_internal_end()
3432
3433extern int __kmp_ignore_mppbeg(void);
3434extern int __kmp_ignore_mppend(void);
3435
3436extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
3437extern void __kmp_exit_single(int gtid);
3438
3439extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3440extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3441
3442#ifdef USE_LOAD_BALANCE
3443extern int __kmp_get_load_balance(int);
3444#endif
3445
3446extern int __kmp_get_global_thread_id(void);
3447extern int __kmp_get_global_thread_id_reg(void);
3448extern void __kmp_exit_thread(int exit_status);
3449extern void __kmp_abort(char const *format, ...);
3450extern void __kmp_abort_thread(void);
3451KMP_NORETURN extern void __kmp_abort_process(void);
3452extern void __kmp_warn(char const *format, ...);
3453
3454extern void __kmp_set_num_threads(int new_nth, int gtid);
3455
3456// Returns current thread (pointer to kmp_info_t). Current thread *must* be
3457// registered.
3458static inline kmp_info_t *__kmp_entry_thread() {
3459 int gtid = __kmp_entry_gtid();
3460
3461 return __kmp_threads[gtid];
3462}
3463
3464extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
3465extern int __kmp_get_max_active_levels(int gtid);
3466extern int __kmp_get_ancestor_thread_num(int gtid, int level);
3467extern int __kmp_get_team_size(int gtid, int level);
3468extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
3469extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
3470
3471extern unsigned short __kmp_get_random(kmp_info_t *thread);
3472extern void __kmp_init_random(kmp_info_t *thread);
3473
3474extern kmp_r_sched_t __kmp_get_schedule_global(void);
3475extern void __kmp_adjust_num_threads(int new_nproc);
3476extern void __kmp_check_stksize(size_t *val);
3477
3478extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
3479extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);
3480extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL);
3481#define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR)
3482#define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR)
3483#define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR)
3484
3485#if USE_FAST_MEMORY
3486extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
3487 size_t size KMP_SRC_LOC_DECL);
3488extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL);
3489extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
3490extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
3491#define __kmp_fast_allocate(this_thr, size) \
3492 ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR)
3493#define __kmp_fast_free(this_thr, ptr) \
3494 ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR)
3495#endif
3496
3497extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL);
3498extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
3499 size_t elsize KMP_SRC_LOC_DECL);
3500extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
3501 size_t size KMP_SRC_LOC_DECL);
3502extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
3503#define __kmp_thread_malloc(th, size) \
3504 ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR)
3505#define __kmp_thread_calloc(th, nelem, elsize) \
3506 ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR)
3507#define __kmp_thread_realloc(th, ptr, size) \
3508 ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR)
3509#define __kmp_thread_free(th, ptr) \
3510 ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
3511
3512extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
3513
3514extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
3515 kmp_proc_bind_t proc_bind);
3516extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
3517 int num_threads);
3518extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
3519 int num_teams_ub, int num_threads);
3520
3521extern void __kmp_yield();
3522
3523extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3524 enum sched_type schedule, kmp_int32 lb,
3525 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
3526extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3527 enum sched_type schedule, kmp_uint32 lb,
3528 kmp_uint32 ub, kmp_int32 st,
3529 kmp_int32 chunk);
3530extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3531 enum sched_type schedule, kmp_int64 lb,
3532 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
3533extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3534 enum sched_type schedule, kmp_uint64 lb,
3535 kmp_uint64 ub, kmp_int64 st,
3536 kmp_int64 chunk);
3537
3538extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
3539 kmp_int32 *p_last, kmp_int32 *p_lb,
3540 kmp_int32 *p_ub, kmp_int32 *p_st);
3541extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
3542 kmp_int32 *p_last, kmp_uint32 *p_lb,
3543 kmp_uint32 *p_ub, kmp_int32 *p_st);
3544extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
3545 kmp_int32 *p_last, kmp_int64 *p_lb,
3546 kmp_int64 *p_ub, kmp_int64 *p_st);
3547extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
3548 kmp_int32 *p_last, kmp_uint64 *p_lb,
3549 kmp_uint64 *p_ub, kmp_int64 *p_st);
3550
3551extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
3552extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
3553extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
3554extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
3555
3556#ifdef KMP_GOMP_COMPAT
3557
3558extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3559 enum sched_type schedule, kmp_int32 lb,
3560 kmp_int32 ub, kmp_int32 st,
3561 kmp_int32 chunk, int push_ws);
3562extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3563 enum sched_type schedule, kmp_uint32 lb,
3564 kmp_uint32 ub, kmp_int32 st,
3565 kmp_int32 chunk, int push_ws);
3566extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3567 enum sched_type schedule, kmp_int64 lb,
3568 kmp_int64 ub, kmp_int64 st,
3569 kmp_int64 chunk, int push_ws);
3570extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3571 enum sched_type schedule, kmp_uint64 lb,
3572 kmp_uint64 ub, kmp_int64 st,
3573 kmp_int64 chunk, int push_ws);
3574extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
3575extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
3576extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
3577extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
3578
3579#endif /* KMP_GOMP_COMPAT */
3580
3581extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
3582extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
3583extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
3584extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
3585extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
3586extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
3587 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
3588 void *obj);
3589extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
3590 kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
3591
3592extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,
3593 int final_spin
3594#if USE_ITT_BUILD
3595 ,
3596 void *itt_sync_obj
3597#endif
3598);
3599extern void __kmp_release_64(kmp_flag_64<> *flag);
3600
3601extern void __kmp_infinite_loop(void);
3602
3603extern void __kmp_cleanup(void);
3604
3605#if KMP_HANDLE_SIGNALS
3606extern int __kmp_handle_signals;
3607extern void __kmp_install_signals(int parallel_init);
3608extern void __kmp_remove_signals(void);
3609#endif
3610
3611extern void __kmp_clear_system_time(void);
3612extern void __kmp_read_system_time(double *delta);
3613
3614extern void __kmp_check_stack_overlap(kmp_info_t *thr);
3615
3616extern void __kmp_expand_host_name(char *buffer, size_t size);
3617extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
3618
3619#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && KMP_ARCH_AARCH64)
3620extern void
3621__kmp_initialize_system_tick(void); /* Initialize timer tick value */
3622#endif
3623
3624extern void
3625__kmp_runtime_initialize(void); /* machine specific initialization */
3626extern void __kmp_runtime_destroy(void);
3627
3628#if KMP_AFFINITY_SUPPORTED
3629extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
3630 kmp_affin_mask_t *mask);
3631extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
3632 kmp_affin_mask_t *mask);
3633extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
3634extern void __kmp_affinity_uninitialize(void);
3635extern void __kmp_affinity_set_init_mask(
3636 int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
3637extern void __kmp_affinity_set_place(int gtid);
3638extern void __kmp_affinity_determine_capable(const char *env_var);
3639extern int __kmp_aux_set_affinity(void **mask);
3640extern int __kmp_aux_get_affinity(void **mask);
3641extern int __kmp_aux_get_affinity_max_proc();
3642extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3643extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
3644extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
3645extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
3646#if KMP_OS_LINUX || KMP_OS_FREEBSD
3647extern int kmp_set_thread_affinity_mask_initial(void);
3648#endif
3649static inline void __kmp_assign_root_init_mask() {
3650 int gtid = __kmp_entry_gtid();
3651 kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
3652 if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
3653 __kmp_affinity_set_init_mask(gtid, TRUE);
3654 r->r.r_affinity_assigned = TRUE;
3655 }
3656}
3657static inline void __kmp_reset_root_init_mask(int gtid) {
3658 if (!KMP_AFFINITY_CAPABLE())
3659 return;
3660 kmp_info_t *th = __kmp_threads[gtid];
3661 kmp_root_t *r = th->th.th_root;
3662 if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) {
3663 __kmp_set_system_affinity(__kmp_affin_origMask, FALSE);
3664 KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask);
3665 r->r.r_affinity_assigned = FALSE;
3666 }
3667}
3668#else /* KMP_AFFINITY_SUPPORTED */
3669#define __kmp_assign_root_init_mask() /* Nothing */
3670static inline void __kmp_reset_root_init_mask(int gtid) {}
3671#endif /* KMP_AFFINITY_SUPPORTED */
3672// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
3673// format string is for affinity, so platforms that do not support
3674// affinity can still use the other fields, e.g., %n for num_threads
3675extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
3676 kmp_str_buf_t *buffer);
3677extern void __kmp_aux_display_affinity(int gtid, const char *format);
3678
3679extern void __kmp_cleanup_hierarchy();
3680extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
3681
3682#if KMP_USE_FUTEX
3683
3684extern int __kmp_futex_determine_capable(void);
3685
3686#endif // KMP_USE_FUTEX
3687
3688extern void __kmp_gtid_set_specific(int gtid);
3689extern int __kmp_gtid_get_specific(void);
3690
3691extern double __kmp_read_cpu_time(void);
3692
3693extern int __kmp_read_system_info(struct kmp_sys_info *info);
3694
3695#if KMP_USE_MONITOR
3696extern void __kmp_create_monitor(kmp_info_t *th);
3697#endif
3698
3699extern void *__kmp_launch_thread(kmp_info_t *thr);
3700
3701extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
3702
3703#if KMP_OS_WINDOWS
3704extern int __kmp_still_running(kmp_info_t *th);
3705extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
3706extern void __kmp_free_handle(kmp_thread_t tHandle);
3707#endif
3708
3709#if KMP_USE_MONITOR
3710extern void __kmp_reap_monitor(kmp_info_t *th);
3711#endif
3712extern void __kmp_reap_worker(kmp_info_t *th);
3713extern void __kmp_terminate_thread(int gtid);
3714
3715extern int __kmp_try_suspend_mx(kmp_info_t *th);
3716extern void __kmp_lock_suspend_mx(kmp_info_t *th);
3717extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
3718
3719extern void __kmp_elapsed(double *);
3720extern void __kmp_elapsed_tick(double *);
3721
3722extern void __kmp_enable(int old_state);
3723extern void __kmp_disable(int *old_state);
3724
3725extern void __kmp_thread_sleep(int millis);
3726
3727extern void __kmp_common_initialize(void);
3728extern void __kmp_common_destroy(void);
3729extern void __kmp_common_destroy_gtid(int gtid);
3730
3731#if KMP_OS_UNIX
3732extern void __kmp_register_atfork(void);
3733#endif
3734extern void __kmp_suspend_initialize(void);
3735extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
3736extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
3737
3738extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
3739 int tid);
3740extern kmp_team_t *
3741__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
3742#if OMPT_SUPPORT
3743 ompt_data_t ompt_parallel_data,
3744#endif
3745 kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
3746 int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
3747extern void __kmp_free_thread(kmp_info_t *);
3748extern void __kmp_free_team(kmp_root_t *,
3749 kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *));
3750extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
3751
3752/* ------------------------------------------------------------------------ */
3753
3754extern void __kmp_initialize_bget(kmp_info_t *th);
3755extern void __kmp_finalize_bget(kmp_info_t *th);
3756
3757KMP_EXPORT void *kmpc_malloc(size_t size);
3758KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment);
3759KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize);
3760KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size);
3761KMP_EXPORT void kmpc_free(void *ptr);
3762
3763/* declarations for internal use */
3764
3765extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
3766 size_t reduce_size, void *reduce_data,
3767 void (*reduce)(void *, void *));
3768extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
3769extern int __kmp_barrier_gomp_cancel(int gtid);
3770
3775enum fork_context_e {
3776 fork_context_gnu,
3778 fork_context_intel,
3779 fork_context_last
3780};
3781extern int __kmp_fork_call(ident_t *loc, int gtid,
3782 enum fork_context_e fork_context, kmp_int32 argc,
3783 microtask_t microtask, launch_t invoker,
3784 kmp_va_list ap);
3785
3786extern void __kmp_join_call(ident_t *loc, int gtid
3787#if OMPT_SUPPORT
3788 ,
3789 enum fork_context_e fork_context
3790#endif
3791 ,
3792 int exit_teams = 0);
3793
3794extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3795extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
3796extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
3797extern int __kmp_invoke_task_func(int gtid);
3798extern void __kmp_run_before_invoked_task(int gtid, int tid,
3799 kmp_info_t *this_thr,
3800 kmp_team_t *team);
3801extern void __kmp_run_after_invoked_task(int gtid, int tid,
3802 kmp_info_t *this_thr,
3803 kmp_team_t *team);
3804
3805// should never have been exported
3806KMP_EXPORT int __kmpc_invoke_task_func(int gtid);
3807extern int __kmp_invoke_teams_master(int gtid);
3808extern void __kmp_teams_master(int gtid);
3809extern int __kmp_aux_get_team_num();
3810extern int __kmp_aux_get_num_teams();
3811extern void __kmp_save_internal_controls(kmp_info_t *thread);
3812extern void __kmp_user_set_library(enum library_type arg);
3813extern void __kmp_aux_set_library(enum library_type arg);
3814extern void __kmp_aux_set_stacksize(size_t arg);
3815extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
3816extern void __kmp_aux_set_defaults(char const *str, size_t len);
3817
3818/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
3819void kmpc_set_blocktime(int arg);
3820void ompc_set_nested(int flag);
3821void ompc_set_dynamic(int flag);
3822void ompc_set_num_threads(int arg);
3823
3824extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
3825 kmp_team_t *team, int tid);
3826extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
3827extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3828 kmp_tasking_flags_t *flags,
3829 size_t sizeof_kmp_task_t,
3830 size_t sizeof_shareds,
3831 kmp_routine_entry_t task_entry);
3832extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
3833 kmp_team_t *team, int tid,
3834 int set_curr_task);
3835extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
3836extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
3837
3838extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3839 int gtid,
3840 kmp_task_t *task);
3841extern void __kmp_fulfill_event(kmp_event_t *event);
3842
3843extern void __kmp_free_task_team(kmp_info_t *thread,
3844 kmp_task_team_t *task_team);
3845extern void __kmp_reap_task_teams(void);
3846extern void __kmp_wait_to_unref_task_teams(void);
3847extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
3848 int always);
3849extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
3850extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
3851#if USE_ITT_BUILD
3852 ,
3853 void *itt_sync_obj
3854#endif /* USE_ITT_BUILD */
3855 ,
3856 int wait = 1);
3857extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
3858 int gtid);
3859
3860extern int __kmp_is_address_mapped(void *addr);
3861extern kmp_uint64 __kmp_hardware_timestamp(void);
3862
3863#if KMP_OS_UNIX
3864extern int __kmp_read_from_file(char const *path, char const *format, ...);
3865#endif
3866
3867/* ------------------------------------------------------------------------ */
3868//
3869// Assembly routines that have no compiler intrinsic replacement
3870//
3871
3872extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
3873 void *argv[]
3874#if OMPT_SUPPORT
3875 ,
3876 void **exit_frame_ptr
3877#endif
3878);
3879
3880/* ------------------------------------------------------------------------ */
3881
3882KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags);
3883KMP_EXPORT void __kmpc_end(ident_t *);
3884
3885KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data,
3886 kmpc_ctor_vec ctor,
3887 kmpc_cctor_vec cctor,
3888 kmpc_dtor_vec dtor,
3889 size_t vector_length);
3890KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data,
3891 kmpc_ctor ctor, kmpc_cctor cctor,
3892 kmpc_dtor dtor);
3893KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
3894 void *data, size_t size);
3895
3896KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *);
3897KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *);
3898KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *);
3899KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *);
3900
3901KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *);
3902KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
3903 kmpc_micro microtask, ...);
3904
3905KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
3906KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
3907
3908KMP_EXPORT void __kmpc_flush(ident_t *);
3909KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
3910KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
3911KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
3912KMP_EXPORT kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid,
3913 kmp_int32 filter);
3914KMP_EXPORT void __kmpc_end_masked(ident_t *, kmp_int32 global_tid);
3915KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
3916KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
3917KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,
3918 kmp_critical_name *);
3919KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
3920 kmp_critical_name *);
3921KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
3922 kmp_critical_name *, uint32_t hint);
3923
3924KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
3925KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
3926
3927KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
3928 kmp_int32 global_tid);
3929
3930KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
3931KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
3932
3933KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);
3934KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,
3935 kmp_int32 numberOfSections);
3936KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);
3937
3938KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
3939 kmp_int32 schedtype, kmp_int32 *plastiter,
3940 kmp_int *plower, kmp_int *pupper,
3941 kmp_int *pstride, kmp_int incr,
3942 kmp_int chunk);
3943
3944KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
3945
3946KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
3947 size_t cpy_size, void *cpy_data,
3948 void (*cpy_func)(void *, void *),
3949 kmp_int32 didit);
3950
3951KMP_EXPORT void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,
3952 void *cpy_data);
3953
3954extern void KMPC_SET_NUM_THREADS(int arg);
3955extern void KMPC_SET_DYNAMIC(int flag);
3956extern void KMPC_SET_NESTED(int flag);
3957
3958/* OMP 3.0 tasking interface routines */
3959KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
3960 kmp_task_t *new_task);
3961KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3962 kmp_int32 flags,
3963 size_t sizeof_kmp_task_t,
3964 size_t sizeof_shareds,
3965 kmp_routine_entry_t task_entry);
3966KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(
3967 ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
3968 size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id);
3969KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
3970 kmp_task_t *task);
3971KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
3972 kmp_task_t *task);
3973KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
3974 kmp_task_t *new_task);
3975KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
3976
3977KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
3978 int end_part);
3979
3980#if TASK_UNUSED
3981void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
3982void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
3983 kmp_task_t *task);
3984#endif // TASK_UNUSED
3985
3986/* ------------------------------------------------------------------------ */
3987
3988KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid);
3989KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid);
3990
3991KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
3992 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
3993 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
3994 kmp_depend_info_t *noalias_dep_list);
3995KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
3996 kmp_int32 ndeps,
3997 kmp_depend_info_t *dep_list,
3998 kmp_int32 ndeps_noalias,
3999 kmp_depend_info_t *noalias_dep_list);
4000extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
4001 bool serialize_immediate);
4002
4003KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
4004 kmp_int32 cncl_kind);
4005KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
4006 kmp_int32 cncl_kind);
4007KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
4008KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
4009
4010KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
4011KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
4012KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
4013 kmp_int32 if_val, kmp_uint64 *lb,
4014 kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
4015 kmp_int32 sched, kmp_uint64 grainsize,
4016 void *task_dup);
4017KMP_EXPORT void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,
4018 kmp_task_t *task, kmp_int32 if_val,
4019 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4020 kmp_int32 nogroup, kmp_int32 sched,
4021 kmp_uint64 grainsize, kmp_int32 modifier,
4022 void *task_dup);
4023KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
4024KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
4025KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
4026KMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
4027 int is_ws, int num,
4028 void *data);
4029KMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
4030 int num, void *data);
4031KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
4032 int is_ws);
4033KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
4034 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
4035 kmp_task_affinity_info_t *affin_list);
4036KMP_EXPORT void __kmp_set_num_teams(int num_teams);
4037KMP_EXPORT int __kmp_get_max_teams(void);
4038KMP_EXPORT void __kmp_set_teams_thread_limit(int limit);
4039KMP_EXPORT int __kmp_get_teams_thread_limit(void);
4040
4041/* Lock interface routines (fast versions with gtid passed in) */
4042KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
4043 void **user_lock);
4044KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
4045 void **user_lock);
4046KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
4047 void **user_lock);
4048KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
4049 void **user_lock);
4050KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
4051KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
4052 void **user_lock);
4053KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
4054 void **user_lock);
4055KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
4056 void **user_lock);
4057KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
4058KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
4059 void **user_lock);
4060
4061KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
4062 void **user_lock, uintptr_t hint);
4063KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
4064 void **user_lock,
4065 uintptr_t hint);
4066
4067/* Interface to fast scalable reduce methods routines */
4068
4069KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(
4070 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4071 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4072 kmp_critical_name *lck);
4073KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
4074 kmp_critical_name *lck);
4075KMP_EXPORT kmp_int32 __kmpc_reduce(
4076 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4077 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4078 kmp_critical_name *lck);
4079KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
4080 kmp_critical_name *lck);
4081
4082/* Internal fast reduction routines */
4083
4084extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
4085 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4086 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4087 kmp_critical_name *lck);
4088
4089// this function is for testing set/get/determine reduce method
4090KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void);
4091
4092KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
4093KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
4094
4095// C++ port
4096// missing 'extern "C"' declarations
4097
4098KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
4099KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
4100KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
4101 kmp_int32 num_threads);
4102
4103KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
4104 int proc_bind);
4105KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
4106 kmp_int32 num_teams,
4107 kmp_int32 num_threads);
4108/* Function for OpenMP 5.1 num_teams clause */
4109KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
4110 kmp_int32 num_teams_lb,
4111 kmp_int32 num_teams_ub,
4112 kmp_int32 num_threads);
4113KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
4114 kmpc_micro microtask, ...);
4115struct kmp_dim { // loop bounds info casted to kmp_int64
4116 kmp_int64 lo; // lower
4117 kmp_int64 up; // upper
4118 kmp_int64 st; // stride
4119};
4120KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
4121 kmp_int32 num_dims,
4122 const struct kmp_dim *dims);
4123KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
4124 const kmp_int64 *vec);
4125KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
4126 const kmp_int64 *vec);
4127KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
4128
4129KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
4130 void *data, size_t size,
4131 void ***cache);
4132
4133// Symbols for MS mutual detection.
4134extern int _You_must_link_with_exactly_one_OpenMP_library;
4135extern int _You_must_link_with_Intel_OpenMP_library;
4136#if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4)
4137extern int _You_must_link_with_Microsoft_OpenMP_library;
4138#endif
4139
4140// The routines below are not exported.
4141// Consider making them 'static' in corresponding source files.
4142void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
4143 void *data_addr, size_t pc_size);
4144struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
4145 void *data_addr,
4146 size_t pc_size);
4147void __kmp_threadprivate_resize_cache(int newCapacity);
4148void __kmp_cleanup_threadprivate_caches();
4149
4150// ompc_, kmpc_ entries moved from omp.h.
4151#if KMP_OS_WINDOWS
4152#define KMPC_CONVENTION __cdecl
4153#else
4154#define KMPC_CONVENTION
4155#endif
4156
4157#ifndef __OMP_H
4158typedef enum omp_sched_t {
4159 omp_sched_static = 1,
4160 omp_sched_dynamic = 2,
4161 omp_sched_guided = 3,
4162 omp_sched_auto = 4
4163} omp_sched_t;
4164typedef void *kmp_affinity_mask_t;
4165#endif
4166
4167KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
4168KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
4169KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
4170KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
4171KMP_EXPORT int KMPC_CONVENTION
4172kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
4173KMP_EXPORT int KMPC_CONVENTION
4174kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
4175KMP_EXPORT int KMPC_CONVENTION
4176kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
4177
4178KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
4179KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
4180KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
4181KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
4182KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
4183void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format);
4184size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size);
4185void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format);
4186size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
4187 char const *format);
4188
4189enum kmp_target_offload_kind {
4190 tgt_disabled = 0,
4191 tgt_default = 1,
4192 tgt_mandatory = 2
4193};
4194typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
4195// Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
4196extern kmp_target_offload_kind_t __kmp_target_offload;
4197extern int __kmpc_get_target_offload();
4198
4199// Constants used in libomptarget
4200#define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device.
4201#define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices".
4202
4203// OMP Pause Resource
4204
4205// The following enum is used both to set the status in __kmp_pause_status, and
4206// as the internal equivalent of the externally-visible omp_pause_resource_t.
4207typedef enum kmp_pause_status_t {
4208 kmp_not_paused = 0, // status is not paused, or, requesting resume
4209 kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
4210 kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
4211} kmp_pause_status_t;
4212
4213// This stores the pause state of the runtime
4214extern kmp_pause_status_t __kmp_pause_status;
4215extern int __kmpc_pause_resource(kmp_pause_status_t level);
4216extern int __kmp_pause_resource(kmp_pause_status_t level);
4217// Soft resume sets __kmp_pause_status, and wakes up all threads.
4218extern void __kmp_resume_if_soft_paused();
4219// Hard resume simply resets the status to not paused. Library will appear to
4220// be uninitialized after hard pause. Let OMP constructs trigger required
4221// initializations.
4222static inline void __kmp_resume_if_hard_paused() {
4223 if (__kmp_pause_status == kmp_hard_paused) {
4224 __kmp_pause_status = kmp_not_paused;
4225 }
4226}
4227
4228extern void __kmp_omp_display_env(int verbose);
4229
4230// 1: it is initializing hidden helper team
4231extern volatile int __kmp_init_hidden_helper;
4232// 1: the hidden helper team is done
4233extern volatile int __kmp_hidden_helper_team_done;
4234// 1: enable hidden helper task
4235extern kmp_int32 __kmp_enable_hidden_helper;
4236// Main thread of hidden helper team
4237extern kmp_info_t *__kmp_hidden_helper_main_thread;
4238// Descriptors for the hidden helper threads
4239extern kmp_info_t **__kmp_hidden_helper_threads;
4240// Number of hidden helper threads
4241extern kmp_int32 __kmp_hidden_helper_threads_num;
4242// Number of hidden helper tasks that have not been executed yet
4243extern std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
4244
4245extern void __kmp_hidden_helper_initialize();
4246extern void __kmp_hidden_helper_threads_initz_routine();
4247extern void __kmp_do_initialize_hidden_helper_threads();
4248extern void __kmp_hidden_helper_threads_initz_wait();
4249extern void __kmp_hidden_helper_initz_release();
4250extern void __kmp_hidden_helper_threads_deinitz_wait();
4251extern void __kmp_hidden_helper_threads_deinitz_release();
4252extern void __kmp_hidden_helper_main_thread_wait();
4253extern void __kmp_hidden_helper_worker_thread_wait();
4254extern void __kmp_hidden_helper_worker_thread_signal();
4255extern void __kmp_hidden_helper_main_thread_release();
4256
4257// Check whether a given thread is a hidden helper thread
4258#define KMP_HIDDEN_HELPER_THREAD(gtid) \
4259 ((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4260
4261#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid) \
4262 ((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4263
4264#define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid) \
4265 ((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4266
4267#define KMP_HIDDEN_HELPER_TEAM(team) \
4268 (team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
4269
4270// Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a
4271// main thread, is skipped.
4272#define KMP_GTID_TO_SHADOW_GTID(gtid) \
4273 ((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)
4274
4275// Return the adjusted gtid value by subtracting from gtid the number
4276// of hidden helper threads. This adjusted value is the gtid the thread would
4277// have received if there were no hidden helper threads.
4278static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
4279 int adjusted_gtid = gtid;
4280 if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&
4281 gtid - __kmp_hidden_helper_threads_num >= 0) {
4282 adjusted_gtid -= __kmp_hidden_helper_threads_num;
4283 }
4284 return adjusted_gtid;
4285}
4286
4287// Support for error directive
4288typedef enum kmp_severity_t {
4289 severity_warning = 1,
4290 severity_fatal = 2
4291} kmp_severity_t;
4292extern void __kmpc_error(ident_t *loc, int severity, const char *message);
4293
4294// Support for scope directive
4295KMP_EXPORT void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4296KMP_EXPORT void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4297
4298#ifdef __cplusplus
4299}
4300#endif
4301
4302template <bool C, bool S>
4303extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
4304template <bool C, bool S>
4305extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
4306template <bool C, bool S>
4307extern void __kmp_atomic_suspend_64(int th_gtid,
4308 kmp_atomic_flag_64<C, S> *flag);
4309extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
4310#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
4311template <bool C, bool S>
4312extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
4313template <bool C, bool S>
4314extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
4315template <bool C, bool S>
4316extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
4317extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
4318#endif
4319template <bool C, bool S>
4320extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
4321template <bool C, bool S>
4322extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
4323template <bool C, bool S>
4324extern void __kmp_atomic_resume_64(int target_gtid,
4325 kmp_atomic_flag_64<C, S> *flag);
4326extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
4327
4328template <bool C, bool S>
4329int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
4330 kmp_flag_32<C, S> *flag, int final_spin,
4331 int *thread_finished,
4332#if USE_ITT_BUILD
4333 void *itt_sync_obj,
4334#endif /* USE_ITT_BUILD */
4335 kmp_int32 is_constrained);
4336template <bool C, bool S>
4337int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4338 kmp_flag_64<C, S> *flag, int final_spin,
4339 int *thread_finished,
4340#if USE_ITT_BUILD
4341 void *itt_sync_obj,
4342#endif /* USE_ITT_BUILD */
4343 kmp_int32 is_constrained);
4344template <bool C, bool S>
4345int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4346 kmp_atomic_flag_64<C, S> *flag,
4347 int final_spin, int *thread_finished,
4348#if USE_ITT_BUILD
4349 void *itt_sync_obj,
4350#endif /* USE_ITT_BUILD */
4351 kmp_int32 is_constrained);
4352int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
4353 kmp_flag_oncore *flag, int final_spin,
4354 int *thread_finished,
4355#if USE_ITT_BUILD
4356 void *itt_sync_obj,
4357#endif /* USE_ITT_BUILD */
4358 kmp_int32 is_constrained);
4359
4360extern int __kmp_nesting_mode;
4361extern int __kmp_nesting_mode_nlevels;
4362extern int *__kmp_nesting_nth_level;
4363extern void __kmp_init_nesting_mode();
4364extern void __kmp_set_nesting_mode_threads();
4365
4373 FILE *f;
4374
4375 void close() {
4376 if (f && f != stdout && f != stderr) {
4377 fclose(f);
4378 f = nullptr;
4379 }
4380 }
4381
4382public:
4383 kmp_safe_raii_file_t() : f(nullptr) {}
4384 kmp_safe_raii_file_t(const char *filename, const char *mode,
4385 const char *env_var = nullptr)
4386 : f(nullptr) {
4387 open(filename, mode, env_var);
4388 }
4389 ~kmp_safe_raii_file_t() { close(); }
4390
4394 void open(const char *filename, const char *mode,
4395 const char *env_var = nullptr) {
4396 KMP_ASSERT(!f);
4397 f = fopen(filename, mode);
4398 if (!f) {
4399 int code = errno;
4400 if (env_var) {
4401 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4402 KMP_HNT(CheckEnvVar, env_var, filename), __kmp_msg_null);
4403 } else {
4404 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4405 __kmp_msg_null);
4406 }
4407 }
4408 }
4411 int try_open(const char *filename, const char *mode) {
4412 KMP_ASSERT(!f);
4413 f = fopen(filename, mode);
4414 if (!f)
4415 return errno;
4416 return 0;
4417 }
4420 void set_stdout() {
4421 KMP_ASSERT(!f);
4422 f = stdout;
4423 }
4426 void set_stderr() {
4427 KMP_ASSERT(!f);
4428 f = stderr;
4429 }
4430 operator bool() { return bool(f); }
4431 operator FILE *() { return f; }
4432};
4433
4434template <typename SourceType, typename TargetType,
4435 bool isSourceSmaller = (sizeof(SourceType) < sizeof(TargetType)),
4436 bool isSourceEqual = (sizeof(SourceType) == sizeof(TargetType)),
4437 bool isSourceSigned = std::is_signed<SourceType>::value,
4438 bool isTargetSigned = std::is_signed<TargetType>::value>
4439struct kmp_convert {};
4440
4441// Both types are signed; Source smaller
4442template <typename SourceType, typename TargetType>
4443struct kmp_convert<SourceType, TargetType, true, false, true, true> {
4444 static TargetType to(SourceType src) { return (TargetType)src; }
4445};
4446// Source equal
4447template <typename SourceType, typename TargetType>
4448struct kmp_convert<SourceType, TargetType, false, true, true, true> {
4449 static TargetType to(SourceType src) { return src; }
4450};
4451// Source bigger
4452template <typename SourceType, typename TargetType>
4453struct kmp_convert<SourceType, TargetType, false, false, true, true> {
4454 static TargetType to(SourceType src) {
4455 KMP_ASSERT(src <= static_cast<SourceType>(
4456 (std::numeric_limits<TargetType>::max)()));
4457 KMP_ASSERT(src >= static_cast<SourceType>(
4458 (std::numeric_limits<TargetType>::min)()));
4459 return (TargetType)src;
4460 }
4461};
4462
4463// Source signed, Target unsigned
4464// Source smaller
4465template <typename SourceType, typename TargetType>
4466struct kmp_convert<SourceType, TargetType, true, false, true, false> {
4467 static TargetType to(SourceType src) {
4468 KMP_ASSERT(src >= 0);
4469 return (TargetType)src;
4470 }
4471};
4472// Source equal
4473template <typename SourceType, typename TargetType>
4474struct kmp_convert<SourceType, TargetType, false, true, true, false> {
4475 static TargetType to(SourceType src) {
4476 KMP_ASSERT(src >= 0);
4477 return (TargetType)src;
4478 }
4479};
4480// Source bigger
4481template <typename SourceType, typename TargetType>
4482struct kmp_convert<SourceType, TargetType, false, false, true, false> {
4483 static TargetType to(SourceType src) {
4484 KMP_ASSERT(src >= 0);
4485 KMP_ASSERT(src <= static_cast<SourceType>(
4486 (std::numeric_limits<TargetType>::max)()));
4487 return (TargetType)src;
4488 }
4489};
4490
4491// Source unsigned, Target signed
4492// Source smaller
4493template <typename SourceType, typename TargetType>
4494struct kmp_convert<SourceType, TargetType, true, false, false, true> {
4495 static TargetType to(SourceType src) { return (TargetType)src; }
4496};
4497// Source equal
4498template <typename SourceType, typename TargetType>
4499struct kmp_convert<SourceType, TargetType, false, true, false, true> {
4500 static TargetType to(SourceType src) {
4501 KMP_ASSERT(src <= static_cast<SourceType>(
4502 (std::numeric_limits<TargetType>::max)()));
4503 return (TargetType)src;
4504 }
4505};
4506// Source bigger
4507template <typename SourceType, typename TargetType>
4508struct kmp_convert<SourceType, TargetType, false, false, false, true> {
4509 static TargetType to(SourceType src) {
4510 KMP_ASSERT(src <= static_cast<SourceType>(
4511 (std::numeric_limits<TargetType>::max)()));
4512 return (TargetType)src;
4513 }
4514};
4515
4516// Source unsigned, Target unsigned
4517// Source smaller
4518template <typename SourceType, typename TargetType>
4519struct kmp_convert<SourceType, TargetType, true, false, false, false> {
4520 static TargetType to(SourceType src) { return (TargetType)src; }
4521};
4522// Source equal
4523template <typename SourceType, typename TargetType>
4524struct kmp_convert<SourceType, TargetType, false, true, false, false> {
4525 static TargetType to(SourceType src) { return src; }
4526};
4527// Source bigger
4528template <typename SourceType, typename TargetType>
4529struct kmp_convert<SourceType, TargetType, false, false, false, false> {
4530 static TargetType to(SourceType src) {
4531 KMP_ASSERT(src <= static_cast<SourceType>(
4532 (std::numeric_limits<TargetType>::max)()));
4533 return (TargetType)src;
4534 }
4535};
4536
4537template <typename T1, typename T2>
4538static inline void __kmp_type_convert(T1 src, T2 *dest) {
4539 *dest = kmp_convert<T1, T2>::to(src);
4540}
4541
4542#endif /* KMP_H */
void set_stdout()
Definition: kmp.h:4420
void set_stderr()
Definition: kmp.h:4426
int try_open(const char *filename, const char *mode)
Definition: kmp.h:4411
void open(const char *filename, const char *mode, const char *env_var=nullptr)
Definition: kmp.h:4394
struct ident ident_t
@ KMP_IDENT_KMPC
Definition: kmp.h:196
@ KMP_IDENT_IMB
Definition: kmp.h:194
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_BARRIER_IMPL
Definition: kmp.h:205
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:199
@ KMP_IDENT_ATOMIC_HINT_MASK
Definition: kmp.h:223
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
@ KMP_IDENT_BARRIER_EXPL
Definition: kmp.h:203
@ KMP_IDENT_ATOMIC_REDUCE
Definition: kmp.h:201
KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *)
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1626
KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags)
KMP_EXPORT void __kmpc_end(ident_t *)
KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
KMP_EXPORT kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_flush(ident_t *)
KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
KMP_EXPORT void * __kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
KMP_EXPORT void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
KMP_EXPORT void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
KMP_EXPORT void * __kmpc_task_reduction_init(int gtid, int num_data, void *data)
KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
KMP_EXPORT void * __kmpc_taskred_init(int gtid, int num_data, void *data)
void(* kmpc_dtor)(void *)
Definition: kmp.h:1650
void *(* kmpc_cctor)(void *, void *)
Definition: kmp.h:1657
KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor)
KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void *(* kmpc_cctor_vec)(void *, void *, size_t)
Definition: kmp.h:1679
void *(* kmpc_ctor)(void *)
Definition: kmp.h:1644
KMP_EXPORT void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
void *(* kmpc_ctor_vec)(void *, size_t)
Definition: kmp.h:1667
KMP_EXPORT void * __kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data, size_t size, void ***cache)
void(* kmpc_dtor_vec)(void *, size_t)
Definition: kmp.h:1673
KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data, kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length)
KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *)
KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *)
KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc)
KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *)
KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *)
KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
sched_type
Definition: kmp.h:357
KMP_EXPORT void __kmpc_end_masked(ident_t *, kmp_int32 global_tid)
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid, kmp_critical_name *, uint32_t hint)
KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid)
KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid, kmp_int32 numberOfSections)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid)
KMP_EXPORT kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)
KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
@ kmp_nm_guided_chunked
Definition: kmp.h:408
@ kmp_sch_runtime_simd
Definition: kmp.h:379
@ kmp_nm_ord_auto
Definition: kmp.h:427
@ kmp_sch_auto
Definition: kmp.h:364
@ kmp_nm_auto
Definition: kmp.h:410
@ kmp_distribute_static_chunked
Definition: kmp.h:395
@ kmp_sch_static
Definition: kmp.h:360
@ kmp_sch_guided_simd
Definition: kmp.h:378
@ kmp_sch_modifier_monotonic
Definition: kmp.h:445
@ kmp_sch_default
Definition: kmp.h:465
@ kmp_sch_modifier_nonmonotonic
Definition: kmp.h:447
@ kmp_nm_ord_static
Definition: kmp.h:423
@ kmp_distribute_static
Definition: kmp.h:396
@ kmp_sch_guided_chunked
Definition: kmp.h:362
@ kmp_nm_static
Definition: kmp.h:406
@ kmp_sch_lower
Definition: kmp.h:358
@ kmp_nm_upper
Definition: kmp.h:429
@ kmp_ord_lower
Definition: kmp.h:384
@ kmp_ord_static
Definition: kmp.h:386
@ kmp_sch_upper
Definition: kmp.h:382
@ kmp_ord_upper
Definition: kmp.h:392
@ kmp_nm_lower
Definition: kmp.h:402
@ kmp_ord_auto
Definition: kmp.h:390
Definition: kmp.h:234
kmp_int32 reserved_1
Definition: kmp.h:235
char const * psource
Definition: kmp.h:244
kmp_int32 reserved_2
Definition: kmp.h:238
kmp_int32 reserved_3
Definition: kmp.h:243
kmp_int32 flags
Definition: kmp.h:236