LLVM OpenMP* Runtime Library
kmp_affinity.h
1/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_AFFINITY_H
14#define KMP_AFFINITY_H
15
16#include "kmp.h"
17#include "kmp_os.h"
18#include <limits>
19
20#if KMP_AFFINITY_SUPPORTED
21#if KMP_USE_HWLOC
22class KMPHwlocAffinity : public KMPAffinity {
23public:
24 class Mask : public KMPAffinity::Mask {
25 hwloc_cpuset_t mask;
26
27 public:
28 Mask() {
29 mask = hwloc_bitmap_alloc();
30 this->zero();
31 }
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(int i) override { hwloc_bitmap_set(mask, i); }
34 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36 void zero() override { hwloc_bitmap_zero(mask); }
37 void copy(const KMPAffinity::Mask *src) override {
38 const Mask *convert = static_cast<const Mask *>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
40 }
41 void bitwise_and(const KMPAffinity::Mask *rhs) override {
42 const Mask *convert = static_cast<const Mask *>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
44 }
45 void bitwise_or(const KMPAffinity::Mask *rhs) override {
46 const Mask *convert = static_cast<const Mask *>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
48 }
49 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50 int begin() const override { return hwloc_bitmap_first(mask); }
51 int end() const override { return -1; }
52 int next(int previous) const override {
53 return hwloc_bitmap_next(mask, previous);
54 }
55 int get_system_affinity(bool abort_on_error) override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
58 long retval =
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60 if (retval >= 0) {
61 return 0;
62 }
63 int error = errno;
64 if (abort_on_error) {
65 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
66 }
67 return error;
68 }
69 int set_system_affinity(bool abort_on_error) const override {
70 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71 "Illegal set affinity operation when not capable");
72 long retval =
73 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74 if (retval >= 0) {
75 return 0;
76 }
77 int error = errno;
78 if (abort_on_error) {
79 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
80 }
81 return error;
82 }
83#if KMP_OS_WINDOWS
84 int set_process_affinity(bool abort_on_error) const override {
85 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
86 "Illegal set process affinity operation when not capable");
87 int error = 0;
88 const hwloc_topology_support *support =
89 hwloc_topology_get_support(__kmp_hwloc_topology);
90 if (support->cpubind->set_proc_cpubind) {
91 int retval;
92 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
93 HWLOC_CPUBIND_PROCESS);
94 if (retval >= 0)
95 return 0;
96 error = errno;
97 if (abort_on_error)
98 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
99 }
100 return error;
101 }
102#endif
103 int get_proc_group() const override {
104 int group = -1;
105#if KMP_OS_WINDOWS
106 if (__kmp_num_proc_groups == 1) {
107 return 1;
108 }
109 for (int i = 0; i < __kmp_num_proc_groups; i++) {
110 // On windows, the long type is always 32 bits
111 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
112 unsigned long second_32_bits =
113 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
114 if (first_32_bits == 0 && second_32_bits == 0) {
115 continue;
116 }
117 if (group >= 0) {
118 return -1;
119 }
120 group = i;
121 }
122#endif /* KMP_OS_WINDOWS */
123 return group;
124 }
125 };
126 void determine_capable(const char *var) override {
127 const hwloc_topology_support *topology_support;
128 if (__kmp_hwloc_topology == NULL) {
129 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
130 __kmp_hwloc_error = TRUE;
131 if (__kmp_affinity.flags.verbose) {
132 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
133 }
134 }
135 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
136 __kmp_hwloc_error = TRUE;
137 if (__kmp_affinity.flags.verbose) {
138 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
139 }
140 }
141 }
142 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
143 // Is the system capable of setting/getting this thread's affinity?
144 // Also, is topology discovery possible? (pu indicates ability to discover
145 // processing units). And finally, were there no errors when calling any
146 // hwloc_* API functions?
147 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
148 topology_support->cpubind->get_thisthread_cpubind &&
149 topology_support->discovery->pu && !__kmp_hwloc_error) {
150 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
151 KMP_AFFINITY_ENABLE(TRUE);
152 } else {
153 // indicate that hwloc didn't work and disable affinity
154 __kmp_hwloc_error = TRUE;
155 KMP_AFFINITY_DISABLE();
156 }
157 }
158 void bind_thread(int which) override {
159 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
160 "Illegal set affinity operation when not capable");
161 KMPAffinity::Mask *mask;
162 KMP_CPU_ALLOC_ON_STACK(mask);
163 KMP_CPU_ZERO(mask);
164 KMP_CPU_SET(which, mask);
165 __kmp_set_system_affinity(mask, TRUE);
166 KMP_CPU_FREE_FROM_STACK(mask);
167 }
168 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
169 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
170 KMPAffinity::Mask *allocate_mask_array(int num) override {
171 return new Mask[num];
172 }
173 void deallocate_mask_array(KMPAffinity::Mask *array) override {
174 Mask *hwloc_array = static_cast<Mask *>(array);
175 delete[] hwloc_array;
176 }
177 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
178 int index) override {
179 Mask *hwloc_array = static_cast<Mask *>(array);
180 return &(hwloc_array[index]);
181 }
182 api_type get_api_type() const override { return HWLOC; }
183};
184#endif /* KMP_USE_HWLOC */
185
186#if KMP_OS_LINUX || KMP_OS_FREEBSD
187#if KMP_OS_LINUX
188/* On some of the older OS's that we build on, these constants aren't present
189 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
190 all systems of the same arch where they are defined, and they cannot change.
191 stone forever. */
192#include <sys/syscall.h>
193#if KMP_ARCH_X86 || KMP_ARCH_ARM
194#ifndef __NR_sched_setaffinity
195#define __NR_sched_setaffinity 241
196#elif __NR_sched_setaffinity != 241
197#error Wrong code for setaffinity system call.
198#endif /* __NR_sched_setaffinity */
199#ifndef __NR_sched_getaffinity
200#define __NR_sched_getaffinity 242
201#elif __NR_sched_getaffinity != 242
202#error Wrong code for getaffinity system call.
203#endif /* __NR_sched_getaffinity */
204#elif KMP_ARCH_AARCH64
205#ifndef __NR_sched_setaffinity
206#define __NR_sched_setaffinity 122
207#elif __NR_sched_setaffinity != 122
208#error Wrong code for setaffinity system call.
209#endif /* __NR_sched_setaffinity */
210#ifndef __NR_sched_getaffinity
211#define __NR_sched_getaffinity 123
212#elif __NR_sched_getaffinity != 123
213#error Wrong code for getaffinity system call.
214#endif /* __NR_sched_getaffinity */
215#elif KMP_ARCH_RISCV64
216#ifndef __NR_sched_setaffinity
217#define __NR_sched_setaffinity 122
218#elif __NR_sched_setaffinity != 122
219#error Wrong code for setaffinity system call.
220#endif /* __NR_sched_setaffinity */
221#ifndef __NR_sched_getaffinity
222#define __NR_sched_getaffinity 123
223#elif __NR_sched_getaffinity != 123
224#error Wrong code for getaffinity system call.
225#endif /* __NR_sched_getaffinity */
226#elif KMP_ARCH_X86_64
227#ifndef __NR_sched_setaffinity
228#define __NR_sched_setaffinity 203
229#elif __NR_sched_setaffinity != 203
230#error Wrong code for setaffinity system call.
231#endif /* __NR_sched_setaffinity */
232#ifndef __NR_sched_getaffinity
233#define __NR_sched_getaffinity 204
234#elif __NR_sched_getaffinity != 204
235#error Wrong code for getaffinity system call.
236#endif /* __NR_sched_getaffinity */
237#elif KMP_ARCH_PPC64
238#ifndef __NR_sched_setaffinity
239#define __NR_sched_setaffinity 222
240#elif __NR_sched_setaffinity != 222
241#error Wrong code for setaffinity system call.
242#endif /* __NR_sched_setaffinity */
243#ifndef __NR_sched_getaffinity
244#define __NR_sched_getaffinity 223
245#elif __NR_sched_getaffinity != 223
246#error Wrong code for getaffinity system call.
247#endif /* __NR_sched_getaffinity */
248#elif KMP_ARCH_MIPS
249#ifndef __NR_sched_setaffinity
250#define __NR_sched_setaffinity 4239
251#elif __NR_sched_setaffinity != 4239
252#error Wrong code for setaffinity system call.
253#endif /* __NR_sched_setaffinity */
254#ifndef __NR_sched_getaffinity
255#define __NR_sched_getaffinity 4240
256#elif __NR_sched_getaffinity != 4240
257#error Wrong code for getaffinity system call.
258#endif /* __NR_sched_getaffinity */
259#elif KMP_ARCH_MIPS64
260#ifndef __NR_sched_setaffinity
261#define __NR_sched_setaffinity 5195
262#elif __NR_sched_setaffinity != 5195
263#error Wrong code for setaffinity system call.
264#endif /* __NR_sched_setaffinity */
265#ifndef __NR_sched_getaffinity
266#define __NR_sched_getaffinity 5196
267#elif __NR_sched_getaffinity != 5196
268#error Wrong code for getaffinity system call.
269#endif /* __NR_sched_getaffinity */
270#else
271#error Unknown or unsupported architecture
272#endif /* KMP_ARCH_* */
273#elif KMP_OS_FREEBSD
274#include <pthread.h>
275#include <pthread_np.h>
276#endif
277class KMPNativeAffinity : public KMPAffinity {
278 class Mask : public KMPAffinity::Mask {
279 typedef unsigned long mask_t;
280 typedef decltype(__kmp_affin_mask_size) mask_size_type;
281 static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
282 static const mask_t ONE = 1;
283 mask_size_type get_num_mask_types() const {
284 return __kmp_affin_mask_size / sizeof(mask_t);
285 }
286
287 public:
288 mask_t *mask;
289 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
290 ~Mask() {
291 if (mask)
292 __kmp_free(mask);
293 }
294 void set(int i) override {
295 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
296 }
297 bool is_set(int i) const override {
298 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
299 }
300 void clear(int i) override {
301 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
302 }
303 void zero() override {
304 mask_size_type e = get_num_mask_types();
305 for (mask_size_type i = 0; i < e; ++i)
306 mask[i] = (mask_t)0;
307 }
308 void copy(const KMPAffinity::Mask *src) override {
309 const Mask *convert = static_cast<const Mask *>(src);
310 mask_size_type e = get_num_mask_types();
311 for (mask_size_type i = 0; i < e; ++i)
312 mask[i] = convert->mask[i];
313 }
314 void bitwise_and(const KMPAffinity::Mask *rhs) override {
315 const Mask *convert = static_cast<const Mask *>(rhs);
316 mask_size_type e = get_num_mask_types();
317 for (mask_size_type i = 0; i < e; ++i)
318 mask[i] &= convert->mask[i];
319 }
320 void bitwise_or(const KMPAffinity::Mask *rhs) override {
321 const Mask *convert = static_cast<const Mask *>(rhs);
322 mask_size_type e = get_num_mask_types();
323 for (mask_size_type i = 0; i < e; ++i)
324 mask[i] |= convert->mask[i];
325 }
326 void bitwise_not() override {
327 mask_size_type e = get_num_mask_types();
328 for (mask_size_type i = 0; i < e; ++i)
329 mask[i] = ~(mask[i]);
330 }
331 int begin() const override {
332 int retval = 0;
333 while (retval < end() && !is_set(retval))
334 ++retval;
335 return retval;
336 }
337 int end() const override {
338 int e;
339 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
340 return e;
341 }
342 int next(int previous) const override {
343 int retval = previous + 1;
344 while (retval < end() && !is_set(retval))
345 ++retval;
346 return retval;
347 }
348 int get_system_affinity(bool abort_on_error) override {
349 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
350 "Illegal get affinity operation when not capable");
351#if KMP_OS_LINUX
352 long retval =
353 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
354#elif KMP_OS_FREEBSD
355 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
356 reinterpret_cast<cpuset_t *>(mask));
357 int retval = (r == 0 ? 0 : -1);
358#endif
359 if (retval >= 0) {
360 return 0;
361 }
362 int error = errno;
363 if (abort_on_error) {
364 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
365 }
366 return error;
367 }
368 int set_system_affinity(bool abort_on_error) const override {
369 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
370 "Illegal set affinity operation when not capable");
371#if KMP_OS_LINUX
372 long retval =
373 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
374#elif KMP_OS_FREEBSD
375 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
376 reinterpret_cast<cpuset_t *>(mask));
377 int retval = (r == 0 ? 0 : -1);
378#endif
379 if (retval >= 0) {
380 return 0;
381 }
382 int error = errno;
383 if (abort_on_error) {
384 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
385 }
386 return error;
387 }
388 };
389 void determine_capable(const char *env_var) override {
390 __kmp_affinity_determine_capable(env_var);
391 }
392 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
393 KMPAffinity::Mask *allocate_mask() override {
394 KMPNativeAffinity::Mask *retval = new Mask();
395 return retval;
396 }
397 void deallocate_mask(KMPAffinity::Mask *m) override {
398 KMPNativeAffinity::Mask *native_mask =
399 static_cast<KMPNativeAffinity::Mask *>(m);
400 delete native_mask;
401 }
402 KMPAffinity::Mask *allocate_mask_array(int num) override {
403 return new Mask[num];
404 }
405 void deallocate_mask_array(KMPAffinity::Mask *array) override {
406 Mask *linux_array = static_cast<Mask *>(array);
407 delete[] linux_array;
408 }
409 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
410 int index) override {
411 Mask *linux_array = static_cast<Mask *>(array);
412 return &(linux_array[index]);
413 }
414 api_type get_api_type() const override { return NATIVE_OS; }
415};
416#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
417
418#if KMP_OS_WINDOWS
419class KMPNativeAffinity : public KMPAffinity {
420 class Mask : public KMPAffinity::Mask {
421 typedef ULONG_PTR mask_t;
422 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
423 mask_t *mask;
424
425 public:
426 Mask() {
427 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
428 }
429 ~Mask() {
430 if (mask)
431 __kmp_free(mask);
432 }
433 void set(int i) override {
434 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
435 }
436 bool is_set(int i) const override {
437 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
438 }
439 void clear(int i) override {
440 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
441 }
442 void zero() override {
443 for (int i = 0; i < __kmp_num_proc_groups; ++i)
444 mask[i] = 0;
445 }
446 void copy(const KMPAffinity::Mask *src) override {
447 const Mask *convert = static_cast<const Mask *>(src);
448 for (int i = 0; i < __kmp_num_proc_groups; ++i)
449 mask[i] = convert->mask[i];
450 }
451 void bitwise_and(const KMPAffinity::Mask *rhs) override {
452 const Mask *convert = static_cast<const Mask *>(rhs);
453 for (int i = 0; i < __kmp_num_proc_groups; ++i)
454 mask[i] &= convert->mask[i];
455 }
456 void bitwise_or(const KMPAffinity::Mask *rhs) override {
457 const Mask *convert = static_cast<const Mask *>(rhs);
458 for (int i = 0; i < __kmp_num_proc_groups; ++i)
459 mask[i] |= convert->mask[i];
460 }
461 void bitwise_not() override {
462 for (int i = 0; i < __kmp_num_proc_groups; ++i)
463 mask[i] = ~(mask[i]);
464 }
465 int begin() const override {
466 int retval = 0;
467 while (retval < end() && !is_set(retval))
468 ++retval;
469 return retval;
470 }
471 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
472 int next(int previous) const override {
473 int retval = previous + 1;
474 while (retval < end() && !is_set(retval))
475 ++retval;
476 return retval;
477 }
478 int set_process_affinity(bool abort_on_error) const override {
479 if (__kmp_num_proc_groups <= 1) {
480 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
481 DWORD error = GetLastError();
482 if (abort_on_error) {
483 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
484 __kmp_msg_null);
485 }
486 return error;
487 }
488 }
489 return 0;
490 }
491 int set_system_affinity(bool abort_on_error) const override {
492 if (__kmp_num_proc_groups > 1) {
493 // Check for a valid mask.
494 GROUP_AFFINITY ga;
495 int group = get_proc_group();
496 if (group < 0) {
497 if (abort_on_error) {
498 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
499 }
500 return -1;
501 }
502 // Transform the bit vector into a GROUP_AFFINITY struct
503 // and make the system call to set affinity.
504 ga.Group = group;
505 ga.Mask = mask[group];
506 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
507
508 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
509 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
510 DWORD error = GetLastError();
511 if (abort_on_error) {
512 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
513 __kmp_msg_null);
514 }
515 return error;
516 }
517 } else {
518 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
519 DWORD error = GetLastError();
520 if (abort_on_error) {
521 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
522 __kmp_msg_null);
523 }
524 return error;
525 }
526 }
527 return 0;
528 }
529 int get_system_affinity(bool abort_on_error) override {
530 if (__kmp_num_proc_groups > 1) {
531 this->zero();
532 GROUP_AFFINITY ga;
533 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
534 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
535 DWORD error = GetLastError();
536 if (abort_on_error) {
537 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
538 KMP_ERR(error), __kmp_msg_null);
539 }
540 return error;
541 }
542 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
543 (ga.Mask == 0)) {
544 return -1;
545 }
546 mask[ga.Group] = ga.Mask;
547 } else {
548 mask_t newMask, sysMask, retval;
549 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
550 DWORD error = GetLastError();
551 if (abort_on_error) {
552 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
553 KMP_ERR(error), __kmp_msg_null);
554 }
555 return error;
556 }
557 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
558 if (!retval) {
559 DWORD error = GetLastError();
560 if (abort_on_error) {
561 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
562 KMP_ERR(error), __kmp_msg_null);
563 }
564 return error;
565 }
566 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
567 if (!newMask) {
568 DWORD error = GetLastError();
569 if (abort_on_error) {
570 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
571 KMP_ERR(error), __kmp_msg_null);
572 }
573 }
574 *mask = retval;
575 }
576 return 0;
577 }
578 int get_proc_group() const override {
579 int group = -1;
580 if (__kmp_num_proc_groups == 1) {
581 return 1;
582 }
583 for (int i = 0; i < __kmp_num_proc_groups; i++) {
584 if (mask[i] == 0)
585 continue;
586 if (group >= 0)
587 return -1;
588 group = i;
589 }
590 return group;
591 }
592 };
593 void determine_capable(const char *env_var) override {
594 __kmp_affinity_determine_capable(env_var);
595 }
596 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
597 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
598 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
599 KMPAffinity::Mask *allocate_mask_array(int num) override {
600 return new Mask[num];
601 }
602 void deallocate_mask_array(KMPAffinity::Mask *array) override {
603 Mask *windows_array = static_cast<Mask *>(array);
604 delete[] windows_array;
605 }
606 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
607 int index) override {
608 Mask *windows_array = static_cast<Mask *>(array);
609 return &(windows_array[index]);
610 }
611 api_type get_api_type() const override { return NATIVE_OS; }
612};
613#endif /* KMP_OS_WINDOWS */
614#endif /* KMP_AFFINITY_SUPPORTED */
615
616// Describe an attribute for a level in the machine topology
617struct kmp_hw_attr_t {
618 int core_type : 8;
619 int core_eff : 8;
620 unsigned valid : 1;
621 unsigned reserved : 15;
622
623 static const int UNKNOWN_CORE_EFF = -1;
624
625 kmp_hw_attr_t()
626 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
627 valid(0), reserved(0) {}
628 void set_core_type(kmp_hw_core_type_t type) {
629 valid = 1;
630 core_type = type;
631 }
632 void set_core_eff(int eff) {
633 valid = 1;
634 core_eff = eff;
635 }
636 kmp_hw_core_type_t get_core_type() const {
637 return (kmp_hw_core_type_t)core_type;
638 }
639 int get_core_eff() const { return core_eff; }
640 bool is_core_type_valid() const {
641 return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
642 }
643 bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
644 operator bool() const { return valid; }
645 void clear() {
646 core_type = KMP_HW_CORE_TYPE_UNKNOWN;
647 core_eff = UNKNOWN_CORE_EFF;
648 valid = 0;
649 }
650 bool contains(const kmp_hw_attr_t &other) const {
651 if (!valid && !other.valid)
652 return true;
653 if (valid && other.valid) {
654 if (other.is_core_type_valid()) {
655 if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
656 return false;
657 }
658 if (other.is_core_eff_valid()) {
659 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
660 return false;
661 }
662 return true;
663 }
664 return false;
665 }
666 bool operator==(const kmp_hw_attr_t &rhs) const {
667 return (rhs.valid == valid && rhs.core_eff == core_eff &&
668 rhs.core_type == core_type);
669 }
670 bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
671};
672
673class kmp_hw_thread_t {
674public:
675 static const int UNKNOWN_ID = -1;
676 static int compare_ids(const void *a, const void *b);
677 static int compare_compact(const void *a, const void *b);
678 int ids[KMP_HW_LAST];
679 int sub_ids[KMP_HW_LAST];
680 bool leader;
681 int os_id;
682 kmp_hw_attr_t attrs;
683
684 void print() const;
685 void clear() {
686 for (int i = 0; i < (int)KMP_HW_LAST; ++i)
687 ids[i] = UNKNOWN_ID;
688 leader = false;
689 attrs.clear();
690 }
691};
692
693class kmp_topology_t {
694
695 struct flags_t {
696 int uniform : 1;
697 int reserved : 31;
698 };
699
700 int depth;
701
702 // The following arrays are all 'depth' long and have been
703 // allocated to hold up to KMP_HW_LAST number of objects if
704 // needed so layers can be added without reallocation of any array
705
706 // Orderd array of the types in the topology
707 kmp_hw_t *types;
708
709 // Keep quick topology ratios, for non-uniform topologies,
710 // this ratio holds the max number of itemAs per itemB
711 // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
712 int *ratio;
713
714 // Storage containing the absolute number of each topology layer
715 int *count;
716
717 // The number of core efficiencies. This is only useful for hybrid
718 // topologies. Core efficiencies will range from 0 to num efficiencies - 1
719 int num_core_efficiencies;
720 int num_core_types;
721 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
722
723 // The hardware threads array
724 // hw_threads is num_hw_threads long
725 // Each hw_thread's ids and sub_ids are depth deep
726 int num_hw_threads;
727 kmp_hw_thread_t *hw_threads;
728
729 // Equivalence hash where the key is the hardware topology item
730 // and the value is the equivalent hardware topology type in the
731 // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
732 // known equivalence for the topology type
733 kmp_hw_t equivalent[KMP_HW_LAST];
734
735 // Flags describing the topology
736 flags_t flags;
737
738 // Compact value used during sort_compact()
739 int compact;
740
741 // Insert a new topology layer after allocation
742 void _insert_layer(kmp_hw_t type, const int *ids);
743
744#if KMP_GROUP_AFFINITY
745 // Insert topology information about Windows Processor groups
746 void _insert_windows_proc_groups();
747#endif
748
749 // Count each item & get the num x's per y
750 // e.g., get the number of cores and the number of threads per core
751 // for each (x, y) in (KMP_HW_* , KMP_HW_*)
752 void _gather_enumeration_information();
753
754 // Remove layers that don't add information to the topology.
755 // This is done by having the layer take on the id = UNKNOWN_ID (-1)
756 void _remove_radix1_layers();
757
758 // Find out if the topology is uniform
759 void _discover_uniformity();
760
761 // Set all the sub_ids for each hardware thread
762 void _set_sub_ids();
763
764 // Set global affinity variables describing the number of threads per
765 // core, the number of packages, the number of cores per package, and
766 // the number of cores.
767 void _set_globals();
768
769 // Set the last level cache equivalent type
770 void _set_last_level_cache();
771
772 // Return the number of cores with a particular attribute, 'attr'.
773 // If 'find_all' is true, then find all cores on the machine, otherwise find
774 // all cores per the layer 'above'
775 int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
776 bool find_all = false) const;
777
778public:
779 // Force use of allocate()/deallocate()
780 kmp_topology_t() = delete;
781 kmp_topology_t(const kmp_topology_t &t) = delete;
782 kmp_topology_t(kmp_topology_t &&t) = delete;
783 kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
784 kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
785
786 static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
787 static void deallocate(kmp_topology_t *);
788
789 // Functions used in create_map() routines
790 kmp_hw_thread_t &at(int index) {
791 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
792 return hw_threads[index];
793 }
794 const kmp_hw_thread_t &at(int index) const {
795 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
796 return hw_threads[index];
797 }
798 int get_num_hw_threads() const { return num_hw_threads; }
799 void sort_ids() {
800 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
801 kmp_hw_thread_t::compare_ids);
802 }
803 // Check if the hardware ids are unique, if they are
804 // return true, otherwise return false
805 bool check_ids() const;
806
807 // Function to call after the create_map() routine
808 void canonicalize();
809 void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
810
811// Functions used after canonicalize() called
812
813#if KMP_AFFINITY_SUPPORTED
814 // Set the granularity for affinity settings
815 void set_granularity(kmp_affinity_t &stgs) const;
816#endif
817 bool filter_hw_subset();
818 bool is_close(int hwt1, int hwt2, int level) const;
819 bool is_uniform() const { return flags.uniform; }
820 // Tell whether a type is a valid type in the topology
821 // returns KMP_HW_UNKNOWN when there is no equivalent type
822 kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
823 // Set type1 = type2
824 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
825 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
826 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
827 kmp_hw_t real_type2 = equivalent[type2];
828 if (real_type2 == KMP_HW_UNKNOWN)
829 real_type2 = type2;
830 equivalent[type1] = real_type2;
831 // This loop is required since any of the types may have been set to
832 // be equivalent to type1. They all must be checked and reset to type2.
833 KMP_FOREACH_HW_TYPE(type) {
834 if (equivalent[type] == type1) {
835 equivalent[type] = real_type2;
836 }
837 }
838 }
839 // Calculate number of types corresponding to level1
840 // per types corresponding to level2 (e.g., number of threads per core)
841 int calculate_ratio(int level1, int level2) const {
842 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
843 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
844 int r = 1;
845 for (int level = level1; level > level2; --level)
846 r *= ratio[level];
847 return r;
848 }
849 int get_ratio(int level) const {
850 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
851 return ratio[level];
852 }
853 int get_depth() const { return depth; };
854 kmp_hw_t get_type(int level) const {
855 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
856 return types[level];
857 }
858 int get_level(kmp_hw_t type) const {
859 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
860 int eq_type = equivalent[type];
861 if (eq_type == KMP_HW_UNKNOWN)
862 return -1;
863 for (int i = 0; i < depth; ++i)
864 if (types[i] == eq_type)
865 return i;
866 return -1;
867 }
868 int get_count(int level) const {
869 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
870 return count[level];
871 }
872 // Return the total number of cores with attribute 'attr'
873 int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
874 return _get_ncores_with_attr(attr, -1, true);
875 }
876 // Return the number of cores with attribute
877 // 'attr' per topology level 'above'
878 int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
879 return _get_ncores_with_attr(attr, above, false);
880 }
881
882#if KMP_AFFINITY_SUPPORTED
883 friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
884 void sort_compact(kmp_affinity_t &affinity) {
885 compact = affinity.compact;
886 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
887 kmp_hw_thread_t::compare_compact);
888 }
889#endif
890 void print(const char *env_var = "KMP_AFFINITY") const;
891 void dump() const;
892};
893extern kmp_topology_t *__kmp_topology;
894
895class kmp_hw_subset_t {
896 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
897
898public:
899 // Describe a machine topology item in KMP_HW_SUBSET
900 struct item_t {
901 kmp_hw_t type;
902 int num_attrs;
903 int num[MAX_ATTRS];
904 int offset[MAX_ATTRS];
905 kmp_hw_attr_t attr[MAX_ATTRS];
906 };
907 // Put parenthesis around max to avoid accidental use of Windows max macro.
908 const static int USE_ALL = (std::numeric_limits<int>::max)();
909
910private:
911 int depth;
912 int capacity;
913 item_t *items;
914 kmp_uint64 set;
915 bool absolute;
916 // The set must be able to handle up to KMP_HW_LAST number of layers
917 KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
918 // Sorting the KMP_HW_SUBSET items to follow topology order
919 // All unknown topology types will be at the beginning of the subset
920 static int hw_subset_compare(const void *i1, const void *i2) {
921 kmp_hw_t type1 = ((const item_t *)i1)->type;
922 kmp_hw_t type2 = ((const item_t *)i2)->type;
923 int level1 = __kmp_topology->get_level(type1);
924 int level2 = __kmp_topology->get_level(type2);
925 return level1 - level2;
926 }
927
928public:
929 // Force use of allocate()/deallocate()
930 kmp_hw_subset_t() = delete;
931 kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
932 kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
933 kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
934 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
935
936 static kmp_hw_subset_t *allocate() {
937 int initial_capacity = 5;
938 kmp_hw_subset_t *retval =
939 (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
940 retval->depth = 0;
941 retval->capacity = initial_capacity;
942 retval->set = 0ull;
943 retval->absolute = false;
944 retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
945 return retval;
946 }
947 static void deallocate(kmp_hw_subset_t *subset) {
948 __kmp_free(subset->items);
949 __kmp_free(subset);
950 }
951 void set_absolute() { absolute = true; }
952 bool is_absolute() const { return absolute; }
953 void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
954 for (int i = 0; i < depth; ++i) {
955 // Found an existing item for this layer type
956 // Add the num, offset, and attr to this item
957 if (items[i].type == type) {
958 int idx = items[i].num_attrs++;
959 if ((size_t)idx >= MAX_ATTRS)
960 return;
961 items[i].num[idx] = num;
962 items[i].offset[idx] = offset;
963 items[i].attr[idx] = attr;
964 return;
965 }
966 }
967 if (depth == capacity - 1) {
968 capacity *= 2;
969 item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
970 for (int i = 0; i < depth; ++i)
971 new_items[i] = items[i];
972 __kmp_free(items);
973 items = new_items;
974 }
975 items[depth].num_attrs = 1;
976 items[depth].type = type;
977 items[depth].num[0] = num;
978 items[depth].offset[0] = offset;
979 items[depth].attr[0] = attr;
980 depth++;
981 set |= (1ull << type);
982 }
983 int get_depth() const { return depth; }
984 const item_t &at(int index) const {
985 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
986 return items[index];
987 }
988 item_t &at(int index) {
989 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
990 return items[index];
991 }
992 void remove(int index) {
993 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
994 set &= ~(1ull << items[index].type);
995 for (int j = index + 1; j < depth; ++j) {
996 items[j - 1] = items[j];
997 }
998 depth--;
999 }
1000 void sort() {
1001 KMP_DEBUG_ASSERT(__kmp_topology);
1002 qsort(items, depth, sizeof(item_t), hw_subset_compare);
1003 }
1004 bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
1005 void dump() const {
1006 printf("**********************\n");
1007 printf("*** kmp_hw_subset: ***\n");
1008 printf("* depth: %d\n", depth);
1009 printf("* items:\n");
1010 for (int i = 0; i < depth; ++i) {
1011 printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1012 for (int j = 0; j < items[i].num_attrs; ++j) {
1013 printf(" num: %d, offset: %d, attr: ", items[i].num[j],
1014 items[i].offset[j]);
1015 if (!items[i].attr[j]) {
1016 printf(" (none)\n");
1017 } else {
1018 printf(
1019 " core_type = %s, core_eff = %d\n",
1020 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1021 items[i].attr[j].get_core_eff());
1022 }
1023 }
1024 }
1025 printf("* set: 0x%llx\n", set);
1026 printf("* absolute: %d\n", absolute);
1027 printf("**********************\n");
1028 }
1029};
1030extern kmp_hw_subset_t *__kmp_hw_subset;
1031
1032/* A structure for holding machine-specific hierarchy info to be computed once
1033 at init. This structure represents a mapping of threads to the actual machine
1034 hierarchy, or to our best guess at what the hierarchy might be, for the
1035 purpose of performing an efficient barrier. In the worst case, when there is
1036 no machine hierarchy information, it produces a tree suitable for a barrier,
1037 similar to the tree used in the hyper barrier. */
1038class hierarchy_info {
1039public:
1040 /* Good default values for number of leaves and branching factor, given no
1041 affinity information. Behaves a bit like hyper barrier. */
1042 static const kmp_uint32 maxLeaves = 4;
1043 static const kmp_uint32 minBranch = 4;
1049 kmp_uint32 maxLevels;
1050
1055 kmp_uint32 depth;
1056 kmp_uint32 base_num_threads;
1057 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1058 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
1059 // 2=initialization in progress
1060 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
1061
1066 kmp_uint32 *numPerLevel;
1067 kmp_uint32 *skipPerLevel;
1068
1069 void deriveLevels() {
1070 int hier_depth = __kmp_topology->get_depth();
1071 for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1072 numPerLevel[level] = __kmp_topology->get_ratio(i);
1073 }
1074 }
1075
1076 hierarchy_info()
1077 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1078
1079 void fini() {
1080 if (!uninitialized && numPerLevel) {
1081 __kmp_free(numPerLevel);
1082 numPerLevel = NULL;
1083 uninitialized = not_initialized;
1084 }
1085 }
1086
1087 void init(int num_addrs) {
1088 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1089 &uninitialized, not_initialized, initializing);
1090 if (bool_result == 0) { // Wait for initialization
1091 while (TCR_1(uninitialized) != initialized)
1092 KMP_CPU_PAUSE();
1093 return;
1094 }
1095 KMP_DEBUG_ASSERT(bool_result == 1);
1096
1097 /* Added explicit initialization of the data fields here to prevent usage of
1098 dirty value observed when static library is re-initialized multiple times
1099 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
1100 OpenMP). */
1101 depth = 1;
1102 resizing = 0;
1103 maxLevels = 7;
1104 numPerLevel =
1105 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1106 skipPerLevel = &(numPerLevel[maxLevels]);
1107 for (kmp_uint32 i = 0; i < maxLevels;
1108 ++i) { // init numPerLevel[*] to 1 item per level
1109 numPerLevel[i] = 1;
1110 skipPerLevel[i] = 1;
1111 }
1112
1113 // Sort table by physical ID
1114 if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1115 deriveLevels();
1116 } else {
1117 numPerLevel[0] = maxLeaves;
1118 numPerLevel[1] = num_addrs / maxLeaves;
1119 if (num_addrs % maxLeaves)
1120 numPerLevel[1]++;
1121 }
1122
1123 base_num_threads = num_addrs;
1124 for (int i = maxLevels - 1; i >= 0;
1125 --i) // count non-empty levels to get depth
1126 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
1127 depth++;
1128
1129 kmp_uint32 branch = minBranch;
1130 if (numPerLevel[0] == 1)
1131 branch = num_addrs / maxLeaves;
1132 if (branch < minBranch)
1133 branch = minBranch;
1134 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
1135 while (numPerLevel[d] > branch ||
1136 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
1137 if (numPerLevel[d] & 1)
1138 numPerLevel[d]++;
1139 numPerLevel[d] = numPerLevel[d] >> 1;
1140 if (numPerLevel[d + 1] == 1)
1141 depth++;
1142 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1143 }
1144 if (numPerLevel[0] == 1) {
1145 branch = branch >> 1;
1146 if (branch < 4)
1147 branch = minBranch;
1148 }
1149 }
1150
1151 for (kmp_uint32 i = 1; i < depth; ++i)
1152 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1153 // Fill in hierarchy in the case of oversubscription
1154 for (kmp_uint32 i = depth; i < maxLevels; ++i)
1155 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1156
1157 uninitialized = initialized; // One writer
1158 }
1159
1160 // Resize the hierarchy if nproc changes to something larger than before
1161 void resize(kmp_uint32 nproc) {
1162 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1163 while (bool_result == 0) { // someone else is trying to resize
1164 KMP_CPU_PAUSE();
1165 if (nproc <= base_num_threads) // happy with other thread's resize
1166 return;
1167 else // try to resize
1168 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1169 }
1170 KMP_DEBUG_ASSERT(bool_result != 0);
1171 if (nproc <= base_num_threads)
1172 return; // happy with other thread's resize
1173
1174 // Calculate new maxLevels
1175 kmp_uint32 old_sz = skipPerLevel[depth - 1];
1176 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1177 // First see if old maxLevels is enough to contain new size
1178 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1179 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1180 numPerLevel[i - 1] *= 2;
1181 old_sz *= 2;
1182 depth++;
1183 }
1184 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
1185 while (nproc > old_sz) {
1186 old_sz *= 2;
1187 incs++;
1188 depth++;
1189 }
1190 maxLevels += incs;
1191
1192 // Resize arrays
1193 kmp_uint32 *old_numPerLevel = numPerLevel;
1194 kmp_uint32 *old_skipPerLevel = skipPerLevel;
1195 numPerLevel = skipPerLevel = NULL;
1196 numPerLevel =
1197 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1198 skipPerLevel = &(numPerLevel[maxLevels]);
1199
1200 // Copy old elements from old arrays
1201 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1202 // init numPerLevel[*] to 1 item per level
1203 numPerLevel[i] = old_numPerLevel[i];
1204 skipPerLevel[i] = old_skipPerLevel[i];
1205 }
1206
1207 // Init new elements in arrays to 1
1208 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1209 // init numPerLevel[*] to 1 item per level
1210 numPerLevel[i] = 1;
1211 skipPerLevel[i] = 1;
1212 }
1213
1214 // Free old arrays
1215 __kmp_free(old_numPerLevel);
1216 }
1217
1218 // Fill in oversubscription levels of hierarchy
1219 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1220 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1221
1222 base_num_threads = nproc;
1223 resizing = 0; // One writer
1224 }
1225};
1226#endif // KMP_AFFINITY_H