| 1 | /* -*- mode: c; tab-width: 4; fill-column: 78 -*- */ |
| 2 | /* vi: set ts=4 tw=78: */ |
| 3 | |
| 4 | /* |
| 5 | thread_util.c, Copyright (c) 2014 Dave Odell <dmo2118@gmail.com> |
| 6 | |
| 7 | Permission to use, copy, modify, distribute, and sell this software and its |
| 8 | documentation for any purpose is hereby granted without fee, provided that |
| 9 | the above copyright notice appear in all copies and that both that |
| 10 | copyright notice and this permission notice appear in supporting |
| 11 | documentation. No representations are made about the suitability of this |
| 12 | software for any purpose. It is provided "as is" without express or |
| 13 | implied warranty. |
| 14 | */ |
| 15 | |
| 16 | #if HAVE_CONFIG_H |
| 17 | # include "config.h" |
| 18 | #endif |
| 19 | |
| 20 | #include <assert.h> |
| 21 | #include <errno.h> |
| 22 | #include <limits.h> |
| 23 | #include <stdlib.h> |
| 24 | #include <stdio.h> /* Only used by thread_memory_alignment(). */ |
| 25 | #include <string.h> |
| 26 | |
| 27 | #if HAVE_ALLOCA_H |
| 28 | # include <alloca.h> |
| 29 | #endif |
| 30 | |
| 31 | #if HAVE_UNISTD_H |
| 32 | # include <unistd.h> |
| 33 | #endif |
| 34 | |
| 35 | #if defined __MACH__ && defined __APPLE__ /* OS X, iOS */ |
| 36 | # include <sys/sysctl.h> |
| 37 | # include <inttypes.h> |
| 38 | #endif |
| 39 | |
| 40 | #include "thread_util.h" |
| 41 | |
| 42 | #include "aligned_malloc.h" |
| 43 | #include "resources.h" |
| 44 | |
| 45 | #define IS_POWER_OF_2(x) ((x) > 0 && !((x) & ((x) - 1))) |
| 46 | |
| 47 | /* |
| 48 | arraysize(a). Also known as countof(x), XtNumber(x), NELEMS(x), LEN(x), |
| 49 | NUMOF(x), ARRAY_SIZE(x), etc., since the fine folks behind C never got |
| 50 | around to including this incredibly useful macro in the standard library, |
| 51 | which is where it belongs. |
| 52 | |
| 53 | Much of the code here assumes that multiple processors in a system all use |
| 54 | the same cache line size...which might be wrong on occasion. |
| 55 | */ |
| 56 | |
| 57 | #define arraysize(a) (sizeof(a) / sizeof(*(a))) |
| 58 | #define arrayend(a) ((a) + arraysize(a)) |
| 59 | |
| 60 | /* |
| 61 | These numbers are from: |
| 62 | - Linux: arch/(arch name)/include/asm/cache.h, note |
| 63 | L1_CACHE_BYTES/L1_CACHE_SHIFT/SMP_CACHE_BYTES. |
| 64 | - FreeBSD: sys/(sys name)/include/param.h, note |
| 65 | CACHE_LINE_SHIFT/CACHE_LINE_SIZE. |
| 66 | |
| 67 | Preprocessor symbols come from: |
| 68 | - TARGET_CPU_CPP_BUILTINS() in the GNU C preprocessor |
| 69 | <http://code.ohloh.net/?s=%22TARGET_CPU_CPP_BUILTINS%22&fp=304413> |
| 70 | - http://predef.sourceforge.net/ |
| 71 | */ |
| 72 | |
| 73 | /* |
| 74 | Several architectures need preprocessor symbols. |
| 75 | |
| 76 | Qualcomm Hexagon: 1 << 5 |
| 77 | Imagination Technologies META: 1 << 6 |
| 78 | OpenRISC: 16 (Linux has the cache line size as a todo.) |
| 79 | Unicore: 1 << 5 |
| 80 | */ |
| 81 | |
| 82 | #if HAVE_PTHREAD |
| 83 | |
| 84 | # if !HAVE_UNISTD_H |
| 85 | # error unistd.h must be present whenever pthread.h is. |
| 86 | # endif |
| 87 | |
| 88 | # if defined __MACH__ && defined __APPLE__ /* OS X, iOS */ |
| 89 | # include <TargetConditionals.h> /* For TARGET_OS_IPHONE. */ |
| 90 | # ifdef TARGET_OS_IPHONE |
| 91 | # define _CACHE_LINE_SIZE 64 |
| 92 | # endif |
| 93 | # endif |
| 94 | |
| 95 | # if defined __FreeBSD__ && !defined _CACHE_LINE_SIZE |
| 96 | # include <machine/param.h> |
| 97 | # ifdef CACHE_LINE_SIZE |
| 98 | # define _CACHE_LINE_SIZE CACHE_LINE_SIZE |
| 99 | # endif |
| 100 | # endif |
| 101 | |
| 102 | # if !defined _CACHE_LINE_SIZE |
| 103 | # if defined __alpha || defined __alpha__ |
| 104 | /* DEC Alpha */ |
| 105 | # define _CACHE_LINE_SIZE 64 /* EV6 and above. EV4 and EV5 use 32 bytes. */ |
| 106 | # elif defined __arm__ |
| 107 | /* ARM architecture */ |
| 108 | # define _CACHE_LINE_SIZE (1 << 6) |
| 109 | # elif defined __AVR || defined __AVR__ |
| 110 | /* Atmel AVR32 */ |
| 111 | # define _CACHE_LINE_SIZE (1 << 5) |
| 112 | # elif defined __bfin || defined __BFIN__ |
| 113 | /* Analog Devices Blackfin */ |
| 114 | # define _CACHE_LINE_SIZE (1 << 5) |
| 115 | # elif defined _TMS320C6X || defined __TMS320C6X__ |
| 116 | /* Texas Instruments TMS320C6x */ |
| 117 | # define _CACHE_LINE_SIZE (1 << 7) /* From L2. L1 data cache line is 1 << 6. */ |
| 118 | # elif defined __cris |
| 119 | /* Axis Communications ETRAX CRIS */ |
| 120 | # define _CACHE_LINE_SIZE 32 |
| 121 | # elif defined __ia64__ || defined _IA64 |
| 122 | /* Intel Itanium */ |
| 123 | # define _CACHE_LINE_SIZE (1 << 7) |
| 124 | # elif defined __M32R__ || defined __m32r__ |
| 125 | /* Mitsubishi/Renesas M32R */ |
| 126 | # define _CACHE_LINE_SIZE (1 << 4) |
| 127 | # elif defined __m68k__ || defined M68000 || defined __MC68K__ |
| 128 | /* Motorola 68000 */ |
| 129 | # define _CACHE_LINE_SIZE (1 << 4) |
| 130 | # elif defined __MICROBLAZE__ || defined __microblaze__ |
| 131 | /* Xilinx MicroBlaze */ |
| 132 | # define _CACHE_LINE_SIZE (1 << 5) |
| 133 | # elif defined __mips__ || defined __mips || defined __MIPS__ |
| 134 | /* MIPS */ |
| 135 | # define _CACHE_LINE_SIZE (1 << 6) |
| 136 | # elif defined __mn10300__ || defined __MN10300__ |
| 137 | /* Matsushita/Panasonic MN103 */ |
| 138 | # define _CACHE_LINE_SIZE 32 /* MN103E010 has 16 bytes. */ |
| 139 | # elif defined __hppa || defined __hppa__ |
| 140 | /* Hewlett-Packard PA-RISC */ |
| 141 | # define _CACHE_LINE_SIZE 64 /* PA-RISC 2.0 uses 64 bytes, PA-RISC 1.1 uses 32. */ |
| 142 | # elif defined __powerpc || defined _ARCH_PPC |
| 143 | /* Power Architecture (a.k.a. PowerPC) */ |
| 144 | # define _CACHE_LINE_SIZE (1 << 7) /* Linux has a list of PPC models with associated L1_CACHE_SHIFT values. */ |
| 145 | # elif defined __s390__ || defined __370__ || defined __zarch__ || defined __SYSC_ZARCH__ |
| 146 | /* IBM System/390 */ |
| 147 | # define _CACHE_LINE_SIZE 256 |
| 148 | # elif defined SUNPLUS || defined __SCORE__ || defined __score__ |
| 149 | /* Sunplus S+core */ |
| 150 | # define _CACHE_LINE_SIZE (1 << 4) |
| 151 | # elif defined __sh__ |
| 152 | /* Hitachi SuperH */ |
| 153 | # define _CACHE_LINE_SIZE (1 << 5) /* SH3 and earlier used 1 << 4. */ |
| 154 | # elif defined __sparc__ || defined __sparc |
| 155 | /* SPARC */ |
| 156 | # define _CACHE_LINE_SIZE (1 << 7) /* Linux and FreeBSD disagree as to what this should be. */ |
| 157 | # elif defined __tile__ |
| 158 | /* Tilera TILE series */ |
| 159 | # define _CACHE_LINE_SIZE (1 << 6) /* TILEPro uses different sizes for L1 and L2. */ |
| 160 | # elif defined __i386 || defined __x86_64 |
| 161 | /* x86(-64) */ |
| 162 | # define _CACHE_LINE_SIZE (1 << 7) |
| 163 | # elif defined __xtensa__ || defined __XTENSA__ |
| 164 | /* Cadence Design Systems/Tensilica Xtensa */ |
| 165 | # define _CACHE_LINE_SIZE (1 << 5) /* 1 << 4 on some models. */ |
| 166 | # endif |
| 167 | # endif /* !defined _CACHE_LINE_SIZE */ |
| 168 | |
| 169 | # if defined __NetBSD__ && !defined _CACHE_LINE_SIZE |
| 170 | /* |
| 171 | NetBSD defines COHERENCY_UNIT to be 32 on MIPS, and 64 for all other platforms -- which is wrong. Still, this is what the kernel |
| 172 | uses; if this value didn't work, the system wouldn't run. |
| 173 | */ |
| 174 | # include <sys/param.h> |
| 175 | # ifdef COHERENCY_UNIT |
| 176 | # define _CACHE_LINE_SIZE COHERENCY_UNIT |
| 177 | # endif |
| 178 | # endif |
| 179 | |
| 180 | # ifndef _CACHE_LINE_SIZE |
| 181 | # define _CACHE_LINE_SIZE 256 /* Fallback cache line size. */ |
| 182 | # endif |
| 183 | |
| 184 | static unsigned _get_cache_line_size(void) |
| 185 | { |
| 186 | /* |
| 187 | The general idea: |
| 188 | - Try to get the actual cache line size from the operating system. |
| 189 | - In the interest of keeping things simple, this only checks with |
| 190 | glibc and OS X. |
| 191 | - A few other methods that could be added: |
| 192 | - Query x86 CPUs directly with the CPUID instruction. |
| 193 | - Query various ELF systems through the auxillary vector. |
| 194 | (Power, Alpha, SuperH) |
| 195 | - Query Linux through |
| 196 | /sys/devices/system/cpu/cpu?/cache/index?/coherency_line_size |
| 197 | (x86 only, AFAIK) |
| 198 | - Query Linux through cache_alignment in /proc/cpuinfo |
| 199 | - Query Solaris through PICL. |
| 200 | - If that fails, return a value appropriate for the current CPU |
| 201 | architecture. |
| 202 | - Otherwise, return a sufficiently large number. |
| 203 | */ |
| 204 | |
| 205 | /* |
| 206 | sysconf(3) is not a syscall, it's a glibc call that, for cache line sizes, |
| 207 | uses CPUID on x86 and returns 0 on other platforms. If it were to work on |
| 208 | most other platforms, it would have to get cache information from the |
| 209 | kernel, since that information is usually made available by the processor |
| 210 | only in privileged mode. |
| 211 | https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/i386/sysconf.c;hb=HEAD |
| 212 | */ |
| 213 | |
| 214 | /* uClibc, newlib, dietlibc, musl, Bionic do not have this. */ |
| 215 | |
| 216 | # if HAVE_UNISTD_H && ( \ |
| 217 | defined _SC_LEVEL1_DCACHE_LINESIZE || \ |
| 218 | defined _SC_LEVEL2_CACHE_LINESIZE || \ |
| 219 | defined _SC_LEVEL3_CACHE_LINESIZE || \ |
| 220 | defined _SC_LEVEL4_CACHE_LINESIZE) |
| 221 | { |
| 222 | static const int names[] = |
| 223 | { |
| 224 | # ifdef _SC_LEVEL1_DCACHE_LINESIZE |
| 225 | _SC_LEVEL1_DCACHE_LINESIZE, |
| 226 | # endif |
| 227 | # ifdef _SC_LEVEL2_CACHE_LINESIZE |
| 228 | _SC_LEVEL2_CACHE_LINESIZE, |
| 229 | # endif |
| 230 | # ifdef _SC_LEVEL3_CACHE_LINESIZE |
| 231 | _SC_LEVEL3_CACHE_LINESIZE, |
| 232 | # endif |
| 233 | # ifdef _SC_LEVEL4_CACHE_LINESIZE |
| 234 | _SC_LEVEL4_CACHE_LINESIZE |
| 235 | # endif |
| 236 | }; |
| 237 | |
| 238 | const int *name; |
| 239 | long result = 0; |
| 240 | |
| 241 | for(name = names; name != arrayend(names); ++name) |
| 242 | { |
| 243 | long sysconf_result = sysconf(*name); /* Can return -1 or 0 on |
| 244 | failure. */ |
| 245 | |
| 246 | if(sysconf_result > result) |
| 247 | result = sysconf_result; |
| 248 | } |
| 249 | |
| 250 | if(result) |
| 251 | return result; |
| 252 | |
| 253 | /* Currently, this fails for every platform that isn't x86. Perhaps |
| 254 | future versions will support other processors? */ |
| 255 | } |
| 256 | # endif |
| 257 | |
| 258 | # if defined __MACH__ && defined __APPLE__ |
| 259 | { |
| 260 | uint32_t result; /* sysctl.h says that hw.cachelinesize is a |
| 261 | CTLTYPE_INT. */ |
| 262 | size_t size = sizeof(result); |
| 263 | static const int name[] = {CTL_HW, HW_CACHELINE}; |
| 264 | |
| 265 | if(!sysctl((int *)name, 2, &result, &size, NULL, 0)) /* (int *) is for OS X. */ |
| 266 | { |
| 267 | assert(size == sizeof(result)); |
| 268 | return result; |
| 269 | }; |
| 270 | } |
| 271 | # endif |
| 272 | |
| 273 | /* Guess based on the CPU type. */ |
| 274 | return _CACHE_LINE_SIZE; |
| 275 | } |
| 276 | |
| 277 | const pthread_mutex_t mutex_initializer = |
| 278 | # if defined _GNU_SOURCE && !defined NDEBUG |
| 279 | PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP |
| 280 | # else |
| 281 | PTHREAD_MUTEX_INITIALIZER |
| 282 | # endif |
| 283 | ; |
| 284 | |
| 285 | const pthread_cond_t cond_initializer = PTHREAD_COND_INITIALIZER; |
| 286 | |
| 287 | static int _has_pthread = 0; /* Initialize when needed. */ |
| 288 | static int _cache_line_size = sizeof(void *); |
| 289 | |
| 290 | /* This is actually the init function for various things in here. */ |
| 291 | int threads_available(Display *dpy) |
| 292 | { |
| 293 | /* This is maybe not thread-safe, but: this should -- and generally will -- |
| 294 | be called before the program launches its second thread. */ |
| 295 | |
| 296 | if(!_has_pthread) |
| 297 | { |
| 298 | # if _POSIX_THREADS |
| 299 | _has_pthread = _POSIX_THREADS; |
| 300 | # else |
| 301 | _has_pthread = sysconf(_SC_THREADS); |
| 302 | # endif |
| 303 | |
| 304 | if(_has_pthread >= 0) |
| 305 | { |
| 306 | if(get_boolean_resource(dpy, "useThreads", "Boolean")) |
| 307 | { |
| 308 | _cache_line_size = _get_cache_line_size(); |
| 309 | assert(_cache_line_size >= sizeof(void *)); |
| 310 | assert(IS_POWER_OF_2(_cache_line_size)); |
| 311 | } |
| 312 | else |
| 313 | { |
| 314 | _has_pthread = -1; |
| 315 | } |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | return _has_pthread; |
| 320 | } |
| 321 | |
| 322 | #endif /* HAVE_PTHREAD */ |
| 323 | |
| 324 | /* |
| 325 | hardware_concurrency() - |
| 326 | |
| 327 | Various platforms offer various statistics that look like they should be |
| 328 | useful: sysconf(_SC_NPROCESSORS_ONLN) (i.e. the number of 'online' |
| 329 | processors) in particular is available on many Unixes, and is frequently |
| 330 | used for functions like hardware_concurrency(). But 'online' is somewhat |
| 331 | ambiguous; it can mean: |
| 332 | |
| 333 | 1. The number of CPU cores that are not (temporarily) asleep. (e.g. Android |
| 334 | can sometimes put cores to sleep if they aren't being used, and this is |
| 335 | reflected in _SC_NPROCESSORS_ONLN.) |
| 336 | |
| 337 | 2. The maximum number of CPU cores that can be provided to this application, |
| 338 | as currently set by the system administrator. (2) is the one that |
| 339 | hardware_concurrency() ultimately needs. |
| 340 | */ |
| 341 | |
| 342 | /* |
| 343 | Shamelessly plagarized from Boost.Thread and Stack Overflow |
| 344 | <http://stackoverflow.com/q/150355>. GNU libstdc++ has some of this too, |
| 345 | see thread::hardware_concurrency() in thread.cc. |
| 346 | http://gcc.gnu.org/viewcvs/gcc/trunk/libstdc%2B%2B-v3/src/c%2B%2B11/thread.cc?view=markup |
| 347 | |
| 348 | This might not work right on less common systems for various reasons. |
| 349 | */ |
| 350 | |
| 351 | #if HAVE_PTHREAD |
| 352 | # if defined __APPLE__ && defined __MACH__ || \ |
| 353 | defined __FreeBSD__ || \ |
| 354 | defined __OpenBSD__ || \ |
| 355 | defined __NetBSD__ || \ |
| 356 | defined __DragonFly__ || \ |
| 357 | defined __minix |
| 358 | |
| 359 | /* |
| 360 | BSD Unixes use sysctl(3) for this. |
| 361 | Some BSDs also support sysconf(3) for this, but in each case this was added |
| 362 | after sysctl(3). |
| 363 | Linux: sysctl is present, but strongly deprecated. |
| 364 | Minix uses the NetBSD userspace, so it has both this and sysconf(3). |
| 365 | QNX: sysctl is present for kern.* and net.*, but it doesn't say anything |
| 366 | about hw.* |
| 367 | */ |
| 368 | |
| 369 | /* __APPLE__ without __MACH__ is OS 9 or earlier. __APPLE__ with __MACH__ is OS X. */ |
| 370 | |
| 371 | /* |
| 372 | The usual thing to do here is for sysctl(3) to call __sysctl(2). |
| 373 | http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/gen/sysctl.c?only_with_tag=HEAD |
| 374 | http://svnweb.freebsd.org/base/head/lib/libc/gen/sysctl.c?view=markup |
| 375 | */ |
| 376 | |
| 377 | /* |
| 378 | OS X: Xcode Instruments (as of Xcode 4; Apple likes to move things like |
| 379 | this around) can disable CPUs as a debugging tool. |
| 380 | Instruments -> Preferences... (Command-,) -> General -> Active Processor Cores |
| 381 | FreeBSD, OpenBSD: It doesn't look like CPUs can be disabled. |
| 382 | NetBSD: CPUs can be disabled manually through cpuctl(8). |
| 383 | */ |
| 384 | |
| 385 | # include <stddef.h> |
| 386 | |
| 387 | /* FreeBSD: sys/sysctl.h needs sys/types.h, but the one doesn't bring the |
| 388 | other in automatically. */ |
| 389 | # include <sys/types.h> |
| 390 | # include <sys/sysctl.h> |
| 391 | |
| 392 | static unsigned _hardware_concurrency(void) |
| 393 | { |
| 394 | int count; |
| 395 | size_t size = sizeof(count); |
| 396 | |
| 397 | # if defined __APPLE__ && defined __MACH__ |
| 398 | /* Apple sez: sysctl("hw.logicalcpu") is affected by the "current power |
| 399 | management mode", so use hw.logicalcpu_max. */ |
| 400 | /* https://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/sysctl.3.html */ |
| 401 | if(!sysctlbyname("hw.logicalcpu_max", &count, &size, NULL, 0)) /* Preferred on more recent Darwin. */ |
| 402 | { |
| 403 | assert(size == sizeof(count)); |
| 404 | return count; |
| 405 | } |
| 406 | # endif |
| 407 | |
| 408 | # if defined HW_NCPUONLINE |
| 409 | /* NetBSD has this. */ |
| 410 | { |
| 411 | static const int name[] = {CTL_HW, HW_NCPUONLINE}; |
| 412 | if(!sysctl(name, 2, &count, &size, NULL, 0)) |
| 413 | { |
| 414 | assert(size == sizeof(count)); |
| 415 | return count; |
| 416 | } |
| 417 | } |
| 418 | # endif |
| 419 | |
| 420 | { |
| 421 | static const int name[] = {CTL_HW, HW_NCPU}; |
| 422 | if(!sysctl((int *)name, 2, &count, &size, NULL, 0)) /* (int *) is for OS X. */ |
| 423 | { |
| 424 | assert(size == sizeof(count)); |
| 425 | return count; |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | return 1; |
| 430 | } |
| 431 | |
| 432 | # elif HAVE_UNISTD_H && defined _SC_NPROCESSORS_ONLN |
| 433 | |
| 434 | /* |
| 435 | Supported by: |
| 436 | Linux 2.0 was the first version to provide SMP support via clone(2). |
| 437 | (e)glibc on Linux provides this, which in turn uses get_nprocs(). |
| 438 | get_nprocs in turn uses /sys/devices/system/cpu/online, /proc/stat, or /proc/cpuinfo, whichever's available. |
| 439 | https://sourceware.org/git/?p=glibc.git;a=blob;f=posix/sysconf.c;hb=HEAD |
| 440 | https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/getsysstats.c;hb=HEAD |
| 441 | Linux usually isn't configured to auto-enable/disable cores. |
| 442 | SunOS (Solaris), sometime between 4.1.3 and 5.5.1. |
| 443 | This includes all open source derivatives of 5.10. (Illumos, OpenIndiana) |
| 444 | sysconf(_SC_NPROCESSORS_ONLN) call _sysconfig(2). |
| 445 | Not sure if CPU power management (enabled by default, see cpupm and |
| 446 | cpu_deep_idle in power.conf(4)) affects this. |
| 447 | psradm(1M) can bring up/down CPU cores, which affects |
| 448 | sysconf(_SC_NPROCESSORS_ONLN). |
| 449 | http://src.illumos.org/source/xref/illumos-gate/usr/src/lib/libc/port/gen/sysconf.c |
| 450 | Minix 3.2, at the latest. (This is the first version to support SMP.) |
| 451 | AIX 7.1, probably earlier. |
| 452 | |
| 453 | Also: |
| 454 | Mac OS X apparently has this on 10.5+. |
| 455 | FreeBSD 5.0, NetBSD 5.0 also have this. They both call sysctl(3). |
| 456 | http://svnweb.freebsd.org/base/head/lib/libc/gen/sysconf.c?view=markup |
| 457 | http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/gen/sysconf.c?only_with_tag=HEAD |
| 458 | |
| 459 | QNX has sysconf(3), but it doesn't have _SC_NPROCESSORS_*. |
| 460 | */ |
| 461 | |
| 462 | static unsigned _hardware_concurrency(void) |
| 463 | { |
| 464 | long count = sysconf(_SC_NPROCESSORS_ONLN); |
| 465 | return count > 0 ? count : 1; |
| 466 | } |
| 467 | |
| 468 | # else |
| 469 | |
| 470 | static unsigned _hardware_concurrency(void) |
| 471 | { |
| 472 | return 1; /* Fallback for unknown systems. */ |
| 473 | } |
| 474 | |
| 475 | # endif |
| 476 | #endif |
| 477 | |
| 478 | unsigned hardware_concurrency(Display *dpy) |
| 479 | { |
| 480 | #if HAVE_PTHREAD |
| 481 | if(threads_available(dpy) >= 0) |
| 482 | return _hardware_concurrency(); |
| 483 | #endif |
| 484 | return 1; |
| 485 | } |
| 486 | |
| 487 | /* thread_memory_alignment() - */ |
| 488 | |
| 489 | unsigned thread_memory_alignment(Display *dpy) |
| 490 | { |
| 491 | (void)threads_available(dpy); |
| 492 | #if HAVE_PTHREAD |
| 493 | return _cache_line_size; |
| 494 | #else |
| 495 | return sizeof(void *); |
| 496 | #endif |
| 497 | } |
| 498 | |
| 499 | /* Thread pool - */ |
| 500 | |
| 501 | static unsigned _threadpool_count_serial(struct threadpool *self) |
| 502 | { |
| 503 | #if HAVE_PTHREAD |
| 504 | assert(_has_pthread); |
| 505 | if(_has_pthread >= 0) |
| 506 | return self->count ? 1 : 0; |
| 507 | #endif |
| 508 | return self->count; |
| 509 | } |
| 510 | |
| 511 | static void _serial_destroy(struct threadpool *self) |
| 512 | { |
| 513 | void *thread = self->serial_threads; |
| 514 | unsigned i, count = _threadpool_count_serial(self); |
| 515 | |
| 516 | for(i = 0; i != count; ++i) |
| 517 | { |
| 518 | self->thread_destroy(thread); |
| 519 | thread = (char *)thread + self->thread_size; |
| 520 | } |
| 521 | |
| 522 | free(self->serial_threads); |
| 523 | } |
| 524 | |
| 525 | #if HAVE_PTHREAD |
| 526 | |
| 527 | static void _parallel_abort(struct threadpool *self) |
| 528 | { |
| 529 | assert(self->count > 1); |
| 530 | self->count = self->parallel_unfinished + 1 /* The '+ 1' should technically be _threadpool_count_serial(self). */; |
| 531 | PTHREAD_VERIFY(pthread_cond_broadcast(&self->cond)); |
| 532 | } |
| 533 | |
| 534 | struct _parallel_startup_type |
| 535 | { |
| 536 | struct threadpool *parent; |
| 537 | int (*thread_create)(void *self, struct threadpool *pool, unsigned id); |
| 538 | int last_errno; |
| 539 | }; |
| 540 | |
| 541 | static unsigned _threadpool_count_parallel(struct threadpool *self) |
| 542 | { |
| 543 | assert(_has_pthread); |
| 544 | assert(self->count >= 1); |
| 545 | return self->count - 1 /* The '- 1' should technically be _threadpool_count_serial(self). */; |
| 546 | } |
| 547 | |
| 548 | static void *_start_routine(void *startup_raw); |
| 549 | |
| 550 | /* Tricky lock sequence: _add_next_thread unlocks on error. */ |
| 551 | static void _add_next_thread(struct _parallel_startup_type *self) |
| 552 | { |
| 553 | assert(!self->last_errno); |
| 554 | |
| 555 | if(self->parent->parallel_unfinished == _threadpool_count_parallel(self->parent)) |
| 556 | { |
| 557 | PTHREAD_VERIFY(pthread_cond_broadcast(&self->parent->cond)); |
| 558 | } |
| 559 | else |
| 560 | { |
| 561 | pthread_t *thread = self->parent->parallel_threads + self->parent->parallel_unfinished; |
| 562 | self->last_errno = pthread_create(thread, NULL, _start_routine, self); |
| 563 | if(self->last_errno) |
| 564 | _parallel_abort(self->parent); |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | static void *_thread_free_and_unlock(struct threadpool *self, void *thread) |
| 569 | { |
| 570 | PTHREAD_VERIFY(pthread_mutex_unlock(&self->mutex)); |
| 571 | # if !HAVE_ALLOCA |
| 572 | thread_free(thread); |
| 573 | # endif |
| 574 | return NULL; |
| 575 | } |
| 576 | |
| 577 | static void *_thread_destroy_and_unlock(struct threadpool *self, void *thread) |
| 578 | { |
| 579 | self->thread_destroy(thread); |
| 580 | return _thread_free_and_unlock(self, thread); |
| 581 | } |
| 582 | |
| 583 | /* At one point, one of the threads refused to destroy itself at the end. Why?! And why won't it happen again? */ |
| 584 | |
| 585 | static void *_start_routine(void *startup_raw) |
| 586 | { |
| 587 | struct _parallel_startup_type *startup = (struct _parallel_startup_type *)startup_raw; |
| 588 | |
| 589 | struct threadpool *parent = startup->parent; |
| 590 | |
| 591 | void *thread; |
| 592 | |
| 593 | PTHREAD_VERIFY(pthread_mutex_lock(&parent->mutex)); |
| 594 | ++parent->parallel_unfinished; |
| 595 | |
| 596 | # if HAVE_ALLOCA |
| 597 | /* Ideally, the thread object goes on the thread's stack. This guarantees no false sharing with other threads, and in a NUMA |
| 598 | configuration, ensures that the thread object is using memory from the right node. */ |
| 599 | thread = alloca(parent->thread_size); |
| 600 | # else |
| 601 | startup->last_errno = thread_malloc(&thread, NULL, parent->thread_size); |
| 602 | if(startup->last_errno) |
| 603 | { |
| 604 | _parallel_abort(parent); |
| 605 | PTHREAD_VERIFY(pthread_mutex_unlock(&parent->mutex)); |
| 606 | return NULL; |
| 607 | } |
| 608 | # endif |
| 609 | |
| 610 | /* Setting thread affinity for threads running in lock-step can cause delays |
| 611 | and jumpiness. Ideally, there would be some way to recommend (but not |
| 612 | require) that a thread run on a certain core/set of cores. */ |
| 613 | |
| 614 | /* Neither Linux nor libnuma seem to support the concept of a preferred/ideal |
| 615 | CPU for a thread/process. */ |
| 616 | |
| 617 | /* Untested. */ |
| 618 | /* { |
| 619 | cpu_set_t cpu_set; |
| 620 | CPU_ZERO(&cpu_set); |
| 621 | CPU_SET(&cpu_set, &parent._threads_unfinished); |
| 622 | pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set); |
| 623 | } */ |
| 624 | |
| 625 | startup->last_errno = startup->thread_create(thread, parent, parent->parallel_unfinished); |
| 626 | if(startup->last_errno) |
| 627 | { |
| 628 | _parallel_abort(parent); |
| 629 | return _thread_free_and_unlock(parent, thread); /* Tail calls make everything better. */ |
| 630 | } |
| 631 | |
| 632 | assert(!startup->last_errno); |
| 633 | _add_next_thread(startup); /* Calls _parallel_abort() on failure. */ |
| 634 | if(startup->last_errno) |
| 635 | return _thread_destroy_and_unlock(parent, thread); |
| 636 | |
| 637 | for(;;) |
| 638 | { |
| 639 | for(;;) |
| 640 | { |
| 641 | /* |
| 642 | This must come before the '.threads' check, otherwise if |
| 643 | threadpool_destroy is called immediately after a run starts, then |
| 644 | it's possible that not all threads would be launched for the final |
| 645 | run. This can cause deadlock in conjunction with things like |
| 646 | barriers. |
| 647 | */ |
| 648 | if(parent->parallel_pending) |
| 649 | break; /* Start a run. */ |
| 650 | |
| 651 | if(!parent->parallel_threads) |
| 652 | return _thread_destroy_and_unlock(parent, thread); /* Threads are shutting down. */ |
| 653 | |
| 654 | PTHREAD_VERIFY(pthread_cond_wait(&parent->cond, &parent->mutex)); |
| 655 | } |
| 656 | |
| 657 | --parent->parallel_pending; |
| 658 | if(!parent->parallel_pending) |
| 659 | PTHREAD_VERIFY(pthread_cond_broadcast(&parent->cond)); |
| 660 | /* All threads have started processing, other threads can finish. */ |
| 661 | |
| 662 | PTHREAD_VERIFY(pthread_mutex_unlock(&parent->mutex)); |
| 663 | |
| 664 | parent->thread_run(thread); |
| 665 | |
| 666 | PTHREAD_VERIFY(pthread_mutex_lock(&parent->mutex)); |
| 667 | # if 0 |
| 668 | if(!parent->parallel_threads) /* I don't think this is necessary anymore. */ |
| 669 | break; |
| 670 | # endif |
| 671 | /* Don't loop around until all other threads have begun processing. */ |
| 672 | |
| 673 | /* I suspect it doesn't matter whether this comes before or after the threads_unfinished check. */ |
| 674 | while(parent->parallel_pending) |
| 675 | PTHREAD_VERIFY(pthread_cond_wait(&parent->cond, &parent->mutex)); |
| 676 | |
| 677 | --parent->parallel_unfinished; |
| 678 | if(!parent->parallel_unfinished) |
| 679 | PTHREAD_VERIFY(pthread_cond_broadcast(&parent->cond)); /* All threads done for now. */ |
| 680 | } |
| 681 | |
| 682 | /* return _thread_destroy_and_unlock(parent, thread); */ |
| 683 | } |
| 684 | |
| 685 | static void _unlock_and_destroy(struct threadpool *self) |
| 686 | { |
| 687 | pthread_t *threads; |
| 688 | |
| 689 | threads = self->parallel_threads; |
| 690 | self->parallel_threads = NULL; |
| 691 | |
| 692 | if(threads) |
| 693 | PTHREAD_VERIFY(pthread_cond_broadcast(&self->cond)); |
| 694 | |
| 695 | PTHREAD_VERIFY(pthread_mutex_unlock(&self->mutex)); |
| 696 | |
| 697 | if(threads) |
| 698 | { |
| 699 | unsigned i, count = _threadpool_count_parallel(self); |
| 700 | for(i = 0; i != count; ++i) |
| 701 | PTHREAD_VERIFY(pthread_join(threads[i], NULL)); |
| 702 | |
| 703 | free(threads); |
| 704 | PTHREAD_VERIFY(pthread_cond_destroy(&self->cond)); |
| 705 | PTHREAD_VERIFY(pthread_mutex_destroy(&self->mutex)); |
| 706 | } |
| 707 | |
| 708 | _serial_destroy(self); |
| 709 | } |
| 710 | |
| 711 | #endif /* HAVE_PTHREAD */ |
| 712 | |
| 713 | int threadpool_create(struct threadpool *self, const struct threadpool_class *cls, Display *dpy, unsigned count) |
| 714 | { |
| 715 | (void)threads_available(dpy); |
| 716 | |
| 717 | self->count = count; |
| 718 | |
| 719 | /* If threads are not present, run each "thread" in sequence on the calling |
| 720 | thread. Otherwise, only run the first thread on the main thread. */ |
| 721 | |
| 722 | assert(cls); |
| 723 | |
| 724 | self->thread_size = cls->size; |
| 725 | self->thread_destroy = cls->destroy; |
| 726 | |
| 727 | { |
| 728 | void *thread; |
| 729 | unsigned i, count_serial = _threadpool_count_serial(self); |
| 730 | |
| 731 | if(count_serial) |
| 732 | { |
| 733 | thread = malloc(cls->size * count_serial); |
| 734 | if(!thread) |
| 735 | return ENOMEM; |
| 736 | } |
| 737 | else |
| 738 | { |
| 739 | /* Might as well skip the malloc. */ |
| 740 | thread = NULL; |
| 741 | } |
| 742 | |
| 743 | self->serial_threads = thread; |
| 744 | |
| 745 | for(i = 0; i != count_serial; ++i) |
| 746 | { |
| 747 | int error = cls->create(thread, self, i); |
| 748 | if(error) |
| 749 | { |
| 750 | self->count = i; |
| 751 | _serial_destroy(self); |
| 752 | return error; |
| 753 | } |
| 754 | |
| 755 | thread = (char *)thread + self->thread_size; |
| 756 | } |
| 757 | } |
| 758 | |
| 759 | #if HAVE_PTHREAD |
| 760 | assert(_has_pthread); /* _has_pthread should be either -1 or >0. */ |
| 761 | if(_has_pthread >= 0) |
| 762 | { |
| 763 | unsigned count_parallel = _threadpool_count_parallel(self); |
| 764 | self->mutex = mutex_initializer; |
| 765 | self->cond = cond_initializer; |
| 766 | self->parallel_pending = 0; |
| 767 | self->parallel_unfinished = 0; |
| 768 | if(!count_parallel) |
| 769 | { |
| 770 | self->parallel_threads = NULL; |
| 771 | return 0; |
| 772 | } |
| 773 | |
| 774 | self->parallel_threads = malloc(sizeof(pthread_t) * count_parallel); |
| 775 | if(!self->parallel_threads) |
| 776 | return ENOMEM; |
| 777 | |
| 778 | { |
| 779 | struct _parallel_startup_type startup; |
| 780 | startup.parent = self; |
| 781 | startup.thread_create = cls->create; |
| 782 | startup.last_errno = 0; |
| 783 | |
| 784 | PTHREAD_VERIFY(pthread_mutex_lock(&self->mutex)); |
| 785 | _add_next_thread(&startup); |
| 786 | |
| 787 | if(!startup.last_errno) |
| 788 | { |
| 789 | while(self->parallel_unfinished != count_parallel && self->parallel_threads) |
| 790 | PTHREAD_VERIFY(pthread_cond_wait(&self->cond, &self->mutex)); |
| 791 | } |
| 792 | |
| 793 | /* This must come after the if(!startup.last_errno). */ |
| 794 | if(startup.last_errno) |
| 795 | { |
| 796 | _unlock_and_destroy(self); |
| 797 | } |
| 798 | else |
| 799 | { |
| 800 | self->parallel_unfinished = 0; |
| 801 | PTHREAD_VERIFY(pthread_mutex_unlock(&self->mutex)); |
| 802 | } |
| 803 | |
| 804 | return startup.last_errno; |
| 805 | } |
| 806 | } |
| 807 | #endif |
| 808 | |
| 809 | return 0; |
| 810 | } |
| 811 | |
| 812 | void threadpool_destroy(struct threadpool *self) |
| 813 | { |
| 814 | #if HAVE_PTHREAD |
| 815 | if(_has_pthread >= 0) |
| 816 | { |
| 817 | PTHREAD_VERIFY(pthread_mutex_lock(&self->mutex)); |
| 818 | _unlock_and_destroy(self); |
| 819 | return; |
| 820 | } |
| 821 | #endif |
| 822 | |
| 823 | _serial_destroy(self); |
| 824 | } |
| 825 | |
| 826 | void threadpool_run(struct threadpool *self, void (*func)(void *)) |
| 827 | { |
| 828 | #if HAVE_PTHREAD |
| 829 | if(_has_pthread >= 0) |
| 830 | { |
| 831 | unsigned count = _threadpool_count_parallel(self); |
| 832 | PTHREAD_VERIFY(pthread_mutex_lock(&self->mutex)); |
| 833 | |
| 834 | /* Do not call threadpool_run() twice without a threadpool_wait() in the middle. */ |
| 835 | assert(!self->parallel_pending); |
| 836 | assert(!self->parallel_unfinished); |
| 837 | |
| 838 | self->parallel_pending = count; |
| 839 | self->parallel_unfinished = count; |
| 840 | self->thread_run = func; |
| 841 | PTHREAD_VERIFY(pthread_cond_broadcast(&self->cond)); |
| 842 | PTHREAD_VERIFY(pthread_mutex_unlock(&self->mutex)); |
| 843 | } |
| 844 | #endif |
| 845 | |
| 846 | /* It's perfectly valid to move this to the beginning of threadpool_wait(). */ |
| 847 | { |
| 848 | void *thread = self->serial_threads; |
| 849 | unsigned i, count = _threadpool_count_serial(self); |
| 850 | for(i = 0; i != count; ++i) |
| 851 | { |
| 852 | func(thread); |
| 853 | thread = (char *)thread + self->thread_size; |
| 854 | } |
| 855 | } |
| 856 | } |
| 857 | |
| 858 | void threadpool_wait(struct threadpool *self) |
| 859 | { |
| 860 | #if HAVE_PTHREAD |
| 861 | if(_has_pthread >= 0) |
| 862 | { |
| 863 | PTHREAD_VERIFY(pthread_mutex_lock(&self->mutex)); |
| 864 | while(self->parallel_unfinished) |
| 865 | PTHREAD_VERIFY(pthread_cond_wait(&self->cond, &self->mutex)); |
| 866 | PTHREAD_VERIFY(pthread_mutex_unlock(&self->mutex)); |
| 867 | } |
| 868 | #endif |
| 869 | } |
| 870 | |
| 871 | /* io_thread - */ |
| 872 | |
| 873 | #if HAVE_PTHREAD |
| 874 | /* Without threads at compile time, there's only stubs in thread_util.h. */ |
| 875 | |
| 876 | # define VERSION_CHECK(cc_major, cc_minor, req_major, req_minor) \ |
| 877 | ((cc_major) > (req_major) || \ |
| 878 | (cc_major) == (req_major) && (cc_minor) >= (req_minor)) |
| 879 | |
| 880 | # if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 7) || \ |
| 881 | defined(__clang__) && \ |
| 882 | (!defined(__apple_build_version__) && VERSION_CHECK(__clang_major__, __clang_minor__, 3, 1) || \ |
| 883 | defined(__apple_build_version__) && VERSION_CHECK(__clang_major__, __clang_minor__, 3, 1)) || \ |
| 884 | defined(__ICC) && __ICC >= 1400 |
| 885 | |
| 886 | /* |
| 887 | Clang 3.0 has a partial implementation of GNU atomics; 3.1 rounds it out. |
| 888 | http://llvm.org/viewvc/llvm-project/cfe/tags/RELEASE_30/final/include/clang/Basic/Builtins.def?view=markup |
| 889 | http://llvm.org/viewvc/llvm-project/cfe/tags/RELEASE_31/final/include/clang/Basic/Builtins.def?view=markup |
| 890 | |
| 891 | Apple changes the Clang version to track Xcode versions; use |
| 892 | __apple_build_version__ to distinguish between the two. |
| 893 | |
| 894 | Xcode 4.3 uses Apple LLVM 3.1, which corresponds to Clang 3.1. |
| 895 | https://en.wikipedia.org/wiki/Xcode |
| 896 | |
| 897 | Earlier versions of Intel C++ may also support these intrinsics. |
| 898 | */ |
| 899 | |
| 900 | #define _status_load(status) (__atomic_load_n((status), __ATOMIC_SEQ_CST)) |
| 901 | #define _status_exchange(obj, desired) (__atomic_exchange_n((obj), (desired), __ATOMIC_SEQ_CST)) |
| 902 | |
| 903 | /* C11 atomics are around the corner, but they're not here yet for many |
| 904 | systems. (Including mine.) */ |
| 905 | /* |
| 906 | #elif __STDC_VERSION__ >= 201112l && !defined __STDC_NO_ATOMICS__ |
| 907 | |
| 908 | #include <stdatomic.h> |
| 909 | |
| 910 | #define _status_load(status) (atomic_load((status))) |
| 911 | #define _status_exchange(obj, desired) (atomic_exchange((obj), (desired))) |
| 912 | */ |
| 913 | |
| 914 | /* Solaris profiles atomic ops on at least Solaris 10. See atomic_swap(3C) and |
| 915 | membar_ops(3C). This would probably also need a snippet in configure.in. |
| 916 | http://graegert.com/programming/using-atomic-operations-in-c-on-solaris-10 |
| 917 | */ |
| 918 | |
| 919 | # else |
| 920 | |
| 921 | /* No atomic variables, so here's some ugly mutex-based code instead. */ |
| 922 | |
| 923 | /* Nothing ever destroys this mutex. */ |
| 924 | pthread_mutex_t _global_mutex = PTHREAD_MUTEX_INITIALIZER; |
| 925 | |
| 926 | #define _lock() PTHREAD_VERIFY(pthread_mutex_lock(&_global_mutex)) |
| 927 | #define _unlock() PTHREAD_VERIFY(pthread_mutex_unlock(&_global_mutex)) |
| 928 | |
| 929 | static enum _io_thread_status _status_load(enum _io_thread_status *status) |
| 930 | { |
| 931 | enum _io_thread_status result; |
| 932 | _lock(); |
| 933 | result = *status; |
| 934 | _unlock(); |
| 935 | return result; |
| 936 | } |
| 937 | |
| 938 | static enum _io_thread_status _status_exchange(enum _io_thread_status *obj, enum _io_thread_status desired) |
| 939 | { |
| 940 | enum _io_thread_status result; |
| 941 | _lock(); |
| 942 | result = *obj; |
| 943 | *obj = desired; |
| 944 | _unlock(); |
| 945 | return result; |
| 946 | } |
| 947 | |
| 948 | # endif |
| 949 | |
| 950 | void *io_thread_create(struct io_thread *self, void *parent, void *(*start_routine)(void *), Display *dpy, unsigned stacksize) |
| 951 | { |
| 952 | if(threads_available(dpy) >= 0) |
| 953 | { |
| 954 | int error; |
| 955 | pthread_attr_t attr; |
| 956 | pthread_attr_t *attr_ptr = NULL; |
| 957 | |
| 958 | if(stacksize) |
| 959 | { |
| 960 | attr_ptr = &attr; |
| 961 | if(pthread_attr_init(&attr)) |
| 962 | return NULL; |
| 963 | # if (defined _POSIX_SOURCE || defined _POSIX_C_SOURCE || defined _XOPEN_SOURCE) && !defined __GNU__ |
| 964 | /* PTHREAD_STACK_MIN needs the above test. */ |
| 965 | assert(stacksize >= PTHREAD_STACK_MIN); |
| 966 | # endif |
| 967 | PTHREAD_VERIFY(pthread_attr_setstacksize(&attr, stacksize)); |
| 968 | } |
| 969 | |
| 970 | /* This doesn't need to be an atomic store, since pthread_create(3) |
| 971 | "synchronizes memory with respect to other threads". |
| 972 | http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_11 */ |
| 973 | self->status = _io_thread_working; |
| 974 | |
| 975 | error = pthread_create(&self->thread, attr_ptr, start_routine, parent); |
| 976 | assert(!error || error == EAGAIN); |
| 977 | if(error) |
| 978 | parent = NULL; |
| 979 | |
| 980 | if(attr_ptr) |
| 981 | PTHREAD_VERIFY(pthread_attr_destroy(attr_ptr)); |
| 982 | |
| 983 | return parent; |
| 984 | } |
| 985 | |
| 986 | return NULL; |
| 987 | } |
| 988 | |
| 989 | int io_thread_return(struct io_thread *self) |
| 990 | { |
| 991 | if(_has_pthread >= 0) |
| 992 | { |
| 993 | enum _io_thread_status old_status = _status_exchange(&self->status, _io_thread_done); |
| 994 | assert(old_status == _io_thread_working || |
| 995 | old_status == _io_thread_cancelled); |
| 996 | return old_status != _io_thread_working; |
| 997 | } |
| 998 | |
| 999 | return 0; |
| 1000 | } |
| 1001 | |
| 1002 | int io_thread_is_done(struct io_thread *self) |
| 1003 | { |
| 1004 | if(_has_pthread >= 0) |
| 1005 | { |
| 1006 | int result = _status_load(&self->status); |
| 1007 | assert(result != _io_thread_cancelled); |
| 1008 | return result; |
| 1009 | } |
| 1010 | return 1; |
| 1011 | } |
| 1012 | |
| 1013 | int io_thread_cancel(struct io_thread *self) |
| 1014 | { |
| 1015 | if(_has_pthread >= 0) |
| 1016 | { |
| 1017 | enum _io_thread_status old_status = |
| 1018 | _status_exchange(&self->status, _io_thread_cancelled); |
| 1019 | assert(old_status == _io_thread_working || |
| 1020 | old_status == _io_thread_done); |
| 1021 | |
| 1022 | PTHREAD_VERIFY(pthread_detach(self->thread)); |
| 1023 | return old_status != _io_thread_working; |
| 1024 | } |
| 1025 | |
| 1026 | return 0; |
| 1027 | } |
| 1028 | |
| 1029 | void io_thread_finish(struct io_thread *self) |
| 1030 | { |
| 1031 | if(_has_pthread >= 0) |
| 1032 | { |
| 1033 | # ifndef NDEBUG |
| 1034 | enum _io_thread_status status = _status_load(&self->status); |
| 1035 | assert(status == _io_thread_working || |
| 1036 | status == _io_thread_done); |
| 1037 | # endif |
| 1038 | PTHREAD_VERIFY(pthread_join(self->thread, NULL)); |
| 1039 | assert(_status_load(&self->status) == _io_thread_done); |
| 1040 | } |
| 1041 | } |
| 1042 | |
| 1043 | #endif /* HAVE_PTHREAD */ |