GNU Radio's TEST Package
atomicops.h
Go to the documentation of this file.
1// ©2013-2015 Cameron Desrochers.
2// Distributed under the simplified BSD license (see the license file that
3// should have come with this header).
4// Uses Jeff Preshing's semaphore implementation (under the terms of its
5// separate zlib license, embedded below).
6
7#pragma once
8
9// Provides portable (VC++2010+, Intel ICC 13, GCC 4.7+, and anything C++11 compliant) implementation
10// of low-level memory barriers, plus a few semi-portable utility macros (for inlining and alignment).
11// Also has a basic atomic type (limited to hardware-supported atomics with no memory ordering guarantees).
12// Uses the AE_* prefix for macros (historical reasons), and the "moodycamel" namespace for symbols.
13
14#include <cassert>
15#include <type_traits>
16
17
18// Platform detection
19#if defined(__INTEL_COMPILER)
20#define AE_ICC
21#elif defined(_MSC_VER)
22#define AE_VCPP
23#elif defined(__GNUC__)
24#define AE_GCC
25#endif
26
27#if defined(_M_IA64) || defined(__ia64__)
28#define AE_ARCH_IA64
29#elif defined(_WIN64) || defined(__amd64__) || defined(_M_X64) || defined(__x86_64__)
30#define AE_ARCH_X64
31#elif defined(_M_IX86) || defined(__i386__)
32#define AE_ARCH_X86
33#elif defined(_M_PPC) || defined(__powerpc__)
34#define AE_ARCH_PPC
35#else
36#define AE_ARCH_UNKNOWN
37#endif
38
39
40// AE_UNUSED
41#define AE_UNUSED(x) ((void)x)
42
43
44// AE_FORCEINLINE
45#if defined(AE_VCPP) || defined(AE_ICC)
46#define AE_FORCEINLINE __forceinline
47#elif defined(AE_GCC)
48//#define AE_FORCEINLINE __attribute__((always_inline))
49#define AE_FORCEINLINE inline
50#else
51#define AE_FORCEINLINE inline
52#endif
53
54
55// AE_ALIGN
56#if defined(AE_VCPP) || defined(AE_ICC)
57#define AE_ALIGN(x) __declspec(align(x))
58#elif defined(AE_GCC)
59#define AE_ALIGN(x) __attribute__((aligned(x)))
60#else
61// Assume GCC compliant syntax...
62#define AE_ALIGN(x) __attribute__((aligned(x)))
63#endif
64
65
66// Portable atomic fences implemented below:
67
68namespace moodycamel {
69
76
77 // memory_order_sync: Forces a full sync:
78 // #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad
80};
81
82} // end namespace moodycamel
83
84#if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || defined(AE_ICC)
85// VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences
86
87#include <intrin.h>
88
89#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
90#define AeFullSync _mm_mfence
91#define AeLiteSync _mm_mfence
92#elif defined(AE_ARCH_IA64)
93#define AeFullSync __mf
94#define AeLiteSync __mf
95#elif defined(AE_ARCH_PPC)
96#include <ppcintrinsics.h>
97#define AeFullSync __sync
98#define AeLiteSync __lwsync
99#endif
100
101
102#ifdef AE_VCPP
103#pragma warning(push)
104#pragma warning(disable: 4365) // Disable erroneous 'conversion from long to unsigned int, signed/unsigned mismatch' error when using `assert`
105#ifdef __cplusplus_cli
106#pragma managed(push, off)
107#endif
108#endif
109
110namespace moodycamel {
111
113{
114 switch (order) {
115 case memory_order_relaxed: break;
116 case memory_order_acquire: _ReadBarrier(); break;
117 case memory_order_release: _WriteBarrier(); break;
118 case memory_order_acq_rel: _ReadWriteBarrier(); break;
119 case memory_order_seq_cst: _ReadWriteBarrier(); break;
120 default: assert(false);
121 }
122}
123
124// x86/x64 have a strong memory model -- all loads and stores have
125// acquire and release semantics automatically (so only need compiler
126// barriers for those).
127#if defined(AE_ARCH_X86) || defined(AE_ARCH_X64)
129{
130 switch (order) {
131 case memory_order_relaxed: break;
132 case memory_order_acquire: _ReadBarrier(); break;
133 case memory_order_release: _WriteBarrier(); break;
134 case memory_order_acq_rel: _ReadWriteBarrier(); break;
136 _ReadWriteBarrier();
137 AeFullSync();
138 _ReadWriteBarrier();
139 break;
140 default: assert(false);
141 }
142}
143#else
145{
146 // Non-specialized arch, use heavier memory barriers everywhere just in case :-(
147 switch (order) {
149 break;
151 _ReadBarrier();
152 AeLiteSync();
153 _ReadBarrier();
154 break;
156 _WriteBarrier();
157 AeLiteSync();
158 _WriteBarrier();
159 break;
161 _ReadWriteBarrier();
162 AeLiteSync();
163 _ReadWriteBarrier();
164 break;
166 _ReadWriteBarrier();
167 AeFullSync();
168 _ReadWriteBarrier();
169 break;
170 default: assert(false);
171 }
172}
173#endif
174} // end namespace moodycamel
175#else
176// Use standard library of atomics
177#include <atomic>
178
179namespace moodycamel {
180
182{
183 switch (order) {
184 case memory_order_relaxed: break;
185 case memory_order_acquire: std::atomic_signal_fence(std::memory_order_acquire); break;
186 case memory_order_release: std::atomic_signal_fence(std::memory_order_release); break;
187 case memory_order_acq_rel: std::atomic_signal_fence(std::memory_order_acq_rel); break;
188 case memory_order_seq_cst: std::atomic_signal_fence(std::memory_order_seq_cst); break;
189 default: assert(false);
190 }
191}
192
194{
195 switch (order) {
196 case memory_order_relaxed: break;
197 case memory_order_acquire: std::atomic_thread_fence(std::memory_order_acquire); break;
198 case memory_order_release: std::atomic_thread_fence(std::memory_order_release); break;
199 case memory_order_acq_rel: std::atomic_thread_fence(std::memory_order_acq_rel); break;
200 case memory_order_seq_cst: std::atomic_thread_fence(std::memory_order_seq_cst); break;
201 default: assert(false);
202 }
203}
204
205} // end namespace moodycamel
206
207#endif
208
209
210#if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli))
211#define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
212#endif
213
214#ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
215#include <atomic>
216#endif
217#include <utility>
218
219// WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
220// Provides basic support for atomic variables -- no memory ordering guarantees are provided.
221// The guarantee of atomicity is only made for types that already have atomic load and store guarantees
222// at the hardware level -- on most platforms this generally means aligned pointers and integers (only).
223namespace moodycamel {
224template<typename T>
226{
227public:
229#ifdef AE_VCPP
230#pragma warning(disable: 4100) // Get rid of (erroneous) 'unreferenced formal parameter' warning
231#endif
232 template<typename U> weak_atomic(U&& x) : value(std::forward<U>(x)) { }
233#ifdef __cplusplus_cli
234 // Work around bug with universal reference/nullptr combination that only appears when /clr is on
235 weak_atomic(nullptr_t) : value(nullptr) { }
236#endif
237 weak_atomic(weak_atomic const& other) : value(other.value) { }
238 weak_atomic(weak_atomic&& other) : value(std::move(other.value)) { }
239#ifdef AE_VCPP
240#pragma warning(default: 4100)
241#endif
242
243 AE_FORCEINLINE operator T() const { return load(); }
244
245
246#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
247 template<typename U> AE_FORCEINLINE weak_atomic const& operator=(U&& x) { value = std::forward<U>(x); return *this; }
248 AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) { value = other.value; return *this; }
249
250 AE_FORCEINLINE T load() const { return value; }
251
252 AE_FORCEINLINE T fetch_add_acquire(T increment)
253 {
254#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
255 if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
256#if defined(_M_AMD64)
257 else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
258#endif
259#else
260#error Unsupported platform
261#endif
262 assert(false && "T must be either a 32 or 64 bit type");
263 return value;
264 }
265
266 AE_FORCEINLINE T fetch_add_release(T increment)
267 {
268#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
269 if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
270#if defined(_M_AMD64)
271 else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
272#endif
273#else
274#error Unsupported platform
275#endif
276 assert(false && "T must be either a 32 or 64 bit type");
277 return value;
278 }
279#else
280 template<typename U>
282 {
283 value.store(std::forward<U>(x), std::memory_order_relaxed);
284 return *this;
285 }
286
288 {
289 value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
290 return *this;
291 }
292
293 AE_FORCEINLINE T load() const { return value.load(std::memory_order_relaxed); }
294
296 {
297 return value.fetch_add(increment, std::memory_order_acquire);
298 }
299
301 {
302 return value.fetch_add(increment, std::memory_order_release);
303 }
304#endif
305
306
307private:
308#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
309 // No std::atomic support, but still need to circumvent compiler optimizations.
310 // `volatile` will make memory access slow, but is guaranteed to be reliable.
311 volatile T value;
312#else
313 std::atomic<T> value;
314#endif
315};
316
317} // end namespace moodycamel
318
319
320
321// Portable single-producer, single-consumer semaphore below:
322
323#if defined(_WIN32)
324// Avoid including windows.h in a header; we only need a handful of
325// items, so we'll redeclare them here (this is relatively safe since
326// the API generally has to remain stable between Windows versions).
327// I know this is an ugly hack but it still beats polluting the global
328// namespace with thousands of generic names or adding a .cpp for nothing.
329extern "C" {
330 struct _SECURITY_ATTRIBUTES;
331 __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
332 __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
333 __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
334 __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
335}
336#elif defined(__MACH__)
337#include <mach/mach.h>
338#elif defined(__unix__)
339#include <semaphore.h>
340#endif
341
342namespace moodycamel
343{
344 // Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
345 // portable + lightweight semaphore implementations, originally from
346 // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
347 // LICENSE:
348 // Copyright (c) 2015 Jeff Preshing
349 //
350 // This software is provided 'as-is', without any express or implied
351 // warranty. In no event will the authors be held liable for any damages
352 // arising from the use of this software.
353 //
354 // Permission is granted to anyone to use this software for any purpose,
355 // including commercial applications, and to alter it and redistribute it
356 // freely, subject to the following restrictions:
357 //
358 // 1. The origin of this software must not be misrepresented; you must not
359 // claim that you wrote the original software. If you use this software
360 // in a product, an acknowledgement in the product documentation would be
361 // appreciated but is not required.
362 // 2. Altered source versions must be plainly marked as such, and must not be
363 // misrepresented as being the original software.
364 // 3. This notice may not be removed or altered from any source distribution.
365 namespace spsc_sema
366 {
367#if defined(_WIN32)
368 class Semaphore
369 {
370 private:
371 void* m_hSema;
372
373 Semaphore(const Semaphore& other);
374 Semaphore& operator=(const Semaphore& other);
375
376 public:
377 Semaphore(int initialCount = 0)
378 {
379 assert(initialCount >= 0);
380 const long maxLong = 0x7fffffff;
381 m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
382 }
383
384 ~Semaphore()
385 {
386 CloseHandle(m_hSema);
387 }
388
389 void wait()
390 {
391 const unsigned long infinite = 0xffffffff;
392 WaitForSingleObject(m_hSema, infinite);
393 }
394
395 void signal(int count = 1)
396 {
397 ReleaseSemaphore(m_hSema, count, nullptr);
398 }
399 };
400#elif defined(__MACH__)
401 //---------------------------------------------------------
402 // Semaphore (Apple iOS and OSX)
403 // Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
404 //---------------------------------------------------------
405 class Semaphore
406 {
407 private:
408 semaphore_t m_sema;
409
410 Semaphore(const Semaphore& other);
411 Semaphore& operator=(const Semaphore& other);
412
413 public:
414 Semaphore(int initialCount = 0)
415 {
416 assert(initialCount >= 0);
417 semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
418 }
419
420 ~Semaphore()
421 {
422 semaphore_destroy(mach_task_self(), m_sema);
423 }
424
425 void wait()
426 {
427 semaphore_wait(m_sema);
428 }
429
430 void signal()
431 {
432 semaphore_signal(m_sema);
433 }
434
435 void signal(int count)
436 {
437 while (count-- > 0)
438 {
439 semaphore_signal(m_sema);
440 }
441 }
442 };
443#elif defined(__unix__)
444 //---------------------------------------------------------
445 // Semaphore (POSIX, Linux)
446 //---------------------------------------------------------
447 class Semaphore
448 {
449 private:
450 sem_t m_sema;
451
452 Semaphore(const Semaphore& other);
453 Semaphore& operator=(const Semaphore& other);
454
455 public:
456 Semaphore(int initialCount = 0)
457 {
458 assert(initialCount >= 0);
459 sem_init(&m_sema, 0, initialCount);
460 }
461
462 ~Semaphore()
463 {
464 sem_destroy(&m_sema);
465 }
466
467 void wait()
468 {
469 // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
470 int rc;
471 do
472 {
473 rc = sem_wait(&m_sema);
474 }
475 while (rc == -1 && errno == EINTR);
476 }
477
478 void signal()
479 {
480 sem_post(&m_sema);
481 }
482
483 void signal(int count)
484 {
485 while (count-- > 0)
486 {
487 sem_post(&m_sema);
488 }
489 }
490 };
491#else
492#error Unsupported platform! (No semaphore wrapper available)
493#endif
494
495 //---------------------------------------------------------
496 // LightweightSemaphore
497 //---------------------------------------------------------
498 class LightweightSemaphore
499 {
500 public:
501 typedef std::make_signed<std::size_t>::type ssize_t;
502
503 private:
504 weak_atomic<ssize_t> m_count;
505 Semaphore m_sema;
506
507 void waitWithPartialSpinning()
508 {
509 ssize_t oldCount;
510 // Is there a better way to set the initial spin count?
511 // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
512 // as threads start hitting the kernel semaphore.
513 int spin = 10000;
514 while (--spin >= 0)
515 {
516 if (m_count.load() > 0)
517 {
518 m_count.fetch_add_acquire(-1);
519 return;
520 }
521 compiler_fence(memory_order_acquire); // Prevent the compiler from collapsing the loop.
522 }
523 oldCount = m_count.fetch_add_acquire(-1);
524 if (oldCount <= 0)
525 {
526 m_sema.wait();
527 }
528 }
529
530 public:
531 LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
532 {
533 assert(initialCount >= 0);
534 }
535
536 bool tryWait()
537 {
538 if (m_count.load() > 0)
539 {
540 m_count.fetch_add_acquire(-1);
541 return true;
542 }
543 return false;
544 }
545
546 void wait()
547 {
548 if (!tryWait())
549 waitWithPartialSpinning();
550 }
551
552 void signal(ssize_t count = 1)
553 {
554 assert(count >= 0);
555 ssize_t oldCount = m_count.fetch_add_release(count);
556 assert(oldCount >= -1);
557 if (oldCount < 0)
558 {
559 m_sema.signal(1);
560 }
561 }
562
563 ssize_t availableApprox() const
564 {
565 ssize_t count = m_count.load();
566 return count > 0 ? count : 0;
567 }
568 };
569 } // end namespace spsc_sema
570} // end namespace moodycamel
571
572#if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
573#pragma warning(pop)
574#ifdef __cplusplus_cli
575#pragma managed(pop)
576#endif
577#endif
#define AE_FORCEINLINE
Definition atomicops.h:51
Definition atomicops.h:226
AE_FORCEINLINE T fetch_add_release(T increment)
Definition atomicops.h:300
AE_FORCEINLINE T load() const
Definition atomicops.h:293
AE_FORCEINLINE weak_atomic const & operator=(U &&x)
Definition atomicops.h:281
AE_FORCEINLINE T fetch_add_acquire(T increment)
Definition atomicops.h:295
weak_atomic(weak_atomic &&other)
Definition atomicops.h:238
AE_FORCEINLINE weak_atomic const & operator=(weak_atomic const &other)
Definition atomicops.h:287
weak_atomic()
Definition atomicops.h:228
weak_atomic(weak_atomic const &other)
Definition atomicops.h:237
weak_atomic(U &&x)
Definition atomicops.h:232
Definition atomicops.h:68
memory_order
Definition atomicops.h:70
@ memory_order_acq_rel
Definition atomicops.h:74
@ memory_order_seq_cst
Definition atomicops.h:75
@ memory_order_acquire
Definition atomicops.h:72
@ memory_order_relaxed
Definition atomicops.h:71
@ memory_order_sync
Definition atomicops.h:79
@ memory_order_release
Definition atomicops.h:73
AE_FORCEINLINE void fence(memory_order order)
Definition atomicops.h:193
AE_FORCEINLINE void compiler_fence(memory_order order)
Definition atomicops.h:181