20 #ifndef RAJA_policy_atomic_builtin_HPP
21 #define RAJA_policy_atomic_builtin_HPP
23 #include "RAJA/config.hpp"
27 #if defined(RAJA_COMPILER_MSVC) || \
28 ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
36 #if defined(RAJA_ENABLE_HIP)
37 #define RAJA_DEVICE_HIP RAJA_HOST_DEVICE
39 #define RAJA_DEVICE_HIP
54 #if defined(RAJA_COMPILER_MSVC) || \
55 ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
63 struct builtin_useIntrinsic
65 static constexpr
bool value =
66 std::is_same<T, char>::value || std::is_same<T, short>::value ||
67 std::is_same<T, long>::value || std::is_same<T, long long>::value;
75 struct builtin_useReinterpret
77 static constexpr
bool value =
79 (
sizeof(T) == 1 ||
sizeof(T) == 2 ||
sizeof(T) == 4 ||
sizeof(T) == 8);
81 using type = std::conditional_t<
84 std::conditional_t<
sizeof(T) == 2,
86 std::conditional_t<
sizeof(T) == 4,
long,
long long>>>;
96 static constexpr
bool value =
98 (
sizeof(T) == 1 ||
sizeof(T) == 2 ||
sizeof(T) == 4 ||
sizeof(T) == 8);
111 return _InterlockedOr8(acc, value);
116 char return_val = _InterlockedOr8((
char*)acc, *((
char*)&value));
117 return *(
reinterpret_cast<uint8_t*
>(&return_val));
122 return _InterlockedOr16(acc, value);
127 short return_val = _InterlockedOr16((
short*)acc, *((
short*)&value));
128 return *(
reinterpret_cast<uint16_t*
>(&return_val));
133 return _InterlockedOr(acc, value);
138 long return_val = _InterlockedOr((
long*)acc, *((
long*)&value));
139 return *(
reinterpret_cast<uint32_t*
>(&return_val));
146 return _InterlockedOr64(acc, value);
151 long long return_val =
152 _InterlockedOr64((
long long*)acc, *((
long long*)&value));
153 return *(
reinterpret_cast<uint64_t*
>(&return_val));
162 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
173 return _InterlockedExchange8(acc, value);
178 char return_val = _InterlockedExchange8((
char*)acc, *((
char*)&value));
179 return *(
reinterpret_cast<uint8_t*
>(&return_val));
184 return _InterlockedExchange16(acc, value);
189 short return_val = _InterlockedExchange16((
short*)acc, *((
short*)&value));
190 return *(
reinterpret_cast<uint16_t*
>(&return_val));
195 return _InterlockedExchange(acc, value);
200 long return_val = _InterlockedExchange((
long*)acc, *((
long*)&value));
201 return *(
reinterpret_cast<uint32_t*
>(&return_val));
208 return _InterlockedExchange64(acc, value);
213 long long return_val =
214 _InterlockedExchange64((
long long*)acc, *((
long long*)&value));
215 return *(
reinterpret_cast<uint64_t*
>(&return_val));
225 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
236 return _InterlockedCompareExchange8(acc, value, compare);
243 char return_val = _InterlockedCompareExchange8((
char*)acc, *((
char*)&value),
245 return *(
reinterpret_cast<uint8_t*
>(&return_val));
250 return _InterlockedCompareExchange16(acc, value, compare);
257 short return_val = _InterlockedCompareExchange16(
258 (
short*)acc, *((
short*)&value), *((
short*)&compare));
259 return *(
reinterpret_cast<uint16_t*
>(&return_val));
264 return _InterlockedCompareExchange(acc, value, compare);
271 long return_val = _InterlockedCompareExchange((
long*)acc, *((
long*)&value),
273 return *(
reinterpret_cast<uint32_t*
>(&return_val));
282 return _InterlockedCompareExchange64(acc, value, compare);
289 long long return_val = _InterlockedCompareExchange64(
290 (
long long*)acc, *((
long long*)&value), *((
long long*)&compare));
291 return *(
reinterpret_cast<uint64_t*
>(&return_val));
302 return _InterlockedExchangeAdd8(acc, value);
307 char return_val = _InterlockedExchangeAdd8((
char*)acc, *((
char*)&value));
308 return *(
reinterpret_cast<uint8_t*
>(&return_val));
313 return _InterlockedExchangeAdd16(acc, value);
318 short return_val = _InterlockedExchangeAdd16((
short*)acc, *((
short*)&value));
319 return *(
reinterpret_cast<uint16_t*
>(&return_val));
324 return _InterlockedExchangeAdd(acc, value);
329 long return_val = _InterlockedExchangeAdd((
long*)acc, *((
long*)&value));
330 return *(
reinterpret_cast<uint32_t*
>(&return_val));
337 return _InterlockedExchangeAdd64(acc, value);
342 long long return_val =
343 _InterlockedExchangeAdd64((
long long*)acc, *((
long long*)&value));
344 return *(
reinterpret_cast<uint64_t*
>(&return_val));
355 return _InterlockedExchangeAdd8(acc, -value);
360 char neg_value = -(*((
char*)&(value)));
361 char return_val = _InterlockedExchangeAdd8((
char*)acc, neg_value);
362 return *(
reinterpret_cast<uint8_t*
>(&return_val));
367 return _InterlockedExchangeAdd16(acc, -value);
372 short neg_value = -(*((
short*)&(value)));
373 short return_val = _InterlockedExchangeAdd16((
short*)acc, neg_value);
374 return *(
reinterpret_cast<uint16_t*
>(&return_val));
379 return _InterlockedExchangeAdd(acc, -value);
384 long neg_value = -(*((
long*)&(value)));
385 long return_val = _InterlockedExchangeAdd((
long*)acc, neg_value);
386 return *(
reinterpret_cast<uint32_t*
>(&return_val));
393 return _InterlockedExchangeAdd64(acc, -value);
398 long long neg_value = -(*((
long long*)&(value)));
399 long long return_val = _InterlockedExchangeAdd64((
long long*)acc, neg_value);
400 return *(
reinterpret_cast<uint64_t*
>(&return_val));
411 return _InterlockedAnd8(acc, value);
416 char return_val = _InterlockedAnd8((
char*)acc, *((
char*)&value));
417 return *(
reinterpret_cast<uint8_t*
>(&return_val));
422 return _InterlockedAnd16(acc, value);
427 short return_val = _InterlockedAnd16((
short*)acc, *((
short*)&value));
428 return *(
reinterpret_cast<uint16_t*
>(&return_val));
433 return _InterlockedAnd(acc, value);
438 long return_val = _InterlockedAnd((
long*)acc, *((
long*)&value));
439 return *(
reinterpret_cast<uint32_t*
>(&return_val));
446 return _InterlockedAnd64(acc, value);
451 long long return_val =
452 _InterlockedAnd64((
long long*)acc, *((
long long*)&value));
453 return *(
reinterpret_cast<uint64_t*
>(&return_val));
464 return _InterlockedXor8(acc, value);
469 char return_val = _InterlockedXor8((
char*)acc, *((
char*)&value));
470 return *(
reinterpret_cast<uint8_t*
>(&return_val));
475 return _InterlockedXor16(acc, value);
480 short return_val = _InterlockedXor16((
short*)acc, *((
short*)&value));
481 return *(
reinterpret_cast<uint16_t*
>(&return_val));
486 return _InterlockedXor(acc, value);
491 long return_val = _InterlockedXor((
long*)acc, *((
long*)&value));
492 return *(
reinterpret_cast<uint32_t*
>(&return_val));
499 return _InterlockedXor64(acc, value);
504 long long return_val =
505 _InterlockedXor64((
long long*)acc, *((
long long*)&value));
506 return *(
reinterpret_cast<uint64_t*
>(&return_val));
524 (std::is_integral<T>::value || std::is_enum<T>::value) &&
525 (
sizeof(T) == 1 ||
sizeof(T) == 2 ||
sizeof(T) == 4 ||
sizeof(T) == 8);
535 static constexpr
bool value = !std::is_integral<T>::value &&
536 !std::is_enum<T>::value &&
538 #
if !defined(UINT8_MAX)
539 &&
sizeof(
unsigned char) == 1
543 #
if !defined(UINT16_MAX)
544 &&
sizeof(
unsigned short) == 2
548 #
if !defined(UINT32_MAX)
549 &&
sizeof(
unsigned int) == 4
553 #
if !defined(UINT64_MAX)
554 &&
sizeof(
unsigned long long) == 8
559 std::conditional_t<
sizeof(T) == 1,
560 #
if defined(UINT8_MAX)
565 std::conditional_t<
sizeof(T) == 2,
566 #
if defined(UINT16_MAX)
571 std::conditional_t<
sizeof(T) == 4,
572 #
if defined(UINT32_MAX)
577 #if defined(UINT64_MAX)
580 unsigned long long>>>;
592 !std::is_integral<T>::value && !std::is_enum<T>::value &&
593 (
sizeof(T) == 1 ||
sizeof(T) == 2 ||
sizeof(T) == 4 ||
sizeof(T) == 8);
605 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
608 return __atomic_load_n(acc, __ATOMIC_RELAXED);
615 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
618 __atomic_store_n(acc, value, __ATOMIC_RELAXED);
625 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
628 return __atomic_exchange_n(acc, value, __ATOMIC_RELAXED);
635 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
638 __atomic_compare_exchange_n(acc, &compare, value,
false, __ATOMIC_RELAXED,
647 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
650 return __atomic_fetch_add(acc, value, __ATOMIC_RELAXED);
657 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
660 return __atomic_fetch_sub(acc, value, __ATOMIC_RELAXED);
667 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
670 return __atomic_fetch_and(acc, value, __ATOMIC_RELAXED);
677 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
680 return __atomic_fetch_or(acc, value, __ATOMIC_RELAXED);
687 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
690 return __atomic_fetch_xor(acc, value, __ATOMIC_RELAXED);
712 std::enable_if_t<builtin_useReinterpret<T>::value,
bool> =
true>
717 return RAJA::util::reinterp_A_as_B<R, T>(
725 std::enable_if_t<builtin_useReinterpret<T>::value,
bool> =
true>
728 using R = builtin_useReinterpret_t<T>;
731 RAJA::util::reinterp_A_as_B<T, R>(value));
738 std::enable_if_t<builtin_useReinterpret<T>::value,
bool> =
true>
741 using R = builtin_useReinterpret_t<T>;
744 reinterpret_cast<R*
>(acc), RAJA::util::reinterp_A_as_B<T, R>(value)));
751 std::enable_if_t<builtin_useReinterpret<T>::value,
bool> =
true>
754 using R = builtin_useReinterpret_t<T>;
757 reinterpret_cast<R*
>(acc), RAJA::util::reinterp_A_as_B<T, R>(compare),
758 RAJA::util::reinterp_A_as_B<T, R>(value)));
771 std::enable_if_t<builtin_useIntrinsic<T>::value,
bool> =
true>
783 std::enable_if_t<builtin_useReinterpret<T>::value,
bool> =
true>
786 using R = builtin_useReinterpret_t<T>;
789 RAJA::util::reinterp_A_as_B<T, R>(b));
797 template<
typename T,
typename Oper>
818 template<
typename T,
typename Oper,
typename ShortCircuit>
849 template<typename T, std::enable_if_t<builtin_useCAS<T>::value,
bool> =
true>
860 template<typename T, std::enable_if_t<builtin_useCAS<T>::value,
bool> =
true>
871 template<typename T, std::enable_if_t<builtin_useCAS<T>::value,
bool> =
true>
882 template<typename T, std::enable_if_t<builtin_useCAS<T>::value,
bool> =
true>
893 template<typename T, std::enable_if_t<builtin_useCAS<T>::value,
bool> =
true>
934 return value < old ? value : old;
937 return current <= value;
947 return old < value ? value : old;
950 return value <= current;
964 return value <= old ? static_cast<T>(0) : old +
static_cast<T
>(1);
978 return old ==
static_cast<T
>(0) || value < old ? value
979 : old -
static_cast<T
>(1);
1001 template<
typename T>
1007 template<
typename T>
Header file for reinterpreting type conversions.
#define RAJA_DEVICE_HIP
Definition: atomic_builtin.hpp:39
Header file for common RAJA internal macro definitions.
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T *acc, T compare, T value)
Definition: atomic_builtin.hpp:636
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T *acc, T value)
Definition: atomic_builtin.hpp:648
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T *acc, T value)
Definition: atomic_builtin.hpp:658
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T *acc)
Definition: atomic_builtin.hpp:606
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T *acc, Oper &&oper)
Definition: atomic_builtin.hpp:798
RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T *acc, T value)
Definition: atomic_builtin.hpp:616
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T *acc, T value)
Definition: atomic_builtin.hpp:678
RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T &a, const T &b)
Definition: atomic_builtin.hpp:772
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T *acc, T value)
Definition: atomic_builtin.hpp:668
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T *acc, T value)
Definition: atomic_builtin.hpp:626
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T *acc, T value)
Definition: atomic_builtin.hpp:688
typename builtin_useReinterpret< T >::type builtin_useReinterpret_t
Definition: atomic_builtin.hpp:706
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T *acc, T value)
Atomic bitwise AND equivalent to (*acc) = (*acc) & value This only works with integral data types.
Definition: atomic.hpp:224
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T *acc, T value)
Atomic bitwise OR equivalent to (*acc) = (*acc) | value This only works with integral data types.
Definition: atomic.hpp:240
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(T *acc)
Atomic load.
Definition: atomic.hpp:92
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc)
Atomic decrement.
Definition: atomic.hpp:195
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T *acc, T value)
Atomic add.
Definition: atomic.hpp:117
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T *acc, T value)
Atomic bitwise XOR equivalent to (*acc) = (*acc) ^ value This only works with integral data types.
Definition: atomic.hpp:256
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T *acc, T value)
Atomic store.
Definition: atomic.hpp:104
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(T *acc, T value)
Atomic maximum equivalent to (*acc) = std::max(*acc, value)
Definition: atomic.hpp:156
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T *acc, T compare, T value)
Atomic compare and swap.
Definition: atomic.hpp:286
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc)
Atomic increment.
Definition: atomic.hpp:168
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T *acc, T value)
Atomic minimum equivalent to (*acc) = std::min(*acc, value)
Definition: atomic.hpp:143
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T *acc, T value)
Atomic subtract.
Definition: atomic.hpp:130
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T *acc, T value)
Atomic value exchange.
Definition: atomic.hpp:271
Atomic policy that uses the compilers builtin __atomic_XXX routines.
Definition: atomic_builtin.hpp:48
Definition: atomic_builtin.hpp:590
static constexpr bool value
Definition: atomic_builtin.hpp:591
Definition: atomic_builtin.hpp:522
static constexpr bool value
Definition: atomic_builtin.hpp:523
Definition: atomic_builtin.hpp:534
std::conditional_t< sizeof(T)==1, unsigned char, std::conditional_t< sizeof(T)==2, unsigned short, std::conditional_t< sizeof(T)==4, unsigned int, unsigned long long > >> type
Definition: atomic_builtin.hpp:580
static constexpr bool value
Definition: atomic_builtin.hpp:535