RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
atomic.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_policy_cuda_atomic_HPP
21 #define RAJA_policy_cuda_atomic_HPP
22 
23 #include "RAJA/config.hpp"
24 
25 #if defined(RAJA_ENABLE_CUDA)
26 
27 #include <stdexcept>
28 #include <type_traits>
29 
30 #if __CUDA__ARCH__ >= 600 && __CUDACC_VER_MAJOR__ >= 11 && \
31  __CUDACC_VER_MINOR__ >= 6
32 #define RAJA_ENABLE_CUDA_ATOMIC_REF
33 #endif
34 
35 #if defined(RAJA_ENABLE_CUDA_ATOMIC_REF)
36 #include <cuda/atomic>
37 #endif
38 
39 #include "camp/list.hpp"
40 
43 
44 #if defined(RAJA_OPENMP_ACTIVE)
46 #endif
47 
48 #include "RAJA/util/EnableIf.hpp"
49 #include "RAJA/util/Operators.hpp"
51 #include "RAJA/util/macros.hpp"
52 
53 // TODO: When we can use if constexpr in C++17, this file can be cleaned up
54 
55 
56 namespace RAJA
57 {
58 
59 
60 namespace detail
61 {
62 
63 
70 template<typename T>
71 struct cuda_useBuiltinCommon
72 {
73  static constexpr bool value = std::is_same<T, int>::value ||
74  std::is_same<T, unsigned int>::value ||
75  std::is_same<T, unsigned long long>::value;
76 };
77 
85 template<typename T>
86 struct cuda_useReinterpretCommon
87 {
88  static constexpr bool value = !cuda_useBuiltinCommon<T>::value &&
89  (sizeof(T) == sizeof(unsigned int) ||
90  sizeof(T) == sizeof(unsigned long long));
91 
92  using type = std::conditional_t<sizeof(T) == sizeof(unsigned int),
93  unsigned int,
94  unsigned long long>;
95 };
96 
100 template<typename T>
101 using cuda_useReinterpretCommon_t = typename cuda_useReinterpretCommon<T>::type;
102 
110 template<typename T,
111  std::enable_if_t<cuda_useBuiltinCommon<T>::value, bool> = true>
112 RAJA_INLINE __device__ T cuda_atomicOr(T* acc, T value)
113 {
114  return ::atomicOr(acc, value);
115 }
116 
125 template<typename T>
126 struct cuda_useBuiltinExchange
127 {
128  static constexpr bool value = std::is_same<T, int>::value ||
129  std::is_same<T, unsigned int>::value ||
130  std::is_same<T, unsigned long long>::value ||
131  std::is_same<T, float>::value;
132 };
133 
138 template<typename T>
139 struct cuda_useReinterpretExchange
140 {
141  static constexpr bool value = !cuda_useBuiltinExchange<T>::value &&
142  (sizeof(T) == sizeof(unsigned int) ||
143  sizeof(T) == sizeof(unsigned long long));
144 
145  using type = std::conditional_t<sizeof(T) == sizeof(unsigned int),
146  unsigned int,
147  unsigned long long>;
148 };
149 
153 template<typename T>
154 using cuda_useReinterpretExchange_t =
155  typename cuda_useReinterpretExchange<T>::type;
156 
161 template<typename T,
162  std::enable_if_t<cuda_useBuiltinExchange<T>::value, bool> = true>
163 RAJA_INLINE __device__ T cuda_atomicExchange(T* acc, T value)
164 {
165  return ::atomicExch(acc, value);
166 }
167 
172 template<typename T,
173  std::enable_if_t<cuda_useReinterpretExchange<T>::value, bool> = true>
174 RAJA_INLINE __device__ T cuda_atomicExchange(T* acc, T value)
175 {
176  using R = cuda_useReinterpretExchange_t<T>;
177 
178  return RAJA::util::reinterp_A_as_B<R, T>(cuda_atomicExchange(
179  reinterpret_cast<R*>(acc), RAJA::util::reinterp_A_as_B<T, R>(value)));
180 }
181 
185 #if defined(RAJA_ENABLE_CUDA_ATOMIC_REF)
186 
187 template<typename T>
188 RAJA_INLINE __device__ T cuda_atomicLoad(T* acc)
189 {
190  return cuda::atomic_ref<T, cuda::thread_scope_device>(*acc).load(
191  cuda::memory_order_relaxed {});
192 }
193 
194 template<typename T>
195 RAJA_INLINE __device__ void cuda_atomicStore(T* acc, T value)
196 {
197  cuda::atomic_ref<T, cuda::thread_scope_device>(*acc).store(
198  value, cuda::memory_order_relaxed {});
199 }
200 
201 #else
202 
203 template<typename T,
204  std::enable_if_t<cuda_useBuiltinCommon<T>::value, bool> = true>
205 RAJA_INLINE __device__ T cuda_atomicLoad(T* acc)
206 {
207  return cuda_atomicOr(acc, static_cast<T>(0));
208 }
209 
210 template<typename T,
211  std::enable_if_t<cuda_useReinterpretCommon<T>::value, bool> = true>
212 RAJA_INLINE __device__ T cuda_atomicLoad(T* acc)
213 {
214  using R = cuda_useReinterpretCommon_t<T>;
215 
216  return RAJA::util::reinterp_A_as_B<R, T>(
217  cuda_atomicLoad(reinterpret_cast<R*>(acc)));
218 }
219 
220 template<typename T>
221 RAJA_INLINE __device__ void cuda_atomicStore(T* acc, T value)
222 {
223  cuda_atomicExchange(acc, value);
224 }
225 
226 #endif
227 
228 
237 template<typename T>
238 struct cuda_useBuiltinCAS
239 {
240  static constexpr bool value =
241 #if __CUDA_ARCH__ >= 700
242  std::is_same<T, unsigned short int>::value ||
243 #endif
244  std::is_same<T, int>::value || std::is_same<T, unsigned int>::value ||
245  std::is_same<T, unsigned long long>::value;
246 };
247 
253 template<typename T>
254 struct cuda_useReinterpretCAS
255 {
256  static constexpr bool value = !cuda_useBuiltinCAS<T>::value &&
257  (
258 #if __CUDA_ARCH__ >= 700
259  sizeof(T) == sizeof(unsigned short) ||
260 #endif
261  sizeof(T) == sizeof(unsigned int) ||
262  sizeof(T) == sizeof(unsigned long long));
263 
264  using type =
265 #if __CUDA_ARCH__ >= 700
266  std::conditional_t<sizeof(T) == sizeof(unsigned short),
267  unsigned short,
268 #endif
269  std::conditional_t<sizeof(T) == sizeof(unsigned int),
270  unsigned int,
271  unsigned long long>
272 #if __CUDA_ARCH__ >= 700
273  >
274 #endif
275  ;
276 };
277 
281 template<typename T>
282 using cuda_useReinterpretCAS_t = typename cuda_useReinterpretCAS<T>::type;
283 
284 template<typename T,
285  std::enable_if_t<cuda_useBuiltinCAS<T>::value, bool> = true>
286 RAJA_INLINE __device__ T cuda_atomicCAS(T* acc, T compare, T value)
287 {
288  return ::atomicCAS(acc, compare, value);
289 }
290 
291 template<typename T,
292  std::enable_if_t<cuda_useReinterpretCAS<T>::value, bool> = true>
293 RAJA_INLINE __device__ T cuda_atomicCAS(T* acc, T compare, T value)
294 {
295  using R = cuda_useReinterpretCAS_t<T>;
296 
297  return RAJA::util::reinterp_A_as_B<R, T>(cuda_atomicCAS(
298  reinterpret_cast<R*>(acc), RAJA::util::reinterp_A_as_B<T, R>(compare),
299  RAJA::util::reinterp_A_as_B<T, R>(value)));
300 }
301 
307 template<typename T,
308  std::enable_if_t<cuda_useBuiltinCommon<T>::value, bool> = true>
309 RAJA_INLINE __device__ bool cuda_atomicCAS_equal(const T& a, const T& b)
310 {
311  return a == b;
312 }
313 
314 template<typename T,
315  std::enable_if_t<cuda_useReinterpretCommon<T>::value, bool> = true>
316 RAJA_INLINE __device__ bool cuda_atomicCAS_equal(const T& a, const T& b)
317 {
318  using R = cuda_useReinterpretCommon_t<T>;
319 
320  return cuda_atomicCAS_equal(RAJA::util::reinterp_A_as_B<T, R>(a),
321  RAJA::util::reinterp_A_as_B<T, R>(b));
322 }
323 
330 template<typename T, typename Oper>
331 RAJA_INLINE __device__ T cuda_atomicCAS_loop(T* acc, Oper&& oper)
332 {
333  T old = cuda_atomicLoad(acc);
334  T expected;
335 
336  do
337  {
338  expected = old;
339  old = cuda_atomicCAS(acc, expected, oper(expected));
340  } while (!cuda_atomicCAS_equal(old, expected));
341 
342  return old;
343 }
344 
351 template<typename T, typename Oper, typename ShortCircuit>
352 RAJA_INLINE __device__ T cuda_atomicCAS_loop(T* acc,
353  Oper&& oper,
354  ShortCircuit&& sc)
355 {
356  T old = cuda_atomicLoad(acc);
357 
358  if (sc(old))
359  {
360  return old;
361  }
362 
363  T expected;
364 
365  do
366  {
367  expected = old;
368  old = cuda_atomicCAS(acc, expected, oper(expected));
369  } while (!cuda_atomicCAS_equal(old, expected) && !sc(old));
370 
371  return old;
372 }
373 
377 using cuda_atomicAdd_builtin_types = ::camp::list<int,
378  unsigned int,
379  unsigned long long int,
380  float
381 #if __CUDA_ARCH__ >= 600
382  ,
383  double
384 #endif
385  >;
386 
387 template<typename T,
389  nullptr>
390 RAJA_INLINE __device__ T cuda_atomicAdd(T* acc, T value)
391 {
392  return cuda_atomicCAS_loop(acc, [value](T old) {
393  return old + value;
394  });
395 }
396 
397 template<
398  typename T,
400 RAJA_INLINE __device__ T cuda_atomicAdd(T* acc, T value)
401 {
402  return ::atomicAdd(acc, value);
403 }
404 
408 using cuda_atomicSub_builtin_types = cuda_atomicAdd_builtin_types;
409 
410 using cuda_atomicSub_via_Sub_builtin_types = ::camp::list<int, unsigned int>;
411 
412 using cuda_atomicSub_via_Add_builtin_types =
413  ::camp::list<unsigned long long int,
414  float
415 #if __CUDA_ARCH__ >= 600
416  ,
417  double
418 #endif
419  >;
420 
421 template<typename T,
423  nullptr>
424 RAJA_INLINE __device__ T cuda_atomicSub(T* acc, T value)
425 {
426  return cuda_atomicCAS_loop(acc, [value](T old) {
427  return old - value;
428  });
429 }
430 
431 template<
432  typename T,
434  nullptr>
435 RAJA_INLINE __device__ T cuda_atomicSub(T* acc, T value)
436 {
437  return ::atomicSub(acc, value);
438 }
439 
440 template<
441  typename T,
443  nullptr>
444 RAJA_INLINE __device__ T cuda_atomicSub(T* acc, T value)
445 {
446  return ::atomicAdd(acc, -value);
447 }
448 
452 using cuda_atomicMinMax_builtin_types = ::camp::list<int,
453  unsigned int
454 #if __CUDA_ARCH__ >= 500
455  ,
456  long long int,
457  unsigned long long int
458 #endif
459  >;
460 
461 
465 template<typename T,
467  nullptr>
468 RAJA_INLINE __device__ T cuda_atomicMin(T* acc, T value)
469 {
470  return cuda_atomicCAS_loop(
471  acc,
472  [value](T old) {
473  return value < old ? value : old;
474  },
475  [value](T current) {
476  return current <= value;
477  });
478 }
479 
480 template<typename T,
482  nullptr>
483 RAJA_INLINE __device__ T cuda_atomicMin(T* acc, T value)
484 {
485  return ::atomicMin(acc, value);
486 }
487 
491 template<typename T,
493  nullptr>
494 RAJA_INLINE __device__ T cuda_atomicMax(T* acc, T value)
495 {
496  return cuda_atomicCAS_loop(
497  acc,
498  [value](T old) {
499  return old < value ? value : old;
500  },
501  [value](T current) {
502  return value <= current;
503  });
504 }
505 
506 template<typename T,
508  nullptr>
509 RAJA_INLINE __device__ T cuda_atomicMax(T* acc, T value)
510 {
511  return ::atomicMax(acc, value);
512 }
513 
517 using cuda_atomicIncDecReset_builtin_types = ::camp::list<unsigned int>;
518 
522 template<
523  typename T,
525  nullptr>
526 RAJA_INLINE __device__ T cuda_atomicInc(T* acc, T value)
527 {
528  // See:
529  // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicinc
530  return cuda_atomicCAS_loop(acc, [value](T old) {
531  return value <= old ? static_cast<T>(0) : old + static_cast<T>(1);
532  });
533 }
534 
535 template<
536  typename T,
538  nullptr>
539 RAJA_INLINE __device__ T cuda_atomicInc(T* acc, T value)
540 {
541  return ::atomicInc(acc, value);
542 }
543 
547 template<typename T>
548 RAJA_INLINE __device__ T cuda_atomicInc(T* acc)
549 {
550  return cuda_atomicAdd(acc, static_cast<T>(1));
551 }
552 
556 template<
557  typename T,
559  nullptr>
560 RAJA_INLINE __device__ T cuda_atomicDec(T* acc, T value)
561 {
562  // See:
563  // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicdec
564  return cuda_atomicCAS_loop(acc, [value](T old) {
565  return old == static_cast<T>(0) || value < old ? value
566  : old - static_cast<T>(1);
567  });
568 }
569 
570 template<
571  typename T,
573  nullptr>
574 RAJA_INLINE __device__ T cuda_atomicDec(T* acc, T value)
575 {
576  return ::atomicDec(acc, value);
577 }
578 
582 template<typename T>
583 RAJA_INLINE __device__ T cuda_atomicDec(T* acc)
584 {
585  return cuda_atomicSub(acc, static_cast<T>(1));
586 }
587 
591 using cuda_atomicBit_builtin_types =
592  ::camp::list<int, unsigned int, unsigned long long int>;
593 
597 template<typename T,
599  nullptr>
600 RAJA_INLINE __device__ T cuda_atomicAnd(T* acc, T value)
601 {
602  return cuda_atomicCAS_loop(acc, [value](T old) {
603  return old & value;
604  });
605 }
606 
607 template<
608  typename T,
610 RAJA_INLINE __device__ T cuda_atomicAnd(T* acc, T value)
611 {
612  return ::atomicAnd(acc, value);
613 }
614 
618 template<typename T,
620  nullptr>
621 RAJA_INLINE __device__ T cuda_atomicOr(T* acc, T value)
622 {
623  return cuda_atomicCAS_loop(acc, [value](T old) {
624  return old | value;
625  });
626 }
627 
637 template<typename T,
639  nullptr>
640 RAJA_INLINE __device__ T cuda_atomicXor(T* acc, T value)
641 {
642  return cuda_atomicCAS_loop(acc, [value](T old) {
643  return old ^ value;
644  });
645 }
646 
647 template<
648  typename T,
650 RAJA_INLINE __device__ T cuda_atomicXor(T* acc, T value)
651 {
652  return ::atomicXor(acc, value);
653 }
654 
655 
656 } // namespace detail
657 
667 template<typename T, typename host_policy>
668 RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(cuda_atomic_explicit<host_policy>,
669  T* acc)
670 {
671 #ifdef __CUDA_ARCH__
672  return detail::cuda_atomicLoad(acc);
673 #else
674  return RAJA::atomicLoad(host_policy {}, acc);
675 #endif
676 }
677 
679 template<typename T, typename host_policy>
680 RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(cuda_atomic_explicit<host_policy>,
681  T* acc,
682  T value)
683 {
684 #ifdef __CUDA_ARCH__
685  detail::cuda_atomicStore(acc, value);
686 #else
687  RAJA::atomicStore(host_policy {}, acc, value);
688 #endif
689 }
690 
692 template<typename T, typename host_policy>
693 RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(cuda_atomic_explicit<host_policy>,
694  T* acc,
695  T value)
696 {
697 #ifdef __CUDA_ARCH__
698  return detail::cuda_atomicAdd(acc, value);
699 #else
700  return RAJA::atomicAdd(host_policy {}, acc, value);
701 #endif
702 }
703 
705 template<typename T, typename host_policy>
706 RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(cuda_atomic_explicit<host_policy>,
707  T* acc,
708  T value)
709 {
710 #ifdef __CUDA_ARCH__
711  return detail::cuda_atomicSub(acc, value);
712 #else
713  return RAJA::atomicSub(host_policy {}, acc, value);
714 #endif
715 }
716 
718 template<typename T, typename host_policy>
719 RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(cuda_atomic_explicit<host_policy>,
720  T* acc,
721  T value)
722 {
723 #ifdef __CUDA_ARCH__
724  return detail::cuda_atomicMin(acc, value);
725 #else
726  return RAJA::atomicMin(host_policy {}, acc, value);
727 #endif
728 }
729 
731 template<typename T, typename host_policy>
732 RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(cuda_atomic_explicit<host_policy>,
733  T* acc,
734  T value)
735 {
736 #ifdef __CUDA_ARCH__
737  return detail::cuda_atomicMax(acc, value);
738 #else
739  return RAJA::atomicMax(host_policy {}, acc, value);
740 #endif
741 }
742 
744 template<typename T, typename host_policy>
745 RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(cuda_atomic_explicit<host_policy>,
746  T* acc,
747  T value)
748 {
749 #ifdef __CUDA_ARCH__
750  // See:
751  // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicinc
752  return detail::cuda_atomicInc(acc, value);
753 #else
754  return RAJA::atomicInc(host_policy {}, acc, value);
755 #endif
756 }
757 
759 template<typename T, typename host_policy>
760 RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(cuda_atomic_explicit<host_policy>,
761  T* acc)
762 {
763 #ifdef __CUDA_ARCH__
764  return detail::cuda_atomicInc(acc);
765 #else
766  return RAJA::atomicInc(host_policy {}, acc);
767 #endif
768 }
769 
771 template<typename T, typename host_policy>
772 RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(cuda_atomic_explicit<host_policy>,
773  T* acc,
774  T value)
775 {
776 #ifdef __CUDA_ARCH__
777  // See:
778  // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicdec
779  return detail::cuda_atomicDec(acc, value);
780 #else
781  return RAJA::atomicDec(host_policy {}, acc, value);
782 #endif
783 }
784 
786 template<typename T, typename host_policy>
787 RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(cuda_atomic_explicit<host_policy>,
788  T* acc)
789 {
790 #ifdef __CUDA_ARCH__
791  return detail::cuda_atomicDec(acc);
792 #else
793  return RAJA::atomicDec(host_policy {}, acc);
794 #endif
795 }
796 
798 template<typename T, typename host_policy>
799 RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(cuda_atomic_explicit<host_policy>,
800  T* acc,
801  T value)
802 {
803 #ifdef __CUDA_ARCH__
804  return detail::cuda_atomicAnd(acc, value);
805 #else
806  return RAJA::atomicAnd(host_policy {}, acc, value);
807 #endif
808 }
809 
811 template<typename T, typename host_policy>
812 RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(cuda_atomic_explicit<host_policy>,
813  T* acc,
814  T value)
815 {
816 #ifdef __CUDA_ARCH__
817  return detail::cuda_atomicOr(acc, value);
818 #else
819  return RAJA::atomicOr(host_policy {}, acc, value);
820 #endif
821 }
822 
824 template<typename T, typename host_policy>
825 RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(cuda_atomic_explicit<host_policy>,
826  T* acc,
827  T value)
828 {
829 #ifdef __CUDA_ARCH__
830  return detail::cuda_atomicXor(acc, value);
831 #else
832  return RAJA::atomicXor(host_policy {}, acc, value);
833 #endif
834 }
835 
837 template<typename T, typename host_policy>
838 RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(cuda_atomic_explicit<host_policy>,
839  T* acc,
840  T value)
841 {
842 #ifdef __CUDA_ARCH__
843  return detail::cuda_atomicExchange(acc, value);
844 #else
845  return RAJA::atomicExchange(host_policy {}, acc, value);
846 #endif
847 }
848 
850 template<typename T, typename host_policy>
851 RAJA_INLINE RAJA_HOST_DEVICE T
852 atomicCAS(cuda_atomic_explicit<host_policy>, T* acc, T compare, T value)
853 {
854 #ifdef __CUDA_ARCH__
855  return detail::cuda_atomicCAS(acc, compare, value);
856 #else
857  return RAJA::atomicCAS(host_policy {}, acc, compare, value);
858 #endif
859 }
860 
861 } // namespace RAJA
862 
863 
864 #endif // RAJA_ENABLE_CUDA
865 #endif // guard
Header file for enable_if helpers.
Header file for RAJA operator definitions.
Header file for reinterpreting type conversions.
RAJA header file defining automatic and builtin atomic operations.
Header file for common RAJA internal macro definitions.
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
#define RAJA_SUPPRESS_HD_WARN
Definition: macros.hpp:68
std::enable_if_t< is_any_of< T, TypeList >::value, T > enable_if_is_any_of
Definition: EnableIf.hpp:49
std::enable_if_t<::RAJA::concepts::negate< is_any_of< T, TypeList > >::value, T > enable_if_is_none_of
Definition: EnableIf.hpp:54
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T *acc, T value)
Atomic bitwise AND equivalent to (*acc) = (*acc) & value This only works with integral data types.
Definition: atomic.hpp:224
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicCAS(seq_atomic, T *acc, T compare, T value)
Definition: atomic.hpp:154
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T *acc, T value)
Atomic bitwise OR equivalent to (*acc) = (*acc) | value This only works with integral data types.
Definition: atomic.hpp:240
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicMax(seq_atomic, T *acc, T value)
Definition: atomic.hpp:73
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(T *acc)
Atomic load.
Definition: atomic.hpp:92
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc)
Atomic decrement.
Definition: atomic.hpp:195
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T *acc, T value)
Atomic add.
Definition: atomic.hpp:117
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicSub(seq_atomic, T *acc, T value)
Definition: atomic.hpp:55
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T *acc, T value)
Atomic bitwise XOR equivalent to (*acc) = (*acc) ^ value This only works with integral data types.
Definition: atomic.hpp:256
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicXor(seq_atomic, T *acc, T value)
Definition: atomic.hpp:136
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T *acc, T value)
Atomic store.
Definition: atomic.hpp:104
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicAnd(seq_atomic, T *acc, T value)
Definition: atomic.hpp:118
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(T *acc, T value)
Atomic maximum equivalent to (*acc) = std::max(*acc, value)
Definition: atomic.hpp:156
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicDec(seq_atomic, T *acc, T val)
Definition: atomic.hpp:109
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T *acc, T compare, T value)
Atomic compare and swap.
Definition: atomic.hpp:286
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc)
Atomic increment.
Definition: atomic.hpp:168
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicAdd(seq_atomic, T *acc, T value)
Definition: atomic.hpp:46
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T *acc, T value)
Atomic minimum equivalent to (*acc) = std::min(*acc, value)
Definition: atomic.hpp:143
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicMin(seq_atomic, T *acc, T value)
Definition: atomic.hpp:64
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T *acc, T value)
Atomic subtract.
Definition: atomic.hpp:130
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicOr(seq_atomic, T *acc, T value)
Definition: atomic.hpp:127
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T *acc, T value)
Atomic value exchange.
Definition: atomic.hpp:271
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE T atomicInc(seq_atomic, T *acc, T val)
Definition: atomic.hpp:91
RAJA header file defining OpenMP atomic operations.
RAJA header file defining sequential atomic operations.