RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
atomic_builtin.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_policy_atomic_builtin_HPP
21 #define RAJA_policy_atomic_builtin_HPP
22 
23 #include "RAJA/config.hpp"
24 
25 #include <cstdint>
26 
27 #if defined(RAJA_COMPILER_MSVC) || \
28  ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
29 #include <intrin.h>
30 #endif
31 
33 #include "RAJA/util/macros.hpp"
34 
35 
36 #if defined(RAJA_ENABLE_HIP)
37 #define RAJA_DEVICE_HIP RAJA_HOST_DEVICE
38 #else
39 #define RAJA_DEVICE_HIP
40 #endif
41 
42 namespace RAJA
43 {
44 
45 
48 {};
49 
50 namespace detail
51 {
52 
53 
54 #if defined(RAJA_COMPILER_MSVC) || \
55  ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
56 
57 
62 template<typename T>
63 struct builtin_useIntrinsic
64 {
65  static constexpr bool value =
66  std::is_same<T, char>::value || std::is_same<T, short>::value ||
67  std::is_same<T, long>::value || std::is_same<T, long long>::value;
68 };
69 
74 template<typename T>
75 struct builtin_useReinterpret
76 {
77  static constexpr bool value =
79  (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
80 
81  using type = std::conditional_t<
82  sizeof(T) == 1,
83  char,
84  std::conditional_t<sizeof(T) == 2,
85  short,
86  std::conditional_t<sizeof(T) == 4, long, long long>>>;
87 };
88 
93 template<typename T>
94 struct builtin_useCAS
95 {
96  static constexpr bool value =
98  (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
99 };
100 
109 RAJA_INLINE char builtin_atomicOr(char* acc, char value)
110 {
111  return _InterlockedOr8(acc, value);
112 }
113 
114 RAJA_INLINE uint8_t builtin_atomicOr(uint8_t* acc, uint8_t value)
115 {
116  char return_val = _InterlockedOr8((char*)acc, *((char*)&value));
117  return *(reinterpret_cast<uint8_t*>(&return_val));
118 }
119 
120 RAJA_INLINE short builtin_atomicOr(short* acc, short value)
121 {
122  return _InterlockedOr16(acc, value);
123 }
124 
125 RAJA_INLINE uint16_t builtin_atomicOr(uint16_t* acc, uint16_t value)
126 {
127  short return_val = _InterlockedOr16((short*)acc, *((short*)&value));
128  return *(reinterpret_cast<uint16_t*>(&return_val));
129 }
130 
131 RAJA_INLINE long builtin_atomicOr(long* acc, long value)
132 {
133  return _InterlockedOr(acc, value);
134 }
135 
136 RAJA_INLINE uint32_t builtin_atomicOr(uint32_t* acc, uint32_t value)
137 {
138  long return_val = _InterlockedOr((long*)acc, *((long*)&value));
139  return *(reinterpret_cast<uint32_t*>(&return_val));
140 }
141 
142 #if defined(_WIN64)
143 
144 RAJA_INLINE long long builtin_atomicOr(long long* acc, long long value)
145 {
146  return _InterlockedOr64(acc, value);
147 }
148 
149 RAJA_INLINE uint64_t builtin_atomicOr(uint64_t* acc, uint64_t value)
150 {
151  long long return_val =
152  _InterlockedOr64((long long*)acc, *((long long*)&value));
153  return *(reinterpret_cast<uint64_t*>(&return_val));
154 }
155 
156 #endif
157 
161 template<typename T,
162  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
163 RAJA_INLINE T builtin_atomicLoad(T* acc)
164 {
165  return builtin_atomicOr(acc, static_cast<T>(0));
166 }
167 
171 RAJA_INLINE char builtin_atomicExchange(char* acc, char value)
172 {
173  return _InterlockedExchange8(acc, value);
174 }
175 
176 RAJA_INLINE uint8_t builtin_atomicExchange(uint8_t* acc, uint8_t value)
177 {
178  char return_val = _InterlockedExchange8((char*)acc, *((char*)&value));
179  return *(reinterpret_cast<uint8_t*>(&return_val));
180 }
181 
182 RAJA_INLINE short builtin_atomicExchange(short* acc, short value)
183 {
184  return _InterlockedExchange16(acc, value);
185 }
186 
187 RAJA_INLINE uint16_t builtin_atomicExchange(uint16_t* acc, uint16_t value)
188 {
189  short return_val = _InterlockedExchange16((short*)acc, *((short*)&value));
190  return *(reinterpret_cast<uint16_t*>(&return_val));
191 }
192 
193 RAJA_INLINE long builtin_atomicExchange(long* acc, long value)
194 {
195  return _InterlockedExchange(acc, value);
196 }
197 
198 RAJA_INLINE uint32_t builtin_atomicExchange(uint32_t* acc, uint32_t value)
199 {
200  long return_val = _InterlockedExchange((long*)acc, *((long*)&value));
201  return *(reinterpret_cast<uint32_t*>(&return_val));
202 }
203 
204 #if defined(_WIN64)
205 
206 RAJA_INLINE long long builtin_atomicExchange(long long* acc, long long value)
207 {
208  return _InterlockedExchange64(acc, value);
209 }
210 
211 RAJA_INLINE uint64_t builtin_atomicExchange(uint64_t* acc, uint64_t value)
212 {
213  long long return_val =
214  _InterlockedExchange64((long long*)acc, *((long long*)&value));
215  return *(reinterpret_cast<uint64_t*>(&return_val));
216 }
217 
218 #endif
219 
220 
224 template<typename T,
225  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
226 RAJA_INLINE void builtin_atomicStore(T* acc, T value)
227 {
228  builtin_atomicExchange(acc, value);
229 }
230 
234 RAJA_INLINE char builtin_atomicCAS(char* acc, char compare, char value)
235 {
236  return _InterlockedCompareExchange8(acc, value, compare);
237 }
238 
239 RAJA_INLINE uint8_t builtin_atomicCAS(uint8_t* acc,
240  uint8_t compare,
241  uint8_t value)
242 {
243  char return_val = _InterlockedCompareExchange8((char*)acc, *((char*)&value),
244  *((char*)&compare));
245  return *(reinterpret_cast<uint8_t*>(&return_val));
246 }
247 
248 RAJA_INLINE short builtin_atomicCAS(short* acc, short compare, short value)
249 {
250  return _InterlockedCompareExchange16(acc, value, compare);
251 }
252 
253 RAJA_INLINE uint16_t builtin_atomicCAS(uint16_t* acc,
254  uint16_t compare,
255  uint16_t value)
256 {
257  short return_val = _InterlockedCompareExchange16(
258  (short*)acc, *((short*)&value), *((short*)&compare));
259  return *(reinterpret_cast<uint16_t*>(&return_val));
260 }
261 
262 RAJA_INLINE long builtin_atomicCAS(long* acc, long compare, long value)
263 {
264  return _InterlockedCompareExchange(acc, value, compare);
265 }
266 
267 RAJA_INLINE uint32_t builtin_atomicCAS(uint32_t* acc,
268  uint32_t compare,
269  uint32_t value)
270 {
271  long return_val = _InterlockedCompareExchange((long*)acc, *((long*)&value),
272  *((long*)&compare));
273  return *(reinterpret_cast<uint32_t*>(&return_val));
274 }
275 
276 #if defined(_WIN64)
277 
278 RAJA_INLINE long long builtin_atomicCAS(long long* acc,
279  long long compare,
280  long long value)
281 {
282  return _InterlockedCompareExchange64(acc, value, compare);
283 }
284 
285 RAJA_INLINE uint64_t builtin_atomicCAS(uint64_t* acc,
286  uint64_t compare,
287  uint64_t value)
288 {
289  long long return_val = _InterlockedCompareExchange64(
290  (long long*)acc, *((long long*)&value), *((long long*)&compare));
291  return *(reinterpret_cast<uint64_t*>(&return_val));
292 }
293 
294 #endif
295 
296 
300 RAJA_INLINE char builtin_atomicAdd(char* acc, char value)
301 {
302  return _InterlockedExchangeAdd8(acc, value);
303 }
304 
305 RAJA_INLINE uint8_t builtin_atomicAdd(uint8_t* acc, uint8_t value)
306 {
307  char return_val = _InterlockedExchangeAdd8((char*)acc, *((char*)&value));
308  return *(reinterpret_cast<uint8_t*>(&return_val));
309 }
310 
311 RAJA_INLINE short builtin_atomicAdd(short* acc, short value)
312 {
313  return _InterlockedExchangeAdd16(acc, value);
314 }
315 
316 RAJA_INLINE uint16_t builtin_atomicAdd(uint16_t* acc, uint16_t value)
317 {
318  short return_val = _InterlockedExchangeAdd16((short*)acc, *((short*)&value));
319  return *(reinterpret_cast<uint16_t*>(&return_val));
320 }
321 
322 RAJA_INLINE long builtin_atomicAdd(long* acc, long value)
323 {
324  return _InterlockedExchangeAdd(acc, value);
325 }
326 
327 RAJA_INLINE uint32_t builtin_atomicAdd(uint32_t* acc, uint32_t value)
328 {
329  long return_val = _InterlockedExchangeAdd((long*)acc, *((long*)&value));
330  return *(reinterpret_cast<uint32_t*>(&return_val));
331 }
332 
333 #if defined(_WIN64)
334 
335 RAJA_INLINE long long builtin_atomicAdd(long long* acc, long long value)
336 {
337  return _InterlockedExchangeAdd64(acc, value);
338 }
339 
340 RAJA_INLINE uint64_t builtin_atomicAdd(uint64_t* acc, uint64_t value)
341 {
342  long long return_val =
343  _InterlockedExchangeAdd64((long long*)acc, *((long long*)&value));
344  return *(reinterpret_cast<uint64_t*>(&return_val));
345 }
346 
347 #endif
348 
349 
353 RAJA_INLINE char builtin_atomicSub(char* acc, char value)
354 {
355  return _InterlockedExchangeAdd8(acc, -value);
356 }
357 
358 RAJA_INLINE uint8_t builtin_atomicSub(uint8_t* acc, uint8_t value)
359 {
360  char neg_value = -(*((char*)&(value)));
361  char return_val = _InterlockedExchangeAdd8((char*)acc, neg_value);
362  return *(reinterpret_cast<uint8_t*>(&return_val));
363 }
364 
365 RAJA_INLINE short builtin_atomicSub(short* acc, short value)
366 {
367  return _InterlockedExchangeAdd16(acc, -value);
368 }
369 
370 RAJA_INLINE uint16_t builtin_atomicSub(uint16_t* acc, uint16_t value)
371 {
372  short neg_value = -(*((short*)&(value)));
373  short return_val = _InterlockedExchangeAdd16((short*)acc, neg_value);
374  return *(reinterpret_cast<uint16_t*>(&return_val));
375 }
376 
377 RAJA_INLINE long builtin_atomicSub(long* acc, long value)
378 {
379  return _InterlockedExchangeAdd(acc, -value);
380 }
381 
382 RAJA_INLINE uint32_t builtin_atomicSub(uint32_t* acc, uint32_t value)
383 {
384  long neg_value = -(*((long*)&(value)));
385  long return_val = _InterlockedExchangeAdd((long*)acc, neg_value);
386  return *(reinterpret_cast<uint32_t*>(&return_val));
387 }
388 
389 #if defined(_WIN64)
390 
391 RAJA_INLINE long long builtin_atomicSub(long long* acc, long long value)
392 {
393  return _InterlockedExchangeAdd64(acc, -value);
394 }
395 
396 RAJA_INLINE uint64_t builtin_atomicSub(uint64_t* acc, uint64_t value)
397 {
398  long long neg_value = -(*((long long*)&(value)));
399  long long return_val = _InterlockedExchangeAdd64((long long*)acc, neg_value);
400  return *(reinterpret_cast<uint64_t*>(&return_val));
401 }
402 
403 #endif
404 
405 
409 RAJA_INLINE char builtin_atomicAnd(char* acc, char value)
410 {
411  return _InterlockedAnd8(acc, value);
412 }
413 
414 RAJA_INLINE uint8_t builtin_atomicAnd(uint8_t* acc, uint8_t value)
415 {
416  char return_val = _InterlockedAnd8((char*)acc, *((char*)&value));
417  return *(reinterpret_cast<uint8_t*>(&return_val));
418 }
419 
420 RAJA_INLINE short builtin_atomicAnd(short* acc, short value)
421 {
422  return _InterlockedAnd16(acc, value);
423 }
424 
425 RAJA_INLINE uint16_t builtin_atomicAnd(uint16_t* acc, uint16_t value)
426 {
427  short return_val = _InterlockedAnd16((short*)acc, *((short*)&value));
428  return *(reinterpret_cast<uint16_t*>(&return_val));
429 }
430 
431 RAJA_INLINE long builtin_atomicAnd(long* acc, long value)
432 {
433  return _InterlockedAnd(acc, value);
434 }
435 
436 RAJA_INLINE uint32_t builtin_atomicAnd(uint32_t* acc, uint32_t value)
437 {
438  long return_val = _InterlockedAnd((long*)acc, *((long*)&value));
439  return *(reinterpret_cast<uint32_t*>(&return_val));
440 }
441 
442 #if defined(_WIN64)
443 
444 RAJA_INLINE long long builtin_atomicAnd(long long* acc, long long value)
445 {
446  return _InterlockedAnd64(acc, value);
447 }
448 
449 RAJA_INLINE uint64_t builtin_atomicAnd(uint64_t* acc, uint64_t value)
450 {
451  long long return_val =
452  _InterlockedAnd64((long long*)acc, *((long long*)&value));
453  return *(reinterpret_cast<uint64_t*>(&return_val));
454 }
455 
456 #endif
457 
458 
462 RAJA_INLINE char builtin_atomicXor(char* acc, char value)
463 {
464  return _InterlockedXor8(acc, value);
465 }
466 
467 RAJA_INLINE uint8_t builtin_atomicXor(uint8_t* acc, uint8_t value)
468 {
469  char return_val = _InterlockedXor8((char*)acc, *((char*)&value));
470  return *(reinterpret_cast<uint8_t*>(&return_val));
471 }
472 
473 RAJA_INLINE short builtin_atomicXor(short* acc, short value)
474 {
475  return _InterlockedXor16(acc, value);
476 }
477 
478 RAJA_INLINE uint16_t builtin_atomicXor(uint16_t* acc, uint16_t value)
479 {
480  short return_val = _InterlockedXor16((short*)acc, *((short*)&value));
481  return *(reinterpret_cast<uint16_t*>(&return_val));
482 }
483 
484 RAJA_INLINE long builtin_atomicXor(long* acc, long value)
485 {
486  return _InterlockedXor(acc, value);
487 }
488 
489 RAJA_INLINE uint32_t builtin_atomicXor(uint32_t* acc, uint32_t value)
490 {
491  long return_val = _InterlockedXor((long*)acc, *((long*)&value));
492  return *(reinterpret_cast<uint32_t*>(&return_val));
493 }
494 
495 #if defined(_WIN64)
496 
497 RAJA_INLINE long long builtin_atomicXor(long long* acc, long long value)
498 {
499  return _InterlockedXor64(acc, value);
500 }
501 
502 RAJA_INLINE uint64_t builtin_atomicXor(uint64_t* acc, uint64_t value)
503 {
504  long long return_val =
505  _InterlockedXor64((long long*)acc, *((long long*)&value));
506  return *(reinterpret_cast<uint64_t*>(&return_val));
507 }
508 
509 #endif
510 
511 
512 #else // else clause from if defined(RAJA_COMPILER_MSVC) ||
513  // ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
514 
515 
520 template<typename T>
522 {
523  static constexpr bool value =
524  (std::is_integral<T>::value || std::is_enum<T>::value) &&
525  (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
526 };
527 
532 template<typename T>
534 {
535  static constexpr bool value = !std::is_integral<T>::value &&
536  !std::is_enum<T>::value &&
537  ((sizeof(T) == 1
538 #if !defined(UINT8_MAX)
539  && sizeof(unsigned char) == 1
540 #endif
541  ) ||
542  (sizeof(T) == 2
543 #if !defined(UINT16_MAX)
544  && sizeof(unsigned short) == 2
545 #endif
546  ) ||
547  (sizeof(T) == 4
548 #if !defined(UINT32_MAX)
549  && sizeof(unsigned int) == 4
550 #endif
551  ) ||
552  (sizeof(T) == 8
553 #if !defined(UINT64_MAX)
554  && sizeof(unsigned long long) == 8
555 #endif
556  ));
557 
558  using type =
559  std::conditional_t<sizeof(T) == 1,
560 #if defined(UINT8_MAX)
561  uint8_t,
562 #else
563  unsigned char,
564 #endif
565  std::conditional_t<sizeof(T) == 2,
566 #if defined(UINT16_MAX)
567  uint16_t,
568 #else
569  unsigned short,
570 #endif
571  std::conditional_t<sizeof(T) == 4,
572 #if defined(UINT32_MAX)
573  uint32_t,
574 #else
575  unsigned int,
576 #endif
577 #if defined(UINT64_MAX)
578  uint64_t>>>;
579 #else
580  unsigned long long>>>;
581 #endif
582 };
583 
588 template<typename T>
590 {
591  static constexpr bool value =
592  !std::is_integral<T>::value && !std::is_enum<T>::value &&
593  (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
594 };
595 
604 template<typename T,
605  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
606 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T* acc)
607 {
608  return __atomic_load_n(acc, __ATOMIC_RELAXED);
609 }
610 
614 template<typename T,
615  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
616 RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T* acc, T value)
617 {
618  __atomic_store_n(acc, value, __ATOMIC_RELAXED);
619 }
620 
624 template<typename T,
625  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
626 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T* acc, T value)
627 {
628  return __atomic_exchange_n(acc, value, __ATOMIC_RELAXED);
629 }
630 
634 template<typename T,
635  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
636 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T* acc, T compare, T value)
637 {
638  __atomic_compare_exchange_n(acc, &compare, value, false, __ATOMIC_RELAXED,
639  __ATOMIC_RELAXED);
640  return compare;
641 }
642 
646 template<typename T,
647  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
648 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T* acc, T value)
649 {
650  return __atomic_fetch_add(acc, value, __ATOMIC_RELAXED);
651 }
652 
656 template<typename T,
657  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
658 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T* acc, T value)
659 {
660  return __atomic_fetch_sub(acc, value, __ATOMIC_RELAXED);
661 }
662 
666 template<typename T,
667  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
668 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T* acc, T value)
669 {
670  return __atomic_fetch_and(acc, value, __ATOMIC_RELAXED);
671 }
672 
676 template<typename T,
677  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
678 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T* acc, T value)
679 {
680  return __atomic_fetch_or(acc, value, __ATOMIC_RELAXED);
681 }
682 
686 template<typename T,
687  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
688 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T* acc, T value)
689 {
690  return __atomic_fetch_xor(acc, value, __ATOMIC_RELAXED);
691 }
692 
693 
694 #endif // RAJA_COMPILER_MSVC
695 
696 
705 template<typename T>
707 
711 template<typename T,
712  std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
713 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T* acc)
714 {
715  using R = builtin_useReinterpret_t<T>;
716 
717  return RAJA::util::reinterp_A_as_B<R, T>(
718  builtin_atomicLoad(reinterpret_cast<R*>(acc)));
719 }
720 
724 template<typename T,
725  std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
726 RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T* acc, T value)
727 {
728  using R = builtin_useReinterpret_t<T>;
729 
730  builtin_atomicStore(reinterpret_cast<R*>(acc),
731  RAJA::util::reinterp_A_as_B<T, R>(value));
732 }
733 
737 template<typename T,
738  std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
739 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T* acc, T value)
740 {
741  using R = builtin_useReinterpret_t<T>;
742 
743  return RAJA::util::reinterp_A_as_B<R, T>(builtin_atomicExchange(
744  reinterpret_cast<R*>(acc), RAJA::util::reinterp_A_as_B<T, R>(value)));
745 }
746 
750 template<typename T,
751  std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
752 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T* acc, T compare, T value)
753 {
754  using R = builtin_useReinterpret_t<T>;
755 
756  return RAJA::util::reinterp_A_as_B<R, T>(builtin_atomicCAS(
757  reinterpret_cast<R*>(acc), RAJA::util::reinterp_A_as_B<T, R>(compare),
758  RAJA::util::reinterp_A_as_B<T, R>(value)));
759 }
760 
770 template<typename T,
771  std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
772 RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T& a, const T& b)
773 {
774  return a == b;
775 }
776 
782 template<typename T,
783  std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
784 RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T& a, const T& b)
785 {
786  using R = builtin_useReinterpret_t<T>;
787 
788  return builtin_atomicCAS_equal(RAJA::util::reinterp_A_as_B<T, R>(a),
789  RAJA::util::reinterp_A_as_B<T, R>(b));
790 }
791 
797 template<typename T, typename Oper>
798 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T* acc, Oper&& oper)
799 {
800  T old = builtin_atomicLoad(acc);
801  T expected;
802 
803  do
804  {
805  expected = old;
806  old = builtin_atomicCAS(acc, expected, oper(expected));
807  } while (!builtin_atomicCAS_equal(old, expected));
808 
809  return old;
810 }
811 
818 template<typename T, typename Oper, typename ShortCircuit>
820  Oper&& oper,
821  ShortCircuit&& sc)
822 {
823  T old = builtin_atomicLoad(acc);
824 
825  if (sc(old))
826  {
827  return old;
828  }
829 
830  T expected;
831 
832  do
833  {
834  expected = old;
835  old = builtin_atomicCAS(acc, expected, oper(expected));
836  } while (!builtin_atomicCAS_equal(old, expected) && !sc(old));
837 
838  return old;
839 }
840 
849 template<typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
850 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T* acc, T value)
851 {
852  return builtin_atomicCAS_loop(acc, [value](T old) {
853  return old + value;
854  });
855 }
856 
860 template<typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
861 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T* acc, T value)
862 {
863  return builtin_atomicCAS_loop(acc, [value](T old) {
864  return old - value;
865  });
866 }
867 
871 template<typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
872 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T* acc, T value)
873 {
874  return builtin_atomicCAS_loop(acc, [value](T old) {
875  return old & value;
876  });
877 }
878 
882 template<typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
883 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T* acc, T value)
884 {
885  return builtin_atomicCAS_loop(acc, [value](T old) {
886  return old | value;
887  });
888 }
889 
893 template<typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
894 RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T* acc, T value)
895 {
896  return builtin_atomicCAS_loop(acc, [value](T old) {
897  return old ^ value;
898  });
899 }
900 
901 
902 } // namespace detail
903 
904 template<typename T>
906 {
907  return detail::builtin_atomicLoad(acc);
908 }
909 
910 template<typename T>
911 RAJA_DEVICE_HIP RAJA_INLINE void atomicStore(builtin_atomic, T* acc, T value)
912 {
913  detail::builtin_atomicStore(acc, value);
914 }
915 
916 template<typename T>
917 RAJA_DEVICE_HIP RAJA_INLINE T atomicAdd(builtin_atomic, T* acc, T value)
918 {
919  return detail::builtin_atomicAdd(acc, value);
920 }
921 
922 template<typename T>
923 RAJA_DEVICE_HIP RAJA_INLINE T atomicSub(builtin_atomic, T* acc, T value)
924 {
925  return detail::builtin_atomicSub(acc, value);
926 }
927 
928 template<typename T>
929 RAJA_DEVICE_HIP RAJA_INLINE T atomicMin(builtin_atomic, T* acc, T value)
930 {
932  acc,
933  [value](T old) {
934  return value < old ? value : old;
935  },
936  [value](T current) {
937  return current <= value;
938  });
939 }
940 
941 template<typename T>
942 RAJA_DEVICE_HIP RAJA_INLINE T atomicMax(builtin_atomic, T* acc, T value)
943 {
945  acc,
946  [value](T old) {
947  return old < value ? value : old;
948  },
949  [value](T current) {
950  return value <= current;
951  });
952 }
953 
954 template<typename T>
956 {
957  return detail::builtin_atomicAdd(acc, static_cast<T>(1));
958 }
959 
960 template<typename T>
961 RAJA_DEVICE_HIP RAJA_INLINE T atomicInc(builtin_atomic, T* acc, T value)
962 {
963  return detail::builtin_atomicCAS_loop(acc, [value](T old) {
964  return value <= old ? static_cast<T>(0) : old + static_cast<T>(1);
965  });
966 }
967 
968 template<typename T>
970 {
971  return detail::builtin_atomicSub(acc, static_cast<T>(1));
972 }
973 
974 template<typename T>
975 RAJA_DEVICE_HIP RAJA_INLINE T atomicDec(builtin_atomic, T* acc, T value)
976 {
977  return detail::builtin_atomicCAS_loop(acc, [value](T old) {
978  return old == static_cast<T>(0) || value < old ? value
979  : old - static_cast<T>(1);
980  });
981 }
982 
983 template<typename T>
984 RAJA_DEVICE_HIP RAJA_INLINE T atomicAnd(builtin_atomic, T* acc, T value)
985 {
986  return detail::builtin_atomicAnd(acc, value);
987 }
988 
989 template<typename T>
990 RAJA_DEVICE_HIP RAJA_INLINE T atomicOr(builtin_atomic, T* acc, T value)
991 {
992  return detail::builtin_atomicOr(acc, value);
993 }
994 
995 template<typename T>
996 RAJA_DEVICE_HIP RAJA_INLINE T atomicXor(builtin_atomic, T* acc, T value)
997 {
998  return detail::builtin_atomicXor(acc, value);
999 }
1000 
1001 template<typename T>
1002 RAJA_DEVICE_HIP RAJA_INLINE T atomicExchange(builtin_atomic, T* acc, T value)
1003 {
1004  return detail::builtin_atomicExchange(acc, value);
1005 }
1006 
1007 template<typename T>
1008 RAJA_DEVICE_HIP RAJA_INLINE T
1009 atomicCAS(builtin_atomic, T* acc, T compare, T value)
1010 {
1011  return detail::builtin_atomicCAS(acc, compare, value);
1012 }
1013 
1014 
1015 } // namespace RAJA
1016 
1017 
1018 #endif
Header file for reinterpreting type conversions.
#define RAJA_DEVICE_HIP
Definition: atomic_builtin.hpp:39
Header file for common RAJA internal macro definitions.
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T *acc, T compare, T value)
Definition: atomic_builtin.hpp:636
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T *acc, T value)
Definition: atomic_builtin.hpp:648
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T *acc, T value)
Definition: atomic_builtin.hpp:658
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T *acc)
Definition: atomic_builtin.hpp:606
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T *acc, Oper &&oper)
Definition: atomic_builtin.hpp:798
RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T *acc, T value)
Definition: atomic_builtin.hpp:616
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T *acc, T value)
Definition: atomic_builtin.hpp:678
RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T &a, const T &b)
Definition: atomic_builtin.hpp:772
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T *acc, T value)
Definition: atomic_builtin.hpp:668
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T *acc, T value)
Definition: atomic_builtin.hpp:626
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T *acc, T value)
Definition: atomic_builtin.hpp:688
typename builtin_useReinterpret< T >::type builtin_useReinterpret_t
Definition: atomic_builtin.hpp:706
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T *acc, T value)
Atomic bitwise AND equivalent to (*acc) = (*acc) & value This only works with integral data types.
Definition: atomic.hpp:224
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T *acc, T value)
Atomic bitwise OR equivalent to (*acc) = (*acc) | value This only works with integral data types.
Definition: atomic.hpp:240
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(T *acc)
Atomic load.
Definition: atomic.hpp:92
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc)
Atomic decrement.
Definition: atomic.hpp:195
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T *acc, T value)
Atomic add.
Definition: atomic.hpp:117
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T *acc, T value)
Atomic bitwise XOR equivalent to (*acc) = (*acc) ^ value This only works with integral data types.
Definition: atomic.hpp:256
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T *acc, T value)
Atomic store.
Definition: atomic.hpp:104
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(T *acc, T value)
Atomic maximum equivalent to (*acc) = std::max(*acc, value)
Definition: atomic.hpp:156
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T *acc, T compare, T value)
Atomic compare and swap.
Definition: atomic.hpp:286
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc)
Atomic increment.
Definition: atomic.hpp:168
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T *acc, T value)
Atomic minimum equivalent to (*acc) = std::min(*acc, value)
Definition: atomic.hpp:143
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T *acc, T value)
Atomic subtract.
Definition: atomic.hpp:130
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T *acc, T value)
Atomic value exchange.
Definition: atomic.hpp:271
Atomic policy that uses the compilers builtin __atomic_XXX routines.
Definition: atomic_builtin.hpp:48
Definition: atomic_builtin.hpp:590
static constexpr bool value
Definition: atomic_builtin.hpp:591
Definition: atomic_builtin.hpp:522
static constexpr bool value
Definition: atomic_builtin.hpp:523
Definition: atomic_builtin.hpp:534
std::conditional_t< sizeof(T)==1, unsigned char, std::conditional_t< sizeof(T)==2, unsigned short, std::conditional_t< sizeof(T)==4, unsigned int, unsigned long long > >> type
Definition: atomic_builtin.hpp:580
static constexpr bool value
Definition: atomic_builtin.hpp:535