RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
Namespaces | Classes | Typedefs | Functions | Variables
RAJA::detail Namespace Reference

Namespaces

 cuda
 
 hip
 
 omp_target
 

Classes

struct  common_type
 
struct  common_type< T >
 
struct  foldl_impl
 
struct  foldl_impl< Op, Arg1 >
 
struct  foldl_impl< Op, Arg1, Arg2 >
 
struct  foldl_impl< Op, Arg1, Arg2, Arg3, Rest... >
 
struct  max_platform
 
struct  get_platform
 
struct  get_platform_from_list
 
struct  get_platform_from_list<>
 
struct  get_platform< T, typename std::enable_if< std::is_base_of< RAJA::PolicyBase, T >::value &&!RAJA::type_traits::is_indexset_policy< T >::value >::type >
 
struct  get_platform< RAJA::ExecPolicy< SEG, EXEC > >
 
struct  get_statement_platform
 
struct  get_platform< RAJA::internal::StatementList< Stmts... > >
 
struct  get_platform< RAJA::internal::StatementList<> >
 
struct  get_platform< RAJA::policy::multi::MultiPolicy< SELECTOR, POLICIES... > >
 
struct  icount_adapter
 Adapter to replace specific implementations for the icount variants. More...
 
struct  CallForall
 
struct  CallForallIcount
 
struct  first_argument
 
struct  first_argument< R(Arg0, Args...)>
 
struct  first_argument< R(C::*)(Arg0, Args...)>
 
struct  first_argument< R(C::*)(Arg0, Args...) const >
 
struct  first_argument< R(C::*)(Arg0, Args...) noexcept >
 
struct  first_argument< R(C::*)(Arg0, Args...) const noexcept >
 
struct  callable_signature
 
struct  callable_signature< T, std::void_t< decltype(&camp::decay< T >::operator())> >
 
struct  launch_context_type
 
struct  launch_context_type< T, std::void_t< typename first_argument< camp::decay< typename callable_signature< T >::type > >::type > >
 
struct  Name
 
struct  DispatcherVoidPtrWrapper
 
struct  DispatcherVoidConstPtrWrapper
 
struct  dispatcher_transform_types
 
struct  Dispatcher
 
struct  dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, holder_type >
 
struct  Dispatcher< platform, ::RAJA::indirect_function_call_dispatch, DispatcherID, CallArgs... >
 
struct  dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, holder_type >
 
struct  Dispatcher< platform, ::RAJA::indirect_virtual_function_dispatch, DispatcherID, CallArgs... >
 
struct  dispatcher_transform_types<::RAJA::direct_dispatch< Ts... >, holder_type >
 
struct  Dispatcher< platform, ::RAJA::direct_dispatch<>, DispatcherID, CallArgs... >
 
struct  Dispatcher< platform, ::RAJA::direct_dispatch< T >, DispatcherID, CallArgs... >
 
struct  Dispatcher< platform, ::RAJA::direct_dispatch< T0, T1, TNs... >, DispatcherID, CallArgs... >
 
struct  HoldBodyArgs_base
 
struct  HoldBodyArgs_host
 
struct  HoldBodyArgs_device
 
struct  HoldForall
 
struct  WorkRunner
 
struct  WorkRunnerForallOrdered_base
 
struct  WorkRunnerForallOrdered
 
struct  WorkRunnerForallReverse
 
struct  random_access_iterator
 
class  WorkStorage
 
class  WorkStorage< RAJA::array_of_pointers, ALLOCATOR_T, Dispatcher_T >
 
class  WorkStorage< RAJA::ragged_array_of_objects, ALLOCATOR_T, Dispatcher_T >
 
class  WorkStorage< RAJA::constant_stride_array_of_objects, ALLOCATOR_T, Dispatcher_T >
 
struct  WorkStruct
 
struct  WorkStruct< size, Dispatcher< platform, dispatch_policy, DispatcherID, CallArgs... > >
 
struct  is_xargs
 
struct  is_xargs< xargs< Args... > >
 
struct  builtin_useIntrinsic
 
struct  builtin_useReinterpret
 
struct  builtin_useCAS
 
struct  WorkRunner< RAJA::cuda_work_explicit< BLOCK_SIZE, BLOCKS_PER_SM, Async >, RAJA::ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  WorkRunner< RAJA::cuda_work_explicit< BLOCK_SIZE, BLOCKS_PER_SM, Async >, RAJA::reverse_ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  HoldCudaDeviceXThreadblockLoop
 
struct  WorkRunner< RAJA::cuda_work_explicit< BLOCK_SIZE, BLOCKS_PER_SM, Async >, RAJA::policy::cuda::unordered_cuda_loop_y_block_iter_x_threadblock_average, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  WorkRunner< RAJA::hip_work< BLOCK_SIZE, Async >, RAJA::ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  WorkRunner< RAJA::hip_work< BLOCK_SIZE, Async >, RAJA::reverse_ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  HoldHipDeviceXThreadblockLoop
 
struct  WorkRunner< RAJA::hip_work< BLOCK_SIZE, Async >, RAJA::policy::hip::unordered_hip_loop_y_block_iter_x_threadblock_average, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  policy_invoker
 
struct  policy_invoker< 0, size, Policy, rest... >
 
struct  WorkRunner< RAJA::omp_work, RAJA::ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  WorkRunner< RAJA::omp_work, RAJA::reverse_ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  WorkRunner< RAJA::omp_target_work, RAJA::ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  WorkRunner< RAJA::omp_target_work, RAJA::reverse_ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  MultiReduceDataSeq
 Seq multi-reduce data class template. More...
 
struct  MultiReduceDataSeq< T, t_MultiReduceOp, RAJA::sequential::MultiReduceTuning< RAJA::sequential::multi_reduce_algorithm::left_fold > >
 Seq multi-reduce data class template using left_fold reductions. More...
 
class  ReduceSeq
 
struct  WorkRunner< RAJA::seq_work, RAJA::ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  WorkRunner< RAJA::seq_work, RAJA::reverse_ordered, DISPATCH_POLICY_T, ALLOCATOR_T, INDEX_T, Args... >
 
struct  LayoutBase_impl
 
struct  stride_calculator
 
struct  stride_calculator< n_dims, n_dims, IdxLin >
 
struct  LayoutBase_impl< camp::idx_seq< RangeInts... >, IdxLin, StrideOneDim >
 
class  SoAArray
 Array class specialized for Struct of Array data layout. More...
 
class  SoAArray<::RAJA::reduce::detail::ValueLoc< T, IndexType, doing_min >, size >
 Specialization for RAJA::reduce::detail::ValueLoc. More...
 
class  SoAPtr
 Pointer class specialized for Struct of Array data layout allocated via RAJA basic_mempools. More...
 
class  SoAPtr< RAJA::reduce::detail::ValueLoc< T, IndexType, doing_min >, mempool, accessor >
 Specialization for RAJA::reduce::detail::ValueLoc. More...
 
class  SoAPtr< RAJA::expt::ValLoc< T, IndexType >, mempool, accessor >
 Specialization for RAJA::expt::ValLoc. More...
 
struct  intro_sort_device_max_depth
 max recursion depth for intro sort when compiling device code. More...
 
struct  intro_sort_insertion_sort_cutoff
 cutoff for intro sort to use insertion sort on small ranges. More...
 
struct  StaticLayoutBase_impl
 
struct  StaticLayoutBase_impl< IdxLin, camp::int_seq< IdxLin, RangeInts... >, camp::int_seq< IdxLin, Sizes... >, camp::int_seq< IdxLin, Strides... >, void >
 
struct  StrideCalculatorIdx
 
struct  StrideCalculatorIdx< IdxLin, N, N, Sizes... >
 
struct  StrideCalculator
 
struct  StrideCalculator< IdxLin, camp::int_seq< IdxLin, Range... >, camp::idx_seq< Perm... >, camp::int_seq< IdxLin, Sizes... > >
 
struct  StaticLayoutBase_impl< IdxLin, camp::int_seq< IdxLin, RangeInts... >, camp::int_seq< IdxLin, Sizes... >, camp::int_seq< IdxLin, Strides... >, camp::list< DimTypes... > >
 
struct  StaticLayoutMaker
 
struct  DefaultAccessor
 Abstracts access to memory using normal memory accesses. More...
 
struct  AsIntegerArray
 Abstracts T into an equal or greater size array of integers whose size is between min_integer_type_size and max_interger_type_size inclusive. More...
 
struct  ScopedAssignment
 Assign a new value to an object and restore the object's previous value at the end of the current scope. More...
 
struct  PermutedViewHelper
 
struct  PermutedViewHelper< std::index_sequence< stride_order_idx... > >
 
struct  PermutedViewHelper< layout_right >
 
struct  PermutedViewHelper< layout_left >
 
struct  PassThrough
 
struct  Move
 
struct  PreInc
 
struct  PreDec
 
struct  PlusEq
 
struct  MinusEq
 
struct  DeRef
 
struct  Swap
 
struct  IterSwap
 

Typedefs

template<typename... Ts>
using common_type_t = typename common_type< Ts... >::type
 
template<typename Iter >
using IterVal = typename ::std::iterator_traits< Iter >::value_type
 
template<typename Iter >
using IterRef = typename ::std::iterator_traits< Iter >::reference
 
template<typename Iter >
using IterDiff = typename ::std::iterator_traits< Iter >::difference_type
 
template<typename Container >
using ContainerIter = camp::iterator_from< Container >
 
template<typename Container >
using ContainerVal = camp::decay< decltype(*camp::val< camp::iterator_from< Container > >())>
 
template<typename Container >
using ContainerRef = decltype(*camp::val< camp::iterator_from< Container > >())
 
template<typename Container >
using ContainerDiff = camp::decay< decltype(camp::val< camp::iterator_from< Container > >() - camp::val< camp::iterator_from< Container > >())>
 
template<typename dispatch_policy , typename holder_type >
using dispatcher_transform_types_t = typename dispatcher_transform_types< dispatch_policy, holder_type >::type
 
template<typename Dispatcher_T >
using GenericWorkStruct = WorkStruct< RAJA_MAX_ALIGN, Dispatcher_T >
 
template<typename T >
using builtin_useReinterpret_t = typename builtin_useReinterpret< T >::type
 
using active_auto_thread = RAJA::seq_thread
 

Functions

template<typename DiffType , typename CountType >
RAJA_INLINE DiffType firstIndex (DiffType n, CountType num_threads, CountType thread_id)
 
constexpr bool dispatcher_use_host_invoke (Platform platform)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad (T *acc)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore (T *acc, T value)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange (T *acc, T value)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS (T *acc, T compare, T value)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd (T *acc, T value)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub (T *acc, T value)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd (T *acc, T value)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr (T *acc, T value)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor (T *acc, T value)
 
template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal (const T &a, const T &b)
 
template<typename T , typename Oper >
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop (T *acc, Oper &&oper)
 
template<typename T , typename Oper , typename ShortCircuit >
RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop (T *acc, Oper &&oper, ShortCircuit &&sc)
 
template<typename T , typename Dispatcher_T , size_t BLOCK_SIZE, size_t BLOCKS_PER_SM, bool Async>
const Dispatcher_T * get_Dispatcher (cuda_work_explicit< BLOCK_SIZE, BLOCKS_PER_SM, Async > const &)
 
template<size_t BLOCK_SIZE, size_t BLOCKS_PER_SM, typename StorageIter , typename value_type , typename index_type , typename... Args>
 __launch_bounds__ (BLOCK_SIZE, BLOCKS_PER_SM) __global__ void cuda_unordered_y_block_global(const RAJA_CUDA_GRID_CONSTANT StorageIter iter
 
template<typename T , typename Dispatcher_T , size_t BLOCK_SIZE, bool Async>
const Dispatcher_T * get_Dispatcher (hip_work< BLOCK_SIZE, Async > const &)
 
template<size_t BLOCK_SIZE, typename StorageIter , typename value_type , typename index_type , typename... Args>
 __launch_bounds__ (BLOCK_SIZE, 1) __global__ void hip_unordered_y_block_global(const StorageIter iter
 
template<camp::idx_t... Indices, typename... Policies, typename Selector >
auto make_multi_policy (camp::idx_seq< Indices... >, Selector s, std::tuple< Policies... > policies) -> MultiPolicy< Selector, Policies... >
 
template<typename T , typename Dispatcher_T >
const Dispatcher_T * get_Dispatcher (omp_work const &)
 
template<typename T , typename Dispatcher_T >
const Dispatcher_T * get_Dispatcher (omp_target_work const &)
 
template<typename T , typename Dispatcher_T >
const Dispatcher_T * get_Dispatcher (seq_work const &)
 
template<typename Iter , typename UnaryFunc >
constexpr RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each (Iter begin, Iter end, UnaryFunc func)
 
template<typename UnaryFunc , typename... Ts>
constexpr RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_type (camp::list< Ts... > const &, UnaryFunc func)
 
template<typename Tuple , typename UnaryFunc , camp::idx_t... Is>
constexpr RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_tuple (Tuple &&t, UnaryFunc func, camp::idx_seq< Is... >)
 
template<typename Iter , typename Predicate >
RAJA_HOST_DEVICE RAJA_INLINE Iter partition (Iter begin, Iter end, Predicate pred)
 unstable partition given range inplace using predicate function and using O(N) predicate evaluations and O(1) memory More...
 
template<typename Iter , typename Compare >
RAJA_HOST_DEVICE RAJA_INLINE void insertion_sort (Iter begin, Iter end, Compare comp)
 stable insertion sort given range inplace using comparison function and using O(N^2) comparisons and O(1) memory More...
 
RAJA_HOST_DEVICE constexpr RAJA_INLINE size_t num_shell_strides ()
 get number of strides for shell sort More...
 
RAJA_HOST_DEVICE constexpr RAJA_INLINE long long unsigned get_shell_stride (int i)
 get strides for shell sort More...
 
template<typename Iter , typename Compare >
RAJA_HOST_DEVICE RAJA_INLINE void shell_sort (Iter begin, Iter end, Compare comp)
 unstable shell sort given range inplace using comparison function and using O(N^?) comparisons and O(1) memory More...
 
template<typename Iter , typename Compare >
RAJA_HOST_DEVICE RAJA_INLINE void heapify (Iter begin, Iter root, Iter end, Compare comp)
 insert the given element into the heaps below it using comparison function and using O(lg(N)) comparisons and O(1) memory More...
 
template<typename Iter , typename Compare >
RAJA_HOST_DEVICE void heap_sort (Iter begin, Iter end, Compare comp)
 unstable heap sort given range inplace using comparison function and using O(N*lg(N)) comparisons and O(1) memory More...
 
template<typename Iter , typename Compare >
RAJA_HOST_DEVICE void intro_sort_depth (Iter begin, Iter end, Compare comp, unsigned depth)
 unstable intro sort given range inplace using comparison function and using O(N*lg(N)) comparisons and O(lg(N)) memory, with limited depth. More...
 
template<typename Iter , typename Compare >
RAJA_HOST_DEVICE void intro_sort (Iter begin, Iter end, Compare comp)
 unstable intro sort given range inplace using comparison function and using O(N*lg(N)) comparisons and O(lg(N)) memory More...
 
template<typename Iter , typename Compare >
void RAJA_INLINE inplace_merge (Iter first, Iter middle, Iter last, Compare comp)
 merge a range with midpoint using comparison function with local range/2 copy More...
 
template<typename Iter1 , typename Iter2 , typename OutIter , typename Compare >
void RAJA_INLINE merge_like_std (Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2, OutIter d_first, Compare comp)
 merge given two ranges using comparison function while copies are outside, somewhat follows STL API More...
 
template<typename Iter , typename Compare >
RAJA_INLINE void merge_sort (Iter begin, Iter end, Compare comp)
 stable merge sort given range inplace using comparison function and using O(N*lg(N)) comparisons and O(N) memory More...
 
template<typename T >
RAJA_HOST_DEVICE constexpr RAJA_INLINE auto get_last_index (T last)
 
template<typename T0 , typename T1 , typename... Args>
RAJA_HOST_DEVICE constexpr RAJA_INLINE auto get_last_index (T0, T1 t1, Args... args)
 
template<std::size_t... idx>
RAJA_HOST_DEVICE constexpr RAJA_INLINE auto make_reverse_array (std::index_sequence< idx... >)
 
template<typename Tuple , typename F , camp::idx_t... Is>
RAJA_HOST_DEVICE constexpr RAJA_INLINE void zip_for_each_impl (Tuple &&t, F &&f, camp::idx_seq< Is... >)
 Call f on each member of t (f(t)...). More...
 
template<typename Tuple0 , typename Tuple1 , typename F , camp::idx_t... Is>
RAJA_HOST_DEVICE constexpr RAJA_INLINE void zip_for_each_impl (Tuple0 &&t0, Tuple1 &&t1, F &&f, camp::idx_seq< Is... >)
 Call f on each member of t0 and t1 (f(t0, t1)...). More...
 
template<typename Tuple , typename F >
RAJA_HOST_DEVICE constexpr RAJA_INLINE void zip_for_each (Tuple &&t, F &&f)
 Call f on each member of t (f(t)...). More...
 
template<typename Tuple0 , typename Tuple1 , typename F >
RAJA_HOST_DEVICE constexpr RAJA_INLINE void zip_for_each (Tuple0 &&t0, Tuple1 &&t1, F &&f)
 Call f on each member of t0 and t1 (f(t0, t1)...). More...
 

Variables

Args args
 
value_type::device_call &[i_loop] iter
 

Typedef Documentation

◆ common_type_t

template<typename... Ts>
using RAJA::detail::common_type_t = typedef typename common_type<Ts...>::type

◆ IterVal

template<typename Iter >
using RAJA::detail::IterVal = typedef typename ::std::iterator_traits<Iter>::value_type

◆ IterRef

template<typename Iter >
using RAJA::detail::IterRef = typedef typename ::std::iterator_traits<Iter>::reference

◆ IterDiff

template<typename Iter >
using RAJA::detail::IterDiff = typedef typename ::std::iterator_traits<Iter>::difference_type

◆ ContainerIter

template<typename Container >
using RAJA::detail::ContainerIter = typedef camp::iterator_from<Container>

◆ ContainerVal

template<typename Container >
using RAJA::detail::ContainerVal = typedef camp::decay<decltype(*camp::val<camp::iterator_from<Container> >())>

◆ ContainerRef

template<typename Container >
using RAJA::detail::ContainerRef = typedef decltype(*camp::val<camp::iterator_from<Container> >())

◆ ContainerDiff

template<typename Container >
using RAJA::detail::ContainerDiff = typedef camp::decay<decltype(camp::val<camp::iterator_from<Container> >() - camp::val<camp::iterator_from<Container> >())>

◆ dispatcher_transform_types_t

template<typename dispatch_policy , typename holder_type >
using RAJA::detail::dispatcher_transform_types_t = typedef typename dispatcher_transform_types<dispatch_policy, holder_type>::type

◆ GenericWorkStruct

template<typename Dispatcher_T >
using RAJA::detail::GenericWorkStruct = typedef WorkStruct<RAJA_MAX_ALIGN, Dispatcher_T>

Generic struct used to layout memory for structs of unknown size. Assumptions for any size (checked in construct): offsetof(GenericWorkStruct<>, obj) == offsetof(WorkStruct<size>, obj) sizeof(GenericWorkStruct) <= sizeof(WorkStruct<size>)

◆ builtin_useReinterpret_t

template<typename T >
using RAJA::detail::builtin_useReinterpret_t = typedef typename builtin_useReinterpret<T>::type

Atomics implemented using reinterpret cast

Alias for determining the integral type of the same size as the given type

◆ active_auto_thread

Provides priority between thread policies that should do the "right thing"

If OpenMP is active we always use the omp_thread.

Fallback to seq_thread, which performs non-thread operations assumes there is no thread safety issues

Function Documentation

◆ firstIndex()

template<typename DiffType , typename CountType >
RAJA_INLINE DiffType RAJA::detail::firstIndex ( DiffType  n,
CountType  num_threads,
CountType  thread_id 
)

◆ dispatcher_use_host_invoke()

constexpr bool RAJA::detail::dispatcher_use_host_invoke ( Platform  platform)
constexpr

◆ builtin_atomicLoad()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicLoad ( T *  acc)

Atomics implemented using intrinsics

Atomic load using intrinsic

Atomic load using reinterpret cast

◆ builtin_atomicStore()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE void RAJA::detail::builtin_atomicStore ( T *  acc,
value 
)

Atomic store using intrinsic

Atomic store using reinterpret cast

◆ builtin_atomicExchange()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicExchange ( T *  acc,
value 
)

Atomic exchange using intrinsic

Atomic exchange using reinterpret cast

◆ builtin_atomicCAS()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicCAS ( T *  acc,
compare,
value 
)

Atomic compare and swap using intrinsic

Atomic compare and swap using reinterpret cast

◆ builtin_atomicAdd()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicAdd ( T *  acc,
value 
)

Atomic addition using intrinsic

Atomics implemented using compare and swap loop

Atomic addition using compare and swap loop

◆ builtin_atomicSub()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicSub ( T *  acc,
value 
)

Atomic subtraction using intrinsic

Atomic subtraction using compare and swap loop

◆ builtin_atomicAnd()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicAnd ( T *  acc,
value 
)

Atomic and using intrinsic

Atomic and using compare and swap loop

◆ builtin_atomicOr()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicOr ( T *  acc,
value 
)

Atomic or using intrinsic

Atomic or using compare and swap loop

◆ builtin_atomicXor()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicXor ( T *  acc,
value 
)

Atomic xor using intrinsic

Atomic xor using compare and swap loop

◆ builtin_atomicCAS_equal()

template<typename T , std::enable_if_t< builtin_useIntrinsic< T >::value, bool > = true>
RAJA_DEVICE_HIP RAJA_INLINE bool RAJA::detail::builtin_atomicCAS_equal ( const T &  a,
const T &  b 
)

Implementation of compare and swap loop

Equality comparison for compare and swap loop using types supported by intrinsics.

Equality comparison for compare and swap loop using reinterpret cast. Converts to the underlying integral type to avoid cases where the values will never compare equal (most notably, NaNs).

◆ builtin_atomicCAS_loop() [1/2]

template<typename T , typename Oper >
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicCAS_loop ( T *  acc,
Oper &&  oper 
)

Generic impementation of any atomic 8, 16, 32, or 64 bit operator that can be implemented using a builtin compare and swap primitive. Returns the OLD value that was replaced by the result of this operation.

◆ builtin_atomicCAS_loop() [2/2]

template<typename T , typename Oper , typename ShortCircuit >
RAJA_DEVICE_HIP RAJA_INLINE T RAJA::detail::builtin_atomicCAS_loop ( T *  acc,
Oper &&  oper,
ShortCircuit &&  sc 
)

Generic impementation of any atomic 8, 16, 32, or 64 bit operator that can be implemented using a builtin compare and swap primitive. Uses short-circuiting for improved efficiency. Returns the OLD value that was replaced by the result of this operation.

◆ get_Dispatcher() [1/5]

template<typename T , typename Dispatcher_T , size_t BLOCK_SIZE, size_t BLOCKS_PER_SM, bool Async>
const Dispatcher_T* RAJA::detail::get_Dispatcher ( cuda_work_explicit< BLOCK_SIZE, BLOCKS_PER_SM, Async > const &  )
inline

Populate and return a Dispatcher object that can be used in device code

◆ __launch_bounds__() [1/2]

template<size_t BLOCK_SIZE, size_t BLOCKS_PER_SM, typename StorageIter , typename value_type , typename index_type , typename... Args>
RAJA::detail::__launch_bounds__ ( BLOCK_SIZE  ,
BLOCKS_PER_SM   
) const

◆ get_Dispatcher() [2/5]

template<typename T , typename Dispatcher_T , size_t BLOCK_SIZE, bool Async>
const Dispatcher_T* RAJA::detail::get_Dispatcher ( hip_work< BLOCK_SIZE, Async > const &  )
inline

Populate and return a Dispatcher object that can be used in device code

◆ __launch_bounds__() [2/2]

template<size_t BLOCK_SIZE, typename StorageIter , typename value_type , typename index_type , typename... Args>
RAJA::detail::__launch_bounds__ ( BLOCK_SIZE  ,
 
) const

◆ make_multi_policy()

template<camp::idx_t... Indices, typename... Policies, typename Selector >
auto RAJA::detail::make_multi_policy ( camp::idx_seq< Indices... >  ,
Selector  s,
std::tuple< Policies... >  policies 
) -> MultiPolicy<Selector, Policies...>

◆ get_Dispatcher() [3/5]

template<typename T , typename Dispatcher_T >
const Dispatcher_T* RAJA::detail::get_Dispatcher ( omp_work const &  )
inline

Populate and return a Dispatcher object

◆ get_Dispatcher() [4/5]

template<typename T , typename Dispatcher_T >
const Dispatcher_T* RAJA::detail::get_Dispatcher ( omp_target_work const &  )
inline

Populate and return a Dispatcher object that can be used in omp target regions

◆ get_Dispatcher() [5/5]

template<typename T , typename Dispatcher_T >
const Dispatcher_T* RAJA::detail::get_Dispatcher ( seq_work const &  )
inline

Populate and return a Dispatcher object

◆ for_each()

template<typename Iter , typename UnaryFunc >
constexpr RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc RAJA::detail::for_each ( Iter  begin,
Iter  end,
UnaryFunc  func 
)
constexpr

◆ for_each_type()

template<typename UnaryFunc , typename... Ts>
constexpr RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc RAJA::detail::for_each_type ( camp::list< Ts... > const &  ,
UnaryFunc  func 
)
constexpr

◆ for_each_tuple()

template<typename Tuple , typename UnaryFunc , camp::idx_t... Is>
constexpr RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc RAJA::detail::for_each_tuple ( Tuple &&  t,
UnaryFunc  func,
camp::idx_seq< Is... >   
)
constexpr

◆ partition()

template<typename Iter , typename Predicate >
RAJA_HOST_DEVICE RAJA_INLINE Iter RAJA::detail::partition ( Iter  begin,
Iter  end,
Predicate  pred 
)

unstable partition given range inplace using predicate function and using O(N) predicate evaluations and O(1) memory

◆ insertion_sort()

template<typename Iter , typename Compare >
RAJA_HOST_DEVICE RAJA_INLINE void RAJA::detail::insertion_sort ( Iter  begin,
Iter  end,
Compare  comp 
)

stable insertion sort given range inplace using comparison function and using O(N^2) comparisons and O(1) memory

◆ num_shell_strides()

RAJA_HOST_DEVICE constexpr RAJA_INLINE size_t RAJA::detail::num_shell_strides ( )
constexpr

get number of strides for shell sort

◆ get_shell_stride()

RAJA_HOST_DEVICE constexpr RAJA_INLINE long long unsigned RAJA::detail::get_shell_stride ( int  i)
constexpr

get strides for shell sort

◆ shell_sort()

template<typename Iter , typename Compare >
RAJA_HOST_DEVICE RAJA_INLINE void RAJA::detail::shell_sort ( Iter  begin,
Iter  end,
Compare  comp 
)

unstable shell sort given range inplace using comparison function and using O(N^?) comparisons and O(1) memory

◆ heapify()

template<typename Iter , typename Compare >
RAJA_HOST_DEVICE RAJA_INLINE void RAJA::detail::heapify ( Iter  begin,
Iter  root,
Iter  end,
Compare  comp 
)

insert the given element into the heaps below it using comparison function and using O(lg(N)) comparisons and O(1) memory

◆ heap_sort()

template<typename Iter , typename Compare >
RAJA_HOST_DEVICE void RAJA::detail::heap_sort ( Iter  begin,
Iter  end,
Compare  comp 
)
inline

unstable heap sort given range inplace using comparison function and using O(N*lg(N)) comparisons and O(1) memory

◆ intro_sort_depth()

template<typename Iter , typename Compare >
RAJA_HOST_DEVICE void RAJA::detail::intro_sort_depth ( Iter  begin,
Iter  end,
Compare  comp,
unsigned  depth 
)
inline

unstable intro sort given range inplace using comparison function and using O(N*lg(N)) comparisons and O(lg(N)) memory, with limited depth.

◆ intro_sort()

template<typename Iter , typename Compare >
RAJA_HOST_DEVICE void RAJA::detail::intro_sort ( Iter  begin,
Iter  end,
Compare  comp 
)
inline

unstable intro sort given range inplace using comparison function and using O(N*lg(N)) comparisons and O(lg(N)) memory

◆ inplace_merge()

template<typename Iter , typename Compare >
void RAJA_INLINE RAJA::detail::inplace_merge ( Iter  first,
Iter  middle,
Iter  last,
Compare  comp 
)

merge a range with midpoint using comparison function with local range/2 copy

◆ merge_like_std()

template<typename Iter1 , typename Iter2 , typename OutIter , typename Compare >
void RAJA_INLINE RAJA::detail::merge_like_std ( Iter1  first1,
Iter1  last1,
Iter2  first2,
Iter2  last2,
OutIter  d_first,
Compare  comp 
)

merge given two ranges using comparison function while copies are outside, somewhat follows STL API

◆ merge_sort()

template<typename Iter , typename Compare >
RAJA_INLINE void RAJA::detail::merge_sort ( Iter  begin,
Iter  end,
Compare  comp 
)

stable merge sort given range inplace using comparison function and using O(N*lg(N)) comparisons and O(N) memory

◆ get_last_index() [1/2]

template<typename T >
RAJA_HOST_DEVICE constexpr RAJA_INLINE auto RAJA::detail::get_last_index ( last)
constexpr

◆ get_last_index() [2/2]

template<typename T0 , typename T1 , typename... Args>
RAJA_HOST_DEVICE constexpr RAJA_INLINE auto RAJA::detail::get_last_index ( T0  ,
T1  t1,
Args...  args 
)
constexpr

◆ make_reverse_array()

template<std::size_t... idx>
RAJA_HOST_DEVICE constexpr RAJA_INLINE auto RAJA::detail::make_reverse_array ( std::index_sequence< idx... >  )
constexpr

◆ zip_for_each_impl() [1/2]

template<typename Tuple , typename F , camp::idx_t... Is>
RAJA_HOST_DEVICE constexpr RAJA_INLINE void RAJA::detail::zip_for_each_impl ( Tuple &&  t,
F &&  f,
camp::idx_seq< Is... >   
)
constexpr

Call f on each member of t (f(t)...).

◆ zip_for_each_impl() [2/2]

template<typename Tuple0 , typename Tuple1 , typename F , camp::idx_t... Is>
RAJA_HOST_DEVICE constexpr RAJA_INLINE void RAJA::detail::zip_for_each_impl ( Tuple0 &&  t0,
Tuple1 &&  t1,
F &&  f,
camp::idx_seq< Is... >   
)
constexpr

Call f on each member of t0 and t1 (f(t0, t1)...).

◆ zip_for_each() [1/2]

template<typename Tuple , typename F >
RAJA_HOST_DEVICE constexpr RAJA_INLINE void RAJA::detail::zip_for_each ( Tuple &&  t,
F &&  f 
)
constexpr

Call f on each member of t (f(t)...).

◆ zip_for_each() [2/2]

template<typename Tuple0 , typename Tuple1 , typename F >
RAJA_HOST_DEVICE constexpr RAJA_INLINE void RAJA::detail::zip_for_each ( Tuple0 &&  t0,
Tuple1 &&  t1,
F &&  f 
)
constexpr

Call f on each member of t0 and t1 (f(t0, t1)...).

Variable Documentation

◆ args

Args RAJA::detail::args
Initial value:
{
const index_type i_loop = blockIdx.y

◆ iter

value_type::device_call&[i_loop] RAJA::detail::iter