21 #ifndef RAJA_policy_cuda_kernel_Reduce_HPP
22 #define RAJA_policy_cuda_kernel_Reduce_HPP
24 #include "RAJA/config.hpp"
38 template<
typename Data,
39 template<
typename...>
class ReduceOperator,
41 typename... EnclosedStmts,
43 struct CudaStatementExecutor<Data,
44 statement::
Reduce<RAJA::cuda_block_reduce,
58 auto value = data.template get_param<ParamId>();
59 using value_t = decltype(value);
60 value_t ident = value_t();
73 RAJA::cuda::impl::block_reduce<combiner_t>(value, ident);
77 thread_active = threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0;
81 data.template assign_param<ParamId>(new_value);
83 enclosed_stmts_t::exec(data, thread_active);
89 LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
97 template<
typename Data,
98 template<
typename...>
class ReduceOperator,
100 typename... EnclosedStmts,
102 struct CudaStatementExecutor<Data,
103 statement::
Reduce<RAJA::cuda_warp_reduce,
117 auto value = data.template get_param<ParamId>();
118 using value_t = decltype(value);
119 value_t ident = value_t();
130 value_t new_value = RAJA::cuda::impl::warp_reduce<combiner_t>(value, ident);
131 data.template assign_param<ParamId>(new_value);
134 thread_active = threadIdx.x == 0;
138 data.template assign_param<ParamId>(new_value);
140 enclosed_stmts_t::exec(data, thread_active);
146 LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
147 return enclosed_dims;
#define RAJA_DEVICE
Definition: macros.hpp:66
constexpr auto Reduce(T *target)
Definition: reducer.hpp:231
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA header file containing constructs used to run kernel traversals on GPU with CUDA.
StatementList< EnclosedStmts... > stmt_list_t
Definition: Reduce.hpp:110
static LaunchDims calculateDimensions(Data const &data)
Definition: Reduce.hpp:143
CudaStatementListExecutor< Data, stmt_list_t, Types > enclosed_stmts_t
Definition: Reduce.hpp:112
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: Reduce.hpp:114
StatementList< EnclosedStmts... > stmt_list_t
Definition: Reduce.hpp:51
CudaStatementListExecutor< Data, stmt_list_t, Types > enclosed_stmts_t
Definition: Reduce.hpp:53
static LaunchDims calculateDimensions(Data const &data)
Definition: Reduce.hpp:86
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: Reduce.hpp:55
Definition: reduce.hpp:70