20 #ifndef RAJA_scan_cuda_HPP
21 #define RAJA_scan_cuda_HPP
23 #include "RAJA/config.hpp"
25 #if defined(RAJA_ENABLE_CUDA)
28 #include <type_traits>
30 #include "cub/device/device_scan.cuh"
31 #include "cub/util_allocator.cuh"
47 template<
typename IterationMapping,
48 typename IterationGetter,
55 resources::Cuda cuda_res,
56 ::RAJA::policy::cuda::cuda_exec_explicit<IterationMapping,
65 cudaStream_t stream = cuda_res.get_stream();
67 int len = std::distance(begin, end);
69 void* d_temp_storage =
nullptr;
70 size_t temp_storage_bytes = 0;
71 CAMP_CUDA_API_INVOKE_AND_CHECK(::cub::DeviceScan::InclusiveScan,
72 d_temp_storage, temp_storage_bytes, begin,
73 begin, binary_op, len, stream);
76 cuda::device_mempool_type::getInstance().malloc<
unsigned char>(
79 CAMP_CUDA_API_INVOKE_AND_CHECK(::cub::DeviceScan::InclusiveScan,
80 d_temp_storage, temp_storage_bytes, begin,
81 begin, binary_op, len, stream);
83 cuda::device_mempool_type::getInstance().free(d_temp_storage);
87 return resources::EventProxy<resources::Cuda>(cuda_res);
94 template<
typename IterationMapping,
95 typename IterationGetter,
103 resources::Cuda cuda_res,
104 ::RAJA::policy::cuda::cuda_exec_explicit<IterationMapping,
114 cudaStream_t stream = cuda_res.get_stream();
116 int len = std::distance(begin, end);
118 void* d_temp_storage =
nullptr;
119 size_t temp_storage_bytes = 0;
120 CAMP_CUDA_API_INVOKE_AND_CHECK(::cub::DeviceScan::ExclusiveScan,
121 d_temp_storage, temp_storage_bytes, begin,
122 begin, binary_op, init, len, stream);
125 cuda::device_mempool_type::getInstance().malloc<
unsigned char>(
128 CAMP_CUDA_API_INVOKE_AND_CHECK(::cub::DeviceScan::ExclusiveScan,
129 d_temp_storage, temp_storage_bytes, begin,
130 begin, binary_op, init, len, stream);
132 cuda::device_mempool_type::getInstance().free(d_temp_storage);
136 return resources::EventProxy<resources::Cuda>(cuda_res);
143 template<
typename IterationMapping,
144 typename IterationGetter,
145 typename Concretizer,
146 size_t BLOCKS_PER_SM,
151 RAJA_INLINE resources::EventProxy<resources::Cuda>
inclusive(
152 resources::Cuda cuda_res,
153 ::RAJA::policy::cuda::cuda_exec_explicit<IterationMapping,
163 cudaStream_t stream = cuda_res.get_stream();
165 int len = std::distance(begin, end);
167 void* d_temp_storage =
nullptr;
168 size_t temp_storage_bytes = 0;
169 CAMP_CUDA_API_INVOKE_AND_CHECK(::cub::DeviceScan::InclusiveScan,
170 d_temp_storage, temp_storage_bytes, begin, out,
171 binary_op, len, stream);
174 cuda::device_mempool_type::getInstance().malloc<
unsigned char>(
177 CAMP_CUDA_API_INVOKE_AND_CHECK(::cub::DeviceScan::InclusiveScan,
178 d_temp_storage, temp_storage_bytes, begin, out,
179 binary_op, len, stream);
181 cuda::device_mempool_type::getInstance().free(d_temp_storage);
185 return resources::EventProxy<resources::Cuda>(cuda_res);
192 template<
typename IterationMapping,
193 typename IterationGetter,
194 typename Concretizer,
195 size_t BLOCKS_PER_SM,
201 RAJA_INLINE resources::EventProxy<resources::Cuda>
exclusive(
202 resources::Cuda cuda_res,
203 ::RAJA::policy::cuda::cuda_exec_explicit<IterationMapping,
214 cudaStream_t stream = cuda_res.get_stream();
216 int len = std::distance(begin, end);
218 void* d_temp_storage =
nullptr;
219 size_t temp_storage_bytes = 0;
220 CAMP_CUDA_API_INVOKE_AND_CHECK(::cub::DeviceScan::ExclusiveScan,
221 d_temp_storage, temp_storage_bytes, begin, out,
222 binary_op, init, len, stream);
225 cuda::device_mempool_type::getInstance().malloc<
unsigned char>(
228 CAMP_CUDA_API_INVOKE_AND_CHECK(::cub::DeviceScan::ExclusiveScan,
229 d_temp_storage, temp_storage_bytes, begin, out,
230 binary_op, init, len, stream);
232 cuda::device_mempool_type::getInstance().free(d_temp_storage);
236 return resources::EventProxy<resources::Cuda>(cuda_res);
Header file defining prototypes for routines used to manage memory for CUDA reductions and other oper...
Header file containing RAJA CUDA policy definitions.
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< Policy > > inclusive(resources::Host host_res, const Policy &exec, Iter begin, Iter end, OutIter out, BinFn f)
Definition: scan.hpp:144
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< Policy > > exclusive(resources::Host host_res, const Policy &exec, Iter begin, Iter end, OutIter out, BinFn f, ValueT v)
Definition: scan.hpp:167
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< Policy > > inclusive_inplace(resources::Host host_res, const Policy &, Iter begin, Iter end, BinFn f)
Definition: scan.hpp:51
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< Policy > > exclusive_inplace(resources::Host host_res, const Policy &, Iter begin, Iter end, BinFn f, ValueT v)
Definition: scan.hpp:96
Definition: AlignedRangeIndexSetBuilders.cpp:35
void launch(LaunchParams const &launch_params, ReduceParams &&... rest_of_launch_args)
Definition: launch_core.hpp:268