24 #ifndef RAJA_forall_hip_HPP
25 #define RAJA_forall_hip_HPP
27 #include "RAJA/config.hpp"
29 #if defined(RAJA_ENABLE_HIP)
32 #include "hip/hip_runtime.h"
74 template<
typename IterationMapping,
75 typename IterationGetter,
77 typename UniqueMarker>
78 struct ForallDimensionCalculator;
88 typename UniqueMarker>
89 struct ForallDimensionCalculator<
91 ::RAJA::hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>,
97 "block size must be > 0 or named_usage::unspecified with forall");
100 "grid size must be > 0 or named_usage::unspecified with forall");
102 using IndexGetter = ::RAJA::hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
104 template<
typename IdxT>
105 static void set_dimensions(internal::HipDims& dims,
110 const IdxT block_size =
static_cast<IdxT
>(IndexGetter::block_size);
111 const IdxT grid_size =
static_cast<IdxT
>(IndexGetter::grid_size);
113 if (len > (block_size * grid_size))
116 "len exceeds the size of the directly mapped index space");
119 internal::set_hip_dim<dim>(dims.threads,
120 static_cast<IdxT
>(IndexGetter::block_size));
121 internal::set_hip_dim<dim>(dims.blocks,
122 static_cast<IdxT
>(IndexGetter::grid_size));
128 typename Concretizer,
129 typename UniqueMarker>
130 struct ForallDimensionCalculator<
132 ::RAJA::hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>,
138 "grid size must be > 0 or named_usage::unspecified with forall");
141 ::RAJA::hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
143 template<
typename IdxT>
144 static void set_dimensions(internal::HipDims& dims,
147 size_t dynamic_shmem_size)
149 ::RAJA::hip::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
150 func, dynamic_shmem_size, len};
152 const IdxT grid_size =
static_cast<IdxT
>(IndexGetter::grid_size);
153 const IdxT block_size = concretizer.get_block_size_to_fit_len(grid_size);
155 if (block_size == IdxT(0))
158 "len exceeds the size of the directly mapped index space");
161 internal::set_hip_dim<dim>(dims.threads, block_size);
162 internal::set_hip_dim<dim>(dims.blocks, grid_size);
168 typename Concretizer,
169 typename UniqueMarker>
170 struct ForallDimensionCalculator<
172 ::RAJA::hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>,
178 "block size must be > 0 or named_usage::unspecified with forall");
181 ::RAJA::hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
183 template<
typename IdxT>
184 static void set_dimensions(internal::HipDims& dims,
187 size_t dynamic_shmem_size)
189 ::RAJA::hip::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
190 func, dynamic_shmem_size, len};
192 const IdxT block_size =
static_cast<IdxT
>(IndexGetter::block_size);
193 const IdxT grid_size = concretizer.get_grid_size_to_fit_len(block_size);
195 internal::set_hip_dim<dim>(dims.threads, block_size);
196 internal::set_hip_dim<dim>(dims.blocks, grid_size);
200 template<named_dim dim,
typename Concretizer,
typename UniqueMarker>
201 struct ForallDimensionCalculator<
204 IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>,
208 using IndexGetter = ::RAJA::hip::
209 IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
211 template<
typename IdxT>
212 static void set_dimensions(internal::HipDims& dims,
215 size_t dynamic_shmem_size)
217 ::RAJA::hip::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
218 func, dynamic_shmem_size, len};
220 const auto sizes = concretizer.get_block_and_grid_size_to_fit_len();
222 internal::set_hip_dim<dim>(dims.threads, sizes.first);
223 internal::set_hip_dim<dim>(dims.blocks, sizes.second);
230 typename Concretizer,
231 typename UniqueMarker>
232 struct ForallDimensionCalculator<
234 ::RAJA::hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>,
240 "block size must be > 0 or named_usage::unspecified with forall");
243 "grid size must be > 0 or named_usage::unspecified with forall");
245 using IndexGetter = ::RAJA::hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
247 template<
typename IdxT>
248 static void set_dimensions(internal::HipDims& dims,
253 const IdxT block_size =
static_cast<IdxT
>(IndexGetter::block_size);
254 const IdxT grid_size =
static_cast<IdxT
>(IndexGetter::grid_size);
256 internal::set_hip_dim<dim>(dims.threads, block_size);
257 internal::set_hip_dim<dim>(dims.blocks, grid_size);
263 typename Concretizer,
264 typename UniqueMarker>
265 struct ForallDimensionCalculator<
267 ::RAJA::hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>,
273 "grid size must be > 0 or named_usage::unspecified with forall");
276 ::RAJA::hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
278 template<
typename IdxT>
279 static void set_dimensions(internal::HipDims& dims,
282 size_t dynamic_shmem_size)
284 ::RAJA::hip::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
285 func, dynamic_shmem_size, len};
287 const IdxT grid_size =
static_cast<IdxT
>(IndexGetter::grid_size);
288 const IdxT block_size = concretizer.get_block_size_to_fit_device(grid_size);
290 internal::set_hip_dim<dim>(dims.threads, block_size);
291 internal::set_hip_dim<dim>(dims.blocks, grid_size);
297 typename Concretizer,
298 typename UniqueMarker>
299 struct ForallDimensionCalculator<
301 ::RAJA::hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>,
307 "block size must be > 0 or named_usage::unspecified with forall");
310 ::RAJA::hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
312 template<
typename IdxT>
313 static void set_dimensions(internal::HipDims& dims,
316 size_t dynamic_shmem_size)
318 ::RAJA::hip::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
319 func, dynamic_shmem_size, len};
321 const IdxT block_size =
static_cast<IdxT
>(IndexGetter::block_size);
322 const IdxT grid_size = concretizer.get_grid_size_to_fit_device(block_size);
324 internal::set_hip_dim<dim>(dims.threads, block_size);
325 internal::set_hip_dim<dim>(dims.blocks, grid_size);
329 template<named_dim dim,
typename Concretizer,
typename UniqueMarker>
330 struct ForallDimensionCalculator<
333 IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>,
337 using IndexGetter = ::RAJA::hip::
338 IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
340 template<
typename IdxT>
341 static void set_dimensions(internal::HipDims& dims,
344 size_t dynamic_shmem_size)
346 ::RAJA::hip::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
347 func, dynamic_shmem_size, len};
349 const auto sizes = concretizer.get_block_and_grid_size_to_fit_device();
351 internal::set_hip_dim<dim>(dims.threads, sizes.first);
352 internal::set_hip_dim<dim>(dims.blocks, sizes.second);
365 template<
typename EXEC_POL,
369 typename ForallParam,
370 typename IterationMapping =
typename EXEC_POL::IterationMapping,
371 typename IterationGetter =
typename EXEC_POL::IterationGetter,
372 std::enable_if_t<std::is_base_of<iteration_mapping::DirectBase,
373 IterationMapping>::value &&
374 (IterationGetter::block_size > 0),
375 size_t> BlockSize = IterationGetter::block_size>
377 void forallp_hip_kernel(const LOOP_BODY loop_body,
379 const IndexType length,
380 ForallParam f_params)
385 auto ii = IterationGetter::template index<IndexType>();
395 template<
typename EXEC_POL,
399 typename ForallParam,
400 typename IterationMapping =
typename EXEC_POL::IterationMapping,
401 typename IterationGetter =
typename EXEC_POL::IterationGetter,
402 std::enable_if_t<std::is_base_of<iteration_mapping::DirectBase,
403 IterationMapping>::value &&
404 (IterationGetter::block_size <= 0),
406 __global__
void forallp_hip_kernel(
const LOOP_BODY loop_body,
408 const IndexType length,
409 ForallParam f_params)
414 auto ii = IterationGetter::template index<IndexType>();
428 typename ForallParam,
429 typename IterationMapping =
typename EXEC_POL::IterationMapping,
430 typename IterationGetter =
typename EXEC_POL::IterationGetter,
431 std::enable_if_t<std::is_base_of<iteration_mapping::StridedLoopBase,
432 IterationMapping>::value &&
433 std::is_base_of<iteration_mapping::UnsizedLoopBase,
434 IterationMapping>::value &&
435 (IterationGetter::block_size > 0),
436 size_t> BlockSize = IterationGetter::block_size>
438 void forallp_hip_kernel(const LOOP_BODY loop_body,
440 const IndexType length,
441 ForallParam f_params)
447 for (
auto ii = IterationGetter::template index<IndexType>(); ii < length;
448 ii += IterationGetter::template size<IndexType>())
460 typename ForallParam,
461 typename IterationMapping =
typename EXEC_POL::IterationMapping,
462 typename IterationGetter =
typename EXEC_POL::IterationGetter,
463 std::enable_if_t<std::is_base_of<iteration_mapping::StridedLoopBase,
464 IterationMapping>::value &&
465 std::is_base_of<iteration_mapping::UnsizedLoopBase,
466 IterationMapping>::value &&
467 (IterationGetter::block_size <= 0),
469 __global__
void forallp_hip_kernel(
const LOOP_BODY loop_body,
471 const IndexType length,
472 ForallParam f_params)
478 for (
auto ii = IterationGetter::template index<IndexType>(); ii < length;
479 ii += IterationGetter::template size<IndexType>())
499 template<
typename Iterable,
501 typename IterationMapping,
502 typename IterationGetter,
503 typename Concretizer,
505 typename ForallParam>
506 RAJA_INLINE concepts::enable_if_t<
507 resources::EventProxy<resources::Hip>,
510 ::RAJA::policy::hip::hip_exec<IterationMapping,
515 LoopBody&& loop_body,
516 ForallParam f_params)
518 using Iterator = camp::decay<decltype(std::begin(
iter))>;
519 using LOOP_BODY = camp::decay<LoopBody>;
521 camp::decay<decltype(std::distance(std::begin(
iter), std::end(
iter)))>;
522 using EXEC_POL = camp::decay<decltype(pol)>;
523 using UniqueMarker = ::camp::list<IterationMapping, IterationGetter,
524 LOOP_BODY, Iterator, ForallParam>;
525 using DimensionCalculator =
526 impl::ForallDimensionCalculator<IterationMapping, IterationGetter,
527 Concretizer, UniqueMarker>;
532 Iterator begin = std::begin(
iter);
533 Iterator end = std::end(
iter);
534 IndexType len = std::distance(begin, end);
540 auto func =
reinterpret_cast<const void*
>(
541 &impl::forallp_hip_kernel<EXEC_POL, Iterator, LOOP_BODY, IndexType,
542 camp::decay<ForallParam>>);
552 internal::HipDims dims(1);
553 DimensionCalculator::set_dimensions(dims, len, func, shmem);
556 RAJA::hip::detail::hipInfo launch_info;
557 launch_info.gridDim = dims.blocks;
558 launch_info.blockDim = dims.threads;
559 launch_info.res = hip_res;
567 LOOP_BODY
body = RAJA::hip::make_launch_body(
568 func, dims.blocks, dims.threads, shmem, hip_res,
569 std::forward<LoopBody>(loop_body));
574 void*
args[] = {(
void*)&
body, (
void*)&begin, (
void*)&len,
584 return resources::EventProxy<resources::Hip>(hip_res);
605 template<
typename LoopBody,
606 typename IterationMapping,
607 typename IterationGetter,
608 typename Concretizer,
610 typename... SegmentTypes>
611 RAJA_INLINE resources::EventProxy<resources::Hip>
forall_impl(
615 ::RAJA::policy::hip::
616 hip_exec<IterationMapping, IterationGetter, Concretizer, Async>>,
617 const TypedIndexSet<SegmentTypes...>& iset,
618 LoopBody&& loop_body)
620 int num_seg = iset.getNumSegments();
621 for (
int isi = 0; isi < num_seg; ++isi)
624 r, isi, detail::CallForall(),
625 ::RAJA::policy::hip::hip_exec<IterationMapping, IterationGetter,
626 Concretizer,
true>(),
631 return resources::EventProxy<resources::Hip>(r);
RAJA header file defining index set classes.
Header file defining prototypes for routines used to manage memory for HIP reductions and other opera...
Header file containing RAJA HIP policy definitions.
Header file for common RAJA internal macro definitions.
RAJA_HOST_DEVICE void RAJA_ABORT_OR_THROW(const char *str)
Definition: macros.hpp:143
#define RAJA_UNUSED_ARG(x)
Definition: macros.hpp:97
Args args
Definition: WorkRunner.hpp:212
value_type::device_call &[i_loop] iter
Definition: WorkRunner.hpp:216
constexpr RAJA_HOST_DEVICE auto invoke_body(Params &¶ms, Fn &&f, Ts &&... extra)
Definition: forall.hpp:598
RAJA_HOST_DEVICE auto thread_privatize(const T &item) -> Privatizer< T >
Create a private copy of the argument to be stored on the current thread's stack in a class of the Pr...
Definition: privatizer.hpp:88
seq_exec seq_segit
Definition: policy.hpp:83
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, expt::type_traits::is_ForallParamPack< ForallParam >, expt::type_traits::is_ForallParamPack_empty< ForallParam > > forall_impl(resources::Host host_res, const simd_exec &, Iterable &&iter, Func &&body, ForallParam)
Definition: forall.hpp:81
Definition: AlignedRangeIndexSetBuilders.cpp:35
named_dim
Definition: types.hpp:53
__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__ void launch_new_reduce_global_fcn_fixed(const RAJA_CUDA_GRID_CONSTANT BODY body_in
void launch(LaunchParams const &launch_params, ReduceParams &&... rest_of_launch_args)
Definition: launch_core.hpp:268
auto & body
Definition: launch.hpp:177
auto privatizer
Definition: launch.hpp:176
void synchronize()
Synchronize all current RAJA executions for the specified policy.
Definition: synchronize.hpp:44
Header file containing RAJA index set and segment iteration template methods that take an execution p...
Header file containing utility methods used in HIP operations.
Header file for RAJA resource definitions.
static constexpr void parampack_resolve(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:304
static constexpr void parampack_init(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:269
static RAJA_HOST_DEVICE constexpr void parampack_combine(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:286
Definition: TypeTraits.hpp:59
Definition: types.hpp:143
Definition: types.hpp:209
Header file for RAJA type definitions.