22 #ifndef RAJA_policy_hip_kernel_internal_HPP
23 #define RAJA_policy_hip_kernel_internal_HPP
25 #include "RAJA/config.hpp"
27 #if defined(RAJA_ENABLE_HIP)
32 #include "camp/camp.hpp"
55 LaunchDims() =
default;
56 LaunchDims(LaunchDims
const&) =
default;
57 LaunchDims(LaunchDims&&) =
default;
58 LaunchDims& operator=(LaunchDims
const&) =
default;
59 LaunchDims& operator=(LaunchDims&&) =
default;
62 LaunchDims(HipDims _active, HipDims _dims, HipDims _min_dims)
69 LaunchDims
max(LaunchDims
const& c)
const
73 result.active.blocks.x =
std::max(c.active.blocks.x, active.blocks.x);
74 result.active.blocks.y =
std::max(c.active.blocks.y, active.blocks.y);
75 result.active.blocks.z =
std::max(c.active.blocks.z, active.blocks.z);
77 result.dims.blocks.x =
std::max(c.dims.blocks.x, dims.blocks.x);
78 result.dims.blocks.y =
std::max(c.dims.blocks.y, dims.blocks.y);
79 result.dims.blocks.z =
std::max(c.dims.blocks.z, dims.blocks.z);
81 result.min_dims.blocks.x =
std::max(c.min_dims.blocks.x, min_dims.blocks.x);
82 result.min_dims.blocks.y =
std::max(c.min_dims.blocks.y, min_dims.blocks.y);
83 result.min_dims.blocks.z =
std::max(c.min_dims.blocks.z, min_dims.blocks.z);
85 result.active.threads.x =
std::max(c.active.threads.x, active.threads.x);
86 result.active.threads.y =
std::max(c.active.threads.y, active.threads.y);
87 result.active.threads.z =
std::max(c.active.threads.z, active.threads.z);
89 result.dims.threads.x =
std::max(c.dims.threads.x, dims.threads.x);
90 result.dims.threads.y =
std::max(c.dims.threads.y, dims.threads.y);
91 result.dims.threads.z =
std::max(c.dims.threads.z, dims.threads.z);
93 result.min_dims.threads.x =
94 std::max(c.min_dims.threads.x, min_dims.threads.x);
95 result.min_dims.threads.y =
96 std::max(c.min_dims.threads.y, min_dims.threads.y);
97 result.min_dims.threads.z =
98 std::max(c.min_dims.threads.z, min_dims.threads.z);
104 int blocks_are_active()
const
106 return active.blocks.x || active.blocks.y || active.blocks.z;
110 int threads_are_active()
const
112 return active.threads.x || active.threads.y || active.threads.z;
116 int num_blocks()
const
118 if (blocks_are_active())
120 return (active.blocks.x ? dims.blocks.x : 1) *
121 (active.blocks.y ? dims.blocks.y : 1) *
122 (active.blocks.z ? dims.blocks.z : 1);
131 int num_threads()
const
133 if (threads_are_active())
135 return (active.threads.x ? dims.threads.x : 1) *
136 (active.threads.y ? dims.threads.y : 1) *
137 (active.threads.z ? dims.threads.z : 1);
146 void clamp_to_min_blocks()
148 dims.blocks.x =
std::max(min_dims.blocks.x, dims.blocks.x);
149 dims.blocks.y =
std::max(min_dims.blocks.y, dims.blocks.y);
150 dims.blocks.z =
std::max(min_dims.blocks.z, dims.blocks.z);
154 void clamp_to_min_threads()
156 dims.threads.x =
std::max(min_dims.threads.x, dims.threads.x);
157 dims.threads.y =
std::max(min_dims.threads.y, dims.threads.y);
158 dims.threads.z =
std::max(min_dims.threads.z, dims.threads.z);
163 LaunchDims combine(LaunchDims
const& lhs, LaunchDims
const& rhs)
168 template<camp::
idx_t cur_stmt, camp::
idx_t num_stmts,
typename StmtList>
169 struct HipStatementListExecutorHelper
172 using next_helper_t =
173 HipStatementListExecutorHelper<cur_stmt + 1, num_stmts, StmtList>;
175 using cur_stmt_t = camp::at_v<StmtList, cur_stmt>;
177 template<
typename Data>
178 inline static RAJA_DEVICE void exec(Data& data,
bool thread_active)
181 cur_stmt_t::exec(data, thread_active);
184 next_helper_t::exec(data, thread_active);
187 template<
typename Data>
188 inline static LaunchDims calculateDimensions(Data& data)
190 LaunchDims statement_dims = cur_stmt_t::calculateDimensions(data);
192 LaunchDims next_dims = next_helper_t::calculateDimensions(data);
194 return combine(statement_dims, next_dims);
198 template<camp::
idx_t num_stmts,
typename StmtList>
199 struct HipStatementListExecutorHelper<num_stmts, num_stmts, StmtList>
202 template<
typename Data>
208 template<
typename Data>
209 inline static LaunchDims calculateDimensions(Data&)
216 template<
typename Data,
typename Policy,
typename Types>
217 struct HipStatementExecutor;
219 template<
typename Data,
typename StmtList,
typename Types>
220 struct HipStatementListExecutor;
222 template<
typename Data,
typename... Stmts,
typename Types>
223 struct HipStatementListExecutor<Data,
StatementList<Stmts...>, Types>
226 using enclosed_stmts_t =
227 camp::list<HipStatementExecutor<Data, Stmts, Types>...>;
229 static constexpr
size_t num_stmts =
sizeof...(Stmts);
231 static inline RAJA_DEVICE void exec(Data& data,
bool thread_active)
234 HipStatementListExecutorHelper<0, num_stmts, enclosed_stmts_t>::exec(
235 data, thread_active);
238 static inline LaunchDims calculateDimensions(Data
const& data)
241 return HipStatementListExecutorHelper<
242 0, num_stmts, enclosed_stmts_t>::calculateDimensions(data);
246 template<
typename StmtList,
typename Data,
typename Types>
247 using hip_statement_list_executor_t =
248 HipStatementListExecutor<Data, StmtList, Types>;
251 template<
typename kernel_indexer>
252 struct KernelDimensionCalculator;
255 template<named_dim dim, kernel_sync_requirement sync>
256 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
257 iteration_mapping::DirectUnchecked,
259 hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>>>
262 hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>;
264 template<
typename IdxT>
265 static LaunchDims get_dimensions(IdxT len)
267 if (len !=
static_cast<IdxT
>(1))
270 "mapped index space");
273 return LaunchDims {};
278 template<named_dim dim, kernel_sync_requirement sync>
279 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
280 iteration_mapping::DirectUnchecked,
282 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>>>
285 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>;
287 template<
typename IdxT>
288 static LaunchDims get_dimensions(IdxT len)
294 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
295 set_hip_dim<dim>(dims.dims.threads,
static_cast<hip_dim_member_t
>(len));
296 set_hip_dim<dim>(dims.min_dims.threads,
static_cast<hip_dim_member_t
>(len));
303 template<named_dim dim,
int BLOCK_SIZE, kernel_sync_requirement sync>
304 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
305 iteration_mapping::DirectUnchecked,
307 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>>>
309 static_assert(BLOCK_SIZE > 0,
310 "block size must be > 0, named_usage::unspecified, or "
311 "named_usage::ignored with kernel");
313 using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>;
315 template<
typename IdxT>
316 static LaunchDims get_dimensions(IdxT len)
318 if (len !=
static_cast<IdxT
>(IndexMapper::block_size))
321 "mapped index space");
326 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
327 set_hip_dim<dim>(dims.dims.threads,
328 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
329 set_hip_dim<dim>(dims.min_dims.threads,
330 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
337 template<named_dim dim, kernel_sync_requirement sync>
338 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
339 iteration_mapping::DirectUnchecked,
341 hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>>>
344 hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>;
346 template<
typename IdxT>
347 static LaunchDims get_dimensions(IdxT len)
351 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
352 set_hip_dim<dim>(dims.dims.blocks,
static_cast<hip_dim_member_t
>(len));
353 set_hip_dim<dim>(dims.min_dims.blocks,
static_cast<hip_dim_member_t
>(len));
360 template<named_dim dim,
int GRID_SIZE, kernel_sync_requirement sync>
361 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
362 iteration_mapping::DirectUnchecked,
364 hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>>>
366 static_assert(GRID_SIZE > 0,
367 "grid size must be > 0, named_usage::unspecified, or "
368 "named_usage::ignored with kernel");
370 using IndexMapper = hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>;
372 template<
typename IdxT>
373 static LaunchDims get_dimensions(IdxT len)
375 if (len !=
static_cast<IdxT
>(IndexMapper::grid_size))
378 "mapped index space");
383 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
384 set_hip_dim<dim>(dims.dims.blocks,
385 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
386 set_hip_dim<dim>(dims.min_dims.blocks,
387 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
394 template<named_dim dim, kernel_sync_requirement sync>
395 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
396 iteration_mapping::DirectUnchecked,
398 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>>>
401 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
403 template<
typename IdxT>
404 static LaunchDims get_dimensions(IdxT len)
406 if (len !=
static_cast<IdxT
>(0))
416 template<named_dim dim,
int GRID_SIZE, kernel_sync_requirement sync>
417 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
418 iteration_mapping::DirectUnchecked,
420 hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>>>
422 static_assert(GRID_SIZE > 0,
423 "grid size must be > 0, named_usage::unspecified, or "
424 "named_usage::ignored with kernel");
427 hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
429 template<
typename IdxT>
430 static LaunchDims get_dimensions(IdxT len)
434 const IdxT block_size =
436 if (len != (block_size *
static_cast<IdxT
>(IndexMapper::grid_size)))
439 "mapped index space");
444 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
445 set_hip_dim<dim>(dims.dims.threads,
446 static_cast<hip_dim_member_t
>(block_size));
447 set_hip_dim<dim>(dims.min_dims.threads,
448 static_cast<hip_dim_member_t
>(block_size));
450 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
451 set_hip_dim<dim>(dims.dims.blocks,
452 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
453 set_hip_dim<dim>(dims.min_dims.blocks,
454 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
461 template<named_dim dim,
int BLOCK_SIZE, kernel_sync_requirement sync>
462 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
463 iteration_mapping::DirectUnchecked,
465 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>>>
467 static_assert(BLOCK_SIZE > 0,
468 "block size must be > 0, named_usage::unspecified, or "
469 "named_usage::ignored with kernel");
472 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
474 template<
typename IdxT>
475 static LaunchDims get_dimensions(IdxT len)
478 len,
static_cast<IdxT
>(IndexMapper::block_size));
479 if (len != (
static_cast<IdxT
>(IndexMapper::block_size) * grid_size))
482 "mapped index space");
487 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
488 set_hip_dim<dim>(dims.dims.threads,
489 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
490 set_hip_dim<dim>(dims.min_dims.threads,
491 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
493 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
494 set_hip_dim<dim>(dims.dims.blocks,
495 static_cast<hip_dim_member_t
>(grid_size));
496 set_hip_dim<dim>(dims.min_dims.blocks,
497 static_cast<hip_dim_member_t
>(grid_size));
508 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
509 iteration_mapping::DirectUnchecked,
511 hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>>>
513 static_assert(BLOCK_SIZE > 0,
514 "block size must be > 0, named_usage::unspecified, or "
515 "named_usage::ignored with kernel");
516 static_assert(GRID_SIZE > 0,
517 "grid size must be > 0, named_usage::unspecified, or "
518 "named_usage::ignored with kernel");
520 using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
522 template<
typename IdxT>
523 static LaunchDims get_dimensions(IdxT len)
525 if (len != (
static_cast<IdxT
>(IndexMapper::block_size) *
526 static_cast<IdxT
>(IndexMapper::grid_size)))
529 "mapped index space");
534 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
535 set_hip_dim<dim>(dims.dims.threads,
536 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
537 set_hip_dim<dim>(dims.min_dims.threads,
538 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
540 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
541 set_hip_dim<dim>(dims.dims.blocks,
542 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
543 set_hip_dim<dim>(dims.min_dims.blocks,
544 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
551 template<named_dim dim, kernel_sync_requirement sync>
552 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
553 iteration_mapping::Direct,
555 hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>>>
558 hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>;
560 template<
typename IdxT>
561 static LaunchDims get_dimensions(IdxT len)
563 if (len >
static_cast<IdxT
>(1))
566 "len exceeds the size of the directly mapped index space");
569 return LaunchDims {};
574 template<named_dim dim, kernel_sync_requirement sync>
575 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
576 iteration_mapping::Direct,
578 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>>>
581 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>;
583 template<
typename IdxT>
584 static LaunchDims get_dimensions(IdxT len)
590 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
591 set_hip_dim<dim>(dims.dims.threads,
static_cast<hip_dim_member_t
>(len));
592 set_hip_dim<dim>(dims.min_dims.threads,
static_cast<hip_dim_member_t
>(len));
599 template<named_dim dim,
int BLOCK_SIZE, kernel_sync_requirement sync>
600 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
601 iteration_mapping::Direct,
603 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>>>
605 static_assert(BLOCK_SIZE > 0,
606 "block size must be > 0, named_usage::unspecified, or "
607 "named_usage::ignored with kernel");
609 using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>;
611 template<
typename IdxT>
612 static LaunchDims get_dimensions(IdxT len)
614 constexpr
auto zero =
static_cast<IdxT
>(0);
616 if (len >
static_cast<IdxT
>(IndexMapper::block_size))
619 "len exceeds the size of the directly mapped index space");
624 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
625 set_hip_dim<dim>(dims.dims.threads,
626 static_cast<hip_dim_member_t
>(
627 (len > zero) ? IndexMapper::block_size : 0));
628 set_hip_dim<dim>(dims.min_dims.threads,
629 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
636 template<named_dim dim, kernel_sync_requirement sync>
637 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
638 iteration_mapping::Direct,
640 hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>>>
643 hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>;
645 template<
typename IdxT>
646 static LaunchDims get_dimensions(IdxT len)
650 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
651 set_hip_dim<dim>(dims.dims.blocks,
static_cast<hip_dim_member_t
>(len));
652 set_hip_dim<dim>(dims.min_dims.blocks,
static_cast<hip_dim_member_t
>(len));
659 template<named_dim dim,
int GRID_SIZE, kernel_sync_requirement sync>
660 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
661 iteration_mapping::Direct,
663 hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>>>
665 static_assert(GRID_SIZE > 0,
666 "grid size must be > 0, named_usage::unspecified, or "
667 "named_usage::ignored with kernel");
669 using IndexMapper = hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>;
671 template<
typename IdxT>
672 static LaunchDims get_dimensions(IdxT len)
674 constexpr
auto zero =
static_cast<IdxT
>(0);
676 if (len >
static_cast<IdxT
>(IndexMapper::grid_size))
679 "len exceeds the size of the directly mapped index space");
684 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
685 set_hip_dim<dim>(dims.dims.blocks,
686 static_cast<hip_dim_member_t
>(
687 (len > zero) ? IndexMapper::grid_size : 0));
688 set_hip_dim<dim>(dims.min_dims.blocks,
689 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
696 template<named_dim dim, kernel_sync_requirement sync>
697 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
698 iteration_mapping::Direct,
700 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>>>
703 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
705 template<
typename IdxT>
706 static LaunchDims get_dimensions(IdxT len)
708 if (len >
static_cast<IdxT
>(0))
713 return LaunchDims {};
718 template<named_dim dim,
int GRID_SIZE, kernel_sync_requirement sync>
719 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
720 iteration_mapping::Direct,
722 hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>>>
724 static_assert(GRID_SIZE > 0,
725 "grid size must be > 0, named_usage::unspecified, or "
726 "named_usage::ignored with kernel");
729 hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
731 template<
typename IdxT>
732 static LaunchDims get_dimensions(IdxT len)
734 constexpr
auto zero =
static_cast<IdxT
>(0);
738 const IdxT block_size =
743 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
744 set_hip_dim<dim>(dims.dims.threads,
745 static_cast<hip_dim_member_t
>(block_size));
746 set_hip_dim<dim>(dims.min_dims.threads,
747 static_cast<hip_dim_member_t
>(block_size));
749 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
750 set_hip_dim<dim>(dims.dims.blocks,
751 static_cast<hip_dim_member_t
>(
752 (len > zero) ? IndexMapper::grid_size : 0));
753 set_hip_dim<dim>(dims.min_dims.blocks,
754 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
761 template<named_dim dim,
int BLOCK_SIZE, kernel_sync_requirement sync>
762 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
763 iteration_mapping::Direct,
765 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>>>
767 static_assert(BLOCK_SIZE > 0,
768 "block size must be > 0, named_usage::unspecified, or "
769 "named_usage::ignored with kernel");
772 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
774 template<
typename IdxT>
775 static LaunchDims get_dimensions(IdxT len)
777 constexpr
auto zero =
static_cast<IdxT
>(0);
780 len,
static_cast<IdxT
>(IndexMapper::block_size));
784 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
785 set_hip_dim<dim>(dims.dims.threads,
786 static_cast<hip_dim_member_t
>(
787 (len > zero) ? IndexMapper::block_size : 0));
788 set_hip_dim<dim>(dims.min_dims.threads,
789 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
791 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
792 set_hip_dim<dim>(dims.dims.blocks,
793 static_cast<hip_dim_member_t
>(grid_size));
794 set_hip_dim<dim>(dims.min_dims.blocks,
795 static_cast<hip_dim_member_t
>(grid_size));
806 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
807 iteration_mapping::Direct,
809 hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>>>
811 static_assert(BLOCK_SIZE > 0,
812 "block size must be > 0, named_usage::unspecified, or "
813 "named_usage::ignored with kernel");
814 static_assert(GRID_SIZE > 0,
815 "grid size must be > 0, named_usage::unspecified, or "
816 "named_usage::ignored with kernel");
818 using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
820 template<
typename IdxT>
821 static LaunchDims get_dimensions(IdxT len)
823 constexpr
auto zero =
static_cast<IdxT
>(0);
825 if (len > (
static_cast<IdxT
>(IndexMapper::block_size) *
826 static_cast<IdxT
>(IndexMapper::grid_size)))
829 "len exceeds the size of the directly mapped index space");
834 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
835 set_hip_dim<dim>(dims.dims.threads,
836 static_cast<hip_dim_member_t
>(
837 (len > zero) ? IndexMapper::block_size : 0));
838 set_hip_dim<dim>(dims.min_dims.threads,
839 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
841 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
842 set_hip_dim<dim>(dims.dims.blocks,
843 static_cast<hip_dim_member_t
>(
844 (len > zero) ? IndexMapper::grid_size : 0));
845 set_hip_dim<dim>(dims.min_dims.blocks,
846 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
853 template<named_dim dim, kernel_sync_requirement sync>
854 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
855 iteration_mapping::StridedLoop<named_usage::unspecified>,
857 hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>>>
860 hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>;
862 template<
typename IdxT>
865 return LaunchDims {};
870 template<named_dim dim, kernel_sync_requirement sync>
871 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
872 iteration_mapping::StridedLoop<named_usage::unspecified>,
874 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>>>
877 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>;
879 template<
typename IdxT>
880 static LaunchDims get_dimensions(IdxT len)
886 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
887 set_hip_dim<dim>(dims.dims.threads,
static_cast<hip_dim_member_t
>(len));
888 set_hip_dim<dim>(dims.min_dims.threads,
static_cast<hip_dim_member_t
>(1));
895 template<named_dim dim,
int BLOCK_SIZE, kernel_sync_requirement sync>
896 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
897 iteration_mapping::StridedLoop<named_usage::unspecified>,
899 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>>>
901 static_assert(BLOCK_SIZE > 0,
902 "block size must be > 0, named_usage::unspecified, or "
903 "named_usage::ignored with kernel");
905 using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>;
907 template<
typename IdxT>
908 static LaunchDims get_dimensions(IdxT len)
910 constexpr
auto zero =
static_cast<IdxT
>(0);
914 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
915 set_hip_dim<dim>(dims.dims.threads,
916 static_cast<hip_dim_member_t
>(
917 (len > zero) ? IndexMapper::block_size : 0));
918 set_hip_dim<dim>(dims.min_dims.threads,
919 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
926 template<named_dim dim, kernel_sync_requirement sync>
927 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
928 iteration_mapping::StridedLoop<named_usage::unspecified>,
930 hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>>>
933 hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>;
935 template<
typename IdxT>
936 static LaunchDims get_dimensions(IdxT len)
940 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
941 set_hip_dim<dim>(dims.dims.blocks,
static_cast<hip_dim_member_t
>(len));
942 set_hip_dim<dim>(dims.min_dims.blocks,
static_cast<hip_dim_member_t
>(1));
949 template<named_dim dim,
int GRID_SIZE, kernel_sync_requirement sync>
950 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
951 iteration_mapping::StridedLoop<named_usage::unspecified>,
953 hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>>>
955 static_assert(GRID_SIZE > 0,
956 "grid size must be > 0, named_usage::unspecified, or "
957 "named_usage::ignored with kernel");
959 using IndexMapper = hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>;
961 template<
typename IdxT>
962 static LaunchDims get_dimensions(IdxT len)
964 constexpr
auto zero =
static_cast<IdxT
>(0);
968 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
969 set_hip_dim<dim>(dims.dims.blocks,
970 static_cast<hip_dim_member_t
>(
971 (len > zero) ? IndexMapper::grid_size : 0));
972 set_hip_dim<dim>(dims.min_dims.blocks,
973 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
980 template<named_dim dim, kernel_sync_requirement sync>
981 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
982 iteration_mapping::StridedLoop<named_usage::unspecified>,
984 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>>>
987 hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
989 template<
typename IdxT>
990 static LaunchDims get_dimensions(IdxT len)
992 constexpr
auto zero =
static_cast<IdxT
>(0);
996 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
997 set_hip_dim<dim>(dims.dims.threads,
998 static_cast<hip_dim_member_t
>((len > zero) ? 1 : 0));
999 set_hip_dim<dim>(dims.min_dims.threads,
static_cast<hip_dim_member_t
>(1));
1001 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
1002 set_hip_dim<dim>(dims.dims.blocks,
1003 static_cast<hip_dim_member_t
>((len > zero) ? 1 : 0));
1004 set_hip_dim<dim>(dims.min_dims.blocks,
static_cast<hip_dim_member_t
>(1));
1011 template<named_dim dim,
int GRID_SIZE, kernel_sync_requirement sync>
1012 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
1013 iteration_mapping::StridedLoop<named_usage::unspecified>,
1015 hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>>>
1017 static_assert(GRID_SIZE > 0,
1018 "grid size must be > 0, named_usage::unspecified, or "
1019 "named_usage::ignored with kernel");
1022 hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
1024 template<
typename IdxT>
1025 static LaunchDims get_dimensions(IdxT len)
1027 constexpr
auto zero =
static_cast<IdxT
>(0);
1031 const IdxT block_size =
1036 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
1037 set_hip_dim<dim>(dims.dims.threads,
1038 static_cast<hip_dim_member_t
>(block_size));
1039 set_hip_dim<dim>(dims.min_dims.threads,
static_cast<hip_dim_member_t
>(1));
1041 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
1042 set_hip_dim<dim>(dims.dims.blocks,
1043 static_cast<hip_dim_member_t
>(
1044 (len > zero) ? IndexMapper::grid_size : 0));
1045 set_hip_dim<dim>(dims.min_dims.blocks,
1046 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
1053 template<named_dim dim,
int BLOCK_SIZE, kernel_sync_requirement sync>
1054 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
1055 iteration_mapping::StridedLoop<named_usage::unspecified>,
1057 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>>>
1059 static_assert(BLOCK_SIZE > 0,
1060 "block size must be > 0, named_usage::unspecified, or "
1061 "named_usage::ignored with kernel");
1064 hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
1066 template<
typename IdxT>
1067 static LaunchDims get_dimensions(IdxT len)
1069 constexpr
auto zero =
static_cast<IdxT
>(0);
1072 len,
static_cast<IdxT
>(IndexMapper::block_size));
1076 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
1077 set_hip_dim<dim>(dims.dims.threads,
1078 static_cast<hip_dim_member_t
>(
1079 (len > zero) ? IndexMapper::block_size : 0));
1080 set_hip_dim<dim>(dims.min_dims.threads,
1081 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
1083 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
1084 set_hip_dim<dim>(dims.dims.blocks,
1085 static_cast<hip_dim_member_t
>(grid_size));
1086 set_hip_dim<dim>(dims.min_dims.blocks,
static_cast<hip_dim_member_t
>(1));
1097 struct KernelDimensionCalculator<
RAJA::policy::hip::hip_indexer<
1098 iteration_mapping::StridedLoop<named_usage::unspecified>,
1100 hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>>>
1102 static_assert(BLOCK_SIZE > 0,
1103 "block size must be > 0, named_usage::unspecified, or "
1104 "named_usage::ignored with kernel");
1105 static_assert(GRID_SIZE > 0,
1106 "grid size must be > 0, named_usage::unspecified, or "
1107 "named_usage::ignored with kernel");
1109 using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
1111 template<
typename IdxT>
1112 static LaunchDims get_dimensions(IdxT len)
1114 constexpr
auto zero =
static_cast<IdxT
>(0);
1118 set_hip_dim<dim>(dims.active.threads,
static_cast<hip_dim_member_t
>(
true));
1119 set_hip_dim<dim>(dims.dims.threads,
1120 static_cast<hip_dim_member_t
>(
1121 (len > zero) ? IndexMapper::block_size : 0));
1122 set_hip_dim<dim>(dims.min_dims.threads,
1123 static_cast<hip_dim_member_t
>(IndexMapper::block_size));
1125 set_hip_dim<dim>(dims.active.blocks,
static_cast<hip_dim_member_t
>(
true));
1126 set_hip_dim<dim>(dims.dims.blocks,
1127 static_cast<hip_dim_member_t
>(
1128 (len > zero) ? IndexMapper::grid_size : 0));
1129 set_hip_dim<dim>(dims.min_dims.blocks,
1130 static_cast<hip_dim_member_t
>(IndexMapper::grid_size));
Header file defining prototypes for routines used to manage memory for HIP reductions and other opera...
Header file containing RAJA HIP policy definitions.
Header file for common RAJA internal macro definitions.
RAJA_HOST_DEVICE void RAJA_ABORT_OR_THROW(const char *str)
Definition: macros.hpp:143
#define RAJA_UNUSED_ARG(x)
Definition: macros.hpp:97
#define RAJA_DIVIDE_CEILING_INT(dividend, divisor)
Definition: macros.hpp:122
#define RAJA_DEVICE
Definition: macros.hpp:66
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
Definition: AlignedRangeIndexSetBuilders.cpp:35
named_dim
Definition: types.hpp:53
kernel_sync_requirement
Definition: types.hpp:63
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result max(Args... args)
Definition: foldl.hpp:155
RAJA header file containing user interface for RAJA::kernel.
Header file for RAJA type definitions.