20 #ifndef RAJA_pattern_launch_cuda_HPP
21 #define RAJA_pattern_launch_cuda_HPP
38 template<
typename IndicesAndDimsT>
56 return indices_and_dims;
60 template<
typename BODY,
typename ReduceParams>
86 RAJA::policy::cuda::cuda_launch_explicit_t<async,
87 named_usage::unspecified,
88 named_usage::unspecified>>
91 template<
typename BODY_IN,
typename ReduceParams>
92 static concepts::enable_if_t<
93 resources::EventProxy<resources::Resource>,
95 exec(RAJA::resources::Resource res,
98 ReduceParams& launch_reducers)
100 using BODY = camp::decay<BODY_IN>;
101 using EXEC_POL = RAJA::policy::cuda::cuda_launch_explicit_t<
105 auto func =
reinterpret_cast<const void*
>(
106 &launch_new_reduce_global_fcn<BODY, camp::decay<ReduceParams>>);
108 resources::Cuda cuda_res = res.get<RAJA::resources::Cuda>();
114 cuda_dim_t gridSize {
115 static_cast<cuda_dim_member_t
>(launch_params.
teams.
value[0]),
116 static_cast<cuda_dim_member_t
>(launch_params.
teams.
value[1]),
117 static_cast<cuda_dim_member_t
>(launch_params.
teams.
value[2])};
119 cuda_dim_t blockSize {
120 static_cast<cuda_dim_member_t
>(launch_params.
threads.
value[0]),
121 static_cast<cuda_dim_member_t
>(launch_params.
threads.
value[1]),
122 static_cast<cuda_dim_member_t
>(launch_params.
threads.
value[2])};
125 constexpr cuda_dim_member_t zero = 0;
126 if (gridSize.x > zero && gridSize.y > zero && gridSize.z > zero &&
127 blockSize.x > zero && blockSize.y > zero && blockSize.z > zero)
132 RAJA::cuda::detail::cudaInfo launch_info;
133 launch_info.gridDim = gridSize;
134 launch_info.blockDim = blockSize;
135 launch_info.dynamic_smem = &shared_mem_size;
136 launch_info.res = cuda_res;
145 BODY
body = RAJA::cuda::make_launch_body(
146 func, gridSize, blockSize, shared_mem_size, cuda_res,
147 std::forward<BODY_IN>(body_in));
152 void*
args[] = {(
void*)&
body, (
void*)&launch_reducers};
161 return resources::EventProxy<resources::Resource>(res);
165 template<
typename BODY,
167 size_t BLOCKS_PER_SM,
168 typename ReduceParams>
170 void launch_new_reduce_global_fcn_fixed(
const RAJA_CUDA_GRID_CONSTANT BODY
194 template<
bool async,
int nthreads,
size_t BLOCKS_PER_SM>
196 RAJA::policy::cuda::cuda_launch_explicit_t<async, nthreads, BLOCKS_PER_SM>>
199 template<
typename BODY_IN,
typename ReduceParams>
200 static concepts::enable_if_t<
201 resources::EventProxy<resources::Resource>,
203 exec(RAJA::resources::Resource res,
206 ReduceParams& launch_reducers)
208 using BODY = camp::decay<BODY_IN>;
211 using EXEC_POL = RAJA::policy::cuda::cuda_launch_explicit_t<
215 auto func =
reinterpret_cast<const void*
>(
216 &launch_new_reduce_global_fcn_fixed<BODY, nthreads, BLOCKS_PER_SM,
217 camp::decay<ReduceParams>>);
219 resources::Cuda cuda_res = res.get<RAJA::resources::Cuda>();
225 cuda_dim_t gridSize {
226 static_cast<cuda_dim_member_t
>(launch_params.
teams.
value[0]),
227 static_cast<cuda_dim_member_t
>(launch_params.
teams.
value[1]),
228 static_cast<cuda_dim_member_t
>(launch_params.
teams.
value[2])};
230 cuda_dim_t blockSize {
231 static_cast<cuda_dim_member_t
>(launch_params.
threads.
value[0]),
232 static_cast<cuda_dim_member_t
>(launch_params.
threads.
value[1]),
233 static_cast<cuda_dim_member_t
>(launch_params.
threads.
value[2])};
236 constexpr cuda_dim_member_t zero = 0;
237 if (gridSize.x > zero && gridSize.y > zero && gridSize.z > zero &&
238 blockSize.x > zero && blockSize.y > zero && blockSize.z > zero)
243 RAJA::cuda::detail::cudaInfo launch_info;
244 launch_info.gridDim = gridSize;
245 launch_info.blockDim = blockSize;
246 launch_info.dynamic_smem = &shared_mem_size;
247 launch_info.res = cuda_res;
256 BODY
body = RAJA::cuda::make_launch_body(
257 func, gridSize, blockSize, shared_mem_size, cuda_res,
258 std::forward<BODY_IN>(body_in));
263 void*
args[] = {(
void*)&
body, (
void*)&launch_reducers};
272 return resources::EventProxy<resources::Resource>(res);
279 template<
typename SEGMENT,
typename IndexMapper>
281 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
282 kernel_sync_requirement::none,
287 using diff_t =
typename std::iterator_traits<
288 typename SEGMENT::iterator>::difference_type;
290 template<
typename LaunchContextPolicy,
typename BODY>
293 SEGMENT
const& segment,
297 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
299 body(*(segment.begin() + i));
303 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
305 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
306 kernel_sync_requirement::none,
312 using diff_t =
typename std::iterator_traits<
313 typename SEGMENT::iterator>::difference_type;
315 template<
typename LaunchContextPolicy,
typename BODY>
318 SEGMENT
const& segment0,
319 SEGMENT
const& segment1,
323 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
325 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
327 body(*(segment0.begin() + i0), *(segment1.begin() + i1));
331 template<
typename SEGMENT,
332 typename IndexMapper0,
333 typename IndexMapper1,
334 typename IndexMapper2>
336 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
337 kernel_sync_requirement::none,
344 using diff_t =
typename std::iterator_traits<
345 typename SEGMENT::iterator>::difference_type;
347 template<
typename LaunchContextPolicy,
typename BODY>
350 SEGMENT
const& segment0,
351 SEGMENT
const& segment1,
352 SEGMENT
const& segment2,
356 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
358 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
360 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
362 body(*(segment0.begin() + i0), *(segment1.begin() + i1),
363 *(segment2.begin() + i2));
367 template<
typename SEGMENT,
typename IndexMapper>
369 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
370 kernel_sync_requirement::none,
375 using diff_t =
typename std::iterator_traits<
376 typename SEGMENT::iterator>::difference_type;
378 template<
typename LaunchContextPolicy,
typename BODY>
381 SEGMENT
const& segment,
384 const diff_t len = segment.end() - segment.begin();
386 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
390 body(*(segment.begin() + i));
395 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
397 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
398 kernel_sync_requirement::none,
404 using diff_t =
typename std::iterator_traits<
405 typename SEGMENT::iterator>::difference_type;
407 template<
typename LaunchContextPolicy,
typename BODY>
410 SEGMENT
const& segment0,
411 SEGMENT
const& segment1,
414 const int len0 = segment0.end() - segment0.begin();
415 const int len1 = segment1.end() - segment1.begin();
418 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
420 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
422 if (i0 < len0 && i1 < len1)
424 body(*(segment0.begin() + i0), *(segment1.begin() + i1));
429 template<
typename SEGMENT,
430 typename IndexMapper0,
431 typename IndexMapper1,
432 typename IndexMapper2>
434 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
435 kernel_sync_requirement::none,
442 using diff_t =
typename std::iterator_traits<
443 typename SEGMENT::iterator>::difference_type;
445 template<
typename LaunchContextPolicy,
typename BODY>
448 SEGMENT
const& segment0,
449 SEGMENT
const& segment1,
450 SEGMENT
const& segment2,
453 const int len0 = segment0.end() - segment0.begin();
454 const int len1 = segment1.end() - segment1.begin();
455 const int len2 = segment2.end() - segment2.begin();
458 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
460 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
462 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
464 if (i0 < len0 && i1 < len1 && i2 < len2)
466 body(*(segment0.begin() + i0), *(segment1.begin() + i1),
467 *(segment2.begin() + i2));
472 template<
typename SEGMENT,
typename IndexMapper>
474 RAJA::policy::cuda::cuda_indexer<
475 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
476 kernel_sync_requirement::none,
481 using diff_t =
typename std::iterator_traits<
482 typename SEGMENT::iterator>::difference_type;
484 template<
typename BODY,
typename LaunchContextPolicy>
487 SEGMENT
const& segment,
490 const diff_t len = segment.end() - segment.begin();
492 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
494 IndexMapper::template size<diff_t>(
ctx.get_indices_and_dims());
496 for (
diff_t i = i_init; i < len; i += i_stride)
498 body(*(segment.begin() + i));
503 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
505 RAJA::policy::cuda::cuda_indexer<
506 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
507 kernel_sync_requirement::none,
513 using diff_t =
typename std::iterator_traits<
514 typename SEGMENT::iterator>::difference_type;
516 template<
typename LaunchContextPolicy,
typename BODY>
519 SEGMENT
const& segment0,
520 SEGMENT
const& segment1,
523 const int len0 = segment0.end() - segment0.begin();
524 const int len1 = segment1.end() - segment1.begin();
527 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
529 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
532 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
534 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
536 for (
diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
539 for (
diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
542 body(*(segment0.begin() + i0), *(segment1.begin() + i1));
548 template<
typename SEGMENT,
549 typename IndexMapper0,
550 typename IndexMapper1,
551 typename IndexMapper2>
553 RAJA::policy::cuda::cuda_indexer<
554 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
555 kernel_sync_requirement::none,
562 using diff_t =
typename std::iterator_traits<
563 typename SEGMENT::iterator>::difference_type;
565 template<
typename LaunchContextPolicy,
typename BODY>
568 SEGMENT
const& segment0,
569 SEGMENT
const& segment1,
570 SEGMENT
const& segment2,
573 const int len0 = segment0.end() - segment0.begin();
574 const int len1 = segment1.end() - segment1.begin();
575 const int len2 = segment2.end() - segment2.begin();
578 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
580 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
582 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
585 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
587 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
589 IndexMapper2::template size<diff_t>(
ctx.get_indices_and_dims());
591 for (
diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
594 for (
diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
597 for (
diff_t i2 = i2_init; i2 < len2; i2 += i2_stride)
600 body(*(segment0.begin() + i0), *(segment1.begin() + i1),
601 *(segment2.begin() + i2));
611 template<
typename SEGMENT,
typename IndexMapper>
613 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
614 kernel_sync_requirement::none,
619 using diff_t =
typename std::iterator_traits<
620 typename SEGMENT::iterator>::difference_type;
622 template<
typename LaunchContextPolicy,
typename BODY>
625 SEGMENT
const& segment,
629 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
631 body(*(segment.begin() + i), i);
635 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
637 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
638 kernel_sync_requirement::none,
644 using diff_t =
typename std::iterator_traits<
645 typename SEGMENT::iterator>::difference_type;
647 template<
typename LaunchContextPolicy,
typename BODY>
650 SEGMENT
const& segment0,
651 SEGMENT
const& segment1,
655 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
657 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
659 body(*(segment0.begin() + i0), *(segment1.begin() + i1), i0, i1);
663 template<
typename SEGMENT,
664 typename IndexMapper0,
665 typename IndexMapper1,
666 typename IndexMapper2>
668 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
669 kernel_sync_requirement::none,
676 using diff_t =
typename std::iterator_traits<
677 typename SEGMENT::iterator>::difference_type;
679 template<
typename LaunchContextPolicy,
typename BODY>
682 SEGMENT
const& segment0,
683 SEGMENT
const& segment1,
684 SEGMENT
const& segment2,
688 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
690 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
692 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
694 body(*(segment0.begin() + i0), *(segment1.begin() + i1),
695 *(segment2.begin() + i2), i0, i1, i2);
699 template<
typename SEGMENT,
typename IndexMapper>
701 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
702 kernel_sync_requirement::none,
707 using diff_t =
typename std::iterator_traits<
708 typename SEGMENT::iterator>::difference_type;
710 template<
typename LaunchContextPolicy,
typename BODY>
713 SEGMENT
const& segment,
716 const diff_t len = segment.end() - segment.begin();
718 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
722 body(*(segment.begin() + i), i);
727 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
729 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
730 kernel_sync_requirement::none,
736 using diff_t =
typename std::iterator_traits<
737 typename SEGMENT::iterator>::difference_type;
739 template<
typename LaunchContextPolicy,
typename BODY>
742 SEGMENT
const& segment0,
743 SEGMENT
const& segment1,
746 const int len0 = segment0.end() - segment0.begin();
747 const int len1 = segment1.end() - segment1.begin();
750 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
752 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
754 if (i0 < len0 && i1 < len1)
756 body(*(segment0.begin() + i0), *(segment1.begin() + i1), i0, i1);
761 template<
typename SEGMENT,
762 typename IndexMapper0,
763 typename IndexMapper1,
764 typename IndexMapper2>
766 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
767 kernel_sync_requirement::none,
774 using diff_t =
typename std::iterator_traits<
775 typename SEGMENT::iterator>::difference_type;
777 template<
typename LaunchContextPolicy,
typename BODY>
780 SEGMENT
const& segment0,
781 SEGMENT
const& segment1,
782 SEGMENT
const& segment2,
785 const int len0 = segment0.end() - segment0.begin();
786 const int len1 = segment1.end() - segment1.begin();
787 const int len2 = segment2.end() - segment2.begin();
790 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
792 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
794 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
796 if (i0 < len0 && i1 < len1 && i2 < len2)
798 body(*(segment0.begin() + i0), *(segment1.begin() + i1),
799 *(segment2.begin() + i2), i0, i1, i2);
804 template<
typename SEGMENT,
typename IndexMapper>
806 RAJA::policy::cuda::cuda_indexer<
807 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
808 kernel_sync_requirement::none,
813 using diff_t =
typename std::iterator_traits<
814 typename SEGMENT::iterator>::difference_type;
816 template<
typename LaunchContextPolicy,
typename BODY>
819 SEGMENT
const& segment,
822 const diff_t len = segment.end() - segment.begin();
824 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
826 IndexMapper::template size<diff_t>(
ctx.get_indices_and_dims());
828 for (
diff_t i = i_init; i < len; i += i_stride)
830 body(*(segment.begin() + i), i);
835 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
837 RAJA::policy::cuda::cuda_indexer<
838 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
839 kernel_sync_requirement::none,
845 using diff_t =
typename std::iterator_traits<
846 typename SEGMENT::iterator>::difference_type;
848 template<
typename LaunchContextPolicy,
typename BODY>
851 SEGMENT
const& segment0,
852 SEGMENT
const& segment1,
855 const int len0 = segment0.end() - segment0.begin();
856 const int len1 = segment1.end() - segment1.begin();
859 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
861 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
864 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
866 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
868 for (
diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
871 for (
diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
874 body(*(segment0.begin() + i0), *(segment1.begin() + i1), i0, i1);
880 template<
typename SEGMENT,
881 typename IndexMapper0,
882 typename IndexMapper1,
883 typename IndexMapper2>
885 RAJA::policy::cuda::cuda_indexer<
886 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
887 kernel_sync_requirement::none,
894 using diff_t =
typename std::iterator_traits<
895 typename SEGMENT::iterator>::difference_type;
897 template<
typename LaunchContextPolicy,
typename BODY>
900 SEGMENT
const& segment0,
901 SEGMENT
const& segment1,
902 SEGMENT
const& segment2,
905 const int len0 = segment0.end() - segment0.begin();
906 const int len1 = segment1.end() - segment1.begin();
907 const int len2 = segment2.end() - segment2.begin();
910 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
912 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
914 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
917 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
919 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
921 IndexMapper2::template size<diff_t>(
ctx.get_indices_and_dims());
923 for (
diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
926 for (
diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
929 for (
diff_t i2 = i2_init; i2 < len2; i2 += i2_stride)
932 body(*(segment0.begin() + i0), *(segment1.begin() + i1),
933 *(segment2.begin() + i2), i0, i1, i2);
943 template<
typename SEGMENT, kernel_sync_requirement sync,
typename IndexMapper0>
945 RAJA::iteration_mapping::DirectUnchecked,
950 RAJA::iteration_mapping::DirectUnchecked,
956 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
958 RAJA::iteration_mapping::DirectUnchecked,
959 kernel_sync_requirement::none,
964 using diff_t =
typename std::iterator_traits<
965 typename SEGMENT::iterator>::difference_type;
967 template<
typename LaunchContextPolicy,
typename BODY>
970 SEGMENT
const& segment,
974 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
976 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
979 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
981 const int i = i0 + i0_stride * i1;
983 body(*(segment.begin() + i));
987 template<
typename SEGMENT,
988 typename IndexMapper0,
989 typename IndexMapper1,
990 typename IndexMapper2>
992 RAJA::iteration_mapping::DirectUnchecked,
993 kernel_sync_requirement::none,
999 using diff_t =
typename std::iterator_traits<
1000 typename SEGMENT::iterator>::difference_type;
1002 template<
typename LaunchContextPolicy,
typename BODY>
1005 SEGMENT
const& segment,
1009 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1011 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1013 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
1016 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
1018 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
1020 const int i = i0 + i0_stride * (i1 + i1_stride * i2);
1022 body(*(segment.begin() + i));
1026 template<
typename SEGMENT, kernel_sync_requirement sync,
typename IndexMapper0>
1028 RAJA::policy::cuda::cuda_flatten_indexer<RAJA::iteration_mapping::Direct,
1033 RAJA::policy::cuda::
1034 cuda_indexer<RAJA::iteration_mapping::Direct, sync, IndexMapper0>,
1038 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
1040 RAJA::policy::cuda::cuda_flatten_indexer<RAJA::iteration_mapping::Direct,
1041 kernel_sync_requirement::none,
1046 using diff_t =
typename std::iterator_traits<
1047 typename SEGMENT::iterator>::difference_type;
1049 template<
typename LaunchContextPolicy,
typename BODY>
1052 SEGMENT
const& segment,
1055 const int len = segment.end() - segment.begin();
1058 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1060 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1063 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
1065 const int i = i0 + i0_stride * i1;
1069 body(*(segment.begin() + i));
1074 template<
typename SEGMENT,
1075 typename IndexMapper0,
1076 typename IndexMapper1,
1077 typename IndexMapper2>
1079 RAJA::policy::cuda::cuda_flatten_indexer<RAJA::iteration_mapping::Direct,
1080 kernel_sync_requirement::none,
1086 using diff_t =
typename std::iterator_traits<
1087 typename SEGMENT::iterator>::difference_type;
1089 template<
typename LaunchContextPolicy,
typename BODY>
1092 SEGMENT
const& segment,
1095 const int len = segment.end() - segment.begin();
1098 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1100 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1102 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
1105 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
1107 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
1109 const int i = i0 + i0_stride * (i1 + i1_stride * i2);
1113 body(*(segment.begin() + i));
1118 template<
typename SEGMENT, kernel_sync_requirement sync,
typename IndexMapper0>
1120 RAJA::policy::cuda::cuda_flatten_indexer<
1121 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1126 RAJA::policy::cuda::cuda_indexer<
1127 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1133 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
1135 RAJA::policy::cuda::cuda_flatten_indexer<
1136 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1137 kernel_sync_requirement::none,
1142 using diff_t =
typename std::iterator_traits<
1143 typename SEGMENT::iterator>::difference_type;
1145 template<
typename LaunchContextPolicy,
typename BODY>
1148 SEGMENT
const& segment,
1151 const int len = segment.end() - segment.begin();
1154 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1156 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1158 const int i0_stride =
1159 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
1160 const int i1_stride =
1161 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
1163 for (
int i = i0 + i0_stride * i1; i < len; i += i0_stride * i1_stride)
1165 body(*(segment.begin() + i));
1170 template<
typename SEGMENT,
1171 typename IndexMapper0,
1172 typename IndexMapper1,
1173 typename IndexMapper2>
1175 RAJA::policy::cuda::cuda_flatten_indexer<
1176 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1177 kernel_sync_requirement::none,
1183 using diff_t =
typename std::iterator_traits<
1184 typename SEGMENT::iterator>::difference_type;
1186 template<
typename LaunchContextPolicy,
typename BODY>
1189 SEGMENT
const& segment,
1192 const int len = segment.end() - segment.begin();
1195 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1197 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1199 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
1201 const int i0_stride =
1202 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
1203 const int i1_stride =
1204 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
1205 const int i2_stride =
1206 IndexMapper2::template size<diff_t>(
ctx.get_indices_and_dims());
1208 for (
int i = i0 + i0_stride * (i1 + i1_stride * i2); i < len;
1209 i += i0_stride * i1_stride * i2_stride)
1211 body(*(segment.begin() + i));
1219 template<
typename SEGMENT,
typename IndexMapper>
1221 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
1222 kernel_sync_requirement::none,
1227 using diff_t =
typename std::iterator_traits<
1228 typename SEGMENT::iterator>::difference_type;
1230 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1234 SEGMENT
const& segment,
1238 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims()) *
1239 static_cast<diff_t>(tile_size);
1241 body(segment.slice(i,
static_cast<diff_t>(tile_size)));
1245 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
1247 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
1248 kernel_sync_requirement::none,
1254 using diff_t =
typename std::iterator_traits<
1255 typename SEGMENT::iterator>::difference_type;
1257 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1262 SEGMENT
const& segment0,
1263 SEGMENT
const& segment1,
1267 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims()) *
1268 static_cast<diff_t>(tile_size0);
1270 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims()) *
1271 static_cast<diff_t>(tile_size1);
1273 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1274 segment1.slice(i1,
static_cast<diff_t>(tile_size1)));
1278 template<
typename SEGMENT,
1279 typename IndexMapper0,
1280 typename IndexMapper1,
1281 typename IndexMapper2>
1283 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
1284 kernel_sync_requirement::none,
1291 using diff_t =
typename std::iterator_traits<
1292 typename SEGMENT::iterator>::difference_type;
1294 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1300 SEGMENT
const& segment0,
1301 SEGMENT
const& segment1,
1302 SEGMENT
const& segment2,
1306 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims()) *
1307 static_cast<diff_t>(tile_size0);
1309 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims()) *
1310 static_cast<diff_t>(tile_size1);
1312 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims()) *
1313 static_cast<diff_t>(tile_size2);
1315 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1316 segment1.slice(i1,
static_cast<diff_t>(tile_size1)),
1317 segment2.slice(i2,
static_cast<diff_t>(tile_size2)));
1321 template<
typename SEGMENT,
typename IndexMapper>
1323 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
1324 kernel_sync_requirement::none,
1329 using diff_t =
typename std::iterator_traits<
1330 typename SEGMENT::iterator>::difference_type;
1332 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1336 SEGMENT
const& segment,
1339 const diff_t len = segment.end() - segment.begin();
1341 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims()) *
1342 static_cast<diff_t>(tile_size);
1346 body(segment.slice(i,
static_cast<diff_t>(tile_size)));
1351 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
1353 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
1354 kernel_sync_requirement::none,
1360 using diff_t =
typename std::iterator_traits<
1361 typename SEGMENT::iterator>::difference_type;
1363 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1368 SEGMENT
const& segment0,
1369 SEGMENT
const& segment1,
1372 const diff_t len0 = segment0.end() - segment0.begin();
1373 const diff_t len1 = segment1.end() - segment1.begin();
1376 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims()) *
1377 static_cast<diff_t>(tile_size0);
1379 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims()) *
1380 static_cast<diff_t>(tile_size1);
1382 if (i0 < len0 && i1 < len1)
1384 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1385 segment1.slice(i1,
static_cast<diff_t>(tile_size1)));
1390 template<
typename SEGMENT,
1391 typename IndexMapper0,
1392 typename IndexMapper1,
1393 typename IndexMapper2>
1395 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
1396 kernel_sync_requirement::none,
1403 using diff_t =
typename std::iterator_traits<
1404 typename SEGMENT::iterator>::difference_type;
1406 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1412 SEGMENT
const& segment0,
1413 SEGMENT
const& segment1,
1414 SEGMENT
const& segment2,
1417 const diff_t len0 = segment0.end() - segment0.begin();
1418 const diff_t len1 = segment1.end() - segment1.begin();
1419 const diff_t len2 = segment2.end() - segment2.begin();
1422 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims()) *
1423 static_cast<diff_t>(tile_size0);
1425 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims()) *
1426 static_cast<diff_t>(tile_size1);
1428 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims()) *
1429 static_cast<diff_t>(tile_size2);
1431 if (i0 < len0 && i1 < len1 && i2 < len2)
1433 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1434 segment1.slice(i1,
static_cast<diff_t>(tile_size1)),
1435 segment2.slice(i2,
static_cast<diff_t>(tile_size2)));
1440 template<
typename SEGMENT,
typename IndexMapper>
1442 RAJA::policy::cuda::cuda_indexer<
1443 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1444 kernel_sync_requirement::none,
1449 using diff_t =
typename std::iterator_traits<
1450 typename SEGMENT::iterator>::difference_type;
1452 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1456 SEGMENT
const& segment,
1459 const diff_t len = segment.end() - segment.begin();
1461 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims()) *
1462 static_cast<diff_t>(tile_size);
1464 IndexMapper::template size<diff_t>(
ctx.get_indices_and_dims()) *
1465 static_cast<diff_t>(tile_size);
1467 for (
diff_t i = i_init; i < len; i += i_stride)
1469 body(segment.slice(i,
static_cast<diff_t>(tile_size)));
1474 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
1476 RAJA::policy::cuda::cuda_indexer<
1477 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1478 kernel_sync_requirement::none,
1484 using diff_t =
typename std::iterator_traits<
1485 typename SEGMENT::iterator>::difference_type;
1487 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1492 SEGMENT
const& segment0,
1493 SEGMENT
const& segment1,
1496 const diff_t len0 = segment0.end() - segment0.begin();
1497 const diff_t len1 = segment1.end() - segment1.begin();
1500 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims()) *
1501 static_cast<diff_t>(tile_size0);
1503 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims()) *
1504 static_cast<diff_t>(tile_size1);
1507 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims()) *
1508 static_cast<diff_t>(tile_size0);
1510 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims()) *
1511 static_cast<diff_t>(tile_size1);
1513 for (
diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
1515 for (
diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
1517 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1518 segment1.slice(i1,
static_cast<diff_t>(tile_size1)));
1524 template<
typename SEGMENT,
1525 typename IndexMapper0,
1526 typename IndexMapper1,
1527 typename IndexMapper2>
1529 RAJA::policy::cuda::cuda_indexer<
1530 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1531 kernel_sync_requirement::none,
1538 using diff_t =
typename std::iterator_traits<
1539 typename SEGMENT::iterator>::difference_type;
1541 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1547 SEGMENT
const& segment0,
1548 SEGMENT
const& segment1,
1549 SEGMENT
const& segment2,
1552 const diff_t len0 = segment0.end() - segment0.begin();
1553 const diff_t len1 = segment1.end() - segment1.begin();
1554 const diff_t len2 = segment2.end() - segment2.begin();
1557 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims()) *
1558 static_cast<diff_t>(tile_size0);
1560 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims()) *
1561 static_cast<diff_t>(tile_size1);
1563 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims()) *
1564 static_cast<diff_t>(tile_size2);
1567 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims()) *
1568 static_cast<diff_t>(tile_size0);
1570 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims()) *
1571 static_cast<diff_t>(tile_size1);
1573 IndexMapper2::template size<diff_t>(
ctx.get_indices_and_dims()) *
1574 static_cast<diff_t>(tile_size2);
1576 for (
diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
1578 for (
diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
1580 for (
diff_t i2 = i2_init; i2 < len2; i2 += i2_stride)
1582 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1583 segment1.slice(i1,
static_cast<diff_t>(tile_size1)),
1584 segment2.slice(i2,
static_cast<diff_t>(tile_size2)));
1594 template<
typename SEGMENT,
typename IndexMapper>
1596 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
1597 kernel_sync_requirement::none,
1602 using diff_t =
typename std::iterator_traits<
1603 typename SEGMENT::iterator>::difference_type;
1605 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1609 SEGMENT
const& segment,
1613 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
1616 body(segment.slice(i,
static_cast<diff_t>(tile_size)), t);
1620 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
1622 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
1623 kernel_sync_requirement::none,
1629 using diff_t =
typename std::iterator_traits<
1630 typename SEGMENT::iterator>::difference_type;
1632 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1637 SEGMENT
const& segment0,
1638 SEGMENT
const& segment1,
1642 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1644 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1646 const diff_t i0 = t0 *
static_cast<diff_t>(tile_size0);
1647 const diff_t i1 = t1 *
static_cast<diff_t>(tile_size1);
1649 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1650 segment1.slice(i1,
static_cast<diff_t>(tile_size1)), t0, t1);
1654 template<
typename SEGMENT,
1655 typename IndexMapper0,
1656 typename IndexMapper1,
1657 typename IndexMapper2>
1659 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::DirectUnchecked,
1660 kernel_sync_requirement::none,
1667 using diff_t =
typename std::iterator_traits<
1668 typename SEGMENT::iterator>::difference_type;
1670 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1676 SEGMENT
const& segment0,
1677 SEGMENT
const& segment1,
1678 SEGMENT
const& segment2,
1682 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1684 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1686 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
1688 const diff_t i0 = t0 *
static_cast<diff_t>(tile_size0);
1689 const diff_t i1 = t1 *
static_cast<diff_t>(tile_size1);
1690 const diff_t i2 = t2 *
static_cast<diff_t>(tile_size2);
1692 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1693 segment1.slice(i1,
static_cast<diff_t>(tile_size1)),
1694 segment2.slice(i2,
static_cast<diff_t>(tile_size2)), t0, t1, t2);
1698 template<
typename SEGMENT,
typename IndexMapper>
1700 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
1701 kernel_sync_requirement::none,
1706 using diff_t =
typename std::iterator_traits<
1707 typename SEGMENT::iterator>::difference_type;
1709 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1713 SEGMENT
const& segment,
1716 const diff_t len = segment.end() - segment.begin();
1718 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
1723 body(segment.slice(i,
static_cast<diff_t>(tile_size)), t);
1728 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
1730 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
1731 kernel_sync_requirement::none,
1737 using diff_t =
typename std::iterator_traits<
1738 typename SEGMENT::iterator>::difference_type;
1740 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1745 SEGMENT
const& segment0,
1746 SEGMENT
const& segment1,
1749 const diff_t len0 = segment0.end() - segment0.begin();
1750 const diff_t len1 = segment1.end() - segment1.begin();
1753 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1755 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1757 const diff_t i0 = t0 *
static_cast<diff_t>(tile_size0);
1758 const diff_t i1 = t1 *
static_cast<diff_t>(tile_size1);
1760 if (i0 < len0 && i1 < len1)
1762 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1763 segment1.slice(i1,
static_cast<diff_t>(tile_size1)), t0, t1);
1768 template<
typename SEGMENT,
1769 typename IndexMapper0,
1770 typename IndexMapper1,
1771 typename IndexMapper2>
1773 RAJA::policy::cuda::cuda_indexer<RAJA::iteration_mapping::Direct,
1774 kernel_sync_requirement::none,
1781 using diff_t =
typename std::iterator_traits<
1782 typename SEGMENT::iterator>::difference_type;
1784 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1790 SEGMENT
const& segment0,
1791 SEGMENT
const& segment1,
1792 SEGMENT
const& segment2,
1795 const diff_t len0 = segment0.end() - segment0.begin();
1796 const diff_t len1 = segment1.end() - segment1.begin();
1797 const diff_t len2 = segment2.end() - segment2.begin();
1800 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1802 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1804 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
1806 const diff_t i0 = t0 *
static_cast<diff_t>(tile_size0);
1807 const diff_t i1 = t1 *
static_cast<diff_t>(tile_size1);
1808 const diff_t i2 = t2 *
static_cast<diff_t>(tile_size2);
1810 if (i0 < len0 && i1 < len1 && i2 < len2)
1812 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1813 segment1.slice(i1,
static_cast<diff_t>(tile_size1)),
1814 segment2.slice(i2,
static_cast<diff_t>(tile_size2)), t0, t1, t2);
1819 template<
typename SEGMENT,
typename IndexMapper>
1821 RAJA::policy::cuda::cuda_indexer<
1822 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1823 kernel_sync_requirement::none,
1828 using diff_t =
typename std::iterator_traits<
1829 typename SEGMENT::iterator>::difference_type;
1831 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1835 SEGMENT
const& segment,
1838 const diff_t len = segment.end() - segment.begin();
1840 IndexMapper::template index<diff_t>(
ctx.get_indices_and_dims());
1841 const diff_t i_init = t_init *
static_cast<diff_t>(tile_size);
1843 IndexMapper::template size<diff_t>(
ctx.get_indices_and_dims());
1844 const diff_t i_stride = t_stride *
static_cast<diff_t>(tile_size);
1846 for (
diff_t i = i_init, t = t_init; i < len; i += i_stride, t += t_stride)
1848 body(segment.slice(i,
static_cast<diff_t>(tile_size)), t);
1853 template<
typename SEGMENT,
typename IndexMapper0,
typename IndexMapper1>
1855 RAJA::policy::cuda::cuda_indexer<
1856 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1857 kernel_sync_requirement::none,
1863 using diff_t =
typename std::iterator_traits<
1864 typename SEGMENT::iterator>::difference_type;
1866 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1871 SEGMENT
const& segment0,
1872 SEGMENT
const& segment1,
1875 const diff_t len0 = segment0.end() - segment0.begin();
1876 const diff_t len1 = segment1.end() - segment1.begin();
1879 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1881 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1883 const diff_t i0_init = t0_init *
static_cast<diff_t>(tile_size0);
1884 const diff_t i1_init = t1_init *
static_cast<diff_t>(tile_size1);
1887 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
1889 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
1891 const diff_t i0_stride = t0_stride *
static_cast<diff_t>(tile_size0);
1892 const diff_t i1_stride = t1_stride *
static_cast<diff_t>(tile_size1);
1894 for (
diff_t i0 = i0_init, t0 = t0_init; i0 < len0;
1895 i0 += i0_stride, t0 += t0_stride)
1897 for (
diff_t i1 = i1_init, t1 = t1_init; i1 < len1;
1898 i1 += i1_stride, t1 += t1_stride)
1900 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1901 segment1.slice(i1,
static_cast<diff_t>(tile_size1)), t0, t1);
1907 template<
typename SEGMENT,
1908 typename IndexMapper0,
1909 typename IndexMapper1,
1910 typename IndexMapper2>
1912 RAJA::policy::cuda::cuda_indexer<
1913 RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1914 kernel_sync_requirement::none,
1921 using diff_t =
typename std::iterator_traits<
1922 typename SEGMENT::iterator>::difference_type;
1924 template<
typename LaunchContextPolicy,
typename TILE_T,
typename BODY>
1930 SEGMENT
const& segment0,
1931 SEGMENT
const& segment1,
1932 SEGMENT
const& segment2,
1935 const diff_t len0 = segment0.end() - segment0.begin();
1936 const diff_t len1 = segment1.end() - segment1.begin();
1937 const diff_t len2 = segment2.end() - segment2.begin();
1940 IndexMapper0::template index<diff_t>(
ctx.get_indices_and_dims());
1942 IndexMapper1::template index<diff_t>(
ctx.get_indices_and_dims());
1944 IndexMapper2::template index<diff_t>(
ctx.get_indices_and_dims());
1946 const diff_t i0_init = t0_init *
static_cast<diff_t>(tile_size0);
1947 const diff_t i1_init = t1_init *
static_cast<diff_t>(tile_size1);
1948 const diff_t i2_init = t2_init *
static_cast<diff_t>(tile_size2);
1951 IndexMapper0::template size<diff_t>(
ctx.get_indices_and_dims());
1953 IndexMapper1::template size<diff_t>(
ctx.get_indices_and_dims());
1955 IndexMapper2::template size<diff_t>(
ctx.get_indices_and_dims());
1957 const diff_t i0_stride = t0_stride *
static_cast<diff_t>(tile_size0);
1958 const diff_t i1_stride = t1_stride *
static_cast<diff_t>(tile_size1);
1959 const diff_t i2_stride = t2_stride *
static_cast<diff_t>(tile_size2);
1961 for (
diff_t i0 = i0_init, t0 = t0_init; i0 < len0;
1962 i0 += i0_stride, t0 += t0_stride)
1964 for (
diff_t i1 = i1_init, t1 = t1_init; i1 < len1;
1965 i1 += i1_stride, t1 += t1_stride)
1967 for (
diff_t i2 = i2_init, t2 = t2_init; i2 < len2;
1968 i2 += i2_stride, t2 += t2_stride)
1970 body(segment0.slice(i0,
static_cast<diff_t>(tile_size0)),
1971 segment1.slice(i1,
static_cast<diff_t>(tile_size1)),
1972 segment2.slice(i2,
static_cast<diff_t>(tile_size2)), t0, t1, t2);
Header file defining prototypes for routines used to manage memory for CUDA reductions and other oper...
Definition: launch_core.hpp:192
Definition: launch_core.hpp:246
RAJA_HOST_DEVICE RAJA_INLINE LaunchContextT()
Definition: launch.hpp:48
IndicesAndDimsT indices_and_dims_t
Definition: launch.hpp:43
indices_and_dims_t indices_and_dims
Definition: launch.hpp:46
RAJA_HOST_DEVICE RAJA_INLINE indices_and_dims_t const & get_indices_and_dims() const
Definition: launch.hpp:53
Definition: launch_context_policy.hpp:30
Header file containing RAJA CUDA policy definitions.
RAJA header file containing the core components of RAJA::launch.
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
#define RAJA_DEVICE
Definition: macros.hpp:66
Args args
Definition: WorkRunner.hpp:212
constexpr RAJA_HOST_DEVICE auto invoke_body(Params &¶ms, Fn &&f, Ts &&... extra)
Definition: forall.hpp:598
RAJA_HOST_DEVICE auto thread_privatize(const T &item) -> Privatizer< T >
Create a private copy of the argument to be stored on the current thread's stack in a class of the Pr...
Definition: privatizer.hpp:88
Definition: AlignedRangeIndexSetBuilders.cpp:35
LaunchContextType ctx
Definition: launch.hpp:185
__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__ void launch_new_reduce_global_fcn_fixed(const RAJA_CUDA_GRID_CONSTANT BODY body_in
ReduceParams reduce_params
Definition: launch.hpp:173
__global__ void launch_new_reduce_global_fcn(const RAJA_CUDA_GRID_CONSTANT BODY body_in, ReduceParams reduce_params)
Definition: launch.hpp:61
void launch(LaunchParams const &launch_params, ReduceParams &&... rest_of_launch_args)
Definition: launch_core.hpp:268
auto & body
Definition: launch.hpp:177
__shared__ char raja_shmem_ptr[]
Definition: launch.hpp:174
@ unspecified
Definition: types.hpp:46
typename RAJA::detail::launch_context_type< BODY >::type LaunchContextType
Definition: launch.hpp:183
auto privatizer
Definition: launch.hpp:176
Header file containing utility methods used in CUDA operations.
Header file for RAJA resource definitions.
static concepts::enable_if_t< resources::EventProxy< resources::Resource >, RAJA::expt::type_traits::is_ForallParamPack< ReduceParams > > exec(RAJA::resources::Resource res, const LaunchParams &launch_params, BODY_IN &&body_in, ReduceParams &launch_reducers)
Definition: launch.hpp:203
static concepts::enable_if_t< resources::EventProxy< resources::Resource >, RAJA::expt::type_traits::is_ForallParamPack< ReduceParams > > exec(RAJA::resources::Resource res, const LaunchParams &launch_params, BODY_IN &&body_in, ReduceParams &launch_reducers)
Definition: launch.hpp:95
Definition: launch_core.hpp:263
Definition: launch_core.hpp:163
size_t shared_mem_size
Definition: launch_core.hpp:167
Teams teams
Definition: launch_core.hpp:165
Threads threads
Definition: launch_core.hpp:166
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1050
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1047
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1146
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1143
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1184
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1187
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:968
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:965
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1000
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1003
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1090
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1087
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:514
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:517
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:348
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:345
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:446
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:443
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:563
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:566
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:313
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:316
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:379
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:376
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:291
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:288
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:485
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:482
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:408
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:405
Definition: launch_core.hpp:480
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:708
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:711
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:814
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:817
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:677
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:680
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:778
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:775
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:737
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:740
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:620
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:623
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:846
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:849
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:895
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:898
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:648
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:645
Definition: launch_core.hpp:483
int value[3]
Definition: launch_core.hpp:99
int value[3]
Definition: launch_core.hpp:124
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1539
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1542
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1333
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1330
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1364
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1361
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1488
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1485
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1404
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1407
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1295
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1292
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1450
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1453
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1228
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1231
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1255
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1258
Definition: launch_core.hpp:579
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1867
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1864
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1671
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1668
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1829
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1832
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1785
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1782
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1738
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1741
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1707
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1710
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1630
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1633
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1925
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1922
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1606
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1603
Definition: launch_core.hpp:582
static constexpr void parampack_resolve(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:304
static constexpr void parampack_init(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:269
static RAJA_HOST_DEVICE constexpr void parampack_combine(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:286
Definition: TypeTraits.hpp:59