RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
launch.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_pattern_launch_hip_HPP
21 #define RAJA_pattern_launch_hip_HPP
22 
28 #include "RAJA/util/resource.hpp"
29 
30 namespace RAJA
31 {
32 
38 template<typename IndicesAndDimsT>
39 class LaunchContextT<hip::LaunchContextIndicesAndDimsPolicy<IndicesAndDimsT>>
40  : public LaunchContextBase
41 {
42 public:
43  using indices_and_dims_t = IndicesAndDimsT;
44 
46 
49  indices_and_dims()
50  {}
51 
53  const
54  {
55  return indices_and_dims;
56  }
57 };
58 
59 template<typename BODY, typename ReduceParams>
60 __global__ void launch_new_reduce_global_fcn(const BODY body_in,
61  ReduceParams reduce_params)
62 {
63 
65  auto privatizer = thread_privatize(body_in);
66  auto& body = privatizer.get_priv();
67 
68  // Set pointer to shared memory
69  extern __shared__ char raja_shmem_ptr[];
70 
71  using LaunchContextType =
73 
75  ctx.shared_mem_ptr = raja_shmem_ptr;
77 
78  // Using a flatten global policy as we may use all dimensions
80  RAJA::hip_flatten_global_xyz_direct {}, reduce_params);
81 }
82 
83 template<bool async>
85  RAJA::policy::hip::hip_launch_t<async, named_usage::unspecified>>
86 {
87 
88  template<typename BODY_IN, typename ReduceParams>
89  static concepts::enable_if_t<
90  resources::EventProxy<resources::Resource>,
92  exec(RAJA::resources::Resource res,
93  const LaunchParams& launch_params,
94  BODY_IN&& body_in,
95  ReduceParams& launch_reducers)
96  {
97  using BODY = camp::decay<BODY_IN>;
98  using EXEC_POL =
99  RAJA::policy::hip::hip_launch_t<async, named_usage::unspecified>;
100  EXEC_POL pol {};
101 
102  auto func = reinterpret_cast<const void*>(
103  &launch_new_reduce_global_fcn<BODY, camp::decay<ReduceParams>>);
104 
105  resources::Hip hip_res = res.get<RAJA::resources::Hip>();
106 
107  //
108  // Compute the number of blocks and threads
109  //
110 
111  hip_dim_t gridSize {
112  static_cast<hip_dim_member_t>(launch_params.teams.value[0]),
113  static_cast<hip_dim_member_t>(launch_params.teams.value[1]),
114  static_cast<hip_dim_member_t>(launch_params.teams.value[2])};
115 
116  hip_dim_t blockSize {
117  static_cast<hip_dim_member_t>(launch_params.threads.value[0]),
118  static_cast<hip_dim_member_t>(launch_params.threads.value[1]),
119  static_cast<hip_dim_member_t>(launch_params.threads.value[2])};
120 
121  // Only launch kernel if we have something to iterate over
122  constexpr hip_dim_member_t zero = 0;
123  if (gridSize.x > zero && gridSize.y > zero && gridSize.z > zero &&
124  blockSize.x > zero && blockSize.y > zero && blockSize.z > zero)
125  {
126 
127 
128  size_t shared_mem_size = launch_params.shared_mem_size;
129  RAJA::hip::detail::hipInfo launch_info;
130  launch_info.gridDim = gridSize;
131  launch_info.blockDim = blockSize;
132  launch_info.dynamic_smem = &shared_mem_size;
133  launch_info.res = hip_res;
134 
135  {
136 
138  launch_info);
139 
140  //
141  // Privatize the loop_body, using make_launch_body to setup reductions
142  //
143  BODY body = RAJA::hip::make_launch_body(func, gridSize, blockSize,
144  shared_mem_size, hip_res,
145  std::forward<BODY_IN>(body_in));
146 
147  //
148  // Launch the kernel
149  //
150  void* args[] = {(void*)&body, (void*)&launch_reducers};
151  RAJA::hip::launch(func, gridSize, blockSize, args, shared_mem_size,
152  hip_res, async);
153 
155  launch_info);
156  }
157  }
158 
159  return resources::EventProxy<resources::Resource>(res);
160  }
161 };
162 
163 template<typename BODY, int num_threads, typename ReduceParams>
164 __launch_bounds__(num_threads, 1) __global__
165  void launch_new_reduce_global_fcn_fixed(const BODY body_in,
166  ReduceParams reduce_params)
167 {
168 
170  auto privatizer = thread_privatize(body_in);
171  auto& body = privatizer.get_priv();
172 
173  // Set pointer to shared memory
174  extern __shared__ char raja_shmem_ptr[];
175 
176  using LaunchContextType =
178 
180  ctx.shared_mem_ptr = raja_shmem_ptr;
182 
183  // Using a flatten global policy as we may use all dimensions
185  RAJA::hip_flatten_global_xyz_direct {}, reduce_params);
186 }
187 
188 template<bool async, int nthreads>
189 struct LaunchExecute<RAJA::policy::hip::hip_launch_t<async, nthreads>>
190 {
191 
192  template<typename BODY_IN, typename ReduceParams>
193  static concepts::enable_if_t<
194  resources::EventProxy<resources::Resource>,
196  exec(RAJA::resources::Resource res,
197  const LaunchParams& launch_params,
198  BODY_IN&& body_in,
199  ReduceParams& launch_reducers)
200  {
201  using BODY = camp::decay<BODY_IN>;
202  // Use a generic block size policy here to match that used in
203  // parampack_combine
204  using EXEC_POL =
205  RAJA::policy::hip::hip_launch_t<async, named_usage::unspecified>;
206  EXEC_POL pol {};
207 
208  auto func = reinterpret_cast<const void*>(
209  &launch_new_reduce_global_fcn_fixed<BODY, nthreads,
210  camp::decay<ReduceParams>>);
211 
212  resources::Hip hip_res = res.get<RAJA::resources::Hip>();
213 
214  //
215  // Compute the number of blocks and threads
216  //
217 
218  hip_dim_t gridSize {
219  static_cast<hip_dim_member_t>(launch_params.teams.value[0]),
220  static_cast<hip_dim_member_t>(launch_params.teams.value[1]),
221  static_cast<hip_dim_member_t>(launch_params.teams.value[2])};
222 
223  hip_dim_t blockSize {
224  static_cast<hip_dim_member_t>(launch_params.threads.value[0]),
225  static_cast<hip_dim_member_t>(launch_params.threads.value[1]),
226  static_cast<hip_dim_member_t>(launch_params.threads.value[2])};
227 
228  // Only launch kernel if we have something to iterate over
229  constexpr hip_dim_member_t zero = 0;
230  if (gridSize.x > zero && gridSize.y > zero && gridSize.z > zero &&
231  blockSize.x > zero && blockSize.y > zero && blockSize.z > zero)
232  {
233 
234 
235  size_t shared_mem_size = launch_params.shared_mem_size;
236  RAJA::hip::detail::hipInfo launch_info;
237  launch_info.gridDim = gridSize;
238  launch_info.blockDim = blockSize;
239  launch_info.dynamic_smem = &shared_mem_size;
240  launch_info.res = hip_res;
241 
242  {
243 
245  launch_info);
246 
247  //
248  // Privatize the loop_body, using make_launch_body to setup reductions
249  //
250  BODY body = RAJA::hip::make_launch_body(func, gridSize, blockSize,
251  shared_mem_size, hip_res,
252  std::forward<BODY_IN>(body_in));
253 
254  //
255  // Launch the kernel
256  //
257  void* args[] = {(void*)&body, (void*)&launch_reducers};
258  RAJA::hip::launch(func, gridSize, blockSize, args, shared_mem_size,
259  hip_res, async);
260 
262  launch_info);
263  }
264  }
265 
266  return resources::EventProxy<resources::Resource>(res);
267  }
268 };
269 
270 /*
271  HIP generic loop implementations
272 */
273 template<typename SEGMENT, typename IndexMapper>
274 struct LoopExecute<
275  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
276  kernel_sync_requirement::none,
277  IndexMapper>,
278  SEGMENT>
279 {
280 
281  using diff_t = typename std::iterator_traits<
282  typename SEGMENT::iterator>::difference_type;
283 
284  template<typename LaunchContextPolicy, typename BODY>
285  static RAJA_INLINE RAJA_DEVICE void exec(
287  SEGMENT const& segment,
288  BODY const& body)
289  {
290  const diff_t i =
291  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
292 
293  body(*(segment.begin() + i));
294  }
295 };
296 
297 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
298 struct LoopExecute<
299  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
300  kernel_sync_requirement::none,
301  IndexMapper0,
302  IndexMapper1>,
303  SEGMENT>
304 {
305 
306  using diff_t = typename std::iterator_traits<
307  typename SEGMENT::iterator>::difference_type;
308 
309  template<typename LaunchContextPolicy, typename BODY>
310  static RAJA_INLINE RAJA_DEVICE void exec(
312  SEGMENT const& segment0,
313  SEGMENT const& segment1,
314  BODY const& body)
315  {
316  const diff_t i0 =
317  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
318  const diff_t i1 =
319  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
320 
321  body(*(segment0.begin() + i0), *(segment1.begin() + i1));
322  }
323 };
324 
325 template<typename SEGMENT,
326  typename IndexMapper0,
327  typename IndexMapper1,
328  typename IndexMapper2>
329 struct LoopExecute<
330  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
331  kernel_sync_requirement::none,
332  IndexMapper0,
333  IndexMapper1,
334  IndexMapper2>,
335  SEGMENT>
336 {
337 
338  using diff_t = typename std::iterator_traits<
339  typename SEGMENT::iterator>::difference_type;
340 
341  template<typename LaunchContextPolicy, typename BODY>
342  static RAJA_INLINE RAJA_DEVICE void exec(
344  SEGMENT const& segment0,
345  SEGMENT const& segment1,
346  SEGMENT const& segment2,
347  BODY const& body)
348  {
349  const diff_t i0 =
350  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
351  const diff_t i1 =
352  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
353  const diff_t i2 =
354  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
355 
356  body(*(segment0.begin() + i0), *(segment1.begin() + i1),
357  *(segment2.begin() + i2));
358  }
359 };
360 
361 template<typename SEGMENT, typename IndexMapper>
362 struct LoopExecute<
363  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
364  kernel_sync_requirement::none,
365  IndexMapper>,
366  SEGMENT>
367 {
368 
369  using diff_t = typename std::iterator_traits<
370  typename SEGMENT::iterator>::difference_type;
371 
372  template<typename LaunchContextPolicy, typename BODY>
373  static RAJA_INLINE RAJA_DEVICE void exec(
375  SEGMENT const& segment,
376  BODY const& body)
377  {
378  const diff_t len = segment.end() - segment.begin();
379  const diff_t i =
380  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
381 
382  if (i < len)
383  {
384  body(*(segment.begin() + i));
385  }
386  }
387 };
388 
389 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
390 struct LoopExecute<
391  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
392  kernel_sync_requirement::none,
393  IndexMapper0,
394  IndexMapper1>,
395  SEGMENT>
396 {
397 
398  using diff_t = typename std::iterator_traits<
399  typename SEGMENT::iterator>::difference_type;
400 
401  template<typename LaunchContextPolicy, typename BODY>
402  static RAJA_INLINE RAJA_DEVICE void exec(
404  SEGMENT const& segment0,
405  SEGMENT const& segment1,
406  BODY const& body)
407  {
408  const int len0 = segment0.end() - segment0.begin();
409  const int len1 = segment1.end() - segment1.begin();
410 
411  const diff_t i0 =
412  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
413  const diff_t i1 =
414  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
415 
416  if (i0 < len0 && i1 < len1)
417  {
418  body(*(segment0.begin() + i0), *(segment1.begin() + i1));
419  }
420  }
421 };
422 
423 template<typename SEGMENT,
424  typename IndexMapper0,
425  typename IndexMapper1,
426  typename IndexMapper2>
427 struct LoopExecute<
428  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
429  kernel_sync_requirement::none,
430  IndexMapper0,
431  IndexMapper1,
432  IndexMapper2>,
433  SEGMENT>
434 {
435 
436  using diff_t = typename std::iterator_traits<
437  typename SEGMENT::iterator>::difference_type;
438 
439  template<typename LaunchContextPolicy, typename BODY>
440  static RAJA_INLINE RAJA_DEVICE void exec(
442  SEGMENT const& segment0,
443  SEGMENT const& segment1,
444  SEGMENT const& segment2,
445  BODY const& body)
446  {
447  const int len0 = segment0.end() - segment0.begin();
448  const int len1 = segment1.end() - segment1.begin();
449  const int len2 = segment2.end() - segment2.begin();
450 
451  const diff_t i0 =
452  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
453  const diff_t i1 =
454  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
455  const diff_t i2 =
456  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
457 
458  if (i0 < len0 && i1 < len1 && i2 < len2)
459  {
460  body(*(segment0.begin() + i0), *(segment1.begin() + i1),
461  *(segment2.begin() + i2));
462  }
463  }
464 };
465 
466 template<typename SEGMENT, typename IndexMapper>
467 struct LoopExecute<
468  RAJA::policy::hip::hip_indexer<
469  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
470  kernel_sync_requirement::none,
471  IndexMapper>,
472  SEGMENT>
473 {
474 
475  using diff_t = typename std::iterator_traits<
476  typename SEGMENT::iterator>::difference_type;
477 
478  template<typename LaunchContextPolicy, typename BODY>
479  static RAJA_INLINE RAJA_DEVICE void exec(
481  SEGMENT const& segment,
482  BODY const& body)
483  {
484  const diff_t len = segment.end() - segment.begin();
485  const diff_t i_init =
486  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
487  const diff_t i_stride =
488  IndexMapper::template size<diff_t>(ctx.get_indices_and_dims());
489 
490  for (diff_t i = i_init; i < len; i += i_stride)
491  {
492  body(*(segment.begin() + i));
493  }
494  }
495 };
496 
497 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
498 struct LoopExecute<
499  RAJA::policy::hip::hip_indexer<
500  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
501  kernel_sync_requirement::none,
502  IndexMapper0,
503  IndexMapper1>,
504  SEGMENT>
505 {
506 
507  using diff_t = typename std::iterator_traits<
508  typename SEGMENT::iterator>::difference_type;
509 
510  template<typename LaunchContextPolicy, typename BODY>
511  static RAJA_INLINE RAJA_DEVICE void exec(
513  SEGMENT const& segment0,
514  SEGMENT const& segment1,
515  BODY const& body)
516  {
517  const int len0 = segment0.end() - segment0.begin();
518  const int len1 = segment1.end() - segment1.begin();
519 
520  const diff_t i0_init =
521  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
522  const diff_t i1_init =
523  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
524 
525  const diff_t i0_stride =
526  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
527  const diff_t i1_stride =
528  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
529 
530  for (diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
531  {
532 
533  for (diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
534  {
535 
536  body(*(segment0.begin() + i0), *(segment1.begin() + i1));
537  }
538  }
539  }
540 };
541 
542 template<typename SEGMENT,
543  typename IndexMapper0,
544  typename IndexMapper1,
545  typename IndexMapper2>
546 struct LoopExecute<
547  RAJA::policy::hip::hip_indexer<
548  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
549  kernel_sync_requirement::none,
550  IndexMapper0,
551  IndexMapper1,
552  IndexMapper2>,
553  SEGMENT>
554 {
555 
556  using diff_t = typename std::iterator_traits<
557  typename SEGMENT::iterator>::difference_type;
558 
559  template<typename LaunchContextPolicy, typename BODY>
560  static RAJA_INLINE RAJA_DEVICE void exec(
562  SEGMENT const& segment0,
563  SEGMENT const& segment1,
564  SEGMENT const& segment2,
565  BODY const& body)
566  {
567  const int len0 = segment0.end() - segment0.begin();
568  const int len1 = segment1.end() - segment1.begin();
569  const int len2 = segment2.end() - segment2.begin();
570 
571  const diff_t i0_init =
572  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
573  const diff_t i1_init =
574  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
575  const diff_t i2_init =
576  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
577 
578  const diff_t i0_stride =
579  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
580  const diff_t i1_stride =
581  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
582  const diff_t i2_stride =
583  IndexMapper2::template size<diff_t>(ctx.get_indices_and_dims());
584 
585  for (diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
586  {
587 
588  for (diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
589  {
590 
591  for (diff_t i2 = i2_init; i2 < len2; i2 += i2_stride)
592  {
593 
594  body(*(segment0.begin() + i0), *(segment1.begin() + i1),
595  *(segment2.begin() + i2));
596  }
597  }
598  }
599  }
600 };
601 
602 /*
603  HIP generic loop_icount implementations
604 */
605 template<typename SEGMENT, typename IndexMapper>
607  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
608  kernel_sync_requirement::none,
609  IndexMapper>,
610  SEGMENT>
611 {
612 
613  using diff_t = typename std::iterator_traits<
614  typename SEGMENT::iterator>::difference_type;
615 
616  template<typename LaunchContextPolicy, typename BODY>
617  static RAJA_INLINE RAJA_DEVICE void exec(
619  SEGMENT const& segment,
620  BODY const& body)
621  {
622  const diff_t i =
623  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
624 
625  body(*(segment.begin() + i), i);
626  }
627 };
628 
629 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
631  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
632  kernel_sync_requirement::none,
633  IndexMapper0,
634  IndexMapper1>,
635  SEGMENT>
636 {
637 
638  using diff_t = typename std::iterator_traits<
639  typename SEGMENT::iterator>::difference_type;
640 
641  template<typename LaunchContextPolicy, typename BODY>
642  static RAJA_INLINE RAJA_DEVICE void exec(
644  SEGMENT const& segment0,
645  SEGMENT const& segment1,
646  BODY const& body)
647  {
648  const diff_t i0 =
649  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
650  const diff_t i1 =
651  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
652 
653  body(*(segment0.begin() + i0), *(segment1.begin() + i1), i0, i1);
654  }
655 };
656 
657 template<typename SEGMENT,
658  typename IndexMapper0,
659  typename IndexMapper1,
660  typename IndexMapper2>
662  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
663  kernel_sync_requirement::none,
664  IndexMapper0,
665  IndexMapper1,
666  IndexMapper2>,
667  SEGMENT>
668 {
669 
670  using diff_t = typename std::iterator_traits<
671  typename SEGMENT::iterator>::difference_type;
672 
673  template<typename LaunchContextPolicy, typename BODY>
674  static RAJA_INLINE RAJA_DEVICE void exec(
676  SEGMENT const& segment0,
677  SEGMENT const& segment1,
678  SEGMENT const& segment2,
679  BODY const& body)
680  {
681  const diff_t i0 =
682  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
683  const diff_t i1 =
684  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
685  const diff_t i2 =
686  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
687 
688  body(*(segment0.begin() + i0), *(segment1.begin() + i1),
689  *(segment2.begin() + i2), i0, i1, i2);
690  }
691 };
692 
693 template<typename SEGMENT, typename IndexMapper>
695  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
696  kernel_sync_requirement::none,
697  IndexMapper>,
698  SEGMENT>
699 {
700 
701  using diff_t = typename std::iterator_traits<
702  typename SEGMENT::iterator>::difference_type;
703 
704  template<typename LaunchContextPolicy, typename BODY>
705  static RAJA_INLINE RAJA_DEVICE void exec(
707  SEGMENT const& segment,
708  BODY const& body)
709  {
710  const diff_t len = segment.end() - segment.begin();
711  const diff_t i =
712  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
713 
714  if (i < len)
715  {
716  body(*(segment.begin() + i), i);
717  }
718  }
719 };
720 
721 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
723  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
724  kernel_sync_requirement::none,
725  IndexMapper0,
726  IndexMapper1>,
727  SEGMENT>
728 {
729 
730  using diff_t = typename std::iterator_traits<
731  typename SEGMENT::iterator>::difference_type;
732 
733  template<typename LaunchContextPolicy, typename BODY>
734  static RAJA_INLINE RAJA_DEVICE void exec(
736  SEGMENT const& segment0,
737  SEGMENT const& segment1,
738  BODY const& body)
739  {
740  const int len0 = segment0.end() - segment0.begin();
741  const int len1 = segment1.end() - segment1.begin();
742 
743  const diff_t i0 =
744  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
745  const diff_t i1 =
746  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
747 
748  if (i0 < len0 && i1 < len1)
749  {
750  body(*(segment0.begin() + i0), *(segment1.begin() + i1), i0, i1);
751  }
752  }
753 };
754 
755 template<typename SEGMENT,
756  typename IndexMapper0,
757  typename IndexMapper1,
758  typename IndexMapper2>
760  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
761  kernel_sync_requirement::none,
762  IndexMapper0,
763  IndexMapper1,
764  IndexMapper2>,
765  SEGMENT>
766 {
767 
768  using diff_t = typename std::iterator_traits<
769  typename SEGMENT::iterator>::difference_type;
770 
771  template<typename LaunchContextPolicy, typename BODY>
772  static RAJA_INLINE RAJA_DEVICE void exec(
774  SEGMENT const& segment0,
775  SEGMENT const& segment1,
776  SEGMENT const& segment2,
777  BODY const& body)
778  {
779  const int len0 = segment0.end() - segment0.begin();
780  const int len1 = segment1.end() - segment1.begin();
781  const int len2 = segment2.end() - segment2.begin();
782 
783  const diff_t i0 =
784  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
785  const diff_t i1 =
786  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
787  const diff_t i2 =
788  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
789 
790  if (i0 < len0 && i1 < len1 && i2 < len2)
791  {
792  body(*(segment0.begin() + i0), *(segment1.begin() + i1),
793  *(segment2.begin() + i2), i0, i1, i2);
794  }
795  }
796 };
797 
798 template<typename SEGMENT, typename IndexMapper>
800  RAJA::policy::hip::hip_indexer<
801  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
802  kernel_sync_requirement::none,
803  IndexMapper>,
804  SEGMENT>
805 {
806 
807  using diff_t = typename std::iterator_traits<
808  typename SEGMENT::iterator>::difference_type;
809 
810  template<typename LaunchContextPolicy, typename BODY>
811  static RAJA_INLINE RAJA_DEVICE void exec(
813  SEGMENT const& segment,
814  BODY const& body)
815  {
816  const diff_t len = segment.end() - segment.begin();
817  const diff_t i_init =
818  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
819  const diff_t i_stride =
820  IndexMapper::template size<diff_t>(ctx.get_indices_and_dims());
821 
822  for (diff_t i = i_init; i < len; i += i_stride)
823  {
824  body(*(segment.begin() + i), i);
825  }
826  }
827 };
828 
829 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
831  RAJA::policy::hip::hip_indexer<
832  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
833  kernel_sync_requirement::none,
834  IndexMapper0,
835  IndexMapper1>,
836  SEGMENT>
837 {
838 
839  using diff_t = typename std::iterator_traits<
840  typename SEGMENT::iterator>::difference_type;
841 
842  template<typename LaunchContextPolicy, typename BODY>
843  static RAJA_INLINE RAJA_DEVICE void exec(
845  SEGMENT const& segment0,
846  SEGMENT const& segment1,
847  BODY const& body)
848  {
849  const int len0 = segment0.end() - segment0.begin();
850  const int len1 = segment1.end() - segment1.begin();
851 
852  const diff_t i0_init =
853  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
854  const diff_t i1_init =
855  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
856 
857  const diff_t i0_stride =
858  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
859  const diff_t i1_stride =
860  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
861 
862  for (diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
863  {
864 
865  for (diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
866  {
867 
868  body(*(segment0.begin() + i0), *(segment1.begin() + i1), i0, i1);
869  }
870  }
871  }
872 };
873 
874 template<typename SEGMENT,
875  typename IndexMapper0,
876  typename IndexMapper1,
877  typename IndexMapper2>
879  RAJA::policy::hip::hip_indexer<
880  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
881  kernel_sync_requirement::none,
882  IndexMapper0,
883  IndexMapper1,
884  IndexMapper2>,
885  SEGMENT>
886 {
887 
888  using diff_t = typename std::iterator_traits<
889  typename SEGMENT::iterator>::difference_type;
890 
891  template<typename LaunchContextPolicy, typename BODY>
892  static RAJA_INLINE RAJA_DEVICE void exec(
894  SEGMENT const& segment0,
895  SEGMENT const& segment1,
896  SEGMENT const& segment2,
897  BODY const& body)
898  {
899  const int len0 = segment0.end() - segment0.begin();
900  const int len1 = segment1.end() - segment1.begin();
901  const int len2 = segment2.end() - segment2.begin();
902 
903  const diff_t i0_init =
904  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
905  const diff_t i1_init =
906  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
907  const diff_t i2_init =
908  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
909 
910  const diff_t i0_stride =
911  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
912  const diff_t i1_stride =
913  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
914  const diff_t i2_stride =
915  IndexMapper2::template size<diff_t>(ctx.get_indices_and_dims());
916 
917  for (diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
918  {
919 
920  for (diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
921  {
922 
923  for (diff_t i2 = i2_init; i2 < len2; i2 += i2_stride)
924  {
925 
926  body(*(segment0.begin() + i0), *(segment1.begin() + i1),
927  *(segment2.begin() + i2), i0, i1, i2);
928  }
929  }
930  }
931  }
932 };
933 
934 /*
935  HIP generic flattened loop implementations
936 */
937 template<typename SEGMENT, kernel_sync_requirement sync, typename IndexMapper0>
938 struct LoopExecute<RAJA::policy::hip::hip_flatten_indexer<
939  RAJA::iteration_mapping::DirectUnchecked,
940  sync,
941  IndexMapper0>,
942  SEGMENT>
943  : LoopExecute<RAJA::policy::hip::hip_indexer<
944  RAJA::iteration_mapping::DirectUnchecked,
945  sync,
946  IndexMapper0>,
947  SEGMENT>
948 {};
949 
950 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
951 struct LoopExecute<RAJA::policy::hip::hip_flatten_indexer<
952  RAJA::iteration_mapping::DirectUnchecked,
953  kernel_sync_requirement::none,
954  IndexMapper0,
955  IndexMapper1>,
956  SEGMENT>
957 {
958  using diff_t = typename std::iterator_traits<
959  typename SEGMENT::iterator>::difference_type;
960 
961  template<typename LaunchContextPolicy, typename BODY>
962  static RAJA_INLINE RAJA_DEVICE void exec(
964  SEGMENT const& segment,
965  BODY const& body)
966  {
967  const int i0 =
968  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
969  const int i1 =
970  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
971 
972  const diff_t i0_stride =
973  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
974 
975  const int i = i0 + i0_stride * i1;
976 
977  body(*(segment.begin() + i));
978  }
979 };
980 
981 template<typename SEGMENT,
982  typename IndexMapper0,
983  typename IndexMapper1,
984  typename IndexMapper2>
985 struct LoopExecute<RAJA::policy::hip::hip_flatten_indexer<
986  RAJA::iteration_mapping::DirectUnchecked,
987  kernel_sync_requirement::none,
988  IndexMapper0,
989  IndexMapper1,
990  IndexMapper2>,
991  SEGMENT>
992 {
993  using diff_t = typename std::iterator_traits<
994  typename SEGMENT::iterator>::difference_type;
995 
996  template<typename LaunchContextPolicy, typename BODY>
997  static RAJA_INLINE RAJA_DEVICE void exec(
999  SEGMENT const& segment,
1000  BODY const& body)
1001  {
1002  const int i0 =
1003  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1004  const int i1 =
1005  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1006  const int i2 =
1007  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
1008 
1009  const diff_t i0_stride =
1010  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
1011  const diff_t i1_stride =
1012  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
1013 
1014  const int i = i0 + i0_stride * (i1 + i1_stride * i2);
1015 
1016  body(*(segment.begin() + i));
1017  }
1018 };
1019 
1020 template<typename SEGMENT, kernel_sync_requirement sync, typename IndexMapper0>
1022  RAJA::policy::hip::hip_flatten_indexer<RAJA::iteration_mapping::Direct,
1023  sync,
1024  IndexMapper0>,
1025  SEGMENT>
1026  : LoopExecute<
1027  RAJA::policy::hip::
1028  hip_indexer<RAJA::iteration_mapping::Direct, sync, IndexMapper0>,
1029  SEGMENT>
1030 {};
1031 
1032 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
1034  RAJA::policy::hip::hip_flatten_indexer<RAJA::iteration_mapping::Direct,
1035  kernel_sync_requirement::none,
1036  IndexMapper0,
1037  IndexMapper1>,
1038  SEGMENT>
1039 {
1040  using diff_t = typename std::iterator_traits<
1041  typename SEGMENT::iterator>::difference_type;
1042 
1043  template<typename LaunchContextPolicy, typename BODY>
1044  static RAJA_INLINE RAJA_DEVICE void exec(
1046  SEGMENT const& segment,
1047  BODY const& body)
1048  {
1049  const int len = segment.end() - segment.begin();
1050 
1051  const int i0 =
1052  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1053  const int i1 =
1054  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1055 
1056  const diff_t i0_stride =
1057  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
1058 
1059  const int i = i0 + i0_stride * i1;
1060 
1061  if (i < len)
1062  {
1063  body(*(segment.begin() + i));
1064  }
1065  }
1066 };
1067 
1068 template<typename SEGMENT,
1069  typename IndexMapper0,
1070  typename IndexMapper1,
1071  typename IndexMapper2>
1073  RAJA::policy::hip::hip_flatten_indexer<RAJA::iteration_mapping::Direct,
1074  kernel_sync_requirement::none,
1075  IndexMapper0,
1076  IndexMapper1,
1077  IndexMapper2>,
1078  SEGMENT>
1079 {
1080  using diff_t = typename std::iterator_traits<
1081  typename SEGMENT::iterator>::difference_type;
1082 
1083  template<typename LaunchContextPolicy, typename BODY>
1084  static RAJA_INLINE RAJA_DEVICE void exec(
1086  SEGMENT const& segment,
1087  BODY const& body)
1088  {
1089  const int len = segment.end() - segment.begin();
1090 
1091  const int i0 =
1092  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1093  const int i1 =
1094  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1095  const int i2 =
1096  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
1097 
1098  const diff_t i0_stride =
1099  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
1100  const diff_t i1_stride =
1101  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
1102 
1103  const int i = i0 + i0_stride * (i1 + i1_stride * i2);
1104 
1105  if (i < len)
1106  {
1107  body(*(segment.begin() + i));
1108  }
1109  }
1110 };
1111 
1112 template<typename SEGMENT, kernel_sync_requirement sync, typename IndexMapper0>
1114  RAJA::policy::hip::hip_flatten_indexer<
1115  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1116  sync,
1117  IndexMapper0>,
1118  SEGMENT>
1119  : LoopExecute<
1120  RAJA::policy::hip::hip_indexer<
1121  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1122  sync,
1123  IndexMapper0>,
1124  SEGMENT>
1125 {};
1126 
1127 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
1129  RAJA::policy::hip::hip_flatten_indexer<
1130  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1131  kernel_sync_requirement::none,
1132  IndexMapper0,
1133  IndexMapper1>,
1134  SEGMENT>
1135 {
1136  using diff_t = typename std::iterator_traits<
1137  typename SEGMENT::iterator>::difference_type;
1138 
1139  template<typename LaunchContextPolicy, typename BODY>
1140  static RAJA_INLINE RAJA_DEVICE void exec(
1142  SEGMENT const& segment,
1143  BODY const& body)
1144  {
1145  const int len = segment.end() - segment.begin();
1146 
1147  const int i0 =
1148  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1149  const int i1 =
1150  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1151 
1152  const int i0_stride =
1153  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
1154  const int i1_stride =
1155  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
1156 
1157  for (int i = i0 + i0_stride * i1; i < len; i += i0_stride * i1_stride)
1158  {
1159  body(*(segment.begin() + i));
1160  }
1161  }
1162 };
1163 
1164 template<typename SEGMENT,
1165  typename IndexMapper0,
1166  typename IndexMapper1,
1167  typename IndexMapper2>
1169  RAJA::policy::hip::hip_flatten_indexer<
1170  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1171  kernel_sync_requirement::none,
1172  IndexMapper0,
1173  IndexMapper1,
1174  IndexMapper2>,
1175  SEGMENT>
1176 {
1177  using diff_t = typename std::iterator_traits<
1178  typename SEGMENT::iterator>::difference_type;
1179 
1180  template<typename LaunchContextPolicy, typename BODY>
1181  static RAJA_INLINE RAJA_DEVICE void exec(
1183  SEGMENT const& segment,
1184  BODY const& body)
1185  {
1186  const int len = segment.end() - segment.begin();
1187 
1188  const int i0 =
1189  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1190  const int i1 =
1191  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1192  const int i2 =
1193  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
1194 
1195  const int i0_stride =
1196  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
1197  const int i1_stride =
1198  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
1199  const int i2_stride =
1200  IndexMapper2::template size<diff_t>(ctx.get_indices_and_dims());
1201 
1202  for (int i = i0 + i0_stride * (i1 + i1_stride * i2); i < len;
1203  i += i0_stride * i1_stride * i2_stride)
1204  {
1205  body(*(segment.begin() + i));
1206  }
1207  }
1208 };
1209 
1210 /*
1211  HIP generic tile implementations
1212 */
1213 template<typename SEGMENT, typename IndexMapper>
1215  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
1216  kernel_sync_requirement::none,
1217  IndexMapper>,
1218  SEGMENT>
1219 {
1220 
1221  using diff_t = typename std::iterator_traits<
1222  typename SEGMENT::iterator>::difference_type;
1223 
1224  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1225  static RAJA_INLINE RAJA_DEVICE void exec(
1227  TILE_T tile_size,
1228  SEGMENT const& segment,
1229  BODY const& body)
1230  {
1231  const diff_t i =
1232  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims()) *
1233  static_cast<diff_t>(tile_size);
1234 
1235  body(segment.slice(i, static_cast<diff_t>(tile_size)));
1236  }
1237 };
1238 
1239 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
1241  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
1242  kernel_sync_requirement::none,
1243  IndexMapper0,
1244  IndexMapper1>,
1245  SEGMENT>
1246 {
1247 
1248  using diff_t = typename std::iterator_traits<
1249  typename SEGMENT::iterator>::difference_type;
1250 
1251  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1252  static RAJA_INLINE RAJA_DEVICE void exec(
1254  TILE_T tile_size0,
1255  TILE_T tile_size1,
1256  SEGMENT const& segment0,
1257  SEGMENT const& segment1,
1258  BODY const& body)
1259  {
1260  const diff_t i0 =
1261  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims()) *
1262  static_cast<diff_t>(tile_size0);
1263  const diff_t i1 =
1264  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims()) *
1265  static_cast<diff_t>(tile_size1);
1266 
1267  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1268  segment1.slice(i1, static_cast<diff_t>(tile_size1)));
1269  }
1270 };
1271 
1272 template<typename SEGMENT,
1273  typename IndexMapper0,
1274  typename IndexMapper1,
1275  typename IndexMapper2>
1277  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
1278  kernel_sync_requirement::none,
1279  IndexMapper0,
1280  IndexMapper1,
1281  IndexMapper2>,
1282  SEGMENT>
1283 {
1284 
1285  using diff_t = typename std::iterator_traits<
1286  typename SEGMENT::iterator>::difference_type;
1287 
1288  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1289  static RAJA_INLINE RAJA_DEVICE void exec(
1291  TILE_T tile_size0,
1292  TILE_T tile_size1,
1293  TILE_T tile_size2,
1294  SEGMENT const& segment0,
1295  SEGMENT const& segment1,
1296  SEGMENT const& segment2,
1297  BODY const& body)
1298  {
1299  const diff_t i0 =
1300  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims()) *
1301  static_cast<diff_t>(tile_size0);
1302  const diff_t i1 =
1303  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims()) *
1304  static_cast<diff_t>(tile_size1);
1305  const diff_t i2 =
1306  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims()) *
1307  static_cast<diff_t>(tile_size2);
1308 
1309  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1310  segment1.slice(i1, static_cast<diff_t>(tile_size1)),
1311  segment2.slice(i2, static_cast<diff_t>(tile_size2)));
1312  }
1313 };
1314 
1315 template<typename SEGMENT, typename IndexMapper>
1317  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
1318  kernel_sync_requirement::none,
1319  IndexMapper>,
1320  SEGMENT>
1321 {
1322 
1323  using diff_t = typename std::iterator_traits<
1324  typename SEGMENT::iterator>::difference_type;
1325 
1326  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1327  static RAJA_INLINE RAJA_DEVICE void exec(
1329  TILE_T tile_size,
1330  SEGMENT const& segment,
1331  BODY const& body)
1332  {
1333  const diff_t len = segment.end() - segment.begin();
1334  const diff_t i =
1335  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims()) *
1336  static_cast<diff_t>(tile_size);
1337 
1338  if (i < len)
1339  {
1340  body(segment.slice(i, static_cast<diff_t>(tile_size)));
1341  }
1342  }
1343 };
1344 
1345 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
1347  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
1348  kernel_sync_requirement::none,
1349  IndexMapper0,
1350  IndexMapper1>,
1351  SEGMENT>
1352 {
1353 
1354  using diff_t = typename std::iterator_traits<
1355  typename SEGMENT::iterator>::difference_type;
1356 
1357  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1358  static RAJA_INLINE RAJA_DEVICE void exec(
1360  TILE_T tile_size0,
1361  TILE_T tile_size1,
1362  SEGMENT const& segment0,
1363  SEGMENT const& segment1,
1364  BODY const& body)
1365  {
1366  const diff_t len0 = segment0.end() - segment0.begin();
1367  const diff_t len1 = segment1.end() - segment1.begin();
1368 
1369  const diff_t i0 =
1370  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims()) *
1371  static_cast<diff_t>(tile_size0);
1372  const diff_t i1 =
1373  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims()) *
1374  static_cast<diff_t>(tile_size1);
1375 
1376  if (i0 < len0 && i1 < len1)
1377  {
1378  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1379  segment1.slice(i1, static_cast<diff_t>(tile_size1)));
1380  }
1381  }
1382 };
1383 
1384 template<typename SEGMENT,
1385  typename IndexMapper0,
1386  typename IndexMapper1,
1387  typename IndexMapper2>
1389  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
1390  kernel_sync_requirement::none,
1391  IndexMapper0,
1392  IndexMapper1,
1393  IndexMapper2>,
1394  SEGMENT>
1395 {
1396 
1397  using diff_t = typename std::iterator_traits<
1398  typename SEGMENT::iterator>::difference_type;
1399 
1400  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1401  static RAJA_INLINE RAJA_DEVICE void exec(
1403  TILE_T tile_size0,
1404  TILE_T tile_size1,
1405  TILE_T tile_size2,
1406  SEGMENT const& segment0,
1407  SEGMENT const& segment1,
1408  SEGMENT const& segment2,
1409  BODY const& body)
1410  {
1411  const diff_t len0 = segment0.end() - segment0.begin();
1412  const diff_t len1 = segment1.end() - segment1.begin();
1413  const diff_t len2 = segment2.end() - segment2.begin();
1414 
1415  const diff_t i0 =
1416  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims()) *
1417  static_cast<diff_t>(tile_size0);
1418  const diff_t i1 =
1419  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims()) *
1420  static_cast<diff_t>(tile_size1);
1421  const diff_t i2 =
1422  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims()) *
1423  static_cast<diff_t>(tile_size2);
1424 
1425  if (i0 < len0 && i1 < len1 && i2 < len2)
1426  {
1427  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1428  segment1.slice(i1, static_cast<diff_t>(tile_size1)),
1429  segment2.slice(i2, static_cast<diff_t>(tile_size2)));
1430  }
1431  }
1432 };
1433 
1434 template<typename SEGMENT, typename IndexMapper>
1436  RAJA::policy::hip::hip_indexer<
1437  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1438  kernel_sync_requirement::none,
1439  IndexMapper>,
1440  SEGMENT>
1441 {
1442 
1443  using diff_t = typename std::iterator_traits<
1444  typename SEGMENT::iterator>::difference_type;
1445 
1446  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1447  static RAJA_INLINE RAJA_DEVICE void exec(
1449  TILE_T tile_size,
1450  SEGMENT const& segment,
1451  BODY const& body)
1452  {
1453  const diff_t len = segment.end() - segment.begin();
1454  const diff_t i_init =
1455  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims()) *
1456  static_cast<diff_t>(tile_size);
1457  const diff_t i_stride =
1458  IndexMapper::template size<diff_t>(ctx.get_indices_and_dims()) *
1459  static_cast<diff_t>(tile_size);
1460 
1461  for (diff_t i = i_init; i < len; i += i_stride)
1462  {
1463  body(segment.slice(i, static_cast<diff_t>(tile_size)));
1464  }
1465  }
1466 };
1467 
1468 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
1470  RAJA::policy::hip::hip_indexer<
1471  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1472  kernel_sync_requirement::none,
1473  IndexMapper0,
1474  IndexMapper1>,
1475  SEGMENT>
1476 {
1477 
1478  using diff_t = typename std::iterator_traits<
1479  typename SEGMENT::iterator>::difference_type;
1480 
1481  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1482  static RAJA_INLINE RAJA_DEVICE void exec(
1484  TILE_T tile_size0,
1485  TILE_T tile_size1,
1486  SEGMENT const& segment0,
1487  SEGMENT const& segment1,
1488  BODY const& body)
1489  {
1490  const diff_t len0 = segment0.end() - segment0.begin();
1491  const diff_t len1 = segment1.end() - segment1.begin();
1492 
1493  const diff_t i0_init =
1494  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims()) *
1495  static_cast<diff_t>(tile_size0);
1496  const diff_t i1_init =
1497  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims()) *
1498  static_cast<diff_t>(tile_size1);
1499 
1500  const diff_t i0_stride =
1501  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims()) *
1502  static_cast<diff_t>(tile_size0);
1503  const diff_t i1_stride =
1504  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims()) *
1505  static_cast<diff_t>(tile_size1);
1506 
1507  for (diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
1508  {
1509  for (diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
1510  {
1511  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1512  segment1.slice(i1, static_cast<diff_t>(tile_size1)));
1513  }
1514  }
1515  }
1516 };
1517 
1518 template<typename SEGMENT,
1519  typename IndexMapper0,
1520  typename IndexMapper1,
1521  typename IndexMapper2>
1523  RAJA::policy::hip::hip_indexer<
1524  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1525  kernel_sync_requirement::none,
1526  IndexMapper0,
1527  IndexMapper1,
1528  IndexMapper2>,
1529  SEGMENT>
1530 {
1531 
1532  using diff_t = typename std::iterator_traits<
1533  typename SEGMENT::iterator>::difference_type;
1534 
1535  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1536  static RAJA_INLINE RAJA_DEVICE void exec(
1538  TILE_T tile_size0,
1539  TILE_T tile_size1,
1540  TILE_T tile_size2,
1541  SEGMENT const& segment0,
1542  SEGMENT const& segment1,
1543  SEGMENT const& segment2,
1544  BODY const& body)
1545  {
1546  const diff_t len0 = segment0.end() - segment0.begin();
1547  const diff_t len1 = segment1.end() - segment1.begin();
1548  const diff_t len2 = segment2.end() - segment2.begin();
1549 
1550  const diff_t i0_init =
1551  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims()) *
1552  static_cast<diff_t>(tile_size0);
1553  const diff_t i1_init =
1554  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims()) *
1555  static_cast<diff_t>(tile_size1);
1556  const diff_t i2_init =
1557  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims()) *
1558  static_cast<diff_t>(tile_size2);
1559 
1560  const diff_t i0_stride =
1561  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims()) *
1562  static_cast<diff_t>(tile_size0);
1563  const diff_t i1_stride =
1564  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims()) *
1565  static_cast<diff_t>(tile_size1);
1566  const diff_t i2_stride =
1567  IndexMapper2::template size<diff_t>(ctx.get_indices_and_dims()) *
1568  static_cast<diff_t>(tile_size2);
1569 
1570  for (diff_t i0 = i0_init; i0 < len0; i0 += i0_stride)
1571  {
1572  for (diff_t i1 = i1_init; i1 < len1; i1 += i1_stride)
1573  {
1574  for (diff_t i2 = i2_init; i2 < len2; i2 += i2_stride)
1575  {
1576  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1577  segment1.slice(i1, static_cast<diff_t>(tile_size1)),
1578  segment2.slice(i2, static_cast<diff_t>(tile_size2)));
1579  }
1580  }
1581  }
1582  }
1583 };
1584 
1585 /*
1586  HIP generic tile_tcount implementations
1587 */
1588 template<typename SEGMENT, typename IndexMapper>
1590  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
1591  kernel_sync_requirement::none,
1592  IndexMapper>,
1593  SEGMENT>
1594 {
1595 
1596  using diff_t = typename std::iterator_traits<
1597  typename SEGMENT::iterator>::difference_type;
1598 
1599  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1600  static RAJA_INLINE RAJA_DEVICE void exec(
1602  TILE_T tile_size,
1603  SEGMENT const& segment,
1604  BODY const& body)
1605  {
1606  const diff_t t =
1607  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
1608  const diff_t i = t * static_cast<diff_t>(tile_size);
1609 
1610  body(segment.slice(i, static_cast<diff_t>(tile_size)), t);
1611  }
1612 };
1613 
1614 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
1616  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
1617  kernel_sync_requirement::none,
1618  IndexMapper0,
1619  IndexMapper1>,
1620  SEGMENT>
1621 {
1622 
1623  using diff_t = typename std::iterator_traits<
1624  typename SEGMENT::iterator>::difference_type;
1625 
1626  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1627  static RAJA_INLINE RAJA_DEVICE void exec(
1629  TILE_T tile_size0,
1630  TILE_T tile_size1,
1631  SEGMENT const& segment0,
1632  SEGMENT const& segment1,
1633  BODY const& body)
1634  {
1635  const diff_t t0 =
1636  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1637  const diff_t t1 =
1638  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1639 
1640  const diff_t i0 = t0 * static_cast<diff_t>(tile_size0);
1641  const diff_t i1 = t1 * static_cast<diff_t>(tile_size1);
1642 
1643  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1644  segment1.slice(i1, static_cast<diff_t>(tile_size1)), t0, t1);
1645  }
1646 };
1647 
1648 template<typename SEGMENT,
1649  typename IndexMapper0,
1650  typename IndexMapper1,
1651  typename IndexMapper2>
1653  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::DirectUnchecked,
1654  kernel_sync_requirement::none,
1655  IndexMapper0,
1656  IndexMapper1,
1657  IndexMapper2>,
1658  SEGMENT>
1659 {
1660 
1661  using diff_t = typename std::iterator_traits<
1662  typename SEGMENT::iterator>::difference_type;
1663 
1664  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1665  static RAJA_INLINE RAJA_DEVICE void exec(
1667  TILE_T tile_size0,
1668  TILE_T tile_size1,
1669  TILE_T tile_size2,
1670  SEGMENT const& segment0,
1671  SEGMENT const& segment1,
1672  SEGMENT const& segment2,
1673  BODY const& body)
1674  {
1675  const diff_t t0 =
1676  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1677  const diff_t t1 =
1678  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1679  const diff_t t2 =
1680  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
1681 
1682  const diff_t i0 = t0 * static_cast<diff_t>(tile_size0);
1683  const diff_t i1 = t1 * static_cast<diff_t>(tile_size1);
1684  const diff_t i2 = t2 * static_cast<diff_t>(tile_size2);
1685 
1686  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1687  segment1.slice(i1, static_cast<diff_t>(tile_size1)),
1688  segment2.slice(i2, static_cast<diff_t>(tile_size2)), t0, t1, t2);
1689  }
1690 };
1691 
1692 template<typename SEGMENT, typename IndexMapper>
1694  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
1695  kernel_sync_requirement::none,
1696  IndexMapper>,
1697  SEGMENT>
1698 {
1699 
1700  using diff_t = typename std::iterator_traits<
1701  typename SEGMENT::iterator>::difference_type;
1702 
1703  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1704  static RAJA_INLINE RAJA_DEVICE void exec(
1706  TILE_T tile_size,
1707  SEGMENT const& segment,
1708  BODY const& body)
1709  {
1710  const diff_t len = segment.end() - segment.begin();
1711  const diff_t t =
1712  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
1713  const diff_t i = t * static_cast<diff_t>(tile_size);
1714 
1715  if (i < len)
1716  {
1717  body(segment.slice(i, static_cast<diff_t>(tile_size)), t);
1718  }
1719  }
1720 };
1721 
1722 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
1724  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
1725  kernel_sync_requirement::none,
1726  IndexMapper0,
1727  IndexMapper1>,
1728  SEGMENT>
1729 {
1730 
1731  using diff_t = typename std::iterator_traits<
1732  typename SEGMENT::iterator>::difference_type;
1733 
1734  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1735  static RAJA_INLINE RAJA_DEVICE void exec(
1737  TILE_T tile_size0,
1738  TILE_T tile_size1,
1739  SEGMENT const& segment0,
1740  SEGMENT const& segment1,
1741  BODY const& body)
1742  {
1743  const diff_t len0 = segment0.end() - segment0.begin();
1744  const diff_t len1 = segment1.end() - segment1.begin();
1745 
1746  const diff_t t0 =
1747  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1748  const diff_t t1 =
1749  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1750 
1751  const diff_t i0 = t0 * static_cast<diff_t>(tile_size0);
1752  const diff_t i1 = t1 * static_cast<diff_t>(tile_size1);
1753 
1754  if (i0 < len0 && i1 < len1)
1755  {
1756  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1757  segment1.slice(i1, static_cast<diff_t>(tile_size1)), t0, t1);
1758  }
1759  }
1760 };
1761 
1762 template<typename SEGMENT,
1763  typename IndexMapper0,
1764  typename IndexMapper1,
1765  typename IndexMapper2>
1767  RAJA::policy::hip::hip_indexer<RAJA::iteration_mapping::Direct,
1768  kernel_sync_requirement::none,
1769  IndexMapper0,
1770  IndexMapper1,
1771  IndexMapper2>,
1772  SEGMENT>
1773 {
1774 
1775  using diff_t = typename std::iterator_traits<
1776  typename SEGMENT::iterator>::difference_type;
1777 
1778  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1779  static RAJA_INLINE RAJA_DEVICE void exec(
1781  TILE_T tile_size0,
1782  TILE_T tile_size1,
1783  TILE_T tile_size2,
1784  SEGMENT const& segment0,
1785  SEGMENT const& segment1,
1786  SEGMENT const& segment2,
1787  BODY const& body)
1788  {
1789  const diff_t len0 = segment0.end() - segment0.begin();
1790  const diff_t len1 = segment1.end() - segment1.begin();
1791  const diff_t len2 = segment2.end() - segment2.begin();
1792 
1793  const diff_t t0 =
1794  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1795  const diff_t t1 =
1796  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1797  const diff_t t2 =
1798  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
1799 
1800  const diff_t i0 = t0 * static_cast<diff_t>(tile_size0);
1801  const diff_t i1 = t1 * static_cast<diff_t>(tile_size1);
1802  const diff_t i2 = t2 * static_cast<diff_t>(tile_size2);
1803 
1804  if (i0 < len0 && i1 < len1 && i2 < len2)
1805  {
1806  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1807  segment1.slice(i1, static_cast<diff_t>(tile_size1)),
1808  segment2.slice(i2, static_cast<diff_t>(tile_size2)), t0, t1, t2);
1809  }
1810  }
1811 };
1812 
1813 template<typename SEGMENT, typename IndexMapper>
1815  RAJA::policy::hip::hip_indexer<
1816  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1817  kernel_sync_requirement::none,
1818  IndexMapper>,
1819  SEGMENT>
1820 {
1821 
1822  using diff_t = typename std::iterator_traits<
1823  typename SEGMENT::iterator>::difference_type;
1824 
1825  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1826  static RAJA_INLINE RAJA_DEVICE void exec(
1828  TILE_T tile_size,
1829  SEGMENT const& segment,
1830  BODY const& body)
1831  {
1832  const diff_t len = segment.end() - segment.begin();
1833  const diff_t t_init =
1834  IndexMapper::template index<diff_t>(ctx.get_indices_and_dims());
1835  const diff_t i_init = t_init * static_cast<diff_t>(tile_size);
1836  const diff_t t_stride =
1837  IndexMapper::template size<diff_t>(ctx.get_indices_and_dims());
1838  const diff_t i_stride = t_stride * static_cast<diff_t>(tile_size);
1839 
1840  for (diff_t i = i_init, t = t_init; i < len; i += i_stride, t += t_stride)
1841  {
1842  body(segment.slice(i, static_cast<diff_t>(tile_size)), t);
1843  }
1844  }
1845 };
1846 
1847 template<typename SEGMENT, typename IndexMapper0, typename IndexMapper1>
1849  RAJA::policy::hip::hip_indexer<
1850  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1851  kernel_sync_requirement::none,
1852  IndexMapper0,
1853  IndexMapper1>,
1854  SEGMENT>
1855 {
1856 
1857  using diff_t = typename std::iterator_traits<
1858  typename SEGMENT::iterator>::difference_type;
1859 
1860  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1861  static RAJA_INLINE RAJA_DEVICE void exec(
1863  TILE_T tile_size0,
1864  TILE_T tile_size1,
1865  SEGMENT const& segment0,
1866  SEGMENT const& segment1,
1867  BODY const& body)
1868  {
1869  const diff_t len0 = segment0.end() - segment0.begin();
1870  const diff_t len1 = segment1.end() - segment1.begin();
1871 
1872  const diff_t t0_init =
1873  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1874  const diff_t t1_init =
1875  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1876 
1877  const diff_t i0_init = t0_init * static_cast<diff_t>(tile_size0);
1878  const diff_t i1_init = t1_init * static_cast<diff_t>(tile_size1);
1879 
1880  const diff_t t0_stride =
1881  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
1882  const diff_t t1_stride =
1883  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
1884 
1885  const diff_t i0_stride = t0_stride * static_cast<diff_t>(tile_size0);
1886  const diff_t i1_stride = t1_stride * static_cast<diff_t>(tile_size1);
1887 
1888  for (diff_t i0 = i0_init, t0 = t0_init; i0 < len0;
1889  i0 += i0_stride, t0 += t0_stride)
1890  {
1891  for (diff_t i1 = i1_init, t1 = t1_init; i1 < len1;
1892  i1 += i1_stride, t1 += t1_stride)
1893  {
1894  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1895  segment1.slice(i1, static_cast<diff_t>(tile_size1)), t0, t1);
1896  }
1897  }
1898  }
1899 };
1900 
1901 template<typename SEGMENT,
1902  typename IndexMapper0,
1903  typename IndexMapper1,
1904  typename IndexMapper2>
1906  RAJA::policy::hip::hip_indexer<
1907  RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
1908  kernel_sync_requirement::none,
1909  IndexMapper0,
1910  IndexMapper1,
1911  IndexMapper2>,
1912  SEGMENT>
1913 {
1914 
1915  using diff_t = typename std::iterator_traits<
1916  typename SEGMENT::iterator>::difference_type;
1917 
1918  template<typename LaunchContextPolicy, typename TILE_T, typename BODY>
1919  static RAJA_INLINE RAJA_DEVICE void exec(
1921  TILE_T tile_size0,
1922  TILE_T tile_size1,
1923  TILE_T tile_size2,
1924  SEGMENT const& segment0,
1925  SEGMENT const& segment1,
1926  SEGMENT const& segment2,
1927  BODY const& body)
1928  {
1929  const diff_t len0 = segment0.end() - segment0.begin();
1930  const diff_t len1 = segment1.end() - segment1.begin();
1931  const diff_t len2 = segment2.end() - segment2.begin();
1932 
1933  const diff_t t0_init =
1934  IndexMapper0::template index<diff_t>(ctx.get_indices_and_dims());
1935  const diff_t t1_init =
1936  IndexMapper1::template index<diff_t>(ctx.get_indices_and_dims());
1937  const diff_t t2_init =
1938  IndexMapper2::template index<diff_t>(ctx.get_indices_and_dims());
1939 
1940  const diff_t i0_init = t0_init * static_cast<diff_t>(tile_size0);
1941  const diff_t i1_init = t1_init * static_cast<diff_t>(tile_size1);
1942  const diff_t i2_init = t2_init * static_cast<diff_t>(tile_size2);
1943 
1944  const diff_t t0_stride =
1945  IndexMapper0::template size<diff_t>(ctx.get_indices_and_dims());
1946  const diff_t t1_stride =
1947  IndexMapper1::template size<diff_t>(ctx.get_indices_and_dims());
1948  const diff_t t2_stride =
1949  IndexMapper2::template size<diff_t>(ctx.get_indices_and_dims());
1950 
1951  const diff_t i0_stride = t0_stride * static_cast<diff_t>(tile_size0);
1952  const diff_t i1_stride = t1_stride * static_cast<diff_t>(tile_size1);
1953  const diff_t i2_stride = t2_stride * static_cast<diff_t>(tile_size2);
1954 
1955  for (diff_t i0 = i0_init, t0 = t0_init; i0 < len0;
1956  i0 += i0_stride, t0 += t0_stride)
1957  {
1958  for (diff_t i1 = i1_init, t1 = t1_init; i1 < len1;
1959  i1 += i1_stride, t1 += t1_stride)
1960  {
1961  for (diff_t i2 = i2_init, t2 = t2_init; i2 < len2;
1962  i2 += i2_stride, t2 += t2_stride)
1963  {
1964  body(segment0.slice(i0, static_cast<diff_t>(tile_size0)),
1965  segment1.slice(i1, static_cast<diff_t>(tile_size1)),
1966  segment2.slice(i2, static_cast<diff_t>(tile_size2)), t0, t1, t2);
1967  }
1968  }
1969  }
1970  }
1971 };
1972 
1973 } // namespace RAJA
1974 #endif
Header file defining prototypes for routines used to manage memory for HIP reductions and other opera...
Definition: launch_core.hpp:192
RAJA_HOST_DEVICE RAJA_INLINE indices_and_dims_t const & get_indices_and_dims() const
Definition: launch.hpp:52
RAJA_HOST_DEVICE RAJA_INLINE LaunchContextT()
Definition: launch.hpp:47
Definition: launch_context_policy.hpp:30
Header file containing RAJA HIP policy definitions.
RAJA header file containing the core components of RAJA::launch.
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
#define RAJA_DEVICE
Definition: macros.hpp:66
Args args
Definition: WorkRunner.hpp:212
constexpr RAJA_HOST_DEVICE auto invoke_body(Params &&params, Fn &&f, Ts &&... extra)
Definition: forall.hpp:598
RAJA_HOST_DEVICE auto thread_privatize(const T &item) -> Privatizer< T >
Create a private copy of the argument to be stored on the current thread's stack in a class of the Pr...
Definition: privatizer.hpp:88
Definition: AlignedRangeIndexSetBuilders.cpp:35
LaunchContextType ctx
Definition: launch.hpp:185
__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__ void launch_new_reduce_global_fcn_fixed(const RAJA_CUDA_GRID_CONSTANT BODY body_in
ReduceParams reduce_params
Definition: launch.hpp:173
__global__ void launch_new_reduce_global_fcn(const RAJA_CUDA_GRID_CONSTANT BODY body_in, ReduceParams reduce_params)
Definition: launch.hpp:61
void launch(LaunchParams const &launch_params, ReduceParams &&... rest_of_launch_args)
Definition: launch_core.hpp:268
auto & body
Definition: launch.hpp:177
__shared__ char raja_shmem_ptr[]
Definition: launch.hpp:174
typename RAJA::detail::launch_context_type< BODY >::type LaunchContextType
Definition: launch.hpp:183
auto privatizer
Definition: launch.hpp:176
Header file containing utility methods used in HIP operations.
Header file for RAJA resource definitions.
static concepts::enable_if_t< resources::EventProxy< resources::Resource >, RAJA::expt::type_traits::is_ForallParamPack< ReduceParams > > exec(RAJA::resources::Resource res, const LaunchParams &launch_params, BODY_IN &&body_in, ReduceParams &launch_reducers)
Definition: launch.hpp:92
static concepts::enable_if_t< resources::EventProxy< resources::Resource >, RAJA::expt::type_traits::is_ForallParamPack< ReduceParams > > exec(RAJA::resources::Resource res, const LaunchParams &launch_params, BODY_IN &&body_in, ReduceParams &launch_reducers)
Definition: launch.hpp:196
Definition: launch_core.hpp:263
Definition: launch_core.hpp:163
size_t shared_mem_size
Definition: launch_core.hpp:167
Teams teams
Definition: launch_core.hpp:165
Threads threads
Definition: launch_core.hpp:166
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:962
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1181
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1044
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:997
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1084
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1140
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:479
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:342
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:440
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:282
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:285
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:310
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:370
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:373
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:511
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:399
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:402
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:560
Definition: launch_core.hpp:480
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:734
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:772
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:892
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:674
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:843
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:702
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:705
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:614
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:617
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:642
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:811
Definition: launch_core.hpp:483
int value[3]
Definition: launch_core.hpp:99
int value[3]
Definition: launch_core.hpp:124
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1355
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1358
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1447
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1536
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1324
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1327
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1289
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1482
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1222
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1225
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1401
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1252
Definition: launch_core.hpp:579
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1600
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1597
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1861
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1627
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:1735
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1919
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1704
typename std::iterator_traits< typename SEGMENT::iterator >::difference_type diff_t
Definition: launch.hpp:1701
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1665
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size0, TILE_T tile_size1, TILE_T tile_size2, SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:1779
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:1826
Definition: launch_core.hpp:582
static constexpr void parampack_resolve(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:304
static constexpr void parampack_init(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:269
static RAJA_HOST_DEVICE constexpr void parampack_combine(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:286
Definition: TypeTraits.hpp:59