RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
forall.hpp
Go to the documentation of this file.
1 
15 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
16 // Copyright (c) Lawrence Livermore National Security, LLC and other
17 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
18 // files for dates and other details. No copyright assignment is required
19 // to contribute to RAJA.
20 //
21 // SPDX-License-Identifier: (BSD-3-Clause)
22 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
23 
24 #ifndef RAJA_forall_cuda_HPP
25 #define RAJA_forall_cuda_HPP
26 
27 #include "RAJA/config.hpp"
28 
29 #if defined(RAJA_ENABLE_CUDA)
30 
31 #include <algorithm>
32 
33 #include "RAJA/pattern/forall.hpp"
34 
36 
37 #include "RAJA/util/macros.hpp"
38 #include "RAJA/util/types.hpp"
39 
43 
44 #include "RAJA/index/IndexSet.hpp"
45 
46 #include "RAJA/util/resource.hpp"
47 
48 namespace RAJA
49 {
50 namespace policy
51 {
52 namespace cuda
53 {
54 
55 namespace impl
56 {
57 
73 template<typename IterationMapping,
74  typename IterationGetter,
75  typename Concretizer,
76  typename UniqueMarker>
77 struct ForallDimensionCalculator;
78 
79 // The general cases handle fixed BLOCK_SIZE > 0 and/or GRID_SIZE > 0
80 // there are specializations for named_usage::unspecified
81 // but named_usage::ignored is not supported so no specializations are provided
82 // and static_asserts in the general case catch unsupported values
83 template<named_dim dim,
84  int BLOCK_SIZE,
85  int GRID_SIZE,
86  typename Concretizer,
87  typename UniqueMarker>
88 struct ForallDimensionCalculator<
90  ::RAJA::cuda::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>,
91  Concretizer,
92  UniqueMarker>
93 {
94  static_assert(
95  BLOCK_SIZE > 0,
96  "block size must be > 0 or named_usage::unspecified with forall");
97  static_assert(
98  GRID_SIZE > 0,
99  "grid size must be > 0 or named_usage::unspecified with forall");
100 
101  using IndexGetter = ::RAJA::cuda::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
102 
103  template<typename IdxT>
104  static void set_dimensions(internal::CudaDims& dims,
105  IdxT len,
106  const void* RAJA_UNUSED_ARG(func),
107  size_t RAJA_UNUSED_ARG(dynamic_shmem_size))
108  {
109  const IdxT block_size = static_cast<IdxT>(IndexGetter::block_size);
110  const IdxT grid_size = static_cast<IdxT>(IndexGetter::grid_size);
111 
112  if (len > (block_size * grid_size))
113  {
115  "len exceeds the size of the directly mapped index space");
116  }
117 
118  internal::set_cuda_dim<dim>(dims.threads,
119  static_cast<IdxT>(IndexGetter::block_size));
120  internal::set_cuda_dim<dim>(dims.blocks,
121  static_cast<IdxT>(IndexGetter::grid_size));
122  }
123 };
124 
125 template<named_dim dim,
126  int GRID_SIZE,
127  typename Concretizer,
128  typename UniqueMarker>
129 struct ForallDimensionCalculator<
131  ::RAJA::cuda::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>,
132  Concretizer,
133  UniqueMarker>
134 {
135  static_assert(
136  GRID_SIZE > 0,
137  "grid size must be > 0 or named_usage::unspecified with forall");
138 
139  using IndexGetter =
140  ::RAJA::cuda::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
141 
142  template<typename IdxT>
143  static void set_dimensions(internal::CudaDims& dims,
144  IdxT len,
145  const void* func,
146  size_t dynamic_shmem_size)
147  {
148  ::RAJA::cuda::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
149  func, dynamic_shmem_size, len};
150 
151  const IdxT grid_size = static_cast<IdxT>(IndexGetter::grid_size);
152  const IdxT block_size = concretizer.get_block_size_to_fit_len(grid_size);
153 
154  if (block_size == IdxT(0))
155  {
157  "len exceeds the size of the directly mapped index space");
158  }
159 
160  internal::set_cuda_dim<dim>(dims.threads, block_size);
161  internal::set_cuda_dim<dim>(dims.blocks, grid_size);
162  }
163 };
164 
165 template<named_dim dim,
166  int BLOCK_SIZE,
167  typename Concretizer,
168  typename UniqueMarker>
169 struct ForallDimensionCalculator<
171  ::RAJA::cuda::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>,
172  Concretizer,
173  UniqueMarker>
174 {
175  static_assert(
176  BLOCK_SIZE > 0,
177  "block size must be > 0 or named_usage::unspecified with forall");
178 
179  using IndexGetter =
180  ::RAJA::cuda::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
181 
182  template<typename IdxT>
183  static void set_dimensions(internal::CudaDims& dims,
184  IdxT len,
185  const void* func,
186  size_t dynamic_shmem_size)
187  {
188  ::RAJA::cuda::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
189  func, dynamic_shmem_size, len};
190 
191  const IdxT block_size = static_cast<IdxT>(IndexGetter::block_size);
192  const IdxT grid_size = concretizer.get_grid_size_to_fit_len(block_size);
193 
194  internal::set_cuda_dim<dim>(dims.threads, block_size);
195  internal::set_cuda_dim<dim>(dims.blocks, grid_size);
196  }
197 };
198 
199 template<named_dim dim, typename Concretizer, typename UniqueMarker>
200 struct ForallDimensionCalculator<
202  ::RAJA::cuda::
203  IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>,
204  Concretizer,
205  UniqueMarker>
206 {
207  using IndexGetter = ::RAJA::cuda::
208  IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
209 
210  template<typename IdxT>
211  static void set_dimensions(internal::CudaDims& dims,
212  IdxT len,
213  const void* func,
214  size_t dynamic_shmem_size)
215  {
216  ::RAJA::cuda::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
217  func, dynamic_shmem_size, len};
218 
219  const auto sizes = concretizer.get_block_and_grid_size_to_fit_len();
220 
221  internal::set_cuda_dim<dim>(dims.threads, sizes.first);
222  internal::set_cuda_dim<dim>(dims.blocks, sizes.second);
223  }
224 };
225 
226 template<named_dim dim,
227  int BLOCK_SIZE,
228  int GRID_SIZE,
229  typename Concretizer,
230  typename UniqueMarker>
231 struct ForallDimensionCalculator<
232  ::RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
233  ::RAJA::cuda::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>,
234  Concretizer,
235  UniqueMarker>
236 {
237  static_assert(
238  BLOCK_SIZE > 0,
239  "block size must be > 0 or named_usage::unspecified with forall");
240  static_assert(
241  GRID_SIZE > 0,
242  "grid size must be > 0 or named_usage::unspecified with forall");
243 
244  using IndexGetter = ::RAJA::cuda::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
245 
246  template<typename IdxT>
247  static void set_dimensions(internal::CudaDims& dims,
248  IdxT RAJA_UNUSED_ARG(len),
249  const void* RAJA_UNUSED_ARG(func),
250  size_t RAJA_UNUSED_ARG(dynamic_shmem_size))
251  {
252  const IdxT block_size = static_cast<IdxT>(IndexGetter::block_size);
253  const IdxT grid_size = static_cast<IdxT>(IndexGetter::grid_size);
254 
255  internal::set_cuda_dim<dim>(dims.threads, block_size);
256  internal::set_cuda_dim<dim>(dims.blocks, grid_size);
257  }
258 };
259 
260 template<named_dim dim,
261  int GRID_SIZE,
262  typename Concretizer,
263  typename UniqueMarker>
264 struct ForallDimensionCalculator<
265  ::RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
266  ::RAJA::cuda::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>,
267  Concretizer,
268  UniqueMarker>
269 {
270  static_assert(
271  GRID_SIZE > 0,
272  "grid size must be > 0 or named_usage::unspecified with forall");
273 
274  using IndexGetter =
275  ::RAJA::cuda::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
276 
277  template<typename IdxT>
278  static void set_dimensions(internal::CudaDims& dims,
279  IdxT len,
280  const void* func,
281  size_t dynamic_shmem_size)
282  {
283  ::RAJA::cuda::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
284  func, dynamic_shmem_size, len};
285 
286  const IdxT grid_size = static_cast<IdxT>(IndexGetter::grid_size);
287  const IdxT block_size = concretizer.get_block_size_to_fit_device(grid_size);
288 
289  internal::set_cuda_dim<dim>(dims.threads, block_size);
290  internal::set_cuda_dim<dim>(dims.blocks, grid_size);
291  }
292 };
293 
294 template<named_dim dim,
295  int BLOCK_SIZE,
296  typename Concretizer,
297  typename UniqueMarker>
298 struct ForallDimensionCalculator<
299  ::RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
300  ::RAJA::cuda::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>,
301  Concretizer,
302  UniqueMarker>
303 {
304  static_assert(
305  BLOCK_SIZE > 0,
306  "block size must be > 0 or named_usage::unspecified with forall");
307 
308  using IndexGetter =
309  ::RAJA::cuda::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
310 
311  template<typename IdxT>
312  static void set_dimensions(internal::CudaDims& dims,
313  IdxT len,
314  const void* func,
315  size_t dynamic_shmem_size)
316  {
317  ::RAJA::cuda::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
318  func, dynamic_shmem_size, len};
319 
320  const IdxT block_size = static_cast<IdxT>(IndexGetter::block_size);
321  const IdxT grid_size = concretizer.get_grid_size_to_fit_device(block_size);
322 
323  internal::set_cuda_dim<dim>(dims.threads, block_size);
324  internal::set_cuda_dim<dim>(dims.blocks, grid_size);
325  }
326 };
327 
328 template<named_dim dim, typename Concretizer, typename UniqueMarker>
329 struct ForallDimensionCalculator<
330  ::RAJA::iteration_mapping::StridedLoop<named_usage::unspecified>,
331  ::RAJA::cuda::
332  IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>,
333  Concretizer,
334  UniqueMarker>
335 {
336  using IndexGetter = ::RAJA::cuda::
337  IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
338 
339  template<typename IdxT>
340  static void set_dimensions(internal::CudaDims& dims,
341  IdxT len,
342  const void* func,
343  size_t dynamic_shmem_size)
344  {
345  ::RAJA::cuda::ConcretizerImpl<IdxT, Concretizer, UniqueMarker> concretizer {
346  func, dynamic_shmem_size, len};
347 
348  const auto sizes = concretizer.get_block_and_grid_size_to_fit_device();
349 
350  internal::set_cuda_dim<dim>(dims.threads, sizes.first);
351  internal::set_cuda_dim<dim>(dims.blocks, sizes.second);
352  }
353 };
354 
355 //
357 //
358 // CUDA kernel templates.
359 //
361 //
362 
371 template<typename EXEC_POL,
372  size_t BlocksPerSM,
373  typename Iterator,
374  typename LOOP_BODY,
375  typename IndexType,
376  typename ForallParam,
377  typename IterationMapping = typename EXEC_POL::IterationMapping,
378  typename IterationGetter = typename EXEC_POL::IterationGetter,
379  std::enable_if_t<std::is_base_of<iteration_mapping::DirectBase,
380  IterationMapping>::value &&
381  (IterationGetter::block_size > 0),
382  size_t> BlockSize = IterationGetter::block_size>
383 __launch_bounds__(BlockSize, BlocksPerSM) __global__
384  void forallp_cuda_kernel(const RAJA_CUDA_GRID_CONSTANT LOOP_BODY loop_body,
385  const RAJA_CUDA_GRID_CONSTANT Iterator idx,
386  const RAJA_CUDA_GRID_CONSTANT IndexType length,
387  ForallParam f_params)
388 {
390  auto privatizer = thread_privatize(loop_body);
391  auto& body = privatizer.get_priv();
392  auto ii = IterationGetter::template index<IndexType>();
393 
394  if (ii < length)
395  {
396  RAJA::expt::invoke_body(f_params, body, idx[ii]);
397  }
398 
400 }
401 
403 template<typename EXEC_POL,
404  size_t BlocksPerSM,
405  typename Iterator,
406  typename LOOP_BODY,
407  typename IndexType,
408  typename ForallParam,
409  typename IterationMapping = typename EXEC_POL::IterationMapping,
410  typename IterationGetter = typename EXEC_POL::IterationGetter,
411  std::enable_if_t<std::is_base_of<iteration_mapping::DirectBase,
412  IterationMapping>::value &&
413  (IterationGetter::block_size <= 0),
414  size_t> RAJA_UNUSED_ARG(BlockSize) = 0>
415 __global__ void forallp_cuda_kernel(
416  const RAJA_CUDA_GRID_CONSTANT LOOP_BODY loop_body,
417  const RAJA_CUDA_GRID_CONSTANT Iterator idx,
418  const RAJA_CUDA_GRID_CONSTANT IndexType length,
419  ForallParam f_params)
420 {
422  auto privatizer = thread_privatize(loop_body);
423  auto& body = privatizer.get_priv();
424  auto ii = IterationGetter::template index<IndexType>();
425 
426  if (ii < length)
427  {
428  RAJA::expt::invoke_body(f_params, body, idx[ii]);
429  }
430 
432 }
433 
435 template<
436  typename EXEC_POL,
437  size_t BlocksPerSM,
438  typename Iterator,
439  typename LOOP_BODY,
440  typename IndexType,
441  typename ForallParam,
442  typename IterationMapping = typename EXEC_POL::IterationMapping,
443  typename IterationGetter = typename EXEC_POL::IterationGetter,
444  std::enable_if_t<std::is_base_of<iteration_mapping::StridedLoopBase,
445  IterationMapping>::value &&
446  std::is_base_of<iteration_mapping::UnsizedLoopBase,
447  IterationMapping>::value &&
448  (IterationGetter::block_size > 0),
449  size_t> BlockSize = IterationGetter::block_size>
450 __launch_bounds__(BlockSize, BlocksPerSM) __global__
451  void forallp_cuda_kernel(const RAJA_CUDA_GRID_CONSTANT LOOP_BODY loop_body,
452  const RAJA_CUDA_GRID_CONSTANT Iterator idx,
453  const RAJA_CUDA_GRID_CONSTANT IndexType length,
454  ForallParam f_params)
455 {
457  auto privatizer = thread_privatize(loop_body);
458  auto& body = privatizer.get_priv();
459 
460  for (auto ii = IterationGetter::template index<IndexType>(); ii < length;
461  ii += IterationGetter::template size<IndexType>())
462  {
463  RAJA::expt::invoke_body(f_params, body, idx[ii]);
464  }
465 
467 }
468 
470 template<
471  typename EXEC_POL,
472  size_t BlocksPerSM,
473  typename Iterator,
474  typename LOOP_BODY,
475  typename IndexType,
476  typename ForallParam,
477  typename IterationMapping = typename EXEC_POL::IterationMapping,
478  typename IterationGetter = typename EXEC_POL::IterationGetter,
479  std::enable_if_t<std::is_base_of<iteration_mapping::StridedLoopBase,
480  IterationMapping>::value &&
481  std::is_base_of<iteration_mapping::UnsizedLoopBase,
482  IterationMapping>::value &&
483  (IterationGetter::block_size <= 0),
484  size_t> RAJA_UNUSED_ARG(BlockSize) = 0>
485 __global__ void forallp_cuda_kernel(
486  const RAJA_CUDA_GRID_CONSTANT LOOP_BODY loop_body,
487  const RAJA_CUDA_GRID_CONSTANT Iterator idx,
488  const RAJA_CUDA_GRID_CONSTANT IndexType length,
489  ForallParam f_params)
490 {
492  auto privatizer = thread_privatize(loop_body);
493  auto& body = privatizer.get_priv();
494 
495  for (auto ii = IterationGetter::template index<IndexType>(); ii < length;
496  ii += IterationGetter::template size<IndexType>())
497  {
498  RAJA::expt::invoke_body(f_params, body, idx[ii]);
499  }
500 
502 }
503 
504 } // namespace impl
505 
506 //
508 //
509 // Function templates for CUDA execution over iterables.
510 //
512 //
513 
514 template<typename Iterable,
515  typename LoopBody,
516  typename IterationMapping,
517  typename IterationGetter,
518  typename Concretizer,
519  size_t BlocksPerSM,
520  bool Async,
521  typename ForallParam>
522 RAJA_INLINE concepts::enable_if_t<
523  resources::EventProxy<resources::Cuda>,
525 forall_impl(resources::Cuda cuda_res,
526  ::RAJA::policy::cuda::cuda_exec_explicit<IterationMapping,
527  IterationGetter,
528  Concretizer,
529  BlocksPerSM,
530  Async> const& pol,
531  Iterable&& iter,
532  LoopBody&& loop_body,
533  ForallParam f_params)
534 {
535  using Iterator = camp::decay<decltype(std::begin(iter))>;
536  using LOOP_BODY = camp::decay<LoopBody>;
537  using IndexType =
538  camp::decay<decltype(std::distance(std::begin(iter), std::end(iter)))>;
539  using EXEC_POL = camp::decay<decltype(pol)>;
540  using UniqueMarker =
541  ::camp::list<IterationMapping, IterationGetter, camp::num<BlocksPerSM>,
542  LOOP_BODY, Iterator, ForallParam>;
543  using DimensionCalculator =
544  impl::ForallDimensionCalculator<IterationMapping, IterationGetter,
545  Concretizer, UniqueMarker>;
546 
547  //
548  // Compute the requested iteration space size
549  //
550  Iterator begin = std::begin(iter);
551  Iterator end = std::end(iter);
552  IndexType len = std::distance(begin, end);
553 
554  // Only launch kernel if we have something to iterate over
555  if (len > 0)
556  {
557 
558  auto func = reinterpret_cast<const void*>(
559  &impl::forallp_cuda_kernel<EXEC_POL, BlocksPerSM, Iterator, LOOP_BODY,
560  IndexType, camp::decay<ForallParam>>);
561 
562  //
563  // Setup shared memory buffers
564  //
565  size_t shmem = 0;
566 
567  //
568  // Compute the kernel dimensions
569  //
570  internal::CudaDims dims(1);
571  DimensionCalculator::set_dimensions(dims, len, func, shmem);
572 
573 
574  RAJA::cuda::detail::cudaInfo launch_info;
575  launch_info.gridDim = dims.blocks;
576  launch_info.blockDim = dims.threads;
577  launch_info.res = cuda_res;
578 
579  {
580  RAJA::expt::ParamMultiplexer::parampack_init(pol, f_params, launch_info);
581 
582  //
583  // Privatize the loop_body, using make_launch_body to setup reductions
584  //
585  LOOP_BODY body = RAJA::cuda::make_launch_body(
586  func, dims.blocks, dims.threads, shmem, cuda_res,
587  std::forward<LoopBody>(loop_body));
588 
589  //
590  // Launch the kernels
591  //
592  void* args[] = {(void*)&body, (void*)&begin, (void*)&len,
593  (void*)&f_params};
594  RAJA::cuda::launch(func, dims.blocks, dims.threads, args, shmem, cuda_res,
595  Async);
596 
598  launch_info);
599  }
600  }
601 
602  return resources::EventProxy<resources::Cuda>(cuda_res);
603 }
604 
605 //
607 //
608 // The following function templates iterate over index set segments
609 // using the explicitly named segment iteration policy and execute
610 // segments as CUDA kernels.
611 //
613 //
614 
623 template<typename LoopBody,
624  typename IterationMapping,
625  typename IterationGetter,
626  typename Concretizer,
627  size_t BlocksPerSM,
628  bool Async,
629  typename... SegmentTypes>
630 RAJA_INLINE resources::EventProxy<resources::Cuda> forall_impl(
631  resources::Cuda r,
632  ExecPolicy<seq_segit,
633  ::RAJA::policy::cuda::cuda_exec_explicit<IterationMapping,
634  IterationGetter,
635  Concretizer,
636  BlocksPerSM,
637  Async>>,
638  const TypedIndexSet<SegmentTypes...>& iset,
639  LoopBody&& loop_body)
640 {
641  int num_seg = iset.getNumSegments();
642  for (int isi = 0; isi < num_seg; ++isi)
643  {
644  iset.segmentCall(
645  r, isi, detail::CallForall(),
646  ::RAJA::policy::cuda::cuda_exec_explicit<IterationMapping,
647  IterationGetter, Concretizer,
648  BlocksPerSM, true>(),
649  loop_body);
650  } // iterate over segments of index set
651 
652  if (!Async) RAJA::cuda::synchronize(r);
653  return resources::EventProxy<resources::Cuda>(r);
654 }
655 
656 } // namespace cuda
657 
658 } // namespace policy
659 
660 } // namespace RAJA
661 
662 #endif // closing endif for RAJA_ENABLE_CUDA guard
663 
664 #endif // closing endif for header file include guard
RAJA header file defining index set classes.
Header file defining prototypes for routines used to manage memory for CUDA reductions and other oper...
Header file containing RAJA CUDA policy definitions.
Header file for common RAJA internal macro definitions.
RAJA_HOST_DEVICE void RAJA_ABORT_OR_THROW(const char *str)
Definition: macros.hpp:143
#define RAJA_UNUSED_ARG(x)
Definition: macros.hpp:97
Args args
Definition: WorkRunner.hpp:212
value_type::device_call &[i_loop] iter
Definition: WorkRunner.hpp:216
constexpr RAJA_HOST_DEVICE auto invoke_body(Params &&params, Fn &&f, Ts &&... extra)
Definition: forall.hpp:598
RAJA_HOST_DEVICE auto thread_privatize(const T &item) -> Privatizer< T >
Create a private copy of the argument to be stored on the current thread's stack in a class of the Pr...
Definition: privatizer.hpp:88
seq_exec seq_segit
Definition: policy.hpp:83
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, expt::type_traits::is_ForallParamPack< ForallParam >, expt::type_traits::is_ForallParamPack_empty< ForallParam > > forall_impl(resources::Host host_res, const simd_exec &, Iterable &&iter, Func &&body, ForallParam)
Definition: forall.hpp:81
Definition: AlignedRangeIndexSetBuilders.cpp:35
named_dim
Definition: types.hpp:53
__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__ void launch_new_reduce_global_fcn_fixed(const RAJA_CUDA_GRID_CONSTANT BODY body_in
void launch(LaunchParams const &launch_params, ReduceParams &&... rest_of_launch_args)
Definition: launch_core.hpp:268
auto & body
Definition: launch.hpp:177
auto privatizer
Definition: launch.hpp:176
void synchronize()
Synchronize all current RAJA executions for the specified policy.
Definition: synchronize.hpp:44
Header file containing RAJA index set and segment iteration template methods that take an execution p...
Header file containing utility methods used in CUDA operations.
Header file for RAJA resource definitions.
static constexpr void parampack_resolve(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:304
static constexpr void parampack_init(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:269
static RAJA_HOST_DEVICE constexpr void parampack_combine(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:286
Definition: TypeTraits.hpp:59
Definition: types.hpp:143
Definition: types.hpp:209
Header file for RAJA type definitions.