RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
internal.hpp
Go to the documentation of this file.
1 
12 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
13 // Copyright (c) Lawrence Livermore National Security, LLC and other
14 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
15 // files for dates and other details. No copyright assignment is required
16 // to contribute to RAJA.
17 //
18 // SPDX-License-Identifier: (BSD-3-Clause)
19 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
20 
21 
22 #ifndef RAJA_policy_hip_kernel_internal_HPP
23 #define RAJA_policy_hip_kernel_internal_HPP
24 
25 #include "RAJA/config.hpp"
26 
27 #if defined(RAJA_ENABLE_HIP)
28 
29 #include <cassert>
30 #include <climits>
31 
32 #include "camp/camp.hpp"
33 
34 #include "RAJA/pattern/kernel.hpp"
35 
36 #include "RAJA/util/macros.hpp"
37 #include "RAJA/util/types.hpp"
38 
41 
42 namespace RAJA
43 {
44 
45 namespace internal
46 {
47 
48 struct LaunchDims
49 {
50 
51  HipDims active {0};
52  HipDims dims {0};
53  HipDims min_dims {0};
54 
55  LaunchDims() = default;
56  LaunchDims(LaunchDims const&) = default;
57  LaunchDims(LaunchDims&&) = default;
58  LaunchDims& operator=(LaunchDims const&) = default;
59  LaunchDims& operator=(LaunchDims&&) = default;
60 
61  RAJA_INLINE
62  LaunchDims(HipDims _active, HipDims _dims, HipDims _min_dims)
63  : active {_active},
64  dims {_dims},
65  min_dims {_min_dims}
66  {}
67 
68  RAJA_INLINE
69  LaunchDims max(LaunchDims const& c) const
70  {
71  LaunchDims result;
72 
73  result.active.blocks.x = std::max(c.active.blocks.x, active.blocks.x);
74  result.active.blocks.y = std::max(c.active.blocks.y, active.blocks.y);
75  result.active.blocks.z = std::max(c.active.blocks.z, active.blocks.z);
76 
77  result.dims.blocks.x = std::max(c.dims.blocks.x, dims.blocks.x);
78  result.dims.blocks.y = std::max(c.dims.blocks.y, dims.blocks.y);
79  result.dims.blocks.z = std::max(c.dims.blocks.z, dims.blocks.z);
80 
81  result.min_dims.blocks.x = std::max(c.min_dims.blocks.x, min_dims.blocks.x);
82  result.min_dims.blocks.y = std::max(c.min_dims.blocks.y, min_dims.blocks.y);
83  result.min_dims.blocks.z = std::max(c.min_dims.blocks.z, min_dims.blocks.z);
84 
85  result.active.threads.x = std::max(c.active.threads.x, active.threads.x);
86  result.active.threads.y = std::max(c.active.threads.y, active.threads.y);
87  result.active.threads.z = std::max(c.active.threads.z, active.threads.z);
88 
89  result.dims.threads.x = std::max(c.dims.threads.x, dims.threads.x);
90  result.dims.threads.y = std::max(c.dims.threads.y, dims.threads.y);
91  result.dims.threads.z = std::max(c.dims.threads.z, dims.threads.z);
92 
93  result.min_dims.threads.x =
94  std::max(c.min_dims.threads.x, min_dims.threads.x);
95  result.min_dims.threads.y =
96  std::max(c.min_dims.threads.y, min_dims.threads.y);
97  result.min_dims.threads.z =
98  std::max(c.min_dims.threads.z, min_dims.threads.z);
99 
100  return result;
101  }
102 
103  RAJA_INLINE
104  int blocks_are_active() const
105  {
106  return active.blocks.x || active.blocks.y || active.blocks.z;
107  }
108 
109  RAJA_INLINE
110  int threads_are_active() const
111  {
112  return active.threads.x || active.threads.y || active.threads.z;
113  }
114 
115  RAJA_INLINE
116  int num_blocks() const
117  {
118  if (blocks_are_active())
119  {
120  return (active.blocks.x ? dims.blocks.x : 1) *
121  (active.blocks.y ? dims.blocks.y : 1) *
122  (active.blocks.z ? dims.blocks.z : 1);
123  }
124  else
125  {
126  return 0;
127  }
128  }
129 
130  RAJA_INLINE
131  int num_threads() const
132  {
133  if (threads_are_active())
134  {
135  return (active.threads.x ? dims.threads.x : 1) *
136  (active.threads.y ? dims.threads.y : 1) *
137  (active.threads.z ? dims.threads.z : 1);
138  }
139  else
140  {
141  return 0;
142  }
143  }
144 
145  RAJA_INLINE
146  void clamp_to_min_blocks()
147  {
148  dims.blocks.x = std::max(min_dims.blocks.x, dims.blocks.x);
149  dims.blocks.y = std::max(min_dims.blocks.y, dims.blocks.y);
150  dims.blocks.z = std::max(min_dims.blocks.z, dims.blocks.z);
151  };
152 
153  RAJA_INLINE
154  void clamp_to_min_threads()
155  {
156  dims.threads.x = std::max(min_dims.threads.x, dims.threads.x);
157  dims.threads.y = std::max(min_dims.threads.y, dims.threads.y);
158  dims.threads.z = std::max(min_dims.threads.z, dims.threads.z);
159  };
160 };
161 
162 RAJA_INLINE
163 LaunchDims combine(LaunchDims const& lhs, LaunchDims const& rhs)
164 {
165  return lhs.max(rhs);
166 }
167 
168 template<camp::idx_t cur_stmt, camp::idx_t num_stmts, typename StmtList>
169 struct HipStatementListExecutorHelper
170 {
171 
172  using next_helper_t =
173  HipStatementListExecutorHelper<cur_stmt + 1, num_stmts, StmtList>;
174 
175  using cur_stmt_t = camp::at_v<StmtList, cur_stmt>;
176 
177  template<typename Data>
178  inline static RAJA_DEVICE void exec(Data& data, bool thread_active)
179  {
180  // Execute stmt
181  cur_stmt_t::exec(data, thread_active);
182 
183  // Execute next stmt
184  next_helper_t::exec(data, thread_active);
185  }
186 
187  template<typename Data>
188  inline static LaunchDims calculateDimensions(Data& data)
189  {
190  LaunchDims statement_dims = cur_stmt_t::calculateDimensions(data);
191 
192  LaunchDims next_dims = next_helper_t::calculateDimensions(data);
193 
194  return combine(statement_dims, next_dims);
195  }
196 };
197 
198 template<camp::idx_t num_stmts, typename StmtList>
199 struct HipStatementListExecutorHelper<num_stmts, num_stmts, StmtList>
200 {
201 
202  template<typename Data>
203  inline static RAJA_DEVICE void exec(Data&, bool)
204  {
205  // nop terminator
206  }
207 
208  template<typename Data>
209  inline static LaunchDims calculateDimensions(Data&)
210  {
211  return LaunchDims();
212  }
213 };
214 
215 
216 template<typename Data, typename Policy, typename Types>
217 struct HipStatementExecutor;
218 
219 template<typename Data, typename StmtList, typename Types>
220 struct HipStatementListExecutor;
221 
222 template<typename Data, typename... Stmts, typename Types>
223 struct HipStatementListExecutor<Data, StatementList<Stmts...>, Types>
224 {
225 
226  using enclosed_stmts_t =
227  camp::list<HipStatementExecutor<Data, Stmts, Types>...>;
228 
229  static constexpr size_t num_stmts = sizeof...(Stmts);
230 
231  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
232  {
233  // Execute statements in order with helper class
234  HipStatementListExecutorHelper<0, num_stmts, enclosed_stmts_t>::exec(
235  data, thread_active);
236  }
237 
238  static inline LaunchDims calculateDimensions(Data const& data)
239  {
240  // Compute this statements launch dimensions
241  return HipStatementListExecutorHelper<
242  0, num_stmts, enclosed_stmts_t>::calculateDimensions(data);
243  }
244 };
245 
246 template<typename StmtList, typename Data, typename Types>
247 using hip_statement_list_executor_t =
248  HipStatementListExecutor<Data, StmtList, Types>;
249 
250 
251 template<typename kernel_indexer>
252 struct KernelDimensionCalculator;
253 
254 // specialization for direct unchecked sequential policies
255 template<named_dim dim, kernel_sync_requirement sync>
256 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
257  iteration_mapping::DirectUnchecked,
258  sync,
259  hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>>>
260 {
261  using IndexMapper =
262  hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>;
263 
264  template<typename IdxT>
265  static LaunchDims get_dimensions(IdxT len)
266  {
267  if (len != static_cast<IdxT>(1))
268  {
269  RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked "
270  "mapped index space");
271  }
272 
273  return LaunchDims {};
274  }
275 };
276 
277 // specialization for direct unchecked thread policies
278 template<named_dim dim, kernel_sync_requirement sync>
279 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
280  iteration_mapping::DirectUnchecked,
281  sync,
282  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>>>
283 {
284  using IndexMapper =
285  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>;
286 
287  template<typename IdxT>
288  static LaunchDims get_dimensions(IdxT len)
289  {
290  LaunchDims dims;
291 
292  // BEWARE: if calculated block_size is too high then the kernel launch will
293  // fail
294  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
295  set_hip_dim<dim>(dims.dims.threads, static_cast<hip_dim_member_t>(len));
296  set_hip_dim<dim>(dims.min_dims.threads, static_cast<hip_dim_member_t>(len));
297 
298  return dims;
299  }
300 };
301 
303 template<named_dim dim, int BLOCK_SIZE, kernel_sync_requirement sync>
304 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
305  iteration_mapping::DirectUnchecked,
306  sync,
307  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>>>
308 {
309  static_assert(BLOCK_SIZE > 0,
310  "block size must be > 0, named_usage::unspecified, or "
311  "named_usage::ignored with kernel");
312 
313  using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>;
314 
315  template<typename IdxT>
316  static LaunchDims get_dimensions(IdxT len)
317  {
318  if (len != static_cast<IdxT>(IndexMapper::block_size))
319  {
320  RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked "
321  "mapped index space");
322  }
323 
324  LaunchDims dims;
325 
326  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
327  set_hip_dim<dim>(dims.dims.threads,
328  static_cast<hip_dim_member_t>(IndexMapper::block_size));
329  set_hip_dim<dim>(dims.min_dims.threads,
330  static_cast<hip_dim_member_t>(IndexMapper::block_size));
331 
332  return dims;
333  }
334 };
335 
336 // specialization for direct unchecked block policies
337 template<named_dim dim, kernel_sync_requirement sync>
338 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
339  iteration_mapping::DirectUnchecked,
340  sync,
341  hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>>>
342 {
343  using IndexMapper =
344  hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>;
345 
346  template<typename IdxT>
347  static LaunchDims get_dimensions(IdxT len)
348  {
349  LaunchDims dims;
350 
351  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
352  set_hip_dim<dim>(dims.dims.blocks, static_cast<hip_dim_member_t>(len));
353  set_hip_dim<dim>(dims.min_dims.blocks, static_cast<hip_dim_member_t>(len));
354 
355  return dims;
356  }
357 };
358 
360 template<named_dim dim, int GRID_SIZE, kernel_sync_requirement sync>
361 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
362  iteration_mapping::DirectUnchecked,
363  sync,
364  hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>>>
365 {
366  static_assert(GRID_SIZE > 0,
367  "grid size must be > 0, named_usage::unspecified, or "
368  "named_usage::ignored with kernel");
369 
370  using IndexMapper = hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>;
371 
372  template<typename IdxT>
373  static LaunchDims get_dimensions(IdxT len)
374  {
375  if (len != static_cast<IdxT>(IndexMapper::grid_size))
376  {
377  RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked "
378  "mapped index space");
379  }
380 
381  LaunchDims dims;
382 
383  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
384  set_hip_dim<dim>(dims.dims.blocks,
385  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
386  set_hip_dim<dim>(dims.min_dims.blocks,
387  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
388 
389  return dims;
390  }
391 };
392 
393 // specialization for direct unchecked global policies
394 template<named_dim dim, kernel_sync_requirement sync>
395 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
396  iteration_mapping::DirectUnchecked,
397  sync,
398  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>>>
399 {
400  using IndexMapper =
401  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
402 
403  template<typename IdxT>
404  static LaunchDims get_dimensions(IdxT len)
405  {
406  if (len != static_cast<IdxT>(0))
407  {
408  RAJA_ABORT_OR_THROW("must know one of block_size or grid_size");
409  }
410 
411  return {};
412  }
413 };
414 
416 template<named_dim dim, int GRID_SIZE, kernel_sync_requirement sync>
417 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
418  iteration_mapping::DirectUnchecked,
419  sync,
420  hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>>>
421 {
422  static_assert(GRID_SIZE > 0,
423  "grid size must be > 0, named_usage::unspecified, or "
424  "named_usage::ignored with kernel");
425 
426  using IndexMapper =
427  hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
428 
429  template<typename IdxT>
430  static LaunchDims get_dimensions(IdxT len)
431  {
432  // BEWARE: if calculated block_size is too high then the kernel launch will
433  // fail
434  const IdxT block_size =
435  RAJA_DIVIDE_CEILING_INT(len, static_cast<IdxT>(IndexMapper::grid_size));
436  if (len != (block_size * static_cast<IdxT>(IndexMapper::grid_size)))
437  {
438  RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked "
439  "mapped index space");
440  }
441 
442  LaunchDims dims;
443 
444  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
445  set_hip_dim<dim>(dims.dims.threads,
446  static_cast<hip_dim_member_t>(block_size));
447  set_hip_dim<dim>(dims.min_dims.threads,
448  static_cast<hip_dim_member_t>(block_size));
449 
450  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
451  set_hip_dim<dim>(dims.dims.blocks,
452  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
453  set_hip_dim<dim>(dims.min_dims.blocks,
454  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
455 
456  return dims;
457  }
458 };
459 
461 template<named_dim dim, int BLOCK_SIZE, kernel_sync_requirement sync>
462 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
463  iteration_mapping::DirectUnchecked,
464  sync,
465  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>>>
466 {
467  static_assert(BLOCK_SIZE > 0,
468  "block size must be > 0, named_usage::unspecified, or "
469  "named_usage::ignored with kernel");
470 
471  using IndexMapper =
472  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
473 
474  template<typename IdxT>
475  static LaunchDims get_dimensions(IdxT len)
476  {
477  const IdxT grid_size = RAJA_DIVIDE_CEILING_INT(
478  len, static_cast<IdxT>(IndexMapper::block_size));
479  if (len != (static_cast<IdxT>(IndexMapper::block_size) * grid_size))
480  {
481  RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked "
482  "mapped index space");
483  }
484 
485  LaunchDims dims;
486 
487  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
488  set_hip_dim<dim>(dims.dims.threads,
489  static_cast<hip_dim_member_t>(IndexMapper::block_size));
490  set_hip_dim<dim>(dims.min_dims.threads,
491  static_cast<hip_dim_member_t>(IndexMapper::block_size));
492 
493  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
494  set_hip_dim<dim>(dims.dims.blocks,
495  static_cast<hip_dim_member_t>(grid_size));
496  set_hip_dim<dim>(dims.min_dims.blocks,
497  static_cast<hip_dim_member_t>(grid_size));
498 
499  return dims;
500  }
501 };
502 
504 template<named_dim dim,
505  int BLOCK_SIZE,
506  int GRID_SIZE,
508 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
509  iteration_mapping::DirectUnchecked,
510  sync,
511  hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>>>
512 {
513  static_assert(BLOCK_SIZE > 0,
514  "block size must be > 0, named_usage::unspecified, or "
515  "named_usage::ignored with kernel");
516  static_assert(GRID_SIZE > 0,
517  "grid size must be > 0, named_usage::unspecified, or "
518  "named_usage::ignored with kernel");
519 
520  using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
521 
522  template<typename IdxT>
523  static LaunchDims get_dimensions(IdxT len)
524  {
525  if (len != (static_cast<IdxT>(IndexMapper::block_size) *
526  static_cast<IdxT>(IndexMapper::grid_size)))
527  {
528  RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked "
529  "mapped index space");
530  }
531 
532  LaunchDims dims;
533 
534  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
535  set_hip_dim<dim>(dims.dims.threads,
536  static_cast<hip_dim_member_t>(IndexMapper::block_size));
537  set_hip_dim<dim>(dims.min_dims.threads,
538  static_cast<hip_dim_member_t>(IndexMapper::block_size));
539 
540  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
541  set_hip_dim<dim>(dims.dims.blocks,
542  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
543  set_hip_dim<dim>(dims.min_dims.blocks,
544  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
545 
546  return dims;
547  }
548 };
549 
550 // specialization for direct sequential policies
551 template<named_dim dim, kernel_sync_requirement sync>
552 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
553  iteration_mapping::Direct,
554  sync,
555  hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>>>
556 {
557  using IndexMapper =
558  hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>;
559 
560  template<typename IdxT>
561  static LaunchDims get_dimensions(IdxT len)
562  {
563  if (len > static_cast<IdxT>(1))
564  {
566  "len exceeds the size of the directly mapped index space");
567  }
568 
569  return LaunchDims {};
570  }
571 };
572 
573 // specialization for direct thread policies
574 template<named_dim dim, kernel_sync_requirement sync>
575 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
576  iteration_mapping::Direct,
577  sync,
578  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>>>
579 {
580  using IndexMapper =
581  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>;
582 
583  template<typename IdxT>
584  static LaunchDims get_dimensions(IdxT len)
585  {
586  LaunchDims dims;
587 
588  // BEWARE: if calculated block_size is too high then the kernel launch will
589  // fail
590  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
591  set_hip_dim<dim>(dims.dims.threads, static_cast<hip_dim_member_t>(len));
592  set_hip_dim<dim>(dims.min_dims.threads, static_cast<hip_dim_member_t>(len));
593 
594  return dims;
595  }
596 };
597 
599 template<named_dim dim, int BLOCK_SIZE, kernel_sync_requirement sync>
600 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
601  iteration_mapping::Direct,
602  sync,
603  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>>>
604 {
605  static_assert(BLOCK_SIZE > 0,
606  "block size must be > 0, named_usage::unspecified, or "
607  "named_usage::ignored with kernel");
608 
609  using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>;
610 
611  template<typename IdxT>
612  static LaunchDims get_dimensions(IdxT len)
613  {
614  constexpr auto zero = static_cast<IdxT>(0);
615 
616  if (len > static_cast<IdxT>(IndexMapper::block_size))
617  {
619  "len exceeds the size of the directly mapped index space");
620  }
621 
622  LaunchDims dims;
623 
624  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
625  set_hip_dim<dim>(dims.dims.threads,
626  static_cast<hip_dim_member_t>(
627  (len > zero) ? IndexMapper::block_size : 0));
628  set_hip_dim<dim>(dims.min_dims.threads,
629  static_cast<hip_dim_member_t>(IndexMapper::block_size));
630 
631  return dims;
632  }
633 };
634 
635 // specialization for direct block policies
636 template<named_dim dim, kernel_sync_requirement sync>
637 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
638  iteration_mapping::Direct,
639  sync,
640  hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>>>
641 {
642  using IndexMapper =
643  hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>;
644 
645  template<typename IdxT>
646  static LaunchDims get_dimensions(IdxT len)
647  {
648  LaunchDims dims;
649 
650  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
651  set_hip_dim<dim>(dims.dims.blocks, static_cast<hip_dim_member_t>(len));
652  set_hip_dim<dim>(dims.min_dims.blocks, static_cast<hip_dim_member_t>(len));
653 
654  return dims;
655  }
656 };
657 
659 template<named_dim dim, int GRID_SIZE, kernel_sync_requirement sync>
660 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
661  iteration_mapping::Direct,
662  sync,
663  hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>>>
664 {
665  static_assert(GRID_SIZE > 0,
666  "grid size must be > 0, named_usage::unspecified, or "
667  "named_usage::ignored with kernel");
668 
669  using IndexMapper = hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>;
670 
671  template<typename IdxT>
672  static LaunchDims get_dimensions(IdxT len)
673  {
674  constexpr auto zero = static_cast<IdxT>(0);
675 
676  if (len > static_cast<IdxT>(IndexMapper::grid_size))
677  {
679  "len exceeds the size of the directly mapped index space");
680  }
681 
682  LaunchDims dims;
683 
684  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
685  set_hip_dim<dim>(dims.dims.blocks,
686  static_cast<hip_dim_member_t>(
687  (len > zero) ? IndexMapper::grid_size : 0));
688  set_hip_dim<dim>(dims.min_dims.blocks,
689  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
690 
691  return dims;
692  }
693 };
694 
695 // specialization for direct global policies
696 template<named_dim dim, kernel_sync_requirement sync>
697 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
698  iteration_mapping::Direct,
699  sync,
700  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>>>
701 {
702  using IndexMapper =
703  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
704 
705  template<typename IdxT>
706  static LaunchDims get_dimensions(IdxT len)
707  {
708  if (len > static_cast<IdxT>(0))
709  {
710  RAJA_ABORT_OR_THROW("must know one of block_size or grid_size");
711  }
712 
713  return LaunchDims {};
714  }
715 };
716 
718 template<named_dim dim, int GRID_SIZE, kernel_sync_requirement sync>
719 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
720  iteration_mapping::Direct,
721  sync,
722  hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>>>
723 {
724  static_assert(GRID_SIZE > 0,
725  "grid size must be > 0, named_usage::unspecified, or "
726  "named_usage::ignored with kernel");
727 
728  using IndexMapper =
729  hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
730 
731  template<typename IdxT>
732  static LaunchDims get_dimensions(IdxT len)
733  {
734  constexpr auto zero = static_cast<IdxT>(0);
735 
736  // BEWARE: if calculated block_size is too high then the kernel launch will
737  // fail
738  const IdxT block_size =
739  RAJA_DIVIDE_CEILING_INT(len, static_cast<IdxT>(IndexMapper::grid_size));
740 
741  LaunchDims dims;
742 
743  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
744  set_hip_dim<dim>(dims.dims.threads,
745  static_cast<hip_dim_member_t>(block_size));
746  set_hip_dim<dim>(dims.min_dims.threads,
747  static_cast<hip_dim_member_t>(block_size));
748 
749  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
750  set_hip_dim<dim>(dims.dims.blocks,
751  static_cast<hip_dim_member_t>(
752  (len > zero) ? IndexMapper::grid_size : 0));
753  set_hip_dim<dim>(dims.min_dims.blocks,
754  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
755 
756  return dims;
757  }
758 };
759 
761 template<named_dim dim, int BLOCK_SIZE, kernel_sync_requirement sync>
762 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
763  iteration_mapping::Direct,
764  sync,
765  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>>>
766 {
767  static_assert(BLOCK_SIZE > 0,
768  "block size must be > 0, named_usage::unspecified, or "
769  "named_usage::ignored with kernel");
770 
771  using IndexMapper =
772  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
773 
774  template<typename IdxT>
775  static LaunchDims get_dimensions(IdxT len)
776  {
777  constexpr auto zero = static_cast<IdxT>(0);
778 
779  const IdxT grid_size = RAJA_DIVIDE_CEILING_INT(
780  len, static_cast<IdxT>(IndexMapper::block_size));
781 
782  LaunchDims dims;
783 
784  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
785  set_hip_dim<dim>(dims.dims.threads,
786  static_cast<hip_dim_member_t>(
787  (len > zero) ? IndexMapper::block_size : 0));
788  set_hip_dim<dim>(dims.min_dims.threads,
789  static_cast<hip_dim_member_t>(IndexMapper::block_size));
790 
791  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
792  set_hip_dim<dim>(dims.dims.blocks,
793  static_cast<hip_dim_member_t>(grid_size));
794  set_hip_dim<dim>(dims.min_dims.blocks,
795  static_cast<hip_dim_member_t>(grid_size));
796 
797  return dims;
798  }
799 };
800 
802 template<named_dim dim,
803  int BLOCK_SIZE,
804  int GRID_SIZE,
806 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
807  iteration_mapping::Direct,
808  sync,
809  hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>>>
810 {
811  static_assert(BLOCK_SIZE > 0,
812  "block size must be > 0, named_usage::unspecified, or "
813  "named_usage::ignored with kernel");
814  static_assert(GRID_SIZE > 0,
815  "grid size must be > 0, named_usage::unspecified, or "
816  "named_usage::ignored with kernel");
817 
818  using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
819 
820  template<typename IdxT>
821  static LaunchDims get_dimensions(IdxT len)
822  {
823  constexpr auto zero = static_cast<IdxT>(0);
824 
825  if (len > (static_cast<IdxT>(IndexMapper::block_size) *
826  static_cast<IdxT>(IndexMapper::grid_size)))
827  {
829  "len exceeds the size of the directly mapped index space");
830  }
831 
832  LaunchDims dims;
833 
834  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
835  set_hip_dim<dim>(dims.dims.threads,
836  static_cast<hip_dim_member_t>(
837  (len > zero) ? IndexMapper::block_size : 0));
838  set_hip_dim<dim>(dims.min_dims.threads,
839  static_cast<hip_dim_member_t>(IndexMapper::block_size));
840 
841  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
842  set_hip_dim<dim>(dims.dims.blocks,
843  static_cast<hip_dim_member_t>(
844  (len > zero) ? IndexMapper::grid_size : 0));
845  set_hip_dim<dim>(dims.min_dims.blocks,
846  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
847 
848  return dims;
849  }
850 };
851 
852 // specialization for strided loop sequential policies
853 template<named_dim dim, kernel_sync_requirement sync>
854 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
855  iteration_mapping::StridedLoop<named_usage::unspecified>,
856  sync,
857  hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>>>
858 {
859  using IndexMapper =
860  hip::IndexGlobal<dim, named_usage::ignored, named_usage::ignored>;
861 
862  template<typename IdxT>
863  static LaunchDims get_dimensions(IdxT RAJA_UNUSED_ARG(len))
864  {
865  return LaunchDims {};
866  }
867 };
868 
869 // specialization for strided loop thread policies
870 template<named_dim dim, kernel_sync_requirement sync>
871 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
872  iteration_mapping::StridedLoop<named_usage::unspecified>,
873  sync,
874  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>>>
875 {
876  using IndexMapper =
877  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::ignored>;
878 
879  template<typename IdxT>
880  static LaunchDims get_dimensions(IdxT len)
881  {
882  LaunchDims dims;
883 
884  // BEWARE: if calculated block_size is too high then the kernel launch will
885  // fail
886  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
887  set_hip_dim<dim>(dims.dims.threads, static_cast<hip_dim_member_t>(len));
888  set_hip_dim<dim>(dims.min_dims.threads, static_cast<hip_dim_member_t>(1));
889 
890  return dims;
891  }
892 };
893 
895 template<named_dim dim, int BLOCK_SIZE, kernel_sync_requirement sync>
896 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
897  iteration_mapping::StridedLoop<named_usage::unspecified>,
898  sync,
899  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>>>
900 {
901  static_assert(BLOCK_SIZE > 0,
902  "block size must be > 0, named_usage::unspecified, or "
903  "named_usage::ignored with kernel");
904 
905  using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::ignored>;
906 
907  template<typename IdxT>
908  static LaunchDims get_dimensions(IdxT len)
909  {
910  constexpr auto zero = static_cast<IdxT>(0);
911 
912  LaunchDims dims;
913 
914  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
915  set_hip_dim<dim>(dims.dims.threads,
916  static_cast<hip_dim_member_t>(
917  (len > zero) ? IndexMapper::block_size : 0));
918  set_hip_dim<dim>(dims.min_dims.threads,
919  static_cast<hip_dim_member_t>(IndexMapper::block_size));
920 
921  return dims;
922  }
923 };
924 
925 // specialization for strided loop block policies
926 template<named_dim dim, kernel_sync_requirement sync>
927 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
928  iteration_mapping::StridedLoop<named_usage::unspecified>,
929  sync,
930  hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>>>
931 {
932  using IndexMapper =
933  hip::IndexGlobal<dim, named_usage::ignored, named_usage::unspecified>;
934 
935  template<typename IdxT>
936  static LaunchDims get_dimensions(IdxT len)
937  {
938  LaunchDims dims;
939 
940  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
941  set_hip_dim<dim>(dims.dims.blocks, static_cast<hip_dim_member_t>(len));
942  set_hip_dim<dim>(dims.min_dims.blocks, static_cast<hip_dim_member_t>(1));
943 
944  return dims;
945  }
946 };
947 
949 template<named_dim dim, int GRID_SIZE, kernel_sync_requirement sync>
950 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
951  iteration_mapping::StridedLoop<named_usage::unspecified>,
952  sync,
953  hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>>>
954 {
955  static_assert(GRID_SIZE > 0,
956  "grid size must be > 0, named_usage::unspecified, or "
957  "named_usage::ignored with kernel");
958 
959  using IndexMapper = hip::IndexGlobal<dim, named_usage::ignored, GRID_SIZE>;
960 
961  template<typename IdxT>
962  static LaunchDims get_dimensions(IdxT len)
963  {
964  constexpr auto zero = static_cast<IdxT>(0);
965 
966  LaunchDims dims;
967 
968  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
969  set_hip_dim<dim>(dims.dims.blocks,
970  static_cast<hip_dim_member_t>(
971  (len > zero) ? IndexMapper::grid_size : 0));
972  set_hip_dim<dim>(dims.min_dims.blocks,
973  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
974 
975  return dims;
976  }
977 };
978 
979 // specialization for strided loop global policies
980 template<named_dim dim, kernel_sync_requirement sync>
981 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
982  iteration_mapping::StridedLoop<named_usage::unspecified>,
983  sync,
984  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>>>
985 {
986  using IndexMapper =
987  hip::IndexGlobal<dim, named_usage::unspecified, named_usage::unspecified>;
988 
989  template<typename IdxT>
990  static LaunchDims get_dimensions(IdxT len)
991  {
992  constexpr auto zero = static_cast<IdxT>(0);
993 
994  LaunchDims dims;
995 
996  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
997  set_hip_dim<dim>(dims.dims.threads,
998  static_cast<hip_dim_member_t>((len > zero) ? 1 : 0));
999  set_hip_dim<dim>(dims.min_dims.threads, static_cast<hip_dim_member_t>(1));
1000 
1001  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
1002  set_hip_dim<dim>(dims.dims.blocks,
1003  static_cast<hip_dim_member_t>((len > zero) ? 1 : 0));
1004  set_hip_dim<dim>(dims.min_dims.blocks, static_cast<hip_dim_member_t>(1));
1005 
1006  return dims;
1007  }
1008 };
1009 
1011 template<named_dim dim, int GRID_SIZE, kernel_sync_requirement sync>
1012 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
1013  iteration_mapping::StridedLoop<named_usage::unspecified>,
1014  sync,
1015  hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>>>
1016 {
1017  static_assert(GRID_SIZE > 0,
1018  "grid size must be > 0, named_usage::unspecified, or "
1019  "named_usage::ignored with kernel");
1020 
1021  using IndexMapper =
1022  hip::IndexGlobal<dim, named_usage::unspecified, GRID_SIZE>;
1023 
1024  template<typename IdxT>
1025  static LaunchDims get_dimensions(IdxT len)
1026  {
1027  constexpr auto zero = static_cast<IdxT>(0);
1028 
1029  // BEWARE: if calculated block_size is too high then the kernel launch will
1030  // fail
1031  const IdxT block_size =
1032  RAJA_DIVIDE_CEILING_INT(len, static_cast<IdxT>(IndexMapper::grid_size));
1033 
1034  LaunchDims dims;
1035 
1036  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
1037  set_hip_dim<dim>(dims.dims.threads,
1038  static_cast<hip_dim_member_t>(block_size));
1039  set_hip_dim<dim>(dims.min_dims.threads, static_cast<hip_dim_member_t>(1));
1040 
1041  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
1042  set_hip_dim<dim>(dims.dims.blocks,
1043  static_cast<hip_dim_member_t>(
1044  (len > zero) ? IndexMapper::grid_size : 0));
1045  set_hip_dim<dim>(dims.min_dims.blocks,
1046  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
1047 
1048  return dims;
1049  }
1050 };
1051 
1053 template<named_dim dim, int BLOCK_SIZE, kernel_sync_requirement sync>
1054 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
1055  iteration_mapping::StridedLoop<named_usage::unspecified>,
1056  sync,
1057  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>>>
1058 {
1059  static_assert(BLOCK_SIZE > 0,
1060  "block size must be > 0, named_usage::unspecified, or "
1061  "named_usage::ignored with kernel");
1062 
1063  using IndexMapper =
1064  hip::IndexGlobal<dim, BLOCK_SIZE, named_usage::unspecified>;
1065 
1066  template<typename IdxT>
1067  static LaunchDims get_dimensions(IdxT len)
1068  {
1069  constexpr auto zero = static_cast<IdxT>(0);
1070 
1071  const IdxT grid_size = RAJA_DIVIDE_CEILING_INT(
1072  len, static_cast<IdxT>(IndexMapper::block_size));
1073 
1074  LaunchDims dims;
1075 
1076  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
1077  set_hip_dim<dim>(dims.dims.threads,
1078  static_cast<hip_dim_member_t>(
1079  (len > zero) ? IndexMapper::block_size : 0));
1080  set_hip_dim<dim>(dims.min_dims.threads,
1081  static_cast<hip_dim_member_t>(IndexMapper::block_size));
1082 
1083  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
1084  set_hip_dim<dim>(dims.dims.blocks,
1085  static_cast<hip_dim_member_t>(grid_size));
1086  set_hip_dim<dim>(dims.min_dims.blocks, static_cast<hip_dim_member_t>(1));
1087 
1088  return dims;
1089  }
1090 };
1091 
1093 template<named_dim dim,
1094  int BLOCK_SIZE,
1095  int GRID_SIZE,
1097 struct KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
1098  iteration_mapping::StridedLoop<named_usage::unspecified>,
1099  sync,
1100  hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>>>
1101 {
1102  static_assert(BLOCK_SIZE > 0,
1103  "block size must be > 0, named_usage::unspecified, or "
1104  "named_usage::ignored with kernel");
1105  static_assert(GRID_SIZE > 0,
1106  "grid size must be > 0, named_usage::unspecified, or "
1107  "named_usage::ignored with kernel");
1108 
1109  using IndexMapper = hip::IndexGlobal<dim, BLOCK_SIZE, GRID_SIZE>;
1110 
1111  template<typename IdxT>
1112  static LaunchDims get_dimensions(IdxT len)
1113  {
1114  constexpr auto zero = static_cast<IdxT>(0);
1115 
1116  LaunchDims dims;
1117 
1118  set_hip_dim<dim>(dims.active.threads, static_cast<hip_dim_member_t>(true));
1119  set_hip_dim<dim>(dims.dims.threads,
1120  static_cast<hip_dim_member_t>(
1121  (len > zero) ? IndexMapper::block_size : 0));
1122  set_hip_dim<dim>(dims.min_dims.threads,
1123  static_cast<hip_dim_member_t>(IndexMapper::block_size));
1124 
1125  set_hip_dim<dim>(dims.active.blocks, static_cast<hip_dim_member_t>(true));
1126  set_hip_dim<dim>(dims.dims.blocks,
1127  static_cast<hip_dim_member_t>(
1128  (len > zero) ? IndexMapper::grid_size : 0));
1129  set_hip_dim<dim>(dims.min_dims.blocks,
1130  static_cast<hip_dim_member_t>(IndexMapper::grid_size));
1131 
1132  return dims;
1133  }
1134 };
1135 
1136 } // namespace internal
1137 
1138 } // namespace RAJA
1139 
1140 #endif // closing endif for RAJA_ENABLE_HIP guard
1141 
1142 #endif // closing endif for header file include guard
Header file defining prototypes for routines used to manage memory for HIP reductions and other opera...
Header file containing RAJA HIP policy definitions.
Header file for common RAJA internal macro definitions.
RAJA_HOST_DEVICE void RAJA_ABORT_OR_THROW(const char *str)
Definition: macros.hpp:143
#define RAJA_UNUSED_ARG(x)
Definition: macros.hpp:97
#define RAJA_DIVIDE_CEILING_INT(dividend, divisor)
Definition: macros.hpp:122
#define RAJA_DEVICE
Definition: macros.hpp:66
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
Definition: AlignedRangeIndexSetBuilders.cpp:35
named_dim
Definition: types.hpp:53
kernel_sync_requirement
Definition: types.hpp:63
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result max(Args... args)
Definition: foldl.hpp:155
RAJA header file containing user interface for RAJA::kernel.
Header file for RAJA type definitions.