RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
For.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 
21 #ifndef RAJA_policy_hip_kernel_For_HPP
22 #define RAJA_policy_hip_kernel_For_HPP
23 
24 #include "RAJA/config.hpp"
25 
27 
28 namespace RAJA
29 {
30 
31 namespace internal
32 {
33 
34 /*
35  * Executor for work sharing inside HipKernel.
36  * Mapping without checking from IndexMapper to indices
37  * Assigns the loop index to offset ArgumentId
38  * Meets all sync requirements
39  */
40 template<typename Data,
41  camp::idx_t ArgumentId,
42  typename IndexMapper,
44  typename... EnclosedStmts,
45  typename Types>
46 struct HipStatementExecutor<
47  Data,
48  statement::For<
49  ArgumentId,
50  RAJA::policy::hip::
51  hip_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>,
52  EnclosedStmts...>,
53  Types>
54 {
55 
56  using stmt_list_t = StatementList<EnclosedStmts...>;
57 
58  // Set the argument type for this loop
60 
62  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
63 
65 
66  using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<
67  RAJA::policy::hip::
68  hip_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>>;
69 
70  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
71  {
72  const diff_t i = IndexMapper::template index<diff_t>();
73 
74  // Assign the index to the argument
75  data.template assign_offset<ArgumentId>(i);
76 
77  // execute enclosed statements
78  enclosed_stmts_t::exec(data, thread_active);
79  }
80 
81  static inline LaunchDims calculateDimensions(Data const& data)
82  {
83  const diff_t len = segment_length<ArgumentId>(data);
84 
85  LaunchDims dims = DimensionCalculator::get_dimensions(len);
86 
87  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
88 
89  return combine(dims, enclosed_dims);
90  }
91 };
92 
93 /*
94  * Executor for work sharing inside HipKernel.
95  * Mapping directly from IndexMapper to indices
96  * Assigns the loop index to offset ArgumentId
97  * Meets all sync requirements
98  */
99 template<typename Data,
100  camp::idx_t ArgumentId,
101  typename IndexMapper,
103  typename... EnclosedStmts,
104  typename Types>
105 struct HipStatementExecutor<
106  Data,
107  statement::For<ArgumentId,
108  RAJA::policy::hip::hip_indexer<iteration_mapping::Direct,
109  sync,
110  IndexMapper>,
111  EnclosedStmts...>,
112  Types>
113 {
114 
115  using stmt_list_t = StatementList<EnclosedStmts...>;
116 
117  // Set the argument type for this loop
119 
121  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
122 
124 
125  using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<
126  RAJA::policy::hip::
127  hip_indexer<iteration_mapping::Direct, sync, IndexMapper>>;
128 
129  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
130  {
131  const diff_t len = segment_length<ArgumentId>(data);
132  const diff_t i = IndexMapper::template index<diff_t>();
133 
134  // execute enclosed statements if any thread will
135  // but mask off threads without work
136  const bool have_work = (i < len);
137 
138  // Assign the index to the argument
139  data.template assign_offset<ArgumentId>(i);
140 
141  // execute enclosed statements
142  enclosed_stmts_t::exec(data, thread_active && have_work);
143  }
144 
145  static inline LaunchDims calculateDimensions(Data const& data)
146  {
147  const diff_t len = segment_length<ArgumentId>(data);
148 
149  LaunchDims dims = DimensionCalculator::get_dimensions(len);
150 
151  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
152 
153  return combine(dims, enclosed_dims);
154  }
155 };
156 
157 /*
158  * Executor for work sharing inside HipKernel.
159  * Provides a strided loop for IndexMapper.
160  * Assigns the loop index to offset ArgumentId.
161  * Meets all sync requirements
162  */
163 template<typename Data,
164  camp::idx_t ArgumentId,
165  typename IndexMapper,
166  typename... EnclosedStmts,
167  typename Types>
168 struct HipStatementExecutor<
169  Data,
170  statement::For<ArgumentId,
171  RAJA::policy::hip::hip_indexer<
172  iteration_mapping::StridedLoop<named_usage::unspecified>,
173  kernel_sync_requirement::sync,
174  IndexMapper>,
175  EnclosedStmts...>,
176  Types>
177 {
178 
179  using stmt_list_t = StatementList<EnclosedStmts...>;
180 
181  // Set the argument type for this loop
183 
185  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
186 
188 
190  RAJA::internal::KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
193  IndexMapper>>;
194 
195  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
196  {
197  // grid stride loop
198  const diff_t len = segment_length<ArgumentId>(data);
199  const diff_t i_init = IndexMapper::template index<diff_t>();
200  const diff_t i_stride = IndexMapper::template size<diff_t>();
201 
202  // Iterate through in chunks
203  // threads will have the same numbers of iterations
204  for (diff_t ii = 0; ii < len; ii += i_stride)
205  {
206  const diff_t i = ii + i_init;
207 
208  // execute enclosed statements if any thread will
209  // but mask off threads without work
210  const bool have_work = (i < len);
211 
212  // Assign the index to the argument
213  data.template assign_offset<ArgumentId>(i);
214 
215  // execute enclosed statements
216  enclosed_stmts_t::exec(data, thread_active && have_work);
217  }
218  }
219 
220  static inline LaunchDims calculateDimensions(Data const& data)
221  {
222  diff_t len = segment_length<ArgumentId>(data);
223 
224  LaunchDims dims = DimensionCalculator::get_dimensions(len);
225 
226  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
227 
228  return combine(dims, enclosed_dims);
229  }
230 };
231 
232 /*
233  * Executor for work sharing inside HipKernel.
234  * Provides a strided loop for IndexMapper.
235  * Assigns the loop index to offset ArgumentId.
236  * Meets no sync requirements
237  */
238 template<typename Data,
239  camp::idx_t ArgumentId,
240  typename IndexMapper,
241  typename... EnclosedStmts,
242  typename Types>
243 struct HipStatementExecutor<
244  Data,
245  statement::For<ArgumentId,
246  RAJA::policy::hip::hip_indexer<
247  iteration_mapping::StridedLoop<named_usage::unspecified>,
248  kernel_sync_requirement::none,
249  IndexMapper>,
250  EnclosedStmts...>,
251  Types>
252 {
253 
254  using stmt_list_t = StatementList<EnclosedStmts...>;
255 
256  // Set the argument type for this loop
258 
260  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
261 
263 
265  RAJA::internal::KernelDimensionCalculator<RAJA::policy::hip::hip_indexer<
268  IndexMapper>>;
269 
270  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
271  {
272  // grid stride loop
273  const diff_t len = segment_length<ArgumentId>(data);
274  const diff_t i_init = IndexMapper::template index<diff_t>();
275  const diff_t i_stride = IndexMapper::template size<diff_t>();
276 
277  // Iterate through one at a time
278  // threads will have different numbers of iterations
279  for (diff_t i = i_init; i < len; i += i_stride)
280  {
281 
282  // Assign the index to the argument
283  data.template assign_offset<ArgumentId>(i);
284 
285  // execute enclosed statements
286  enclosed_stmts_t::exec(data, thread_active);
287  }
288  }
289 
290  static inline LaunchDims calculateDimensions(Data const& data)
291  {
292  const diff_t len = segment_length<ArgumentId>(data);
293 
294  LaunchDims dims = DimensionCalculator::get_dimensions(len);
295 
296  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
297 
298  return combine(dims, enclosed_dims);
299  }
300 };
301 
302 /*
303  * Executor for sequential loops inside of a HipKernel.
304  */
305 template<typename Data,
306  camp::idx_t ArgumentId,
307  typename... EnclosedStmts,
308  typename Types>
309 struct HipStatementExecutor<
310  Data,
311  statement::For<ArgumentId, seq_exec, EnclosedStmts...>,
312  Types>
313  : HipStatementExecutor<
314  Data,
315  statement::For<
316  ArgumentId,
317  RAJA::policy::hip::hip_indexer<
318  iteration_mapping::StridedLoop<named_usage::unspecified>,
319  kernel_sync_requirement::none,
320  hip::IndexGlobal<named_dim::x,
321  named_usage::ignored,
322  named_usage::ignored>>,
323  EnclosedStmts...>,
324  Types>
325 {};
326 
327 /*
328  * Executor for thread work sharing loop inside HipKernel.
329  * Mapping directly from a warp lane
330  * Assigns the loop index to offset ArgumentId
331  */
332 template<typename Data,
333  camp::idx_t ArgumentId,
334  typename Mask,
335  typename... EnclosedStmts,
336  typename Types>
337 struct HipStatementExecutor<Data,
338  statement::For<ArgumentId,
339  RAJA::hip_warp_masked_direct<Mask>,
340  EnclosedStmts...>,
341  Types>
342 {
343 
344  using stmt_list_t = StatementList<EnclosedStmts...>;
345 
346  // Set the argument type for this loop
348 
350  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
351 
352  using mask_t = Mask;
353 
355 
357  RAJA::internal::KernelDimensionCalculator<hip_warp_direct>;
358 
359  static_assert(mask_t::max_masked_size <=
360  RAJA::policy::hip::device_constants.WARP_SIZE,
361  "BitMask is too large for HIP warp size");
362 
363  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
364  {
365  const diff_t len = segment_length<ArgumentId>(data);
366 
367  const diff_t i = mask_t::maskValue((diff_t)threadIdx.x);
368 
369  // assign thread id directly to offset
370  data.template assign_offset<ArgumentId>(i);
371 
372  // execute enclosed statements if in bounds
373  enclosed_stmts_t::exec(data, thread_active && (i < len));
374  }
375 
376  static inline LaunchDims calculateDimensions(Data const& data)
377  {
378  diff_t len = segment_length<ArgumentId>(data);
379 
380  LaunchDims dims = DimensionCalculator::get_dimensions(len);
381 
382  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
383 
384  return combine(dims, enclosed_dims);
385  }
386 };
387 
388 /*
389  * Executor for thread work sharing loop inside HipKernel.
390  * Mapping directly from a warp lane
391  * Assigns the loop index to offset ArgumentId
392  */
393 template<typename Data,
394  camp::idx_t ArgumentId,
395  typename Mask,
396  typename... EnclosedStmts,
397  typename Types>
398 struct HipStatementExecutor<Data,
399  statement::For<ArgumentId,
400  RAJA::hip_warp_masked_loop<Mask>,
401  EnclosedStmts...>,
402  Types>
403 {
404 
405  using stmt_list_t = StatementList<EnclosedStmts...>;
406 
407  // Set the argument type for this loop
409 
411  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
412 
413  using mask_t = Mask;
414 
416 
418  RAJA::internal::KernelDimensionCalculator<hip_warp_loop>;
419 
420  static_assert(mask_t::max_masked_size <=
421  RAJA::policy::hip::device_constants.WARP_SIZE,
422  "BitMask is too large for HIP warp size");
423 
424  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
425  {
426  // masked size strided loop
427  const diff_t len = segment_length<ArgumentId>(data);
428  const diff_t i_init = mask_t::maskValue((diff_t)threadIdx.x);
429  const diff_t i_stride = (diff_t)mask_t::max_masked_size;
430 
431  // Iterate through grid stride of chunks
432  for (diff_t ii = 0; ii < len; ii += i_stride)
433  {
434  const diff_t i = ii + i_init;
435 
436  // execute enclosed statements if any thread will
437  // but mask off threads without work
438  bool have_work = i < len;
439 
440  // Assign the x thread to the argument
441  data.template assign_offset<ArgumentId>(i);
442 
443  // execute enclosed statements
444  enclosed_stmts_t::exec(data, thread_active && have_work);
445  }
446  }
447 
448  static inline LaunchDims calculateDimensions(Data const& data)
449  {
450  diff_t len = segment_length<ArgumentId>(data);
451 
452  LaunchDims dims = DimensionCalculator::get_dimensions(len);
453 
454  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
455 
456  return combine(dims, enclosed_dims);
457  }
458 };
459 
460 /*
461  * Executor for thread work sharing loop inside HipKernel.
462  * Mapping directly from raw threadIdx.x
463  * Assigns the loop index to offset ArgumentId
464  */
465 template<typename Data,
466  camp::idx_t ArgumentId,
467  typename Mask,
468  typename... EnclosedStmts,
469  typename Types>
470 struct HipStatementExecutor<Data,
471  statement::For<ArgumentId,
472  RAJA::hip_thread_masked_direct<Mask>,
473  EnclosedStmts...>,
474  Types>
475 {
476 
477  using stmt_list_t = StatementList<EnclosedStmts...>;
478 
479  // Set the argument type for this loop
481 
483  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
484 
485  using mask_t = Mask;
486 
488 
489  using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<
490  hip_thread_size_x_direct<mask_t::max_input_size>>;
491 
492  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
493  {
494  const diff_t len = segment_length<ArgumentId>(data);
495 
496  const diff_t i = mask_t::maskValue((diff_t)threadIdx.x);
497 
498  // assign thread id directly to offset
499  data.template assign_offset<ArgumentId>(i);
500 
501  // execute enclosed statements if in bounds
502  enclosed_stmts_t::exec(data, thread_active && (i < len));
503  }
504 
505  static inline LaunchDims calculateDimensions(Data const& data)
506  {
507  const diff_t len = segment_length<ArgumentId>(data);
508 
509  LaunchDims dims = DimensionCalculator::get_dimensions(len);
510 
511  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
512 
513  return combine(dims, enclosed_dims);
514  }
515 };
516 
517 /*
518  * Executor for thread work sharing loop inside HipKernel.
519  * Mapping directly from a warp lane
520  * Assigns the loop index to offset ArgumentId
521  */
522 template<typename Data,
523  camp::idx_t ArgumentId,
524  typename Mask,
525  typename... EnclosedStmts,
526  typename Types>
527 struct HipStatementExecutor<Data,
528  statement::For<ArgumentId,
529  RAJA::hip_thread_masked_loop<Mask>,
530  EnclosedStmts...>,
531  Types>
532 {
533 
534  using stmt_list_t = StatementList<EnclosedStmts...>;
535 
536  // Set the argument type for this loop
538 
540  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
541 
542  using mask_t = Mask;
543 
545 
546  using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<
547  hip_thread_size_x_loop<mask_t::max_input_size>>;
548 
549  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
550  {
551  // masked size strided loop
552  const diff_t len = segment_length<ArgumentId>(data);
553  const diff_t i_init = mask_t::maskValue((diff_t)threadIdx.x);
554  const diff_t i_stride = (diff_t)mask_t::max_masked_size;
555 
556  // Iterate through grid stride of chunks
557  for (diff_t ii = 0; ii < len; ii += i_stride)
558  {
559  const diff_t i = ii + i_init;
560 
561  // execute enclosed statements if any thread will
562  // but mask off threads without work
563  bool have_work = i < len;
564 
565  // Assign the x thread to the argument
566  data.template assign_offset<ArgumentId>(i);
567 
568  // execute enclosed statements
569  enclosed_stmts_t::exec(data, thread_active && have_work);
570  }
571  }
572 
573  static inline LaunchDims calculateDimensions(Data const& data)
574  {
575  diff_t len = segment_length<ArgumentId>(data);
576 
577  LaunchDims dims = DimensionCalculator::get_dimensions(len);
578 
579  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
580 
581  return combine(dims, enclosed_dims);
582  }
583 };
584 
585 } // namespace internal
586 } // end namespace RAJA
587 
588 
589 #endif /* RAJA_policy_hip_kernel_For_HPP */
#define RAJA_DEVICE
Definition: macros.hpp:66
setSegmentType< Types, Segment, camp::at_v< typename camp::decay< Data >::index_types_t, Segment > > setSegmentTypeFromData
Definition: LoopTypes.hpp:95
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
typename std::iterator_traits< typename camp::at_v< typename Data::segment_tuple_t::TList, ArgumentId >::iterator >::difference_type segment_diff_type
Definition: LoopData.hpp:184
Definition: AlignedRangeIndexSetBuilders.cpp:35
kernel_sync_requirement
Definition: types.hpp:63
RAJA header file containing constructs used to run kernel traversals on GPU with HIP.
RAJA::internal::KernelDimensionCalculator< hip_thread_size_x_loop< mask_t::max_input_size > > DimensionCalculator
Definition: For.hpp:547
RAJA::internal::KernelDimensionCalculator< hip_thread_size_x_direct< mask_t::max_input_size > > DimensionCalculator
Definition: For.hpp:490
RAJA::internal::KernelDimensionCalculator< RAJA::policy::hip::hip_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper > > DimensionCalculator
Definition: For.hpp:268
RAJA::internal::KernelDimensionCalculator< RAJA::policy::hip::hip_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper > > DimensionCalculator
Definition: For.hpp:193
RAJA::internal::KernelDimensionCalculator< RAJA::policy::hip::hip_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper > > DimensionCalculator
Definition: For.hpp:68
RAJA::internal::KernelDimensionCalculator< RAJA::policy::hip::hip_indexer< iteration_mapping::Direct, sync, IndexMapper > > DimensionCalculator
Definition: For.hpp:127
Definition: types.hpp:209