RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
ForICount.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 
21 #ifndef RAJA_policy_hip_kernel_ForICount_HPP
22 #define RAJA_policy_hip_kernel_ForICount_HPP
23 
24 #include "RAJA/config.hpp"
25 
27 
28 namespace RAJA
29 {
30 
31 namespace internal
32 {
33 
34 /*
35  * Executor for work sharing inside HipKernel.
36  * Provides a direct unchecked mapping.
37  * Assigns the loop index to offset ArgumentId
38  * Assigns the loop index to param ParamId
39  * Meets all sync requirements
40  */
41 template<typename Data,
42  camp::idx_t ArgumentId,
43  typename ParamId,
44  typename IndexMapper,
46  typename... EnclosedStmts,
47  typename Types>
48 struct HipStatementExecutor<
49  Data,
50  statement::ForICount<
51  ArgumentId,
52  ParamId,
53  RAJA::policy::hip::
54  hip_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>,
55  EnclosedStmts...>,
56  Types>
57  : HipStatementExecutor<
58  Data,
59  statement::For<
60  ArgumentId,
61  RAJA::policy::hip::hip_indexer<iteration_mapping::DirectUnchecked,
62  sync,
63  IndexMapper>,
64  EnclosedStmts...>,
65  Types>
66 {
67 
68  using Base = HipStatementExecutor<
69  Data,
71  ArgumentId,
72  RAJA::policy::hip::hip_indexer<iteration_mapping::DirectUnchecked,
73  sync,
74  IndexMapper>,
75  EnclosedStmts...>,
76  Types>;
77 
78  using typename Base::diff_t;
79  using typename Base::enclosed_stmts_t;
80 
81  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
82  {
83  // grid stride loop
84  const diff_t i = IndexMapper::template index<diff_t>();
85 
86  // Assign the index to the argument and param
87  data.template assign_offset<ArgumentId>(i);
88  data.template assign_param<ParamId>(i);
89 
90  // execute enclosed statements
91  enclosed_stmts_t::exec(data, thread_active);
92  }
93 };
94 
95 /*
96  * Executor for work sharing inside HipKernel.
97  * Provides a direct mapping.
98  * Assigns the loop index to offset ArgumentId
99  * Assigns the loop index to param ParamId
100  * Meets all sync requirements
101  */
102 template<typename Data,
103  camp::idx_t ArgumentId,
104  typename ParamId,
105  typename IndexMapper,
107  typename... EnclosedStmts,
108  typename Types>
109 struct HipStatementExecutor<
110  Data,
111  statement::ForICount<
112  ArgumentId,
113  ParamId,
114  RAJA::policy::hip::
115  hip_indexer<iteration_mapping::Direct, sync, IndexMapper>,
116  EnclosedStmts...>,
117  Types>
118  : HipStatementExecutor<
119  Data,
120  statement::For<
121  ArgumentId,
122  RAJA::policy::hip::
123  hip_indexer<iteration_mapping::Direct, sync, IndexMapper>,
124  EnclosedStmts...>,
125  Types>
126 {
127 
128  using Base = HipStatementExecutor<
129  Data,
130  statement::For<ArgumentId,
131  RAJA::policy::hip::hip_indexer<iteration_mapping::Direct,
132  sync,
133  IndexMapper>,
134  EnclosedStmts...>,
135  Types>;
136 
137  using typename Base::diff_t;
138  using typename Base::enclosed_stmts_t;
139 
140  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
141  {
142  // grid stride loop
143  const diff_t len = segment_length<ArgumentId>(data);
144  const diff_t i = IndexMapper::template index<diff_t>();
145 
146  // execute enclosed statements if any thread will
147  // but mask off threads without work
148  const bool have_work = (i < len);
149 
150  // Assign the index to the argument and param
151  data.template assign_offset<ArgumentId>(i);
152  data.template assign_param<ParamId>(i);
153 
154  // execute enclosed statements
155  enclosed_stmts_t::exec(data, thread_active && have_work);
156  }
157 };
158 
159 /*
160  * Executor for work sharing inside HipKernel.
161  * Provides a strided loop.
162  * Assigns the loop index to offset ArgumentId
163  * Assigns the loop index to param ParamId
164  * Meets all sync requirements
165  */
166 template<typename Data,
167  camp::idx_t ArgumentId,
168  typename ParamId,
169  typename IndexMapper,
170  typename... EnclosedStmts,
171  typename Types>
172 struct HipStatementExecutor<
173  Data,
174  statement::ForICount<
175  ArgumentId,
176  ParamId,
177  RAJA::policy::hip::hip_indexer<
178  iteration_mapping::StridedLoop<named_usage::unspecified>,
179  kernel_sync_requirement::sync,
180  IndexMapper>,
181  EnclosedStmts...>,
182  Types>
183  : public HipStatementExecutor<
184  Data,
185  statement::For<
186  ArgumentId,
187  RAJA::policy::hip::hip_indexer<
188  iteration_mapping::StridedLoop<named_usage::unspecified>,
189  kernel_sync_requirement::sync,
190  IndexMapper>,
191  EnclosedStmts...>,
192  Types>
193 {
194 
195  using Base = HipStatementExecutor<
196  Data,
198  ArgumentId,
199  RAJA::policy::hip::hip_indexer<
202  IndexMapper>,
203  EnclosedStmts...>,
204  Types>;
205 
206  using typename Base::diff_t;
207  using typename Base::enclosed_stmts_t;
208 
209  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
210  {
211  // grid stride loop
212  const diff_t len = segment_length<ArgumentId>(data);
213  const diff_t i_init = IndexMapper::template index<diff_t>();
214  const diff_t i_stride = IndexMapper::template size<diff_t>();
215 
216  // Iterate through in chunks
217  // threads will have the same numbers of iterations
218  for (diff_t ii = 0; ii < len; ii += i_stride)
219  {
220  const diff_t i = ii + i_init;
221 
222  // execute enclosed statements if any thread will
223  // but mask off threads without work
224  const bool have_work = (i < len);
225 
226  // Assign the index to the argument and param
227  data.template assign_offset<ArgumentId>(i);
228  data.template assign_param<ParamId>(i);
229 
230  // execute enclosed statements
231  enclosed_stmts_t::exec(data, thread_active && have_work);
232  }
233  }
234 };
235 
236 /*
237  * Executor for work sharing inside HipKernel.
238  * Provides a strided loop.
239  * Assigns the loop index to offset ArgumentId
240  * Assigns the loop index to param ParamId
241  * Meets no sync requirements
242  */
243 template<typename Data,
244  camp::idx_t ArgumentId,
245  typename ParamId,
246  typename IndexMapper,
247  typename... EnclosedStmts,
248  typename Types>
249 struct HipStatementExecutor<
250  Data,
251  statement::ForICount<
252  ArgumentId,
253  ParamId,
254  RAJA::policy::hip::hip_indexer<
255  iteration_mapping::StridedLoop<named_usage::unspecified>,
256  kernel_sync_requirement::none,
257  IndexMapper>,
258  EnclosedStmts...>,
259  Types>
260  : public HipStatementExecutor<
261  Data,
262  statement::For<
263  ArgumentId,
264  RAJA::policy::hip::hip_indexer<
265  iteration_mapping::StridedLoop<named_usage::unspecified>,
266  kernel_sync_requirement::none,
267  IndexMapper>,
268  EnclosedStmts...>,
269  Types>
270 {
271 
272  using Base = HipStatementExecutor<
273  Data,
275  ArgumentId,
276  RAJA::policy::hip::hip_indexer<
279  IndexMapper>,
280  EnclosedStmts...>,
281  Types>;
282 
283  using typename Base::diff_t;
284  using typename Base::enclosed_stmts_t;
285 
286  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
287  {
288  // grid stride loop
289  const diff_t len = segment_length<ArgumentId>(data);
290  const diff_t i_init = IndexMapper::template index<diff_t>();
291  const diff_t i_stride = IndexMapper::template size<diff_t>();
292 
293  // Iterate through one at a time
294  // threads will have the different numbers of iterations
295  for (diff_t i = i_init; i < len; i += i_stride)
296  {
297 
298  // Assign the index to the argument and param
299  data.template assign_offset<ArgumentId>(i);
300  data.template assign_param<ParamId>(i);
301 
302  // execute enclosed statements
303  enclosed_stmts_t::exec(data, thread_active);
304  }
305  }
306 };
307 
308 /*
309  * Executor for sequential loops inside of a HipKernel.
310  *
311  * This is specialized since it need to execute the loop immediately.
312  * Assigns the loop index to offset ArgumentId
313  * Assigns the loop index to param ParamId
314  */
315 template<typename Data,
316  camp::idx_t ArgumentId,
317  typename ParamId,
318  typename... EnclosedStmts,
319  typename Types>
320 struct HipStatementExecutor<
321  Data,
322  statement::ForICount<ArgumentId, ParamId, seq_exec, EnclosedStmts...>,
323  Types>
324  : HipStatementExecutor<
325  Data,
326  statement::ForICount<
327  ArgumentId,
328  RAJA::policy::hip::hip_indexer<
329  iteration_mapping::StridedLoop<named_usage::unspecified>,
330  kernel_sync_requirement::none,
331  hip::IndexGlobal<named_dim::x,
332  named_usage::ignored,
333  named_usage::ignored>>,
334  EnclosedStmts...>,
335  Types>
336 {};
337 
338 /*
339  * Executor for thread work sharing loop inside HipKernel.
340  * Mapping directly from a warp lane
341  * Assigns the loop index to offset ArgumentId
342  */
343 template<typename Data,
344  camp::idx_t ArgumentId,
345  typename ParamId,
346  typename Mask,
347  typename... EnclosedStmts,
348  typename Types>
349 struct HipStatementExecutor<
350  Data,
351  statement::ForICount<ArgumentId,
352  ParamId,
353  RAJA::hip_warp_masked_direct<Mask>,
354  EnclosedStmts...>,
355  Types>
356  : public HipStatementExecutor<
357  Data,
358  statement::For<ArgumentId,
359  RAJA::hip_warp_masked_direct<Mask>,
360  EnclosedStmts...>,
361  Types>
362 {
363 
364  using Base =
365  HipStatementExecutor<Data,
366  statement::For<ArgumentId,
367  RAJA::hip_warp_masked_direct<Mask>,
368  EnclosedStmts...>,
369  Types>;
370 
371  using typename Base::diff_t;
372 
373  using stmt_list_t = StatementList<EnclosedStmts...>;
374 
375  // Set the argument type for this loop
377 
379  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
380 
381  using mask_t = Mask;
382 
383  static_assert(mask_t::max_masked_size <=
384  RAJA::policy::hip::device_constants.WARP_SIZE,
385  "BitMask is too large for HIP warp size");
386 
387  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
388  {
389  const diff_t len = segment_length<ArgumentId>(data);
390 
391  const diff_t i = mask_t::maskValue((diff_t)threadIdx.x);
392 
393  // assign thread id directly to offset
394  data.template assign_offset<ArgumentId>(i);
395  data.template assign_param<ParamId>(i);
396 
397  // execute enclosed statements if in bounds
398  enclosed_stmts_t::exec(data, thread_active && (i < len));
399  }
400 };
401 
402 /*
403  * Executor for thread work sharing loop inside HipKernel.
404  * Mapping directly from a warp lane
405  * Assigns the loop index to offset ArgumentId
406  */
407 template<typename Data,
408  camp::idx_t ArgumentId,
409  typename ParamId,
410  typename Mask,
411  typename... EnclosedStmts,
412  typename Types>
413 struct HipStatementExecutor<
414  Data,
415  statement::ForICount<ArgumentId,
416  ParamId,
417  RAJA::hip_warp_masked_loop<Mask>,
418  EnclosedStmts...>,
419  Types>
420  : public HipStatementExecutor<
421  Data,
422  statement::For<ArgumentId,
423  RAJA::hip_warp_masked_loop<Mask>,
424  EnclosedStmts...>,
425  Types>
426 {
427 
428  using Base =
429  HipStatementExecutor<Data,
430  statement::For<ArgumentId,
431  RAJA::hip_warp_masked_loop<Mask>,
432  EnclosedStmts...>,
433  Types>;
434 
435  using typename Base::diff_t;
436 
437  using stmt_list_t = StatementList<EnclosedStmts...>;
438 
439  // Set the argument type for this loop
441 
443  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
444 
445  using mask_t = Mask;
446 
447  static_assert(mask_t::max_masked_size <=
448  RAJA::policy::hip::device_constants.WARP_SIZE,
449  "BitMask is too large for HIP warp size");
450 
451  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
452  {
453  // masked size strided loop
454  const diff_t len = segment_length<ArgumentId>(data);
455  const diff_t i_init = mask_t::maskValue((diff_t)threadIdx.x);
456  const diff_t i_stride = (diff_t)mask_t::max_masked_size;
457 
458  // Iterate through grid stride of chunks
459  for (diff_t ii = 0; ii < len; ii += i_stride)
460  {
461  const diff_t i = ii + i_init;
462 
463  // execute enclosed statements if any thread will
464  // but mask off threads without work
465  bool have_work = i < len;
466 
467  // Assign the x thread to the argument and param
468  data.template assign_offset<ArgumentId>(i);
469  data.template assign_param<ParamId>(i);
470 
471  // execute enclosed statements
472  enclosed_stmts_t::exec(data, thread_active && have_work);
473  }
474  }
475 };
476 
477 /*
478  * Executor for thread work sharing loop inside HipKernel.
479  * Mapping directly from a warp lane
480  * Assigns the loop index to offset ArgumentId
481  */
482 template<typename Data,
483  camp::idx_t ArgumentId,
484  typename ParamId,
485  typename Mask,
486  typename... EnclosedStmts,
487  typename Types>
488 struct HipStatementExecutor<
489  Data,
490  statement::ForICount<ArgumentId,
491  ParamId,
492  RAJA::hip_thread_masked_direct<Mask>,
493  EnclosedStmts...>,
494  Types>
495  : public HipStatementExecutor<
496  Data,
497  statement::For<ArgumentId,
498  RAJA::hip_thread_masked_direct<Mask>,
499  EnclosedStmts...>,
500  Types>
501 {
502 
503  using Base =
504  HipStatementExecutor<Data,
505  statement::For<ArgumentId,
506  RAJA::hip_thread_masked_direct<Mask>,
507  EnclosedStmts...>,
508  Types>;
509 
510  using typename Base::diff_t;
511 
512  using stmt_list_t = StatementList<EnclosedStmts...>;
513 
514  // Set the argument type for this loop
516 
518  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
519 
520  using mask_t = Mask;
521 
522  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
523  {
524  const diff_t len = segment_length<ArgumentId>(data);
525 
526  const diff_t i = mask_t::maskValue((diff_t)threadIdx.x);
527 
528  // assign thread id directly to offset
529  data.template assign_offset<ArgumentId>(i);
530  data.template assign_param<ParamId>(i);
531 
532  // execute enclosed statements if in bounds
533  enclosed_stmts_t::exec(data, thread_active && (i < len));
534  }
535 };
536 
537 /*
538  * Executor for thread work sharing loop inside HipKernel.
539  * Mapping directly from a warp lane
540  * Assigns the loop index to offset ArgumentId
541  */
542 template<typename Data,
543  camp::idx_t ArgumentId,
544  typename ParamId,
545  typename Mask,
546  typename... EnclosedStmts,
547  typename Types>
548 struct HipStatementExecutor<
549  Data,
550  statement::ForICount<ArgumentId,
551  ParamId,
552  RAJA::hip_thread_masked_loop<Mask>,
553  EnclosedStmts...>,
554  Types>
555  : public HipStatementExecutor<
556  Data,
557  statement::For<ArgumentId,
558  RAJA::hip_thread_masked_loop<Mask>,
559  EnclosedStmts...>,
560  Types>
561 {
562 
563  using Base =
564  HipStatementExecutor<Data,
565  statement::For<ArgumentId,
566  RAJA::hip_thread_masked_loop<Mask>,
567  EnclosedStmts...>,
568  Types>;
569 
570  using typename Base::diff_t;
571 
572  using stmt_list_t = StatementList<EnclosedStmts...>;
573 
574  // Set the argument type for this loop
576 
578  HipStatementListExecutor<Data, stmt_list_t, NewTypes>;
579 
580  using mask_t = Mask;
581 
582  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
583  {
584  // masked size strided loop
585  const diff_t len = segment_length<ArgumentId>(data);
586  const diff_t i_init = mask_t::maskValue((diff_t)threadIdx.x);
587  const diff_t i_stride = (diff_t)mask_t::max_masked_size;
588 
589  // Iterate through grid stride of chunks
590  for (diff_t ii = 0; ii < len; ii += i_stride)
591  {
592  const diff_t i = ii + i_init;
593 
594  // execute enclosed statements if any thread will
595  // but mask off threads without work
596  bool have_work = i < len;
597 
598  // Assign the x thread to the argument
599  data.template assign_offset<ArgumentId>(i);
600  data.template assign_param<ParamId>(i);
601 
602  // execute enclosed statements
603  enclosed_stmts_t::exec(data, thread_active && have_work);
604  }
605  }
606 };
607 
608 } // namespace internal
609 } // end namespace RAJA
610 
611 
612 #endif /* RAJA_policy_hip_kernel_ForICount_HPP */
#define RAJA_DEVICE
Definition: macros.hpp:66
setSegmentType< Types, Segment, camp::at_v< typename camp::decay< Data >::index_types_t, Segment > > setSegmentTypeFromData
Definition: LoopTypes.hpp:95
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
Definition: AlignedRangeIndexSetBuilders.cpp:35
kernel_sync_requirement
Definition: types.hpp:63
RAJA header file containing constructs used to run kernel traversals on GPU with HIP.
Definition: types.hpp:143
Definition: types.hpp:209
Definition: For.hpp:49