RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
ForICount.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 
21 #ifndef RAJA_policy_sycl_kernel_ForICount_HPP
22 #define RAJA_policy_sycl_kernel_ForICount_HPP
23 
24 #include "RAJA/config.hpp"
25 
27 
28 namespace RAJA
29 {
30 
31 namespace internal
32 {
33 
34 
35 /*
36  * Executor for local work sharing loop inside SyclKernel.
37  * Mapping directly from local id to indices
38  * Assigns the loop iterate to offset ArgumentId
39  * Assigns the loop count to param ParamId
40  */
41 template<typename Data,
42  camp::idx_t ArgumentId,
43  typename ParamId,
44  int ThreadDim,
45  typename... EnclosedStmts,
46  typename Types>
47 struct SyclStatementExecutor<
48  Data,
49  statement::ForICount<ArgumentId,
50  ParamId,
51  RAJA::sycl_local_012_direct<ThreadDim>,
52  EnclosedStmts...>,
53  Types>
54  : public SyclStatementExecutor<
55  Data,
56  statement::For<ArgumentId,
57  RAJA::sycl_local_012_direct<ThreadDim>,
58  EnclosedStmts...>,
59  Types>
60 {
61 
62  using Base = SyclStatementExecutor<
63  Data,
64  statement::For<ArgumentId,
65  RAJA::sycl_local_012_direct<ThreadDim>,
66  EnclosedStmts...>,
67  Types>;
68 
69  using typename Base::diff_t;
70  using typename Base::enclosed_stmts_t;
71 
72  static inline RAJA_DEVICE void exec(Data& data,
73  ::sycl::nd_item<3> item,
74  bool thread_active)
75  {
76  diff_t len = segment_length<ArgumentId>(data);
77  auto i = item.get_local_id(ThreadDim);
78 
79  // assign thread id directly to offset
80  data.template assign_offset<ArgumentId>(i);
81  data.template assign_param<ParamId>(i);
82 
83  // execute enclosed statements if in bounds
84  enclosed_stmts_t::exec(data, item, thread_active && (i < len));
85  }
86 };
87 
88 /*
89  * Executor for local work sharing loop inside SyclKernel.
90  * Assigns the loop index to offset ArgumentId
91  */
92 template<typename Data,
93  camp::idx_t ArgumentId,
94  typename ParamId,
95  typename Mask,
96  typename... EnclosedStmts,
97  typename Types>
98 struct SyclStatementExecutor<
99  Data,
100  statement::ForICount<ArgumentId,
101  ParamId,
102  RAJA::sycl_local_masked_direct<Mask>,
103  EnclosedStmts...>,
104  Types>
105  : public SyclStatementExecutor<
106  Data,
107  statement::For<ArgumentId,
108  RAJA::sycl_local_masked_direct<Mask>,
109  EnclosedStmts...>,
110  Types>
111 {
112 
113  using Base =
114  SyclStatementExecutor<Data,
115  statement::For<ArgumentId,
116  RAJA::sycl_local_masked_direct<Mask>,
117  EnclosedStmts...>,
118  Types>;
119 
120  using typename Base::diff_t;
121 
122  using stmt_list_t = StatementList<EnclosedStmts...>;
123 
124  // Set the argument type for this loop
126 
128  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
129 
130  using mask_t = Mask;
131 
132  static inline RAJA_DEVICE void exec(Data& data,
133  ::sycl::nd_item<3> item,
134  bool thread_active)
135  {
136  diff_t len = segment_length<ArgumentId>(data);
137  auto i0 = item.get_local_id(0);
138  diff_t i = mask_t::maskValue(i0);
139 
140  // assign thread id directly to offset
141  data.template assign_offset<ArgumentId>(i);
142  data.template assign_param<ParamId>(i);
143 
144  // execute enclosed statements if in bounds
145  enclosed_stmts_t::exec(data, item, thread_active && (i < len));
146  }
147 };
148 
149 /*
150  * Executor for local work sharing loop inside SyclKernel.
151  * Assigns the loop index to offset ArgumentId
152  */
153 template<typename Data,
154  camp::idx_t ArgumentId,
155  typename ParamId,
156  typename Mask,
157  typename... EnclosedStmts,
158  typename Types>
159 struct SyclStatementExecutor<
160  Data,
161  statement::ForICount<ArgumentId,
162  ParamId,
163  RAJA::sycl_local_masked_loop<Mask>,
164  EnclosedStmts...>,
165  Types>
166  : public SyclStatementExecutor<
167  Data,
168  statement::For<ArgumentId,
169  RAJA::sycl_local_masked_loop<Mask>,
170  EnclosedStmts...>,
171  Types>
172 {
173 
174  using Base =
175  SyclStatementExecutor<Data,
176  statement::For<ArgumentId,
177  RAJA::sycl_local_masked_loop<Mask>,
178  EnclosedStmts...>,
179  Types>;
180 
181  using typename Base::diff_t;
182 
183  using stmt_list_t = StatementList<EnclosedStmts...>;
184 
185  // Set the argument type for this loop
187 
189  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
190 
191  using mask_t = Mask;
192 
193  static inline RAJA_DEVICE void exec(Data& data,
194  ::sycl::nd_item<3> item,
195  bool thread_active)
196  {
197  // masked size strided loop
198  diff_t len = segment_length<ArgumentId>(data);
199  auto i0 = item.get_local_id(0);
200  diff_t i_init = mask_t::maskValue(i0);
201  diff_t i_stride = (diff_t)mask_t::max_masked_size;
202 
203  // Iterate through grid stride of chunks
204  for (diff_t ii = 0; ii < len; ii += i_stride)
205  {
206  diff_t i = ii + i_init;
207 
208  // execute enclosed statements if any thread will
209  // but mask off threads without work
210  bool have_work = i < len;
211 
212  // Assign the x thread to the argument
213  data.template assign_offset<ArgumentId>(i);
214  data.template assign_param<ParamId>(i);
215 
216  // execute enclosed statements
217  enclosed_stmts_t::exec(data, item, thread_active && have_work);
218  }
219  }
220 };
221 
222 /*
223  * Executor for thread work sharing loop inside SyclKernel.
224  * Provides a block-stride loop (stride of blockDim.xyz) for
225  * each thread in xyz.
226  * Assigns the loop iterate to offset ArgumentId
227  * Assigns the loop offset to param ParamId
228  */
229 template<typename Data,
230  camp::idx_t ArgumentId,
231  typename ParamId,
232  int ThreadDim,
233  typename... EnclosedStmts,
234  typename Types>
235 struct SyclStatementExecutor<
236  Data,
237  statement::ForICount<ArgumentId,
238  ParamId,
239  RAJA::sycl_local_012_loop<ThreadDim>,
240  EnclosedStmts...>,
241  Types>
242  : public SyclStatementExecutor<
243  Data,
244  statement::For<ArgumentId,
245  RAJA::sycl_local_012_loop<ThreadDim>,
246  EnclosedStmts...>,
247  Types>
248 {
249 
250  using Base =
251  SyclStatementExecutor<Data,
252  statement::For<ArgumentId,
253  RAJA::sycl_local_012_loop<ThreadDim>,
254  EnclosedStmts...>,
255  Types>;
256 
257  using typename Base::diff_t;
258  using typename Base::enclosed_stmts_t;
259 
260  static inline RAJA_DEVICE void exec(Data& data,
261  ::sycl::nd_item<3> item,
262  bool thread_active)
263  {
264  // block stride loop
265  diff_t len = segment_length<ArgumentId>(data);
266  auto i_init = item.get_local_id(ThreadDim);
267  auto i_stride = item.get_local_range(ThreadDim);
268 
269  // Iterate through grid stride of chunks
270  for (diff_t ii = 0; ii < len; ii += i_stride)
271  {
272  diff_t i = ii + i_init;
273 
274  // execute enclosed statements if any thread will
275  // but mask off threads without work
276  bool have_work = i < len;
277 
278  // Assign the x thread to the argument
279  data.template assign_offset<ArgumentId>(i);
280  data.template assign_param<ParamId>(i);
281 
282  // execute enclosed statements
283  enclosed_stmts_t::exec(data, item, thread_active && have_work);
284  }
285  }
286 };
287 
288 /*
289  * Executor for group work sharing inside SyclKernel.
290  * Provides a direct mapping of each block in 012.
291  * Assigns the loop index to offset ArgumentId
292  * Assigns the loop index to param ParamId
293  */
294 template<typename Data,
295  camp::idx_t ArgumentId,
296  typename ParamId,
297  int BlockDim,
298  typename... EnclosedStmts,
299  typename Types>
300 struct SyclStatementExecutor<
301  Data,
302  statement::ForICount<ArgumentId,
303  ParamId,
304  RAJA::sycl_group_012_direct<BlockDim>,
305  EnclosedStmts...>,
306  Types>
307  : public SyclStatementExecutor<
308  Data,
309  statement::For<ArgumentId,
310  RAJA::sycl_group_012_direct<BlockDim>,
311  EnclosedStmts...>,
312  Types>
313 {
314 
315  using Base = SyclStatementExecutor<
316  Data,
317  statement::For<ArgumentId,
318  RAJA::sycl_group_012_direct<BlockDim>,
319  EnclosedStmts...>,
320  Types>;
321 
322  using typename Base::diff_t;
323  using typename Base::enclosed_stmts_t;
324 
325  static inline RAJA_DEVICE void exec(Data& data,
326  ::sycl::nd_item<3> item,
327  bool thread_active)
328  {
329  // grid stride loop
330  diff_t len = segment_length<ArgumentId>(data);
331  auto i = item.get_group(BlockDim);
332 
333  if (i < len)
334  {
335 
336  // Assign the x thread to the argument
337  data.template assign_offset<ArgumentId>(i);
338  data.template assign_param<ParamId>(i);
339 
340  // execute enclosed statements
341  enclosed_stmts_t::exec(data, item, thread_active);
342  }
343  }
344 };
345 
346 /*
347  * Executor for group work sharing inside SyclKernel.
348  * Provides a group-stride loop for
349  * each block in 012.
350  * Assigns the loop index to offset ArgumentId
351  * Assigns the loop index to param ParamId
352  */
353 template<typename Data,
354  camp::idx_t ArgumentId,
355  typename ParamId,
356  int BlockDim,
357  typename... EnclosedStmts,
358  typename Types>
359 struct SyclStatementExecutor<
360  Data,
361  statement::ForICount<ArgumentId,
362  ParamId,
363  RAJA::sycl_group_012_loop<BlockDim>,
364  EnclosedStmts...>,
365  Types>
366  : public SyclStatementExecutor<
367  Data,
368  statement::For<ArgumentId,
369  RAJA::sycl_group_012_loop<BlockDim>,
370  EnclosedStmts...>,
371  Types>
372 {
373 
374  using Base =
375  SyclStatementExecutor<Data,
376  statement::For<ArgumentId,
377  RAJA::sycl_group_012_loop<BlockDim>,
378  EnclosedStmts...>,
379  Types>;
380 
381  using typename Base::diff_t;
382  using typename Base::enclosed_stmts_t;
383 
384  static inline RAJA_DEVICE void exec(Data& data,
385  ::sycl::nd_item<3> item,
386  bool thread_active)
387  {
388  // grid stride loop
389  diff_t len = segment_length<ArgumentId>(data);
390  auto i_init = item.get_group(BlockDim);
391  auto i_stride = item.get_group_range(BlockDim);
392 
393  // Iterate through grid stride of chunks
394  for (diff_t i = i_init; i < len; i += i_stride)
395  {
396 
397  // Assign the x thread to the argument
398  data.template assign_offset<ArgumentId>(i);
399  data.template assign_param<ParamId>(i);
400 
401  // execute enclosed statements
402  enclosed_stmts_t::exec(data, item, thread_active);
403  }
404  }
405 };
406 
407 /*
408  * Executor for sequential loops inside of a SyclKernel.
409  *
410  * This is specialized since it need to execute the loop immediately.
411  * Assigns the loop index to offset ArgumentId
412  * Assigns the loop index to param ParamId
413  */
414 template<typename Data,
415  camp::idx_t ArgumentId,
416  typename ParamId,
417  typename... EnclosedStmts,
418  typename Types>
419 struct SyclStatementExecutor<
420  Data,
421  statement::ForICount<ArgumentId, ParamId, seq_exec, EnclosedStmts...>,
422  Types>
423  : public SyclStatementExecutor<
424  Data,
425  statement::For<ArgumentId, seq_exec, EnclosedStmts...>,
426  Types>
427 {
428 
429  using Base = SyclStatementExecutor<
430  Data,
431  statement::For<ArgumentId, seq_exec, EnclosedStmts...>,
432  Types>;
433 
434  using typename Base::diff_t;
435  using typename Base::enclosed_stmts_t;
436 
437  static inline RAJA_DEVICE void exec(Data& data,
438  ::sycl::nd_item<3> item,
439  bool thread_active)
440  {
441  diff_t len = segment_length<ArgumentId>(data);
442 
443  for (diff_t i = 0; i < len; ++i)
444  {
445  // Assign i to the argument
446  data.template assign_offset<ArgumentId>(i);
447  data.template assign_param<ParamId>(i);
448 
449  // execute enclosed statements
450  enclosed_stmts_t::exec(data, item, thread_active);
451  }
452  }
453 };
454 
455 
456 } // namespace internal
457 } // end namespace RAJA
458 
459 
460 #endif /* RAJA_policy_sycl_kernel_ForICount_HPP */
#define RAJA_DEVICE
Definition: macros.hpp:66
setSegmentType< Types, Segment, camp::at_v< typename camp::decay< Data >::index_types_t, Segment > > setSegmentTypeFromData
Definition: LoopTypes.hpp:95
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA header file containing constructs used to run kernel traversals on GPU with SYCL.
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: ForICount.hpp:72
SyclStatementExecutor< Data, statement::For< ArgumentId, RAJA::sycl_local_012_direct< ThreadDim >, EnclosedStmts... >, Types > Base
Definition: ForICount.hpp:67
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: ForICount.hpp:325
SyclStatementExecutor< Data, statement::For< ArgumentId, RAJA::sycl_group_012_direct< BlockDim >, EnclosedStmts... >, Types > Base
Definition: ForICount.hpp:320
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: ForICount.hpp:260
SyclStatementExecutor< Data, statement::For< ArgumentId, RAJA::sycl_local_012_loop< ThreadDim >, EnclosedStmts... >, Types > Base
Definition: ForICount.hpp:255
SyclStatementExecutor< Data, statement::For< ArgumentId, RAJA::sycl_local_masked_loop< Mask >, EnclosedStmts... >, Types > Base
Definition: ForICount.hpp:179
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: ForICount.hpp:193
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: ForICount.hpp:384
SyclStatementExecutor< Data, statement::For< ArgumentId, RAJA::sycl_group_012_loop< BlockDim >, EnclosedStmts... >, Types > Base
Definition: ForICount.hpp:379
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: ForICount.hpp:132
SyclStatementExecutor< Data, statement::For< ArgumentId, RAJA::sycl_local_masked_direct< Mask >, EnclosedStmts... >, Types > Base
Definition: ForICount.hpp:118
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: ForICount.hpp:437
SyclStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:471
Definition: policy.hpp:78
Definition: For.hpp:49