RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
For.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_policy_sycl_kernel_For_HPP
21 #define RAJA_policy_sycl_kernel_For_HPP
22 
23 #include "RAJA/config.hpp"
24 
26 
27 namespace RAJA
28 {
29 
30 namespace internal
31 {
32 
33 // SyclStatementExecutors
34 //
35 
36 /*
37  * Executor for local work sharing inside SyclKernel.
38  * Mapping directly to indicies
39  * Assigns the global index to offset ArgumentId
40  */
41 template<typename Data,
42  camp::idx_t ArgumentId,
43  int Dim,
44  int Local_Size,
45  typename... EnclosedStmts,
46  typename Types>
47 struct SyclStatementExecutor<
48  Data,
49  statement::For<ArgumentId,
50  RAJA::sycl_global_012<Dim, Local_Size>,
51  EnclosedStmts...>,
52  Types>
53 {
54 
55  using stmt_list_t = StatementList<EnclosedStmts...>;
56 
57  // Set the argument type for this loop
59 
61  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
62 
64 
65  static inline RAJA_DEVICE void exec(Data& data,
66  ::sycl::nd_item<3> item,
67  bool thread_active)
68  {
69  auto len = segment_length<ArgumentId>(data);
70  auto i = item.get_global_id(Dim);
71 
72  // Assign the x thread to the argument
73  data.template assign_offset<ArgumentId>(i);
74 
75  // execute enclosed statements
76  enclosed_stmts_t::exec(data, item, thread_active && (i < len));
77  }
78 
79  static inline LaunchDims calculateDimensions(Data const& data)
80  {
81  auto len = segment_length<ArgumentId>(data);
82 
83  // Set Global Space for Dimension and Local Size
84  LaunchDims dims;
85  if (Dim == 0)
86  {
87  dims.global.x = len;
88  dims.local.x = Local_Size;
89  }
90  if (Dim == 1)
91  {
92  dims.global.y = len;
93  dims.local.y = Local_Size;
94  }
95  if (Dim == 2)
96  {
97  dims.global.z = len;
98  dims.local.z = Local_Size;
99  }
100 
101  // combine with enclosed statements
102  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
103  return dims.max(enclosed_dims);
104  }
105 };
106 
107 /*
108  * Executor for group work sharing inside SyclKernel.
109  * Mapping directly to indicies
110  * Assigns the loop index to offset ArgumentId
111  */
112 template<typename Data,
113  camp::idx_t ArgumentId,
114  int Dim,
115  typename... EnclosedStmts,
116  typename Types>
117 struct SyclStatementExecutor<Data,
118  statement::For<ArgumentId,
119  RAJA::sycl_group_012_direct<Dim>,
120  EnclosedStmts...>,
121  Types>
122 {
123 
124  using stmt_list_t = StatementList<EnclosedStmts...>;
125 
126  // Set the argument type for this loop
128 
130  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
131 
133 
134  static inline RAJA_DEVICE void exec(Data& data,
135  ::sycl::nd_item<3> item,
136  bool thread_active)
137  {
138  auto len = segment_length<ArgumentId>(data);
139  auto i = item.get_group(Dim);
140 
141  // Assign the x thread to the argument
142  data.template assign_offset<ArgumentId>(i);
143 
144  // execute enclosed statements
145  enclosed_stmts_t::exec(data, item, thread_active && (i < len));
146  }
147 
148  static inline LaunchDims calculateDimensions(Data const& data)
149  {
150  auto len = segment_length<ArgumentId>(data);
151 
152  // request one block per element in the segment
153  LaunchDims dims;
154  if (Dim == 0)
155  {
156  dims.group.x = len;
157  }
158  if (Dim == 1)
159  {
160  dims.group.y = len;
161  }
162  if (Dim == 2)
163  {
164  dims.group.z = len;
165  }
166 
167  // combine with enclosed statements
168  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
169  return dims.max(enclosed_dims);
170  }
171 };
172 
173 /*
174  * Executor for work group sharing inside SyclKernel.
175  * Provides a group-stride loop (stride of grid range) for
176  * each group in dims.
177  * Assigns the loop index to offset ArgumentId
178  */
179 template<typename Data,
180  camp::idx_t ArgumentId,
181  int Dim,
182  typename... EnclosedStmts,
183  typename Types>
184 struct SyclStatementExecutor<Data,
185  statement::For<ArgumentId,
186  RAJA::sycl_group_012_loop<Dim>,
187  EnclosedStmts...>,
188  Types>
189 {
190 
191  using stmt_list_t = StatementList<EnclosedStmts...>;
192 
193  // Set the argument type for this loop
195 
197  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
198 
200 
201  static inline RAJA_DEVICE void exec(Data& data,
202  ::sycl::nd_item<3> item,
203  bool thread_active)
204  {
205  auto len = segment_length<ArgumentId>(data);
206  auto i0 = item.get_group(Dim);
207  auto i_stride = item.get_group_range(Dim);
208 
209  for (auto i = i0; i < len; i += i_stride)
210  {
211 
212  // Assign the x thread to the argument
213  data.template assign_offset<ArgumentId>(i);
214 
215  // execute enclosed statements
216  enclosed_stmts_t::exec(data, item, thread_active);
217  }
218  }
219 
220  static inline LaunchDims calculateDimensions(Data const& data)
221  {
222  auto len = segment_length<ArgumentId>(data);
223 
224  // request one block per element in the segment
225  LaunchDims dims;
226  if (Dim == 0)
227  {
228  dims.group.x = len;
229  }
230  if (Dim == 1)
231  {
232  dims.group.y = len;
233  }
234  if (Dim == 2)
235  {
236  dims.group.z = len;
237  }
238 
239  // combine with enclosed statements
240  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
241  return dims.max(enclosed_dims);
242  }
243 };
244 
245 /*
246  * Executor for block work sharing inside SyclKernel.
247  * Mapping directly to indicies
248  * Assigns the loop index to offset ArgumentId
249  */
250 template<typename Data,
251  camp::idx_t ArgumentId,
252  int Dim,
253  typename... EnclosedStmts,
254  typename Types>
255 struct SyclStatementExecutor<Data,
256  statement::For<ArgumentId,
257  RAJA::sycl_local_012_direct<Dim>,
258  EnclosedStmts...>,
259  Types>
260 {
261 
262  using stmt_list_t = StatementList<EnclosedStmts...>;
263 
264  // Set the argument type for this loop
266 
268  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
269 
271 
272  static inline RAJA_DEVICE void exec(Data& data,
273  ::sycl::nd_item<3> item,
274  bool thread_active)
275  {
276  auto len = segment_length<ArgumentId>(data);
277  auto i = item.get_local_id(Dim);
278 
279  // assign thread id directly to offset
280  data.template assign_offset<ArgumentId>(i);
281 
282  // execute enclosed statements if in bounds
283  enclosed_stmts_t::exec(data, item, thread_active && (i < len));
284  }
285 
286  static inline LaunchDims calculateDimensions(Data const& data)
287  {
288  auto len = segment_length<ArgumentId>(data);
289 
290  // request one block per element in the segment
291  LaunchDims dims;
292  if (Dim == 0)
293  {
294  dims.local.x = len;
295  }
296  if (Dim == 1)
297  {
298  dims.local.y = len;
299  }
300  if (Dim == 2)
301  {
302  dims.local.z = len;
303  }
304 
305  // combine with enclosed statements
306  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
307  return dims.max(enclosed_dims);
308  }
309 };
310 
311 /*
312  * Executor for local item sharing loop inside SyclKernel.
313  * Provides a local-stride loop (stride of work item local range)
314  * for each item in dim.
315  * Assigns the loop index to offset ArgumentId
316  */
317 template<typename Data,
318  camp::idx_t ArgumentId,
319  int Dim,
320  typename... EnclosedStmts,
321  typename Types>
322 struct SyclStatementExecutor<Data,
323  statement::For<ArgumentId,
324  RAJA::sycl_local_012_loop<Dim>,
325  EnclosedStmts...>,
326  Types>
327 {
328 
329  using stmt_list_t = StatementList<EnclosedStmts...>;
330 
331  // Set the argument type for this loop
333 
335  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
336 
338 
339  static inline RAJA_DEVICE void exec(Data& data,
340  ::sycl::nd_item<3> item,
341  bool thread_active)
342  {
343  auto len = segment_length<ArgumentId>(data);
344  auto i0 = item.get_local_id(Dim);
345  auto i_stride = item.get_local_range(Dim);
346  auto i = i0;
347 
348  for (; i < len; i += i_stride)
349  {
350 
351  // Assign the x thread to the argument
352  data.template assign_offset<ArgumentId>(i);
353 
354  // execute enclosed statements
355  enclosed_stmts_t::exec(data, item, thread_active);
356  }
357  // do we need one more masked iteration?
358  if (i - i0 < len)
359  {
360  // execute enclosed statements one more time, but masking them off
361  // this is because there's at least one thread that isn't masked off
362  // that is still executing the above loop
363  enclosed_stmts_t::exec(data, item, false);
364  }
365  }
366 
367  static inline LaunchDims calculateDimensions(Data const& data)
368  {
369  auto len = segment_length<ArgumentId>(data);
370 
371  // request one block per element in the segment
372  LaunchDims dims;
373  if (Dim == 0)
374  {
375  dims.local.x = len;
376  }
377  if (Dim == 1)
378  {
379  dims.local.y = len;
380  }
381  if (Dim == 2)
382  {
383  dims.local.z = len;
384  }
385 
386  // combine with enclosed statements
387  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
388  return dims.max(enclosed_dims);
389  }
390 };
391 
392 /*
393  * Executor for block work sharing inside SyclKernel.
394  * Mapping directly to indicies
395  * Assigns the loop index to offset ArgumentId
396  */
397 template<typename Data,
398  camp::idx_t ArgumentId,
399  int Local_Size,
400  typename... EnclosedStmts,
401  typename Types>
402 struct SyclStatementExecutor<
403  Data,
404  statement::For<ArgumentId, RAJA::sycl_exec<Local_Size>, EnclosedStmts...>,
405  Types>
406 {
407 
408  using stmt_list_t = StatementList<EnclosedStmts...>;
409 
410  // Set the argument type for this loop
412 
414  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
415 
417 
418  static inline RAJA_DEVICE void exec(Data& data, ::sycl::nd_item<3> item)
419  {
420  auto len = segment_length<ArgumentId>(data);
421  auto i = item.get_global_id(0);
422 
423  if (i < len)
424  {
425 
426  // Assign the x thread to the argument
427  data.template assign_offset<ArgumentId>(i);
428 
429  // execute enclosed statements
430  enclosed_stmts_t::exec(data, item);
431  }
432  }
433 
434  static inline LaunchDims calculateDimensions(Data const& data)
435  {
436  auto len = segment_length<ArgumentId>(data);
437 
438  // request one block per element in the segment
439  LaunchDims dims;
440  dims.local.x = Local_Size;
441  dims.global.x = len;
442 
443  // combine with enclosed statements
444  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
445  return dims.max(enclosed_dims);
446  }
447 };
448 
449 /*
450  * Executor for sequential loops inside of a SyclKernel.
451  *
452  * This is specialized since it need to execute the loop immediately.
453  * Assigns the loop index to offset ArgumentId
454  */
455 template<typename Data,
456  camp::idx_t ArgumentId,
457  typename... EnclosedStmts,
458  typename Types>
459 struct SyclStatementExecutor<
460  Data,
461  statement::For<ArgumentId, seq_exec, EnclosedStmts...>,
462  Types>
463 {
464 
465  using stmt_list_t = StatementList<EnclosedStmts...>;
466 
467  // Set the argument type for this loop
469 
471  SyclStatementListExecutor<Data, stmt_list_t, NewTypes>;
472 
474 
475  static inline RAJA_DEVICE void exec(Data& data,
476  ::sycl::nd_item<3> item,
477  bool thread_active)
478  {
479 
480  using idx_type =
481  camp::decay<decltype(camp::get<ArgumentId>(data.offset_tuple))>;
482 
483  idx_type len = segment_length<ArgumentId>(data);
484 
485  for (idx_type i = 0; i < len; ++i)
486  {
487  // Assign i to the argument
488  data.template assign_offset<ArgumentId>(i);
489 
490  // execute enclosed statements
491  enclosed_stmts_t::exec(data, item, thread_active);
492  }
493  }
494 
495  static inline LaunchDims calculateDimensions(Data const& data)
496  {
497  return enclosed_stmts_t::calculateDimensions(data);
498  }
499 };
500 
501 
502 } // namespace internal
503 } // end namespace RAJA
504 
505 
506 #endif
#define RAJA_DEVICE
Definition: macros.hpp:66
setSegmentType< Types, Segment, camp::at_v< typename camp::decay< Data >::index_types_t, Segment > > setSegmentTypeFromData
Definition: LoopTypes.hpp:95
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
typename std::iterator_traits< typename camp::at_v< typename Data::segment_tuple_t::TList, ArgumentId >::iterator >::difference_type segment_diff_type
Definition: LoopData.hpp:184
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA header file containing constructs used to run kernel traversals on GPU with SYCL.
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: For.hpp:339
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: For.hpp:65
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: For.hpp:134
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: For.hpp:272
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: For.hpp:475
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:468
SyclStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:471
static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item< 3 > item, bool thread_active)
Definition: For.hpp:201