RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
Tile.hpp
Go to the documentation of this file.
1 
12 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
13 // Copyright (c) Lawrence Livermore National Security, LLC and other
14 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
15 // files for dates and other details. No copyright assignment is required
16 // to contribute to RAJA.
17 //
18 // SPDX-License-Identifier: (BSD-3-Clause)
19 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
20 
21 
22 #ifndef RAJA_policy_sycl_kernel_Tile_HPP
23 #define RAJA_policy_sycl_kernel_Tile_HPP
24 
25 #include "RAJA/config.hpp"
26 
27 #if defined(RAJA_ENABLE_SYCL)
28 
29 #include <iostream>
30 #include <type_traits>
31 
32 #include "camp/camp.hpp"
33 #include "camp/concepts.hpp"
34 #include "camp/tuple.hpp"
35 
36 #include "RAJA/util/macros.hpp"
37 #include "RAJA/util/types.hpp"
38 
41 
42 namespace RAJA
43 {
44 namespace internal
45 {
46 
52 template<typename Data,
53  camp::idx_t ArgumentId,
54  typename TPol,
55  typename... EnclosedStmts,
56  typename Types>
57 struct SyclStatementExecutor<
58  Data,
59  statement::Tile<ArgumentId, TPol, seq_exec, EnclosedStmts...>,
60  Types>
61 {
62 
63  using stmt_list_t = StatementList<EnclosedStmts...>;
64  using enclosed_stmts_t = SyclStatementListExecutor<Data, stmt_list_t, Types>;
65  using diff_t = segment_diff_type<ArgumentId, Data>;
66 
67  static inline RAJA_DEVICE void exec(Data& data,
68  ::sycl::nd_item<3> item,
69  bool thread_active)
70  {
71  // Get the segment referenced by this Tile statement
72  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
73 
74  // Keep copy of original segment, so we can restore it
75  using segment_t = camp::decay<decltype(segment)>;
76  segment_t orig_segment = segment;
77 
78  diff_t chunk_size = TPol::chunk_size;
79 
80  // compute trip count
81  diff_t len = segment.end() - segment.begin();
82 
83  // Iterate through tiles
84  for (diff_t i = 0; i < len; i += chunk_size)
85  {
86 
87  // Assign our new tiled segment
88  segment = orig_segment.slice(i, chunk_size);
89 
90  // execute enclosed statements
91  enclosed_stmts_t::exec(data, item, thread_active);
92  }
93 
94  // Set range back to original values
95  segment = orig_segment;
96  }
97 
98  static inline LaunchDims calculateDimensions(Data const& data)
99  {
100 
101  // privatize data, so we can mess with the segments
102  using data_t = camp::decay<Data>;
103  data_t private_data = data;
104 
105  // Get original segment
106  auto& segment = camp::get<ArgumentId>(private_data.segment_tuple);
107 
108  // restrict to first tile
109  segment = segment.slice(0, TPol::chunk_size);
110 
111  // compute dimensions of children with segment restricted to tile
112  LaunchDims enclosed_dims =
113  enclosed_stmts_t::calculateDimensions(private_data);
114 
115  return enclosed_dims;
116  }
117 };
118 
124 template<typename Data,
125  camp::idx_t ArgumentId,
126  camp::idx_t chunk_size,
127  int BlockDim,
128  typename... EnclosedStmts,
129  typename Types>
130 struct SyclStatementExecutor<Data,
131  statement::Tile<ArgumentId,
132  RAJA::tile_fixed<chunk_size>,
133  sycl_group_012_direct<BlockDim>,
134  EnclosedStmts...>,
135  Types>
136 {
137 
138  using stmt_list_t = StatementList<EnclosedStmts...>;
139 
140  using enclosed_stmts_t = SyclStatementListExecutor<Data, stmt_list_t, Types>;
141 
142  using diff_t = segment_diff_type<ArgumentId, Data>;
143 
144  static inline RAJA_DEVICE void exec(Data& data,
145  ::sycl::nd_item<3> item,
146  bool thread_active)
147  {
148  // Get the segment referenced by this Tile statement
149  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
150 
151  using segment_t = camp::decay<decltype(segment)>;
152 
153  // compute trip count
154  diff_t len = segment.end() - segment.begin();
155  // diff_t i = get_sycl_dim<BlockDim>(blockIdx) * chunk_size; // TODO
156  diff_t i =
157  item.get_group(BlockDim) *
158  chunk_size; // get_sycl_dim<BlockDim>(blockIdx) * chunk_size; // TODO
159 
160  // check have chunk
161  if (i < len)
162  {
163 
164  // Keep copy of original segment, so we can restore it
165  segment_t orig_segment = segment;
166 
167  // Assign our new tiled segment
168  segment = orig_segment.slice(i, chunk_size);
169 
170  // execute enclosed statements
171  enclosed_stmts_t::exec(data, item, thread_active);
172 
173  // Set range back to original values
174  segment = orig_segment;
175  }
176  }
177 
178  static inline LaunchDims calculateDimensions(Data const& data)
179  {
180 
181  // Compute how many blocks
182  diff_t len = segment_length<ArgumentId>(data);
183  diff_t num_blocks = len / chunk_size;
184  if (num_blocks * chunk_size < len)
185  {
186  num_blocks++;
187  }
188 
189  LaunchDims dims;
190  set_sycl_dim<BlockDim>(dims.group, num_blocks);
191 
192  // since we are direct-mapping, we REQUIRE len
193  set_sycl_dim<BlockDim>(dims.min_groups, num_blocks);
194 
195 
196  // privatize data, so we can mess with the segments
197  using data_t = camp::decay<Data>;
198  data_t private_data = data;
199 
200  // Get original segment
201  auto& segment = camp::get<ArgumentId>(private_data.segment_tuple);
202 
203  // restrict to first tile
204  segment = segment.slice(0, chunk_size);
205 
206 
207  LaunchDims enclosed_dims =
208  enclosed_stmts_t::calculateDimensions(private_data);
209 
210  return dims.max(enclosed_dims);
211  }
212 };
213 
219 template<typename Data,
220  camp::idx_t ArgumentId,
221  camp::idx_t chunk_size,
222  int BlockDim,
223  typename... EnclosedStmts,
224  typename Types>
225 struct SyclStatementExecutor<Data,
226  statement::Tile<ArgumentId,
227  RAJA::tile_fixed<chunk_size>,
228  sycl_group_012_loop<BlockDim>,
229  EnclosedStmts...>,
230  Types>
231 {
232 
233  using stmt_list_t = StatementList<EnclosedStmts...>;
234 
235  using enclosed_stmts_t = SyclStatementListExecutor<Data, stmt_list_t, Types>;
236 
237  using diff_t = segment_diff_type<ArgumentId, Data>;
238 
239  static inline RAJA_DEVICE void exec(Data& data,
240  ::sycl::nd_item<3> item,
241  bool thread_active)
242  {
243  // Get the segment referenced by this Tile statement
244  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
245 
246  // Keep copy of original segment, so we can restore it
247  using segment_t = camp::decay<decltype(segment)>;
248  segment_t orig_segment = segment;
249 
250  // compute trip count
251  diff_t len = segment.end() - segment.begin();
252  diff_t i_init = item.get_group(BlockDim) * chunk_size; // TODO
253  diff_t i_stride = item.get_group_range(BlockDim) * chunk_size; // TODO
254 
255  // Iterate through grid stride of chunks
256  for (diff_t i = i_init; i < len; i += i_stride)
257  {
258 
259  // Assign our new tiled segment
260  segment = orig_segment.slice(i, chunk_size);
261 
262  // execute enclosed statements
263  enclosed_stmts_t::exec(data, item, thread_active);
264  }
265 
266  // Set range back to original values
267  segment = orig_segment;
268  }
269 
270  static inline LaunchDims calculateDimensions(Data const& data)
271  {
272 
273  // Compute how many blocks
274  diff_t len = segment_length<ArgumentId>(data);
275  diff_t num_blocks = len / chunk_size;
276  if (num_blocks * chunk_size < len)
277  {
278  num_blocks++;
279  }
280 
281  LaunchDims dims;
282  set_sycl_dim<BlockDim>(dims.group, num_blocks);
283 
284 
285  // privatize data, so we can mess with the segments
286  using data_t = camp::decay<Data>;
287  data_t private_data = data;
288 
289  // Get original segment
290  auto& segment = camp::get<ArgumentId>(private_data.segment_tuple);
291 
292  // restrict to first tile
293  segment = segment.slice(0, chunk_size);
294 
295 
296  LaunchDims enclosed_dims =
297  enclosed_stmts_t::calculateDimensions(private_data);
298 
299  return dims.max(enclosed_dims);
300  }
301 };
302 
308 template<typename Data,
309  camp::idx_t ArgumentId,
310  camp::idx_t chunk_size,
311  int ThreadDim,
312  typename... EnclosedStmts,
313  typename Types>
314 struct SyclStatementExecutor<Data,
315  statement::Tile<ArgumentId,
316  RAJA::tile_fixed<chunk_size>,
317  sycl_local_012_direct<ThreadDim>,
318  EnclosedStmts...>,
319  Types>
320 {
321 
322  using stmt_list_t = StatementList<EnclosedStmts...>;
323 
324  using enclosed_stmts_t = SyclStatementListExecutor<Data, stmt_list_t, Types>;
325 
326  using diff_t = segment_diff_type<ArgumentId, Data>;
327 
328  static inline RAJA_DEVICE void exec(Data& data,
329  ::sycl::nd_item<3> item,
330  bool thread_active)
331  {
332  // Get the segment referenced by this Tile statement
333  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
334 
335  // Keep copy of original segment, so we can restore it
336  using segment_t = camp::decay<decltype(segment)>;
337  segment_t orig_segment = segment;
338 
339  // compute trip count
340  diff_t len = segment.end() - segment.begin();
341  diff_t i = item.get_local_id(ThreadDim) * chunk_size;
342 
343  // execute enclosed statements if any thread will
344  // but mask off threads without work
345  bool have_work = i < len;
346 
347  // Assign our new tiled segment
348  diff_t slice_size = have_work ? chunk_size : 0;
349  segment = orig_segment.slice(i, slice_size);
350 
351  // execute enclosed statements
352  enclosed_stmts_t::exec(data, item, thread_active && have_work);
353 
354  // Set range back to original values
355  segment = orig_segment;
356  }
357 
358  static inline LaunchDims calculateDimensions(Data const& data)
359  {
360 
361  // Compute how many blocks
362  diff_t len = segment_length<ArgumentId>(data);
363  diff_t num_threads = len / chunk_size;
364  if (num_threads * chunk_size < len)
365  {
366  num_threads++;
367  }
368 
369  LaunchDims dims;
370  set_sycl_dim<ThreadDim>(dims.local, num_threads);
371  set_sycl_dim<ThreadDim>(dims.min_locals, num_threads);
372 
373  // privatize data, so we can mess with the segments
374  using data_t = camp::decay<Data>;
375  data_t private_data = data;
376 
377  // Get original segment
378  auto& segment = camp::get<ArgumentId>(private_data.segment_tuple);
379 
380  // restrict to first tile
381  segment = segment.slice(0, chunk_size);
382 
383 
384  LaunchDims enclosed_dims =
385  enclosed_stmts_t::calculateDimensions(private_data);
386 
387  return (dims.max(enclosed_dims));
388  }
389 };
390 
396 template<typename Data,
397  camp::idx_t ArgumentId,
398  camp::idx_t chunk_size,
399  int ThreadDim,
400  typename... EnclosedStmts,
401  typename Types>
402 struct SyclStatementExecutor<Data,
403  statement::Tile<ArgumentId,
404  RAJA::tile_fixed<chunk_size>,
405  sycl_local_012_loop<ThreadDim>,
406  EnclosedStmts...>,
407  Types>
408 {
409 
410  using stmt_list_t = StatementList<EnclosedStmts...>;
411 
412  using enclosed_stmts_t = SyclStatementListExecutor<Data, stmt_list_t, Types>;
413 
414  using diff_t = segment_diff_type<ArgumentId, Data>;
415 
416  static inline RAJA_DEVICE void exec(Data& data,
417  ::sycl::nd_item<3> item,
418  bool thread_active)
419  {
420  // Get the segment referenced by this Tile statement
421  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
422 
423  // Keep copy of original segment, so we can restore it
424  using segment_t = camp::decay<decltype(segment)>;
425  segment_t orig_segment = segment;
426 
427  // compute trip count
428  diff_t len = segment_length<ArgumentId>(data);
429  diff_t i_init = item.get_local_id(ThreadDim) * chunk_size;
430  diff_t i_stride = item.get_group_range(ThreadDim) * chunk_size;
431 
432  // Iterate through grid stride of chunks
433  for (diff_t ii = 0; ii < len; ii += i_stride)
434  {
435  diff_t i = ii + i_init;
436 
437  // execute enclosed statements if any thread will
438  // but mask off threads without work
439  bool have_work = i < len;
440 
441  // Assign our new tiled segment
442  diff_t slice_size = have_work ? chunk_size : 0;
443  segment = orig_segment.slice(i, slice_size);
444 
445  // execute enclosed statements
446  enclosed_stmts_t::exec(data, item, thread_active && have_work);
447  }
448 
449  // Set range back to original values
450  segment = orig_segment;
451  }
452 
453  static inline LaunchDims calculateDimensions(Data const& data)
454  {
455 
456  // Compute how many blocks
457  diff_t len = segment_length<ArgumentId>(data);
458  diff_t num_threads = len / chunk_size;
459  if (num_threads * chunk_size < len)
460  {
461  num_threads++;
462  }
463  num_threads = std::max(num_threads, (diff_t)1);
464 
465  LaunchDims dims;
466  set_sycl_dim<ThreadDim>(dims.local, num_threads);
467  set_sycl_dim<ThreadDim>(dims.min_locals, 1);
468 
469  // privatize data, so we can mess with the segments
470  using data_t = camp::decay<Data>;
471  data_t private_data = data;
472 
473  // Get original segment
474  auto& segment = camp::get<ArgumentId>(private_data.segment_tuple);
475 
476  // restrict to first tile
477  segment = segment.slice(0, chunk_size);
478 
479 
480  LaunchDims enclosed_dims =
481  enclosed_stmts_t::calculateDimensions(private_data);
482 
483  return (dims.max(enclosed_dims));
484  }
485 };
486 
487 
488 } // end namespace internal
489 } // end namespace RAJA
490 
491 #endif // RAJA_ENABLE_SYCL
492 #endif /* RAJA_policy_sycl_kernel_Tile_HPP */
Header file for common RAJA internal macro definitions.
#define RAJA_DEVICE
Definition: macros.hpp:66
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result max(Args... args)
Definition: foldl.hpp:155
Header file for tile wrapper and iterator.
Header file for loop kernel internals.
Header file for RAJA type definitions.