RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
TileTCount.hpp
Go to the documentation of this file.
1 
12 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
13 // Copyright (c) Lawrence Livermore National Security, LLC and other
14 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
15 // files for dates and other details. No copyright assignment is required
16 // to contribute to RAJA.
17 //
18 // SPDX-License-Identifier: (BSD-3-Clause)
19 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
20 
21 
22 #ifndef RAJA_policy_hip_kernel_TileTCount_HPP
23 #define RAJA_policy_hip_kernel_TileTCount_HPP
24 
25 #include "RAJA/config.hpp"
26 
27 #if defined(RAJA_ENABLE_HIP)
28 
29 #include <iostream>
30 #include <type_traits>
31 
32 #include "camp/camp.hpp"
33 #include "camp/concepts.hpp"
34 #include "camp/tuple.hpp"
35 
36 #include "RAJA/util/macros.hpp"
37 #include "RAJA/util/types.hpp"
38 
41 
42 namespace RAJA
43 {
44 namespace internal
45 {
46 
53 template<typename Data,
54  camp::idx_t ArgumentId,
55  typename ParamId,
56  camp::idx_t chunk_size,
57  typename IndexMapper,
59  typename... EnclosedStmts,
60  typename Types>
61 struct HipStatementExecutor<
62  Data,
63  statement::TileTCount<
64  ArgumentId,
65  ParamId,
66  RAJA::tile_fixed<chunk_size>,
67  RAJA::policy::hip::
68  hip_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>,
69  EnclosedStmts...>,
70  Types>
71  : public HipStatementExecutor<
72  Data,
73  statement::Tile<
74  ArgumentId,
75  RAJA::tile_fixed<chunk_size>,
76  RAJA::policy::hip::hip_indexer<iteration_mapping::DirectUnchecked,
77  sync,
78  IndexMapper>,
79  EnclosedStmts...>,
80  Types>
81 {
82 
83  using Base = HipStatementExecutor<
84  Data,
85  statement::Tile<
86  ArgumentId,
88  RAJA::policy::hip::hip_indexer<iteration_mapping::DirectUnchecked,
89  sync,
90  IndexMapper>,
91  EnclosedStmts...>,
92  Types>;
93 
94  using typename Base::diff_t;
95  using typename Base::enclosed_stmts_t;
96 
97  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
98  {
99  // Get the segment referenced by this Tile statement
100  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
101 
102  using segment_t = camp::decay<decltype(segment)>;
103 
104  // compute trip count
105  const diff_t t = IndexMapper::template index<diff_t>();
106  const diff_t i = t * static_cast<diff_t>(chunk_size);
107 
108  // Keep copy of original segment, so we can restore it
109  segment_t orig_segment = segment;
110 
111  // Assign our new tiled segment
112  segment = orig_segment.slice(i, static_cast<diff_t>(chunk_size));
113  data.template assign_param<ParamId>(t);
114 
115  // execute enclosed statements
116  enclosed_stmts_t::exec(data, thread_active);
117 
118  // Set range back to original values
119  segment = orig_segment;
120  }
121 };
122 
129 template<typename Data,
130  camp::idx_t ArgumentId,
131  typename ParamId,
132  camp::idx_t chunk_size,
133  typename IndexMapper,
135  typename... EnclosedStmts,
136  typename Types>
137 struct HipStatementExecutor<
138  Data,
139  statement::TileTCount<
140  ArgumentId,
141  ParamId,
142  RAJA::tile_fixed<chunk_size>,
143  RAJA::policy::hip::
144  hip_indexer<iteration_mapping::Direct, sync, IndexMapper>,
145  EnclosedStmts...>,
146  Types>
147  : public HipStatementExecutor<
148  Data,
149  statement::Tile<
150  ArgumentId,
151  RAJA::tile_fixed<chunk_size>,
152  RAJA::policy::hip::
153  hip_indexer<iteration_mapping::Direct, sync, IndexMapper>,
154  EnclosedStmts...>,
155  Types>
156 {
157 
158  using Base = HipStatementExecutor<
159  Data,
160  statement::Tile<ArgumentId,
162  RAJA::policy::hip::hip_indexer<iteration_mapping::Direct,
163  sync,
164  IndexMapper>,
165  EnclosedStmts...>,
166  Types>;
167 
168  using typename Base::diff_t;
169  using typename Base::enclosed_stmts_t;
170 
171  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
172  {
173  // Get the segment referenced by this Tile statement
174  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
175 
176  using segment_t = camp::decay<decltype(segment)>;
177 
178  // compute trip count
179  const diff_t len = segment.end() - segment.begin();
180  const diff_t t = IndexMapper::template index<diff_t>();
181  const diff_t i = t * static_cast<diff_t>(chunk_size);
182 
183  // execute enclosed statements if any thread will
184  // but mask off threads without work
185  const bool have_work = (i < len);
186 
187  // Keep copy of original segment, so we can restore it
188  segment_t orig_segment = segment;
189 
190  // Assign our new tiled segment
191  segment = orig_segment.slice(i, static_cast<diff_t>(chunk_size));
192  data.template assign_param<ParamId>(t);
193 
194  // execute enclosed statements
195  enclosed_stmts_t::exec(data, thread_active && have_work);
196 
197  // Set range back to original values
198  segment = orig_segment;
199  }
200 };
201 
208 template<typename Data,
209  camp::idx_t ArgumentId,
210  typename ParamId,
211  camp::idx_t chunk_size,
212  typename IndexMapper,
213  typename... EnclosedStmts,
214  typename Types>
215 struct HipStatementExecutor<
216  Data,
217  statement::TileTCount<
218  ArgumentId,
219  ParamId,
220  RAJA::tile_fixed<chunk_size>,
221  RAJA::policy::hip::hip_indexer<
222  iteration_mapping::StridedLoop<named_usage::unspecified>,
223  kernel_sync_requirement::sync,
224  IndexMapper>,
225  EnclosedStmts...>,
226  Types>
227  : public HipStatementExecutor<
228  Data,
229  statement::Tile<
230  ArgumentId,
231  RAJA::tile_fixed<chunk_size>,
232  RAJA::policy::hip::hip_indexer<
233  iteration_mapping::StridedLoop<named_usage::unspecified>,
234  kernel_sync_requirement::sync,
235  IndexMapper>,
236  EnclosedStmts...>,
237  Types>
238 {
239 
240  using Base = HipStatementExecutor<
241  Data,
242  statement::Tile<
243  ArgumentId,
245  RAJA::policy::hip::hip_indexer<
246  iteration_mapping::StridedLoop<named_usage::unspecified>,
248  IndexMapper>,
249  EnclosedStmts...>,
250  Types>;
251 
252  using typename Base::diff_t;
253  using typename Base::enclosed_stmts_t;
254 
255  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
256  {
257  // Get the segment referenced by this Tile statement
258  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
259 
260  // Keep copy of original segment, so we can restore it
261  using segment_t = camp::decay<decltype(segment)>;
262  segment_t orig_segment = segment;
263 
264  // compute trip count
265  const diff_t len = segment.end() - segment.begin();
266  const diff_t t_init = IndexMapper::template index<diff_t>();
267  const diff_t i_init = t_init * static_cast<diff_t>(chunk_size);
268  const diff_t t_stride = IndexMapper::template size<diff_t>();
269  const diff_t i_stride = t_stride * static_cast<diff_t>(chunk_size);
270 
271  // Iterate through in chunks
272  // threads will have the same numbers of iterations
273  for (diff_t ii = 0, t = t_init; ii < len; ii += i_stride, t += t_stride)
274  {
275  const diff_t i = ii + i_init;
276 
277  // execute enclosed statements if any thread will
278  // but mask off threads without work
279  const bool have_work = (i < len);
280 
281  // Assign our new tiled segment
282  segment = orig_segment.slice(i, static_cast<diff_t>(chunk_size));
283  data.template assign_param<ParamId>(t);
284 
285  // execute enclosed statements
286  enclosed_stmts_t::exec(data, thread_active && have_work);
287  }
288 
289  // Set range back to original values
290  segment = orig_segment;
291  }
292 };
293 
300 template<typename Data,
301  camp::idx_t ArgumentId,
302  typename ParamId,
303  camp::idx_t chunk_size,
304  typename IndexMapper,
305  typename... EnclosedStmts,
306  typename Types>
307 struct HipStatementExecutor<
308  Data,
309  statement::TileTCount<
310  ArgumentId,
311  ParamId,
312  RAJA::tile_fixed<chunk_size>,
313  RAJA::policy::hip::hip_indexer<
314  iteration_mapping::StridedLoop<named_usage::unspecified>,
315  kernel_sync_requirement::none,
316  IndexMapper>,
317  EnclosedStmts...>,
318  Types>
319  : public HipStatementExecutor<
320  Data,
321  statement::Tile<
322  ArgumentId,
323  RAJA::tile_fixed<chunk_size>,
324  RAJA::policy::hip::hip_indexer<
325  iteration_mapping::StridedLoop<named_usage::unspecified>,
326  kernel_sync_requirement::none,
327  IndexMapper>,
328  EnclosedStmts...>,
329  Types>
330 {
331 
332  using Base = HipStatementExecutor<
333  Data,
334  statement::Tile<
335  ArgumentId,
337  RAJA::policy::hip::hip_indexer<
338  iteration_mapping::StridedLoop<named_usage::unspecified>,
340  IndexMapper>,
341  EnclosedStmts...>,
342  Types>;
343 
344  using typename Base::diff_t;
345  using typename Base::enclosed_stmts_t;
346 
347  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
348  {
349  // Get the segment referenced by this Tile statement
350  auto& segment = camp::get<ArgumentId>(data.segment_tuple);
351 
352  // Keep copy of original segment, so we can restore it
353  using segment_t = camp::decay<decltype(segment)>;
354  segment_t orig_segment = segment;
355 
356  // compute trip count
357  const diff_t len = segment.end() - segment.begin();
358  const diff_t t_init = IndexMapper::template index<diff_t>();
359  const diff_t i_init = t_init * static_cast<diff_t>(chunk_size);
360  const diff_t t_stride = IndexMapper::template size<diff_t>();
361  const diff_t i_stride = t_stride * static_cast<diff_t>(chunk_size);
362 
363  // Iterate through one at a time
364  // threads will have the different numbers of iterations
365  for (diff_t i = i_init, t = t_init; i < len; i += i_stride, t += t_stride)
366  {
367 
368  // Assign our new tiled segment
369  segment = orig_segment.slice(i, static_cast<diff_t>(chunk_size));
370  data.template assign_param<ParamId>(t);
371 
372  // execute enclosed statements
373  enclosed_stmts_t::exec(data, thread_active);
374  }
375 
376  // Set range back to original values
377  segment = orig_segment;
378  }
379 };
380 
386 template<typename Data,
387  camp::idx_t ArgumentId,
388  typename ParamId,
389  typename TPol,
390  typename... EnclosedStmts,
391  typename Types>
392 struct HipStatementExecutor<
393  Data,
394  statement::
395  TileTCount<ArgumentId, ParamId, TPol, seq_exec, EnclosedStmts...>,
396  Types>
397  : HipStatementExecutor<
398  Data,
399  statement::TileTCount<
400  ArgumentId,
401  ParamId,
402  TPol,
403  RAJA::policy::hip::hip_indexer<
404  iteration_mapping::StridedLoop<named_usage::unspecified>,
405  kernel_sync_requirement::none,
406  hip::IndexGlobal<named_dim::x,
407  named_usage::ignored,
408  named_usage::ignored>>,
409  EnclosedStmts...>,
410  Types>
411 {};
412 
413 } // end namespace internal
414 } // end namespace RAJA
415 
416 #endif // RAJA_ENABLE_HIP
417 #endif /* RAJA_policy_hip_kernel_TileTCount_HPP */
Header file for common RAJA internal macro definitions.
#define RAJA_DEVICE
Definition: macros.hpp:66
Definition: AlignedRangeIndexSetBuilders.cpp:35
kernel_sync_requirement
Definition: types.hpp:63
Header file for tile wrapper and iterator.
Header file for loop kernel internals.
! tag for a tiling loop
Definition: Tile.hpp:72
Header file for RAJA type definitions.