RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
Collapse.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_policy_openmp_kernel_collapse_HPP
21 #define RAJA_policy_openmp_kernel_collapse_HPP
22 
23 #include "RAJA/config.hpp"
24 
25 #if defined(RAJA_ENABLE_OPENMP)
26 
28 
33 
34 #include "RAJA/util/macros.hpp"
35 #include "RAJA/util/types.hpp"
36 
38 
39 namespace RAJA
40 {
41 
42 struct omp_parallel_collapse_exec
43  : make_policy_pattern_t<RAJA::Policy::openmp,
44  RAJA::Pattern::forall,
45  RAJA::policy::omp::For>
46 {};
47 
48 namespace internal
49 {
50 
52 // Collapsing two loops
54 
55 template<camp::idx_t Arg0,
56  camp::idx_t Arg1,
57  typename... EnclosedStmts,
58  typename Types>
59 struct StatementExecutor<statement::Collapse<omp_parallel_collapse_exec,
60  ArgList<Arg0, Arg1>,
61  EnclosedStmts...>,
62  Types>
63 {
64 
65 
66  template<typename Data>
67  static RAJA_INLINE concepts::enable_if<
69  exec(Data&& data)
70  {
71  const auto l0 = segment_length<Arg0>(data);
72  const auto l1 = segment_length<Arg1>(data);
73  // NOTE: these are here to avoid a use-after-scope detected by address
74  // sanitizer, probably a false positive, but the result should be
75  // essentially identical
76  auto i0 = l0;
77  auto i1 = l1;
78 
79  // Set the argument types for this loop
80  using NewTypes0 = setSegmentTypeFromData<Types, Arg0, Data>;
81  using NewTypes1 = setSegmentTypeFromData<NewTypes0, Arg1, Data>;
82 
84  auto reducers_tuple = data.param_tuple;
85  RAJA::expt::detail::init_params<omp_parallel_collapse_exec>(reducers_tuple);
86  auto privatizer = thread_privatize(data);
87  using EXEC_POL = omp_parallel_collapse_exec;
88  RAJA_UNUSED_VAR(EXEC_POL {});
89  RAJA_OMP_DECLARE_TUPLE_REDUCTION_COMBINE;
90 #pragma omp parallel for private(i0, i1) firstprivate(privatizer) \
91  RAJA_COLLAPSE(2) reduction(combine : reducers_tuple)
92  for (i0 = 0; i0 < l0; ++i0)
93  {
94  for (i1 = 0; i1 < l1; ++i1)
95 
96  {
97  auto& private_data = privatizer.get_priv();
98  private_data.template assign_offset<Arg0>(i0);
99  private_data.template assign_offset<Arg1>(i1);
100  execute_statement_list<camp::list<EnclosedStmts...>, NewTypes1>(
101  private_data);
102  // Note: we don't want to do this copy, but it is necessary for now
103  // due to limitations of the LoopData interface in OpenMP combine
104  // calls. See note in policy/openmp/forall.hpp for more detail.
105  reducers_tuple = private_data.param_tuple;
106  }
107  }
108  RAJA::expt::detail::resolve_params<EXEC_POL>(reducers_tuple);
109  }
110 
111  template<typename Data>
112  static RAJA_INLINE concepts::enable_if<concepts::negate<
114  exec(Data&& data)
115  {
116  const auto l0 = segment_length<Arg0>(data);
117  const auto l1 = segment_length<Arg1>(data);
118  // NOTE: these are here to avoid a use-after-scope detected by address
119  // sanitizer, probably a false positive, but the result should be
120  // essentially identical
121  auto i0 = l0;
122  auto i1 = l1;
123 
124  // Set the argument types for this loop
125  using NewTypes0 = setSegmentTypeFromData<Types, Arg0, Data>;
126  using NewTypes1 = setSegmentTypeFromData<NewTypes0, Arg1, Data>;
127 
129  auto privatizer = thread_privatize(data);
130 #pragma omp parallel for private(i0, i1) firstprivate(privatizer) \
131  RAJA_COLLAPSE(2)
132  for (i0 = 0; i0 < l0; ++i0)
133  {
134  for (i1 = 0; i1 < l1; ++i1)
135  {
136  auto& private_data = privatizer.get_priv();
137  private_data.template assign_offset<Arg0>(i0);
138  private_data.template assign_offset<Arg1>(i1);
139  execute_statement_list<camp::list<EnclosedStmts...>, NewTypes1>(
140  private_data);
141  }
142  }
143  }
144 };
145 
146 template<camp::idx_t Arg0,
147  camp::idx_t Arg1,
148  camp::idx_t Arg2,
149  typename... EnclosedStmts,
150  typename Types>
151 struct StatementExecutor<statement::Collapse<omp_parallel_collapse_exec,
152  ArgList<Arg0, Arg1, Arg2>,
153  EnclosedStmts...>,
154  Types>
155 {
156 
157 
158  template<typename Data>
159  static RAJA_INLINE concepts::enable_if<
161  exec(Data&& data)
162  {
163  const auto l0 = segment_length<Arg0>(data);
164  const auto l1 = segment_length<Arg1>(data);
165  const auto l2 = segment_length<Arg2>(data);
166  auto i0 = l0;
167  auto i1 = l1;
168  auto i2 = l2;
169 
170  // Set the argument types for this loop
171  using NewTypes0 = setSegmentTypeFromData<Types, Arg0, Data>;
172  using NewTypes1 = setSegmentTypeFromData<NewTypes0, Arg1, Data>;
173  using NewTypes2 = setSegmentTypeFromData<NewTypes1, Arg2, Data>;
174 
175  auto reducers_tuple = data.param_tuple;
176  RAJA::expt::detail::init_params<omp_parallel_collapse_exec>(reducers_tuple);
178  auto privatizer = thread_privatize(data);
179  using EXEC_POL = omp_parallel_collapse_exec;
180  RAJA_OMP_DECLARE_TUPLE_REDUCTION_COMBINE;
181 #pragma omp parallel for private(i0, i1, i2) firstprivate(privatizer) \
182  RAJA_COLLAPSE(3) reduction(combine : reducers_tuple)
183  for (i0 = 0; i0 < l0; ++i0)
184  {
185  for (i1 = 0; i1 < l1; ++i1)
186  {
187  for (i2 = 0; i2 < l2; ++i2)
188  {
189  auto& private_data = privatizer.get_priv();
190  private_data.template assign_offset<Arg0>(i0);
191  private_data.template assign_offset<Arg1>(i1);
192  private_data.template assign_offset<Arg2>(i2);
193  execute_statement_list<camp::list<EnclosedStmts...>, NewTypes2>(
194  private_data);
195  reducers_tuple = private_data.param_tuple;
196  }
197  }
198  }
199 
200  RAJA::expt::detail::resolve_params<EXEC_POL>(reducers_tuple);
201  }
202 
203  template<typename Data>
204  static RAJA_INLINE concepts::enable_if<concepts::negate<
206  exec(Data&& data)
207  {
208  const auto l0 = segment_length<Arg0>(data);
209  const auto l1 = segment_length<Arg1>(data);
210  const auto l2 = segment_length<Arg2>(data);
211  auto i0 = l0;
212  auto i1 = l1;
213  auto i2 = l2;
214 
215  // Set the argument types for this loop
216  using NewTypes0 = setSegmentTypeFromData<Types, Arg0, Data>;
217  using NewTypes1 = setSegmentTypeFromData<NewTypes0, Arg1, Data>;
218  using NewTypes2 = setSegmentTypeFromData<NewTypes1, Arg2, Data>;
219 
221  auto privatizer = thread_privatize(data);
222 #pragma omp parallel for private(i0, i1, i2) firstprivate(privatizer) \
223  RAJA_COLLAPSE(3)
224  for (i0 = 0; i0 < l0; ++i0)
225  {
226  for (i1 = 0; i1 < l1; ++i1)
227  {
228  for (i2 = 0; i2 < l2; ++i2)
229  {
230  auto& private_data = privatizer.get_priv();
231  private_data.template assign_offset<Arg0>(i0);
232  private_data.template assign_offset<Arg1>(i1);
233  private_data.template assign_offset<Arg2>(i2);
234  execute_statement_list<camp::list<EnclosedStmts...>, NewTypes2>(
235  private_data);
236  }
237  }
238  }
239  }
240 };
241 
242 
243 } // namespace internal
244 } // namespace RAJA
245 
246 #undef RAJA_COLLAPSE
247 
248 #endif // closing endif for RAJA_ENABLE_OPENMP guard
249 
250 #endif // closing endif for header file include guard
Header file for common RAJA internal macro definitions.
RAJA_HOST_DEVICE RAJA_INLINE void RAJA_UNUSED_VAR(T &&...) noexcept
Definition: macros.hpp:120
RAJA_HOST_DEVICE auto thread_privatize(const T &item) -> Privatizer< T >
Create a private copy of the argument to be stored on the current thread's stack in a class of the Pr...
Definition: privatizer.hpp:88
RAJA_INLINE void execute_statement_list(Data &&data)
Definition: StatementList.hpp:84
Definition: AlignedRangeIndexSetBuilders.cpp:35
PolicyBaseT< Pol, Pat, Launch::undefined, Platform::undefined, Args... > make_policy_pattern_t
Definition: PolicyBase.hpp:168
auto privatizer
Definition: launch.hpp:176
Header file containing RAJA OpenMP policy definitions.
Header file for kernel statement collapse struct.
Header file for kernel lambda executor.
Header file for loop kernel internals.
Definition: TypeTraits.hpp:36
Header file for RAJA type definitions.