RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
forall.hpp
Go to the documentation of this file.
1 
14 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
15 // Copyright (c) Lawrence Livermore National Security, LLC and other
16 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
17 // files for dates and other details. No copyright assignment is required
18 // to contribute to RAJA.
19 //
20 // SPDX-License-Identifier: (BSD-3-Clause)
21 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
22 
23 #ifndef RAJA_forall_openmp_HPP
24 #define RAJA_forall_openmp_HPP
25 
26 #include "RAJA/config.hpp"
27 
28 #if defined(RAJA_ENABLE_OPENMP)
29 
30 #include <iostream>
31 #include <type_traits>
32 
33 #include <omp.h>
34 
35 #include "RAJA/util/types.hpp"
37 
38 #include "RAJA/index/IndexSet.hpp"
41 
43 
44 #include "RAJA/pattern/forall.hpp"
45 #include "RAJA/pattern/region.hpp"
46 
48 
50 
51 namespace RAJA
52 {
53 
54 namespace policy
55 {
56 namespace omp
57 {
58 
59 template<typename Iterable,
60  typename Func,
61  typename InnerPolicy,
62  typename ForallParam>
63 RAJA_INLINE concepts::enable_if_t<
64  resources::EventProxy<resources::Host>,
67 forall_impl(resources::Host host_res,
68  const omp_parallel_exec<InnerPolicy>&,
69  Iterable&& iter,
70  Func&& loop_body,
71  ForallParam f_params)
72 {
74  camp::decay<Func>>::value)
75  {
76  RAJA::region<RAJA::omp_parallel_region>([&]() {
78  auto body = thread_privatize(loop_body);
79  forall_impl(host_res, InnerPolicy {}, iter, body.get_priv(), f_params);
80  });
81  return resources::EventProxy<resources::Host>(host_res);
82  }
83  else
84  {
85 // This branch handles the case of an OpenMP reduction through the RAJA::kernel
86 // abstraction. MSVC is not supported in this case.
87 #if defined(RAJA_COMPILER_MSVC)
88  static_assert(false, "MSVC does not support an OpenMP reduction through "
89  "the RAJA::kernel abstraction");
90 #else
91  auto reducers_tuple = loop_body.data.param_tuple;
92 
93  using EXEC_POL = camp::decay<InnerPolicy>;
94  RAJA::expt::detail::init_params<EXEC_POL>(reducers_tuple);
95 
97  RAJA_UNUSED_VAR(EXEC_POL {});
99  RAJA_OMP_DECLARE_TUPLE_REDUCTION_COMBINE;
100 
101 #pragma omp parallel
102  {
103  auto body = thread_privatize(loop_body);
104 #pragma omp for reduction(combine : reducers_tuple)
105  for (decltype(distance_it) i = 0; i < distance_it; ++i)
106  {
107  body.get_priv()(begin_it[i]);
108  // Note: this is inefficient. However, the structure of loop data
109  // requires us to perform this manual copy. This is because body
110  // performs the local reduction on its own copy of the reducers, not the
111  // OpenMP managed copy of the reducers_tuple. Alternatively, we could
112  // have OpenMP use LoopData as the combination object, but this would
113  // require additional changes to make LoopData trivially constructable
114  // (a requirement for OpenMP combinations.)
115  reducers_tuple = body.get_priv().data.param_tuple;
116  }
117  }
118  RAJA::expt::detail::resolve_params<EXEC_POL>(reducers_tuple);
119 #endif
120  }
121  return resources::EventProxy<resources::Host>(host_res);
122 }
123 
127 
128 namespace internal
129 {
130 
132 
133 //
134 // omp for (Auto)
135 //
136 template<typename Iterable, typename Func>
137 RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Auto&,
138  Iterable&& iter,
139  Func&& loop_body)
140 {
142 #pragma omp for
143  for (decltype(distance_it) i = 0; i < distance_it; ++i)
144  {
145  loop_body(begin_it[i]);
146  }
147 }
148 
149 //
150 // omp for schedule(static, ChunkSize)
151 //
152 template<typename Iterable, typename Func, int ChunkSize>
153 RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Static<ChunkSize>&,
154  Iterable&& iter,
155  Func&& loop_body)
156 {
158  if constexpr (ChunkSize > 0)
159  {
160 #pragma omp for schedule(static, ChunkSize)
161  for (decltype(distance_it) i = 0; i < distance_it; ++i)
162  {
163  loop_body(begin_it[i]);
164  }
165  }
166  else
167  {
168 #pragma omp for schedule(static)
169  for (decltype(distance_it) i = 0; i < distance_it; ++i)
170  {
171  loop_body(begin_it[i]);
172  }
173  }
174 }
175 
176 //
177 // omp for schedule(dynamic) / schedule(dynamic, ChunkSize)
178 //
179 template<typename Iterable, typename Func, int ChunkSize>
180 RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Dynamic<ChunkSize>&,
181  Iterable&& iter,
182  Func&& loop_body)
183 {
185  if constexpr (ChunkSize > 0)
186  {
187 #pragma omp for schedule(dynamic, ChunkSize)
188  for (decltype(distance_it) i = 0; i < distance_it; ++i)
189  {
190  loop_body(begin_it[i]);
191  }
192  }
193  else
194  {
195 #pragma omp for schedule(dynamic)
196  for (decltype(distance_it) i = 0; i < distance_it; ++i)
197  {
198  loop_body(begin_it[i]);
199  }
200  }
201 }
202 
203 //
204 // omp for schedule(guided)/schedule(guided, ChunkSize)
205 //
206 template<typename Iterable, typename Func, int ChunkSize>
207 RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Guided<ChunkSize>&,
208  Iterable&& iter,
209  Func&& loop_body)
210 {
212  if constexpr (ChunkSize > 0)
213  {
214 #pragma omp for schedule(guided, ChunkSize)
215  for (decltype(distance_it) i = 0; i < distance_it; ++i)
216  {
217  loop_body(begin_it[i]);
218  }
219  }
220  else
221  {
222 #pragma omp for schedule(guided)
223  for (decltype(distance_it) i = 0; i < distance_it; ++i)
224  {
225  loop_body(begin_it[i]);
226  }
227  }
228 }
229 
230 //
231 // omp for schedule(runtime)
232 //
233 template<typename Iterable, typename Func>
234 RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Runtime&,
235  Iterable&& iter,
236  Func&& loop_body)
237 {
239 #pragma omp for schedule(runtime)
240  for (decltype(distance_it) i = 0; i < distance_it; ++i)
241  {
242  loop_body(begin_it[i]);
243  }
244 }
245 
246 // TODO :: not implemented in forall param interface ...
247 #if !defined(RAJA_COMPILER_MSVC)
248 // dynamic & guided
249 template<typename Policy, typename Iterable, typename Func>
250 RAJA_INLINE void forall_impl(const Policy&, Iterable&& iter, Func&& loop_body)
251 {
252  omp_sched_t prev_sched;
253  int prev_chunk;
254  omp_get_schedule(&prev_sched, &prev_chunk);
255  omp_set_schedule(Policy::schedule, Policy::chunk_size);
256  forall_impl(::RAJA::policy::omp::Runtime {}, std::forward<Iterable>(iter),
257  std::forward<Func>(loop_body));
258  omp_set_schedule(prev_sched, prev_chunk);
259 }
260 #endif
261 
262 
264 
265 //
266 // omp for nowait (Auto)
267 //
268 template<typename Iterable, typename Func>
269 RAJA_INLINE void forall_impl_nowait(const ::RAJA::policy::omp::Auto&,
270  Iterable&& iter,
271  Func&& loop_body)
272 {
274 #pragma omp for nowait
275  for (decltype(distance_it) i = 0; i < distance_it; ++i)
276  {
277  loop_body(begin_it[i]);
278  }
279 }
280 
281 //
282 // omp for schedule(static) nowait/omp for schedule(static, ChunkSize) nowait
283 //
284 template<typename Iterable, typename Func, int ChunkSize>
285 RAJA_INLINE void forall_impl_nowait(
286  const ::RAJA::policy::omp::Static<ChunkSize>&,
287  Iterable&& iter,
288  Func&& loop_body)
289 {
291  if constexpr (ChunkSize > 0)
292  {
293 #pragma omp for schedule(static, ChunkSize) nowait
294  for (decltype(distance_it) i = 0; i < distance_it; ++i)
295  {
296  loop_body(begin_it[i]);
297  }
298  }
299  else
300  {
301 #pragma omp for schedule(static) nowait
302  for (decltype(distance_it) i = 0; i < distance_it; ++i)
303  {
304  loop_body(begin_it[i]);
305  }
306  }
307 }
308 
309 // TODO :: not implemented in param interface...
310 #if !defined(RAJA_COMPILER_MSVC)
311 // dynamic & guided
312 template<typename Policy, typename Iterable, typename Func>
313 RAJA_INLINE void forall_impl_nowait(const Policy&,
314  Iterable&& iter,
315  Func&& loop_body)
316 {
317  omp_sched_t prev_sched;
318  int prev_chunk;
319  omp_get_schedule(&prev_sched, &prev_chunk);
320  omp_set_schedule(Policy::schedule, Policy::chunk_size);
322  std::forward<Iterable>(iter),
323  std::forward<Func>(loop_body));
324  omp_set_schedule(prev_sched, prev_chunk);
325 }
326 #endif
327 
328 } // end namespace internal
329 
330 template<typename Schedule,
331  typename Iterable,
332  typename Func,
333  typename ForallParam>
334 RAJA_INLINE concepts::enable_if_t<
335  resources::EventProxy<resources::Host>,
338 forall_impl(resources::Host host_res,
339  const omp_for_schedule_exec<Schedule>&,
340  Iterable&& iter,
341  Func&& loop_body,
342  ForallParam)
343 {
344  internal::forall_impl(Schedule {}, std::forward<Iterable>(iter),
345  std::forward<Func>(loop_body));
346  return resources::EventProxy<resources::Host>(host_res);
347 }
348 
349 template<typename Schedule,
350  typename Iterable,
351  typename Func,
352  typename ForallParam>
353 RAJA_INLINE concepts::enable_if_t<
354  resources::EventProxy<resources::Host>,
357 forall_impl(resources::Host host_res,
358  const omp_for_nowait_schedule_exec<Schedule>&,
359  Iterable&& iter,
360  Func&& loop_body,
361  ForallParam)
362 {
363  internal::forall_impl_nowait(Schedule {}, std::forward<Iterable>(iter),
364  std::forward<Func>(loop_body));
365  return resources::EventProxy<resources::Host>(host_res);
366 }
367 
368 //
370 //
371 // The following function templates iterate over index set
372 // segments using omp execution. Segment execution is defined by
373 // segment execution policy template parameter.
374 //
376 //
377 
391 /*
392  * TODO: Fix this!!!
393  */
394 
395 /*
396 template <typename SEG_EXEC_POLICY_T, typename LOOP_BODY, typename ...
397 SEG_TYPES>
398 RAJA_INLINE void forall(
399  ExecPolicy<omp_taskgraph_segit, SEG_EXEC_POLICY_T>,
400  const IndexSet<SEG_TYPES ...>& iset,
401  LOOP_BODY loop_body)
402 {
403  if (!iset.dependencyGraphSet()) {
404  std::cerr << "\n RAJA IndexSet dependency graph not set , "
405  << "FILE: " << __FILE__ << " line: " << __LINE__ << std::endl;
406  RAJA_ABORT_OR_THROW("IndexSet dependency graph");
407  }
408 
409  IndexSet& ncis = (*const_cast<IndexSet*>(&iset));
410 
411  int num_seg = ncis.getNumSegments();
412 
413 #pragma omp parallel for schedule(static, 1)
414  for (int isi = 0; isi < num_seg; ++isi) {
415  IndexSetSegInfo* seg_info = ncis.getSegmentInfo(isi);
416  DepGraphNode* task = seg_info->getDepGraphNode();
417 
418  task->wait();
419 
420  executeRangeList_forall<SEG_EXEC_POLICY_T>(seg_info, loop_body);
421 
422  task->reset();
423 
424  if (task->numDepTasks() != 0) {
425  for (int ii = 0; ii < task->numDepTasks(); ++ii) {
426  // Alternateively, we could get the return value of this call
427  // and actively launch the task if we are the last depedent
428  // task. In that case, we would not need the semaphore spin
429  // loop above.
430  int seg = task->depTaskNum(ii);
431  DepGraphNode* dep = ncis.getSegmentInfo(seg)->getDepGraphNode();
432  dep->satisfyOne();
433  }
434  }
435 
436  } // iterate over segments of index set
437 }
438 */
439 
440 } // namespace omp
441 
442 } // namespace policy
443 
444 } // namespace RAJA
445 
446 #endif // closing endif for if defined(RAJA_ENABLE_OPENMP)
447 
448 #endif // closing endif for header file include guard
RAJA header file defining index set classes.
Header file containing definition of RAJA list segment class.
Header file containing definitions of RAJA range segment classes.
Header file containing type traits needed by kernel implementation.
RAJA_HOST_DEVICE RAJA_INLINE void RAJA_UNUSED_VAR(T &&...) noexcept
Definition: macros.hpp:120
value_type::device_call &[i_loop] iter
Definition: WorkRunner.hpp:216
RAJA_HOST_DEVICE auto thread_privatize(const T &item) -> Privatizer< T >
Create a private copy of the argument to be stored on the current thread's stack in a class of the Pr...
Definition: privatizer.hpp:88
RAJA_INLINE void forall_impl_nowait(const ::RAJA::policy::omp::Static< ChunkSize > &p, Iterable &&iter, Func &&loop_body, ForallParam &&f_params)
Definition: forall.hpp:257
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, RAJA::expt::type_traits::is_ForallParamPack< ForallParam >, concepts::negate< RAJA::expt::type_traits::is_ForallParamPack_empty< ForallParam > > > forall_impl(resources::Host host_res, const omp_parallel_exec< InnerPolicy > &, Iterable &&iter, Func &&loop_body, ForallParam f_params)
Definition: forall.hpp:325
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, expt::type_traits::is_ForallParamPack< ForallParam >, expt::type_traits::is_ForallParamPack_empty< ForallParam > > forall_impl(resources::Host host_res, const simd_exec &, Iterable &&iter, Func &&body, ForallParam)
Definition: forall.hpp:81
Definition: AlignedRangeIndexSetBuilders.cpp:35
Policy
Definition: PolicyBase.hpp:32
auto & body
Definition: launch.hpp:177
Header file containing RAJA OpenMP policy definitions.
#define RAJA_EXTRACT_BED_IT(CONTAINER)
Definition: forall.hpp:32
Header file containing RAJA index set and segment iteration template methods that take an execution p...
Header file containing the RAJA Region API call.
Definition: TypeTraits.hpp:59
Definition: TypeTraits.hpp:49
Definition: policy.hpp:119
Header file for RAJA type definitions.