RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
forall.hpp
Go to the documentation of this file.
1 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
2 // Copyright (c) Lawrence Livermore National Security, LLC and other
3 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
4 // files for dates and other details. No copyright assignment is required
5 // to contribute to RAJA.
6 //
7 // SPDX-License-Identifier: (BSD-3-Clause)
8 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
9 
10 #ifndef RAJA_target_forall_openmp_HPP
11 #define RAJA_target_forall_openmp_HPP
12 
13 #include "RAJA/config.hpp"
14 
15 #if defined(RAJA_ENABLE_TARGET_OPENMP)
16 
17 #include <omp.h>
18 
19 #include "RAJA/util/types.hpp"
20 
22 
24 
25 namespace RAJA
26 {
27 
28 namespace policy
29 {
30 
31 namespace omp
32 {
33 
37 
38 template<size_t ThreadsPerTeam,
39  typename Iterable,
40  typename Func,
41  typename ForallParam>
42 RAJA_INLINE concepts::enable_if_t<
43  resources::EventProxy<resources::Omp>,
45 forall_impl(resources::Omp omp_res,
46  const omp_target_parallel_for_exec<ThreadsPerTeam>& p,
47  Iterable&& iter,
48  Func&& loop_body,
49  ForallParam f_params)
50 {
51  using EXEC_POL = camp::decay<decltype(p)>;
52  constexpr bool is_forall_param_empty =
54  if constexpr (!is_forall_param_empty)
55  {
57  }
58 
59  using Body = typename std::remove_reference<decltype(loop_body)>::type;
60  Body body = loop_body;
61 
63 
64  // Reset if exceed CUDA threads per block limit.
65  int tperteam = ThreadsPerTeam;
66  if (tperteam > omp::MAXNUMTHREADS)
67  {
68  tperteam = omp::MAXNUMTHREADS;
69  }
70 
71  // calculate number of teams based on user defined threads per team
72  // datasize is distance between begin() and end() of iterable
73  auto numteams = RAJA_DIVIDE_CEILING_INT(distance_it, tperteam);
74  if (numteams > tperteam)
75  {
76  // Omp target reducers will write team # results, into Threads-sized array.
77  // Need to insure NumTeams <= Threads to prevent array out of bounds access.
78  numteams = tperteam;
79  }
80 
81  // thread_limit(tperteam) unused due to XL seg fault (when tperteam !=
82  // distance)
83  auto i = distance_it;
84  if constexpr (is_forall_param_empty)
85  {
86 #pragma omp target teams distribute parallel for num_teams(numteams) \
87  schedule(static, 1) map(to : body, begin_it)
88  for (i = 0; i < distance_it; ++i)
89  {
90  Body ib = body;
91  ib(begin_it[i]);
92  }
93  }
94  else
95  {
96  RAJA_OMP_DECLARE_REDUCTION_COMBINE
97 #pragma omp target teams distribute parallel for num_teams(numteams) \
98  schedule(static, 1) map(to : body, begin_it) reduction(combine : f_params)
99  for (i = 0; i < distance_it; ++i)
100  {
101  Body ib = body;
102  RAJA::expt::invoke_body(f_params, ib, begin_it[i]);
103  }
104 
106  }
107 
108  return resources::EventProxy<resources::Omp>(omp_res);
109 }
110 
111 template<typename Iterable, typename Func, typename ForallParam>
112 RAJA_INLINE concepts::enable_if_t<
113  resources::EventProxy<resources::Omp>,
115 forall_impl(resources::Omp omp_res,
116  const omp_target_parallel_for_exec_nt& p,
117  Iterable&& iter,
118  Func&& loop_body,
119  ForallParam f_params)
120 {
121  using EXEC_POL = camp::decay<decltype(p)>;
122  constexpr bool is_forall_param_empty =
124  if constexpr (!is_forall_param_empty)
125  {
127  }
128 
129  using Body = typename std::remove_reference<decltype(loop_body)>::type;
130  Body body = loop_body;
131 
133 
134  if constexpr (!is_forall_param_empty)
135  {
136  RAJA_OMP_DECLARE_REDUCTION_COMBINE;
137 #pragma omp target teams distribute parallel for schedule(static, 1) \
138  firstprivate(body, begin_it) reduction(combine : f_params)
139  for (decltype(distance_it) i = 0; i < distance_it; ++i)
140  {
141  Body ib = body;
142  RAJA::expt::invoke_body(f_params, ib, begin_it[i]);
143  }
144 
146  }
147  else
148  {
149 #pragma omp target teams distribute parallel for schedule(static, 1) \
150  firstprivate(body, begin_it)
151  for (decltype(distance_it) i = 0; i < distance_it; ++i)
152  {
153  Body ib = body;
154  ib(begin_it[i]);
155  }
156  }
157  return resources::EventProxy<resources::Omp>(omp_res);
158 }
159 
160 } // namespace omp
161 
162 } // namespace policy
163 
164 } // namespace RAJA
165 
166 #endif // closing endif for if defined(RAJA_TARGET_RAJA_ENABLE_OPENMP)
167 
168 #endif // closing endif for header file include guard
#define RAJA_DIVIDE_CEILING_INT(dividend, divisor)
Definition: macros.hpp:122
value_type::device_call &[i_loop] iter
Definition: WorkRunner.hpp:216
constexpr RAJA_HOST_DEVICE auto invoke_body(Params &&params, Fn &&f, Ts &&... extra)
Definition: forall.hpp:598
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, RAJA::expt::type_traits::is_ForallParamPack< ForallParam >, concepts::negate< RAJA::expt::type_traits::is_ForallParamPack_empty< ForallParam > > > forall_impl(resources::Host host_res, const omp_parallel_exec< InnerPolicy > &, Iterable &&iter, Func &&loop_body, ForallParam f_params)
Definition: forall.hpp:325
Definition: AlignedRangeIndexSetBuilders.cpp:35
auto & body
Definition: launch.hpp:177
Header file containing RAJA OpenMP policy definitions.
#define RAJA_EXTRACT_BED_IT(CONTAINER)
Definition: forall.hpp:32
static constexpr void parampack_resolve(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:304
static constexpr void parampack_init(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:269
Definition: TypeTraits.hpp:59
Header file for RAJA type definitions.