RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
launch.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_pattern_launch_openmp_HPP
21 #define RAJA_pattern_launch_openmp_HPP
22 
25 
26 namespace RAJA
27 {
28 
29 template<>
31 {
32 
33  template<typename ReduceParams, typename BODY>
34  static concepts::enable_if_t<
35  resources::EventProxy<resources::Resource>,
37  exec(RAJA::resources::Resource res,
38  LaunchParams const& launch_params,
39  BODY const& body,
40  ReduceParams& f_params)
41  {
43  constexpr bool has_reducers =
45  using EXEC_POL = RAJA::omp_launch_t;
46  EXEC_POL pol {};
47  using BodyType = decltype(thread_privatize(body));
48 
49  using LaunchContextType =
51 
52  auto parallel_section = [&](ReduceParams& f_params, auto func) {
54 
55  auto loop_body = thread_privatize(body);
56  static_assert(std::is_invocable<decltype(func), ReduceParams&, BodyType&,
57  LaunchContextType&>::value,
58  "Internal RAJA error: Check the parallel kernel passed to "
59  "OpenMP Parallel section in openmp/launch.hpp");
60 
61  ctx.shared_mem_ptr = (char*)malloc(launch_params.shared_mem_size);
62 
63  func(f_params, loop_body, ctx);
64 
65  free(ctx.shared_mem_ptr);
66  ctx.shared_mem_ptr = nullptr;
67  };
68 
69  // Init reducers if present
71 
72  // reducer object must be named f_params as expected by macro below
73  if constexpr (has_reducers)
74  {
75  RAJA_OMP_DECLARE_REDUCTION_COMBINE;
76 #pragma omp parallel reduction(combine : f_params)
77  {
78  // This "extra lambda" has to be declared within the scope of the OpenMP
79  // pragma so that the reduction parameter pack it operates on is the
80  // version tracked by the combine OpenMP syntax
81  auto parallel_kernel = [&](ReduceParams& f_params, BodyType& body,
83  expt::invoke_body(f_params, body.get_priv(), ctx);
84  };
85  parallel_section(f_params, parallel_kernel);
86  }
87  }
88  else
89  {
90  RAJA::region<RAJA::omp_parallel_region>([&]() {
91  auto parallel_kernel = [&](ReduceParams&, BodyType& body,
93  body.get_priv()(ctx);
94  };
95  parallel_section(f_params, parallel_kernel);
96  });
97  }
98  // Resolve reducers if present
100 
101  return resources::EventProxy<resources::Resource>(res);
102  }
103 };
104 
105 template<typename SEGMENT>
107 {
108 
109  template<typename LaunchContextPolicy, typename BODY>
110  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
112  SEGMENT const& segment,
113  BODY const& body)
114  {
115 
116  int len = segment.end() - segment.begin();
117  RAJA::region<RAJA::omp_parallel_region>([&]() {
119  auto loop_body = thread_privatize(body);
120 #pragma omp for
121  for (int i = 0; i < len; i++)
122  {
123 
124  loop_body.get_priv()(*(segment.begin() + i));
125  }
126  });
127  }
128 
129  template<typename LaunchContextPolicy, typename BODY>
130  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
132  SEGMENT const& segment0,
133  SEGMENT const& segment1,
134  BODY const& body)
135  {
136 
137  const int len1 = segment1.end() - segment1.begin();
138  const int len0 = segment0.end() - segment0.begin();
139 
140  RAJA::region<RAJA::omp_parallel_region>([&]() {
142  auto loop_body = thread_privatize(body);
143 
144 #pragma omp for
145  for (int j = 0; j < len1; j++)
146  {
147  for (int i = 0; i < len0; i++)
148  {
149 
150  loop_body.get_priv()(*(segment0.begin() + i),
151  *(segment1.begin() + j));
152  }
153  }
154  });
155  }
156 
157  template<typename LaunchContextPolicy, typename BODY>
158  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
160  SEGMENT const& segment0,
161  SEGMENT const& segment1,
162  SEGMENT const& segment2,
163  BODY const& body)
164  {
165 
166  const int len2 = segment2.end() - segment2.begin();
167  const int len1 = segment1.end() - segment1.begin();
168  const int len0 = segment0.end() - segment0.begin();
169 
170  RAJA::region<RAJA::omp_parallel_region>([&]() {
172  auto loop_body = thread_privatize(body);
173 
174 #pragma omp for
175  for (int k = 0; k < len2; k++)
176  {
177  for (int j = 0; j < len1; j++)
178  {
179  for (int i = 0; i < len0; i++)
180  {
181  loop_body.get_priv()(*(segment0.begin() + i),
182  *(segment1.begin() + j),
183  *(segment2.begin() + k));
184  }
185  }
186  }
187  });
188  }
189 };
190 
191 template<typename SEGMENT>
192 struct LoopExecute<omp_for_exec, SEGMENT>
193 {
194 
195  template<typename LaunchContextPolicy, typename BODY>
196  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
198  SEGMENT const& segment,
199  BODY const& body)
200  {
201 
202  int len = segment.end() - segment.begin();
203 #pragma omp for
204  for (int i = 0; i < len; i++)
205  {
206 
207  body(*(segment.begin() + i));
208  }
209  }
210 
211  template<typename LaunchContextPolicy, typename BODY>
212  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
214  SEGMENT const& segment0,
215  SEGMENT const& segment1,
216  BODY const& body)
217  {
218 
219  const int len1 = segment1.end() - segment1.begin();
220  const int len0 = segment0.end() - segment0.begin();
221 
222 #pragma omp for
223  for (int j = 0; j < len1; j++)
224  {
225  for (int i = 0; i < len0; i++)
226  {
227 
228  body(*(segment0.begin() + i), *(segment1.begin() + j));
229  }
230  }
231  }
232 
233  template<typename LaunchContextPolicy, typename BODY>
234  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
236  SEGMENT const& segment0,
237  SEGMENT const& segment1,
238  SEGMENT const& segment2,
239  BODY const& body)
240  {
241 
242  const int len2 = segment2.end() - segment2.begin();
243  const int len1 = segment1.end() - segment1.begin();
244  const int len0 = segment0.end() - segment0.begin();
245 
246 #pragma omp for
247  for (int k = 0; k < len2; k++)
248  {
249  for (int j = 0; j < len1; j++)
250  {
251  for (int i = 0; i < len0; i++)
252  {
253  body(*(segment0.begin() + i), *(segment1.begin() + j),
254  *(segment2.begin() + k));
255  }
256  }
257  }
258  }
259 };
260 
261 //
262 // Return local index
263 //
264 template<typename SEGMENT>
266 {
267 
268  template<typename LaunchContextPolicy, typename BODY>
269  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
271  SEGMENT const& segment,
272  BODY const& body)
273  {
274 
275  int len = segment.end() - segment.begin();
276 
277 #pragma omp for
278  for (int i = 0; i < len; i++)
279  {
280  body(*(segment.begin() + i), i);
281  }
282  }
283 
284  template<typename LaunchContextPolicy, typename BODY>
285  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
287  SEGMENT const& segment0,
288  SEGMENT const& segment1,
289  BODY const& body)
290  {
291 
292  const int len1 = segment1.end() - segment1.begin();
293  const int len0 = segment0.end() - segment0.begin();
294 
295 #pragma omp for
296  for (int j = 0; j < len1; j++)
297  {
298  for (int i = 0; i < len0; i++)
299  {
300 
301  body(*(segment0.begin() + i), *(segment1.begin() + j), i, j);
302  }
303  }
304  }
305 
306  template<typename LaunchContextPolicy, typename BODY>
307  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
309  SEGMENT const& segment0,
310  SEGMENT const& segment1,
311  SEGMENT const& segment2,
312  BODY const& body)
313  {
314 
315  const int len2 = segment2.end() - segment2.begin();
316  const int len1 = segment1.end() - segment1.begin();
317  const int len0 = segment0.end() - segment0.begin();
318 
319 #pragma omp for
320  for (int k = 0; k < len2; k++)
321  {
322  for (int j = 0; j < len1; j++)
323  {
324  for (int i = 0; i < len0; i++)
325  {
326  body(*(segment0.begin() + i), *(segment1.begin() + j),
327  *(segment2.begin() + k), i, j, k);
328  }
329  }
330  }
331  }
332 };
333 
334 // policy for perfectly nested loops
335 struct omp_parallel_nested_for_exec;
336 
337 template<typename SEGMENT>
338 struct LoopExecute<omp_parallel_nested_for_exec, SEGMENT>
339 {
340 
341  template<typename LaunchContextPolicy, typename BODY>
342  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
344  SEGMENT const& segment0,
345  SEGMENT const& segment1,
346  BODY const& body)
347  {
348 
349  const int len1 = segment1.end() - segment1.begin();
350  const int len0 = segment0.end() - segment0.begin();
351 
352  RAJA::region<RAJA::omp_parallel_region>([&]() {
354  auto loop_body = thread_privatize(body);
355 
356 #pragma omp for RAJA_COLLAPSE(2)
357  for (int j = 0; j < len1; j++)
358  {
359  for (int i = 0; i < len0; i++)
360  {
361 
362  loop_body.get_priv()(*(segment0.begin() + i),
363  *(segment1.begin() + j));
364  }
365  }
366  });
367  }
368 
369  template<typename LaunchContextPolicy, typename BODY>
370  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
372  SEGMENT const& segment0,
373  SEGMENT const& segment1,
374  SEGMENT const& segment2,
375  BODY const& body)
376  {
377 
378  const int len2 = segment2.end() - segment2.begin();
379  const int len1 = segment1.end() - segment1.begin();
380  const int len0 = segment0.end() - segment0.begin();
381 
382  RAJA::region<RAJA::omp_parallel_region>([&]() {
384  auto loop_body = thread_privatize(body);
385 
386 #pragma omp for RAJA_COLLAPSE(3)
387  for (int k = 0; k < len2; k++)
388  {
389  for (int j = 0; j < len1; j++)
390  {
391  for (int i = 0; i < len0; i++)
392  {
393  loop_body.get_priv()(*(segment0.begin() + i),
394  *(segment1.begin() + j),
395  *(segment2.begin() + k));
396  }
397  }
398  }
399  });
400  }
401 };
402 
403 // Return local index
404 template<typename SEGMENT>
405 struct LoopICountExecute<omp_parallel_nested_for_exec, SEGMENT>
406 {
407 
408  template<typename LaunchContextPolicy, typename BODY>
409  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
411  SEGMENT const& segment0,
412  SEGMENT const& segment1,
413  BODY const& body)
414  {
415 
416  const int len1 = segment1.end() - segment1.begin();
417  const int len0 = segment0.end() - segment0.begin();
418 
419  RAJA::region<RAJA::omp_parallel_region>([&]() {
421  auto loop_body = thread_privatize(body);
422 
423 #pragma omp for RAJA_COLLAPSE(2)
424  for (int j = 0; j < len1; j++)
425  {
426  for (int i = 0; i < len0; i++)
427  {
428 
429  loop_body.get_priv()(*(segment0.begin() + i), *(segment1.begin() + j),
430  i, j);
431  }
432  }
433  });
434  }
435 
436  template<typename LaunchContextPolicy, typename BODY>
437  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
439  SEGMENT const& segment0,
440  SEGMENT const& segment1,
441  SEGMENT const& segment2,
442  BODY const& body)
443  {
444 
445  const int len2 = segment2.end() - segment2.begin();
446  const int len1 = segment1.end() - segment1.begin();
447  const int len0 = segment0.end() - segment0.begin();
448 
449  RAJA::region<RAJA::omp_parallel_region>([&]() {
451  auto loop_body = thread_privatize(body);
452 
453 #pragma omp for RAJA_COLLAPSE(3)
454  for (int k = 0; k < len2; k++)
455  {
456  for (int j = 0; j < len1; j++)
457  {
458  for (int i = 0; i < len0; i++)
459  {
460  loop_body.get_priv()(*(segment0.begin() + i),
461  *(segment1.begin() + j),
462  *(segment2.begin() + k), i, j, k);
463  }
464  }
465  }
466  });
467  }
468 };
469 
470 template<typename SEGMENT>
472 {
473 
474  template<typename LaunchContextPolicy, typename BODY, typename TILE_T>
475  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
477  TILE_T tile_size,
478  SEGMENT const& segment,
479  BODY const& body)
480  {
481 
482  int len = segment.end() - segment.begin();
483 
484  RAJA::region<RAJA::omp_parallel_region>([&]() {
486  auto loop_body = thread_privatize(body);
487 
488 #pragma omp for
489  for (int i = 0; i < len; i += tile_size)
490  {
491  loop_body.get_priv()(segment.slice(i, tile_size));
492  }
493  });
494  }
495 };
496 
497 template<typename SEGMENT>
499 {
500 
501  template<typename LaunchContextPolicy, typename BODY, typename TILE_T>
502  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
504  TILE_T tile_size,
505  SEGMENT const& segment,
506  BODY const& body)
507  {
508 
509  const int len = segment.end() - segment.begin();
510  const int numTiles = (len - 1) / tile_size + 1;
511 
512  RAJA::region<RAJA::omp_parallel_region>([&]() {
514  auto loop_body = thread_privatize(body);
515 
516 #pragma omp parallel for
517  for (int i = 0; i < numTiles; i++)
518  {
519  const int i_tile_size = i * tile_size;
520  loop_body.get_priv()(segment.slice(i_tile_size, tile_size), i);
521  }
522  });
523  }
524 };
525 
526 template<typename SEGMENT>
527 struct TileExecute<omp_for_exec, SEGMENT>
528 {
529 
530  template<typename LaunchContextPolicy, typename BODY, typename TILE_T>
531  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
533  TILE_T tile_size,
534  SEGMENT const& segment,
535  BODY const& body)
536  {
537 
538  int len = segment.end() - segment.begin();
539 #pragma omp for
540  for (int i = 0; i < len; i += tile_size)
541  {
542  body(segment.slice(i, tile_size));
543  }
544  }
545 };
546 
547 template<typename SEGMENT>
549 {
550 
551  template<typename LaunchContextPolicy, typename BODY, typename TILE_T>
552  static RAJA_INLINE RAJA_HOST_DEVICE void exec(
554  TILE_T tile_size,
555  SEGMENT const& segment,
556  BODY const& body)
557  {
558 
559  const int len = segment.end() - segment.begin();
560  const int numTiles = (len - 1) / tile_size + 1;
561 
562 #pragma omp for
563  for (int i = 0; i < numTiles; i++)
564  {
565  const int i_tile_size = i * tile_size;
566  body(segment.slice(i_tile_size, tile_size), i);
567  }
568  }
569 };
570 
571 } // namespace RAJA
572 #endif
Definition: launch_context_policy.hpp:30
RAJA header file containing the core components of RAJA::launch.
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
#define RAJA_UNUSED_ARG(x)
Definition: macros.hpp:97
constexpr RAJA_HOST_DEVICE auto invoke_body(Params &&params, Fn &&f, Ts &&... extra)
Definition: forall.hpp:598
RAJA_HOST_DEVICE auto thread_privatize(const T &item) -> Privatizer< T >
Create a private copy of the argument to be stored on the current thread's stack in a class of the Pr...
Definition: privatizer.hpp:88
omp_for_schedule_exec< Auto > omp_for_exec
Definition: policy.hpp:187
omp_parallel_exec< omp_for_exec > omp_parallel_for_exec
Definition: policy.hpp:239
Definition: AlignedRangeIndexSetBuilders.cpp:35
LaunchContextType ctx
Definition: launch.hpp:185
auto & body
Definition: launch.hpp:177
typename RAJA::detail::launch_context_type< BODY >::type LaunchContextType
Definition: launch.hpp:183
Header file containing RAJA OpenMP policy definitions.
static concepts::enable_if_t< resources::EventProxy< resources::Resource >, RAJA::expt::type_traits::is_ForallParamPack< ReduceParams > > exec(RAJA::resources::Resource res, LaunchParams const &launch_params, BODY const &body, ReduceParams &f_params)
Definition: launch.hpp:37
Definition: launch_core.hpp:263
Definition: launch_core.hpp:163
size_t shared_mem_size
Definition: launch_core.hpp:167
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:212
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:234
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:196
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:110
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:158
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:130
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:370
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:342
Definition: launch_core.hpp:480
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:285
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:269
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:307
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, SEGMENT const &segment2, BODY const &body)
Definition: launch.hpp:437
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), SEGMENT const &segment0, SEGMENT const &segment1, BODY const &body)
Definition: launch.hpp:409
Definition: launch_core.hpp:483
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:531
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:475
Definition: launch_core.hpp:579
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:552
static RAJA_INLINE RAJA_HOST_DEVICE void exec(LaunchContextT< LaunchContextPolicy > const RAJA_UNUSED_ARG(&ctx), TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch.hpp:502
Definition: launch_core.hpp:582
static constexpr void parampack_resolve(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:304
static constexpr void parampack_init(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:269
Definition: TypeTraits.hpp:59
Definition: policy.hpp:146