RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
reduce.hpp
Go to the documentation of this file.
1 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
2 // Copyright (c) Lawrence Livermore National Security, LLC and other
3 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
4 // files for dates and other details. No copyright assignment is required
5 // to contribute to RAJA.
6 //
7 // SPDX-License-Identifier: (BSD-3-Clause)
8 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
9 
10 #ifndef RAJA_omp_target_reduce_HPP
11 #define RAJA_omp_target_reduce_HPP
12 
13 #include "RAJA/config.hpp"
14 
15 #if defined(RAJA_ENABLE_TARGET_OPENMP)
16 
17 // #include <cassert> // Leaving out until XL is fixed 2/25/2019.
18 
19 #include <algorithm>
20 
21 #include <omp.h>
22 
23 #include "RAJA/util/types.hpp"
24 
25 #include "RAJA/pattern/reduce.hpp"
26 
28 
29 namespace RAJA
30 {
31 
32 namespace omp
33 {
34 #pragma omp declare target
35 
36 template<typename T, typename I>
37 struct minloc
38 {
39  RAJA_HOST_DEVICE static constexpr T identity()
40  {
42  }
43 
44  RAJA_HOST_DEVICE RAJA_INLINE void operator()(T& val,
45  I& loc,
46  const T v,
47  const I l)
48  {
49  if (v < val)
50  {
51  loc = l;
52  val = v;
53  }
54  }
55 };
56 
57 template<typename T, typename I>
58 struct maxloc
59 {
60  RAJA_HOST_DEVICE static constexpr T identity()
61  {
63  }
64 
65  RAJA_HOST_DEVICE RAJA_INLINE void operator()(T& val,
66  I& loc,
67  const T v,
68  const I l)
69  {
70  if (v > val)
71  {
72  loc = l;
73  val = v;
74  }
75  }
76 };
77 
78 #pragma omp end declare target
79 
80 // Alias for clarity. Reduction size operates on number of omp teams.
81 // Ideally, MaxNumTeams = ThreadsPerTeam in omp_target_parallel_for_exec.
82 static constexpr int MaxNumTeams = policy::omp::MAXNUMTHREADS;
83 
85 struct Offload_Info
86 {
87  int hostID {omp_get_initial_device()};
88  int deviceID {omp_get_default_device()};
89  bool isMapped {false};
90 
91  Offload_Info() = default;
92 
93  Offload_Info(const Offload_Info& other)
94  : hostID {other.hostID},
95  deviceID {other.deviceID},
96  isMapped {other.isMapped}
97  {}
98 };
99 
102 template<typename T>
103 struct Reduce_Data
104 {
105  mutable T value;
106  T* device;
107  T* host;
108 
110  Reduce_Data() = delete;
111 
116  Reduce_Data(T initValue, T identityValue, Offload_Info& info)
117  : value(initValue),
118  device {reinterpret_cast<T*>(
119  omp_target_alloc(omp::MaxNumTeams * sizeof(T), info.deviceID))},
120  host {new T[omp::MaxNumTeams]}
121  {
122  if (!host)
123  {
124  printf("Unable to allocate space on host\n");
125  exit(1);
126  }
127  if (!device)
128  {
129  printf("Unable to allocate space on device\n");
130  exit(1);
131  }
132  std::fill_n(host, omp::MaxNumTeams, identityValue);
133  hostToDevice(info);
134  }
135 
136  void reset(T initValue) { value = initValue; }
137 
139  Reduce_Data(const Reduce_Data&) = default;
140 
142  RAJA_INLINE void hostToDevice(Offload_Info& info)
143  {
144  // precondition: host and device are valid pointers
145  if (omp_target_memcpy(reinterpret_cast<void*>(device),
146  reinterpret_cast<void*>(host),
147  omp::MaxNumTeams * sizeof(T), 0, 0, info.deviceID,
148  info.hostID) != 0)
149  {
150  printf("Unable to copy memory from host to device\n");
151  exit(1);
152  }
153  }
154 
156  RAJA_INLINE void deviceToHost(Offload_Info& info)
157  {
158  // precondition: host and device are valid pointers
159  if (omp_target_memcpy(reinterpret_cast<void*>(host),
160  reinterpret_cast<void*>(device),
161  omp::MaxNumTeams * sizeof(T), 0, 0, info.hostID,
162  info.deviceID) != 0)
163  {
164  printf("Unable to copy memory from device to host\n");
165  exit(1);
166  }
167  }
168 
170  RAJA_INLINE void cleanup(Offload_Info& info)
171  {
172  if (device)
173  {
174  omp_target_free(reinterpret_cast<void*>(device), info.deviceID);
175  device = nullptr;
176  }
177  if (host)
178  {
179  delete[] host;
180  host = nullptr;
181  }
182  }
183 };
184 
185 } // end namespace omp
186 
189 template<typename Reducer, typename T>
190 struct TargetReduce
191 {
192  TargetReduce() = delete;
193  TargetReduce(const TargetReduce&) = default;
194 
195  explicit TargetReduce(T init_val_, T identity_ = Reducer::identity())
196  : info(),
197  val(identity_, identity_, info),
198  initVal(init_val_),
199  finalVal(identity_)
200  {}
201 
202  void reset(T init_val_, T identity_ = Reducer::identity())
203  {
204  operator T();
205  val.reset(identity_);
206  initVal = init_val_;
207  finalVal = identity_;
208  }
209 
210 #ifdef __ibmxl__ // TODO: implicit declare target doesn't pick this up
211 #pragma omp declare target
212 #endif
214  ~TargetReduce()
215  {
216  // assert ( omp_get_num_teams() <= omp::MaxNumTeams ); // Leaving out until
217  // XL is fixed 2/25/2019.
218  if (!omp_is_initial_device())
219  {
220 #pragma omp critical
221  {
222  int tid = omp_get_team_num();
223  Reducer {}(val.device[tid], val.value);
224  }
225  }
226  }
227 #ifdef __ibmxl__ // TODO: implicit declare target doesn't pick this up
228 #pragma omp end declare target
229 #endif
230 
232  operator T()
233  {
234  if (!info.isMapped)
235  {
236  val.deviceToHost(info);
237 
238  for (int i = 0; i < omp::MaxNumTeams; ++i)
239  {
240  Reducer {}(val.value, val.host[i]);
241  }
242  val.cleanup(info);
243  info.isMapped = true;
244  }
245  finalVal = Reducer::identity();
246  Reducer {}(finalVal, initVal);
247  Reducer {}(finalVal, val.value);
248  return finalVal;
249  }
250 
252  T get() { return operator T(); }
253 
255  TargetReduce& reduce(T rhsVal)
256  {
257  Reducer {}(val.value, rhsVal);
258  return *this;
259  }
260 
262  const TargetReduce& reduce(T rhsVal) const
263  {
264  Reducer {}(val.value, rhsVal);
265  return *this;
266  }
267 
268 private:
270  omp::Offload_Info info;
272  omp::Reduce_Data<T> val;
273  T initVal;
274  T finalVal;
275 };
276 
279 template<typename Reducer, typename T, typename IndexType>
280 struct TargetReduceLoc
281 {
282  TargetReduceLoc() = delete;
283  TargetReduceLoc(const TargetReduceLoc&) = default;
284 
285  explicit TargetReduceLoc(
286  T init_val_,
287  IndexType init_loc,
288  T identity_val_ = Reducer::identity(),
289  IndexType identity_loc_ =
291  : info(),
292  val(identity_val_, identity_val_, info),
293  loc(identity_loc_, identity_loc_, info),
294  initVal(init_val_),
295  finalVal(identity_val_),
296  initLoc(init_loc),
297  finalLoc(identity_loc_)
298  {}
299 
300  void reset(T init_val_,
301  IndexType init_loc_,
302  T identity_val_ = Reducer::identity(),
303  IndexType identity_loc_ =
305  {
306  operator T();
307  val.reset(identity_val_);
308  loc.reset(identity_loc_);
309  initVal = init_val_;
310  finalVal = identity_val_;
311  initLoc = init_loc_;
312  finalLoc = identity_loc_;
313  }
314 
316  ~TargetReduceLoc()
317  {
318  // assert ( omp_get_num_teams() <= omp::MaxNumTeams ); // Leaving out until
319  // XL is fixed 2/25/2019.
320  if (!omp_is_initial_device())
321  {
322 #pragma omp critical
323  {
324  int tid = omp_get_team_num();
325  Reducer {}(val.device[tid], loc.device[tid], val.value, loc.value);
326  }
327  }
328  }
329 
331  operator T()
332  {
333  if (!info.isMapped)
334  {
335  val.deviceToHost(info);
336  loc.deviceToHost(info);
337  for (int i = 0; i < omp::MaxNumTeams; ++i)
338  {
339  Reducer {}(val.value, loc.value, val.host[i], loc.host[i]);
340  }
341  val.cleanup(info);
342  loc.cleanup(info);
343  info.isMapped = true;
344  }
345  finalVal = Reducer::identity();
346  finalLoc = IndexType(RAJA::reduce::detail::DefaultLoc<IndexType>().value());
347  Reducer {}(finalVal, finalLoc, initVal, initLoc);
348  Reducer {}(finalVal, finalLoc, val.value, loc.value);
349  return finalVal;
350  }
351 
353  T get() { return operator T(); }
354 
357  IndexType getLoc()
358  {
359  if (!info.isMapped) get();
360  // return loc.value;
361  return (finalLoc);
362  }
363 
365  TargetReduceLoc& reduce(T rhsVal, IndexType rhsLoc)
366  {
367  Reducer {}(val.value, loc.value, rhsVal, rhsLoc);
368  return *this;
369  }
370 
372  const TargetReduceLoc& reduce(T rhsVal, IndexType rhsLoc) const
373  {
374  Reducer {}(val.value, loc.value, rhsVal, rhsLoc);
375  return *this;
376  }
377 
378 private:
380  omp::Offload_Info info;
382  omp::Reduce_Data<T> val;
384  omp::Reduce_Data<IndexType> loc;
385  T initVal;
386  T finalVal;
387  IndexType initLoc;
388  IndexType finalLoc;
389 };
390 
392 template<typename T>
393 class ReduceSum<omp_target_reduce, T>
394  : public TargetReduce<RAJA::reduce::sum<T>, T>
395 {
396 public:
397  using self = ReduceSum<omp_target_reduce, T>;
398  using parent = TargetReduce<RAJA::reduce::sum<T>, T>;
399  using parent::parent;
400 
402  self& operator+=(T rhsVal)
403  {
404  parent::reduce(rhsVal);
405  return *this;
406  }
407 
409  const self& operator+=(T rhsVal) const
410  {
411  parent::reduce(rhsVal);
412  return *this;
413  }
414 };
415 
417 template<typename T>
418 class ReduceBitOr<omp_target_reduce, T>
419  : public TargetReduce<RAJA::reduce::or_bit<T>, T>
420 {
421 public:
422  using self = ReduceBitOr<omp_target_reduce, T>;
423  using parent = TargetReduce<RAJA::reduce::or_bit<T>, T>;
424  using parent::parent;
425 
427  self& operator|=(T rhsVal)
428  {
429  parent::reduce(rhsVal);
430  return *this;
431  }
432 
434  const self& operator|=(T rhsVal) const
435  {
436  parent::reduce(rhsVal);
437  return *this;
438  }
439 };
440 
442 template<typename T>
443 class ReduceBitAnd<omp_target_reduce, T>
444  : public TargetReduce<RAJA::reduce::and_bit<T>, T>
445 {
446 public:
447  using self = ReduceBitAnd<omp_target_reduce, T>;
448  using parent = TargetReduce<RAJA::reduce::and_bit<T>, T>;
449  using parent::parent;
450 
452  self& operator&=(T rhsVal)
453  {
454  parent::reduce(rhsVal);
455  return *this;
456  }
457 
459  const self& operator&=(T rhsVal) const
460  {
461  parent::reduce(rhsVal);
462  return *this;
463  }
464 };
465 
467 template<typename T>
468 class ReduceMin<omp_target_reduce, T>
469  : public TargetReduce<RAJA::reduce::min<T>, T>
470 {
471 public:
472  using self = ReduceMin<omp_target_reduce, T>;
473  using parent = TargetReduce<RAJA::reduce::min<T>, T>;
474  using parent::parent;
475 
477  self& min(T rhsVal)
478  {
479  parent::reduce(rhsVal);
480  return *this;
481  }
482 
484  const self& min(T rhsVal) const
485  {
486  parent::reduce(rhsVal);
487  return *this;
488  }
489 };
490 
492 template<typename T>
493 class ReduceMax<omp_target_reduce, T>
494  : public TargetReduce<RAJA::reduce::max<T>, T>
495 {
496 public:
497  using self = ReduceMax<omp_target_reduce, T>;
498  using parent = TargetReduce<RAJA::reduce::max<T>, T>;
499  using parent::parent;
500 
502  self& max(T rhsVal)
503  {
504  parent::reduce(rhsVal);
505  return *this;
506  }
507 
509  const self& max(T rhsVal) const
510  {
511  parent::reduce(rhsVal);
512  return *this;
513  }
514 };
515 
517 template<typename T, typename IndexType>
518 class ReduceMinLoc<omp_target_reduce, T, IndexType>
519  : public TargetReduceLoc<omp::minloc<T, IndexType>, T, IndexType>
520 {
521 public:
522  using self = ReduceMinLoc<omp_target_reduce, T, IndexType>;
523  using parent = TargetReduceLoc<omp::minloc<T, IndexType>, T, IndexType>;
524  using parent::parent;
525 
527  self& minloc(T rhsVal, IndexType rhsLoc)
528  {
529  parent::reduce(rhsVal, rhsLoc);
530  return *this;
531  }
532 
534  const self& minloc(T rhsVal, IndexType rhsLoc) const
535  {
536  parent::reduce(rhsVal, rhsLoc);
537  return *this;
538  }
539 };
540 
542 template<typename T, typename IndexType>
543 class ReduceMaxLoc<omp_target_reduce, T, IndexType>
544  : public TargetReduceLoc<omp::maxloc<T, IndexType>, T, IndexType>
545 {
546 public:
547  using self = ReduceMaxLoc<omp_target_reduce, T, IndexType>;
548  using parent = TargetReduceLoc<omp::maxloc<T, IndexType>, T, IndexType>;
549  using parent::parent;
550 
552  self& maxloc(T rhsVal, IndexType rhsLoc)
553  {
554  parent::reduce(rhsVal, rhsLoc);
555  return *this;
556  }
557 
559  const self& maxloc(T rhsVal, IndexType rhsLoc) const
560  {
561  parent::reduce(rhsVal, rhsLoc);
562  return *this;
563  }
564 };
565 
566 
567 } // namespace RAJA
568 
569 #endif // closing endif for RAJA_ENABLE_TARGET_OPENMP guard
570 
571 #endif // closing endif for header file include guard
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result min(Args... args)
Definition: foldl.hpp:161
RAJA_HOST_DEVICE constexpr RAJA_INLINE RAJA::zip_tuple_element_t< I, zip_tuple< is_val, Ts... > > & get(zip_tuple< is_val, Ts... > &z) noexcept
Definition: zip_tuple.hpp:56
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result max(Args... args)
Definition: foldl.hpp:155
Header file containing RAJA OpenMP policy definitions.
Header file providing RAJA reduction declarations.
Definition: reduce.hpp:115
Header file for RAJA type definitions.