10 #ifndef RAJA_omp_target_reduce_HPP
11 #define RAJA_omp_target_reduce_HPP
13 #include "RAJA/config.hpp"
15 #if defined(RAJA_ENABLE_TARGET_OPENMP)
34 #pragma omp declare target
36 template<
typename T,
typename I>
57 template<
typename T,
typename I>
78 #pragma omp end declare target
82 static constexpr
int MaxNumTeams = policy::omp::MAXNUMTHREADS;
87 int hostID {omp_get_initial_device()};
88 int deviceID {omp_get_default_device()};
89 bool isMapped {
false};
91 Offload_Info() =
default;
93 Offload_Info(
const Offload_Info& other)
94 : hostID {other.hostID},
95 deviceID {other.deviceID},
96 isMapped {other.isMapped}
110 Reduce_Data() =
delete;
116 Reduce_Data(T initValue, T identityValue, Offload_Info& info)
118 device {reinterpret_cast<T*>(
119 omp_target_alloc(omp::MaxNumTeams * sizeof(T), info.deviceID))},
120 host {new T[omp::MaxNumTeams]}
124 printf(
"Unable to allocate space on host\n");
129 printf(
"Unable to allocate space on device\n");
132 std::fill_n(host, omp::MaxNumTeams, identityValue);
136 void reset(T initValue) { value = initValue; }
139 Reduce_Data(
const Reduce_Data&) =
default;
142 RAJA_INLINE
void hostToDevice(Offload_Info& info)
145 if (omp_target_memcpy(
reinterpret_cast<void*
>(device),
146 reinterpret_cast<void*
>(host),
147 omp::MaxNumTeams *
sizeof(T), 0, 0, info.deviceID,
150 printf(
"Unable to copy memory from host to device\n");
156 RAJA_INLINE
void deviceToHost(Offload_Info& info)
159 if (omp_target_memcpy(
reinterpret_cast<void*
>(host),
160 reinterpret_cast<void*
>(device),
161 omp::MaxNumTeams *
sizeof(T), 0, 0, info.hostID,
164 printf(
"Unable to copy memory from device to host\n");
170 RAJA_INLINE
void cleanup(Offload_Info& info)
174 omp_target_free(
reinterpret_cast<void*
>(device), info.deviceID);
189 template<
typename Reducer,
typename T>
192 TargetReduce() =
delete;
193 TargetReduce(
const TargetReduce&) =
default;
195 explicit TargetReduce(T init_val_, T identity_ = Reducer::identity())
197 val(identity_, identity_, info),
202 void reset(T init_val_, T identity_ = Reducer::identity())
205 val.reset(identity_);
207 finalVal = identity_;
211 #pragma omp declare target
218 if (!omp_is_initial_device())
222 int tid = omp_get_team_num();
223 Reducer {}(val.device[tid], val.value);
228 #pragma omp end declare target
236 val.deviceToHost(info);
238 for (
int i = 0; i < omp::MaxNumTeams; ++i)
240 Reducer {}(val.value, val.host[i]);
243 info.isMapped =
true;
245 finalVal = Reducer::identity();
246 Reducer {}(finalVal, initVal);
247 Reducer {}(finalVal, val.value);
252 T
get() {
return operator T(); }
255 TargetReduce&
reduce(T rhsVal)
257 Reducer {}(val.value, rhsVal);
262 const TargetReduce&
reduce(T rhsVal)
const
264 Reducer {}(val.value, rhsVal);
270 omp::Offload_Info info;
272 omp::Reduce_Data<T> val;
279 template<
typename Reducer,
typename T,
typename IndexType>
280 struct TargetReduceLoc
282 TargetReduceLoc() =
delete;
283 TargetReduceLoc(
const TargetReduceLoc&) =
default;
285 explicit TargetReduceLoc(
288 T identity_val_ = Reducer::identity(),
289 IndexType identity_loc_ =
292 val(identity_val_, identity_val_, info),
293 loc(identity_loc_, identity_loc_, info),
295 finalVal(identity_val_),
297 finalLoc(identity_loc_)
300 void reset(T init_val_,
302 T identity_val_ = Reducer::identity(),
303 IndexType identity_loc_ =
307 val.reset(identity_val_);
308 loc.reset(identity_loc_);
310 finalVal = identity_val_;
312 finalLoc = identity_loc_;
320 if (!omp_is_initial_device())
324 int tid = omp_get_team_num();
325 Reducer {}(val.device[tid], loc.device[tid], val.value, loc.value);
335 val.deviceToHost(info);
336 loc.deviceToHost(info);
337 for (
int i = 0; i < omp::MaxNumTeams; ++i)
339 Reducer {}(val.value, loc.value, val.host[i], loc.host[i]);
343 info.isMapped =
true;
345 finalVal = Reducer::identity();
347 Reducer {}(finalVal, finalLoc, initVal, initLoc);
348 Reducer {}(finalVal, finalLoc, val.value, loc.value);
353 T
get() {
return operator T(); }
359 if (!info.isMapped)
get();
365 TargetReduceLoc&
reduce(T rhsVal, IndexType rhsLoc)
367 Reducer {}(val.value, loc.value, rhsVal, rhsLoc);
372 const TargetReduceLoc&
reduce(T rhsVal, IndexType rhsLoc)
const
374 Reducer {}(val.value, loc.value, rhsVal, rhsLoc);
380 omp::Offload_Info info;
382 omp::Reduce_Data<T> val;
384 omp::Reduce_Data<IndexType> loc;
393 class ReduceSum<omp_target_reduce, T>
394 :
public TargetReduce<RAJA::reduce::sum<T>, T>
397 using self = ReduceSum<omp_target_reduce, T>;
398 using parent = TargetReduce<RAJA::reduce::sum<T>, T>;
399 using parent::parent;
402 self& operator+=(T rhsVal)
404 parent::reduce(rhsVal);
409 const self& operator+=(T rhsVal)
const
411 parent::reduce(rhsVal);
418 class ReduceBitOr<omp_target_reduce, T>
419 :
public TargetReduce<RAJA::reduce::or_bit<T>, T>
422 using self = ReduceBitOr<omp_target_reduce, T>;
423 using parent = TargetReduce<RAJA::reduce::or_bit<T>, T>;
424 using parent::parent;
427 self& operator|=(T rhsVal)
429 parent::reduce(rhsVal);
434 const self& operator|=(T rhsVal)
const
436 parent::reduce(rhsVal);
443 class ReduceBitAnd<omp_target_reduce, T>
444 :
public TargetReduce<RAJA::reduce::and_bit<T>, T>
447 using self = ReduceBitAnd<omp_target_reduce, T>;
448 using parent = TargetReduce<RAJA::reduce::and_bit<T>, T>;
449 using parent::parent;
452 self& operator&=(T rhsVal)
454 parent::reduce(rhsVal);
459 const self& operator&=(T rhsVal)
const
461 parent::reduce(rhsVal);
468 class ReduceMin<omp_target_reduce, T>
469 :
public TargetReduce<RAJA::reduce::min<T>, T>
472 using self = ReduceMin<omp_target_reduce, T>;
473 using parent = TargetReduce<RAJA::reduce::min<T>, T>;
474 using parent::parent;
479 parent::reduce(rhsVal);
484 const self&
min(T rhsVal)
const
486 parent::reduce(rhsVal);
493 class ReduceMax<omp_target_reduce, T>
494 :
public TargetReduce<RAJA::reduce::max<T>, T>
497 using self = ReduceMax<omp_target_reduce, T>;
498 using parent = TargetReduce<RAJA::reduce::max<T>, T>;
499 using parent::parent;
504 parent::reduce(rhsVal);
509 const self&
max(T rhsVal)
const
511 parent::reduce(rhsVal);
517 template<
typename T,
typename IndexType>
518 class ReduceMinLoc<omp_target_reduce, T, IndexType>
519 :
public TargetReduceLoc<omp::minloc<T, IndexType>, T, IndexType>
522 using self = ReduceMinLoc<omp_target_reduce, T, IndexType>;
523 using parent = TargetReduceLoc<omp::minloc<T, IndexType>, T, IndexType>;
524 using parent::parent;
527 self& minloc(T rhsVal, IndexType rhsLoc)
529 parent::reduce(rhsVal, rhsLoc);
534 const self& minloc(T rhsVal, IndexType rhsLoc)
const
536 parent::reduce(rhsVal, rhsLoc);
542 template<
typename T,
typename IndexType>
543 class ReduceMaxLoc<omp_target_reduce, T, IndexType>
544 :
public TargetReduceLoc<omp::maxloc<T, IndexType>, T, IndexType>
547 using self = ReduceMaxLoc<omp_target_reduce, T, IndexType>;
548 using parent = TargetReduceLoc<omp::maxloc<T, IndexType>, T, IndexType>;
549 using parent::parent;
552 self& maxloc(T rhsVal, IndexType rhsLoc)
554 parent::reduce(rhsVal, rhsLoc);
559 const self& maxloc(T rhsVal, IndexType rhsLoc)
const
561 parent::reduce(rhsVal, rhsLoc);
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result min(Args... args)
Definition: foldl.hpp:161
RAJA_HOST_DEVICE constexpr RAJA_INLINE RAJA::zip_tuple_element_t< I, zip_tuple< is_val, Ts... > > & get(zip_tuple< is_val, Ts... > &z) noexcept
Definition: zip_tuple.hpp:56
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result max(Args... args)
Definition: foldl.hpp:155
Header file containing RAJA OpenMP policy definitions.
Header file providing RAJA reduction declarations.
Definition: reduce.hpp:115
Header file for RAJA type definitions.