RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
reduce.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_util_reduce_HPP
21 #define RAJA_util_reduce_HPP
22 
23 #include "RAJA/config.hpp"
24 
25 #include <climits>
26 #include <iterator>
27 #include <new>
28 #include <type_traits>
29 
31 
32 #include "RAJA/util/macros.hpp"
33 #include "RAJA/util/concepts.hpp"
34 #include "RAJA/util/math.hpp"
35 #include "RAJA/util/Operators.hpp"
36 
37 namespace RAJA
38 {
39 
43 template<typename T, typename BinaryOp>
45 {
46  RAJA_HOST_DEVICE RAJA_INLINE constexpr explicit LeftFoldReduce(
47  T init = BinaryOp::identity(),
48  BinaryOp op = BinaryOp {}) noexcept
49  : m_storage(std::move(op), std::move(init))
50  {}
51 
55  RAJA_HOST_DEVICE RAJA_INLINE constexpr void reset(
56  T init = BinaryOp::identity()) noexcept
57  {
58  m_storage.m_accumulated_value = std::move(init);
59  }
60 
64  RAJA_HOST_DEVICE RAJA_INLINE constexpr T get_and_reset(
65  T init = BinaryOp::identity())
66  {
67  T value = get();
68 
69  reset(std::move(init));
70 
71  return value;
72  }
73 
77  RAJA_HOST_DEVICE RAJA_INLINE constexpr T get() const
78  {
79  return m_storage.m_accumulated_value;
80  }
81 
85  RAJA_HOST_DEVICE RAJA_INLINE constexpr void combine(T value)
86  {
87  m_storage.m_accumulated_value = m_storage.get_op()(
88  std::move(m_storage.m_accumulated_value), std::move(value));
89  }
90 
94  RAJA_HOST_DEVICE RAJA_INLINE constexpr void operator+=(T value)
95  {
96  combine(std::move(value));
97  }
98 
99 private:
100  // use a struct derived from BinaryOp to avoid extra storage when BinaryOp
101  // is an empty class
102  struct Storage : BinaryOp
103  {
104  T m_accumulated_value;
105 
106  RAJA_HOST_DEVICE RAJA_INLINE constexpr Storage(BinaryOp op, T init)
107  : BinaryOp(std::move(op)),
108  m_accumulated_value(std::move(init))
109  {}
110 
111  RAJA_HOST_DEVICE RAJA_INLINE constexpr BinaryOp& get_op() noexcept
112  {
113  return *this;
114  }
115 
116  RAJA_HOST_DEVICE RAJA_INLINE constexpr BinaryOp const& get_op()
117  const noexcept
118  {
119  return *this;
120  }
121  };
122 
123  Storage m_storage;
124 };
125 
129 template<typename T,
130  typename BinaryOp,
131  typename SizeType = size_t,
132  SizeType t_num_levels = CHAR_BIT * sizeof(SizeType)>
134 {
135  static_assert(std::is_unsigned<SizeType>::value, "SizeType must be unsigned");
136  static_assert(
137  t_num_levels <= CHAR_BIT * sizeof(SizeType),
138  "SizeType must be large enough to act at a bitset for num_levels");
139 
140  static constexpr SizeType num_levels = t_num_levels;
141 
142  RAJA_HOST_DEVICE RAJA_INLINE constexpr explicit BinaryTreeReduce(
143  T init = BinaryOp::identity(),
144  BinaryOp op = BinaryOp {}) noexcept
145  : m_storage(std::move(op))
146  {
147  combine(std::move(init));
148  }
149 
153  RAJA_HOST_DEVICE RAJA_INLINE constexpr void reset(
154  T init = BinaryOp::identity()) noexcept
155  {
156  m_storage.m_count = 0;
157 
158  combine(std::move(init));
159  }
160 
164  RAJA_HOST_DEVICE RAJA_INLINE constexpr T get_and_reset(
165  T init = BinaryOp::identity())
166  {
167  T value = get();
168 
169  reset(std::move(init));
170 
171  return value;
172  }
173 
177  RAJA_HOST_DEVICE RAJA_INLINE constexpr T get() const
178  {
179  // accumulate all values
180  T value = BinaryOp::identity();
181 
182  for (SizeType count = m_storage.m_count, level = 0, mask = 1; count;
183  ++level, mask <<= 1)
184  {
185  if (count & mask)
186  {
187  value =
188  m_storage.get_op()(std::move(value), m_storage.m_tree_stack[level]);
189 
190  count ^= mask;
191  }
192  }
193 
194  return value;
195  }
196 
200  RAJA_HOST_DEVICE RAJA_INLINE constexpr void combine(T value)
201  {
202  // accumulate values and store in the first unused level found
203  // reset values from used levels along the way
204  SizeType level = 0;
205  for (SizeType mask = 1; m_storage.m_count & mask; ++level, mask <<= 1)
206  {
207  value = m_storage.get_op()(std::move(m_storage.m_tree_stack[level]),
208  std::move(value));
209  }
210 
211  m_storage.m_tree_stack[level] = std::move(value);
212 
213  ++m_storage.m_count;
214  }
215 
219  RAJA_HOST_DEVICE RAJA_INLINE constexpr void operator+=(T value)
220  {
221  combine(std::move(value));
222  }
223 
224 private:
225  // use a struct derived from BinaryOp to avoid extra storage when BinaryOp
226  // is an empty class
227  struct Storage : BinaryOp
228  {
229  // A counter of the number of inputs combined.
230  // The bits of count indicate which levels of tree stack have a value
231  SizeType m_count = 0;
232 
233  // Each level in tree stack has a value that holds the accumulation of
234  // 2^level values or is unused and has no value.
235  T m_tree_stack[num_levels];
236 
237  RAJA_HOST_DEVICE RAJA_INLINE constexpr Storage(BinaryOp op)
238  : BinaryOp(std::move(op))
239  {}
240 
241  RAJA_HOST_DEVICE RAJA_INLINE constexpr BinaryOp& get_op() noexcept
242  {
243  return *this;
244  }
245 
246  RAJA_HOST_DEVICE RAJA_INLINE constexpr BinaryOp const& get_op()
247  const noexcept
248  {
249  return *this;
250  }
251  };
252 
253  Storage m_storage;
254 };
255 
257 {
258  Default,
259  Volatile
260 };
261 
272 template<typename T,
274 struct KahanSum
275 {
276  static_assert(std::is_floating_point_v<T>, "T must be a floating point type");
277 
278  RAJA_HOST_DEVICE RAJA_INLINE constexpr explicit KahanSum(
279  T init = T()) noexcept
280  : m_accumulated_value(std::move(init)),
281  m_accumulated_carry(T())
282  {}
283 
287  RAJA_HOST_DEVICE RAJA_INLINE constexpr void reset(T init = T()) noexcept
288  {
289  m_accumulated_value = std::move(init);
290  m_accumulated_carry = T();
291  }
292 
296  RAJA_HOST_DEVICE RAJA_INLINE constexpr T get_and_reset(T init = T())
297  {
298  T value = get();
299 
300  reset(std::move(init));
301 
302  return value;
303  }
304 
308  RAJA_HOST_DEVICE RAJA_INLINE constexpr T get() const
309  {
310  return m_accumulated_value;
311  }
312 
316  RAJA_HOST_DEVICE RAJA_INLINE constexpr void combine(T val)
317  {
318  if constexpr (sum_impl == KahanSumImplementation::Default)
319  {
320 
321  T y = val - m_accumulated_carry;
322  T t = m_accumulated_value + y;
323  T z = t - m_accumulated_value;
324  m_accumulated_carry = z - y;
325  m_accumulated_value = t;
326  }
327  else if constexpr (sum_impl == KahanSumImplementation::Volatile)
328  {
329 
330  // volatile used to prevent compiler optimizations that assume
331  // floating-point operations are associative
332  T y = val - m_accumulated_carry;
333  volatile T t = m_accumulated_value + y;
334  volatile T z = t - m_accumulated_value;
335  m_accumulated_carry = z - y;
336  m_accumulated_value = t;
337  }
338  }
339 
343  RAJA_HOST_DEVICE RAJA_INLINE constexpr void operator+=(T val)
344  {
345  combine(std::move(val));
346  }
347 
348 private:
349  T m_accumulated_value;
350  T m_accumulated_carry;
351 };
352 
353 template<typename T, typename BinaryOp>
355  std::conditional_t<RAJA::operators::is_fp_associative<T>::value,
358 
364 template<typename Container,
365  typename T = detail::ContainerVal<Container>,
366  typename BinaryOp = operators::plus<T>>
367 RAJA_HOST_DEVICE RAJA_INLINE constexpr concepts::
368  enable_if_t<T, type_traits::is_range<Container>>
369  left_fold_reduce(Container&& c,
370  T init = BinaryOp::identity(),
371  BinaryOp op = BinaryOp {})
372 {
373  using std::begin;
374  using std::end;
375  static_assert(type_traits::is_binary_function<BinaryOp, T, T, T>::value,
376  "BinaryOp must model BinaryFunction");
377 
378  auto begin_it = begin(c);
379  auto end_it = end(c);
380 
381  LeftFoldReduce<T, BinaryOp> reducer(std::move(init), std::move(op));
382 
383  for (; begin_it != end_it; ++begin_it)
384  {
385  reducer.combine(*begin_it);
386  }
387 
388  return reducer.get_and_reset();
389 }
390 
396 template<typename Container,
397  typename T = detail::ContainerVal<Container>,
398  typename BinaryOp = operators::plus<T>>
399 RAJA_HOST_DEVICE RAJA_INLINE constexpr concepts::
400  enable_if_t<T, type_traits::is_range<Container>>
401  binary_tree_reduce(Container&& c,
402  T init = BinaryOp::identity(),
403  BinaryOp op = BinaryOp {})
404 {
405  using std::begin;
406  using std::distance;
407  using std::end;
408  static_assert(type_traits::is_binary_function<BinaryOp, T, T, T>::value,
409  "BinaryOp must model BinaryFunction");
410 
411  auto begin_it = begin(c);
412  auto end_it = end(c);
413  using SizeType = std::make_unsigned_t<decltype(distance(begin_it, end_it))>;
414 
415  BinaryTreeReduce<T, BinaryOp, SizeType> reducer(std::move(init),
416  std::move(op));
417 
418  for (; begin_it != end_it; ++begin_it)
419  {
420  reducer.combine(*begin_it);
421  }
422 
423  return reducer.get_and_reset();
424 }
425 
431 template<typename Container, typename T = detail::ContainerVal<Container>>
432 RAJA_HOST_DEVICE RAJA_INLINE constexpr concepts::
433  enable_if_t<T, type_traits::is_range<Container>, std::is_floating_point<T>>
434  kahan_sum(Container&& c, T init = T())
435 {
436  using std::begin;
437  using std::end;
438 
439  auto begin_it = begin(c);
440  auto end_it = end(c);
441 
442  KahanSum<T> reducer(std::move(init));
443 
444  for (; begin_it != end_it; ++begin_it)
445  {
446  reducer.combine(*begin_it);
447  }
448 
449  return reducer.get_and_reset();
450 }
451 
459 template<typename Container, typename T = detail::ContainerVal<Container>>
460 RAJA_HOST_DEVICE RAJA_INLINE constexpr concepts::
461  enable_if_t<T, type_traits::is_range<Container>, std::is_floating_point<T>>
462  kahan_sum_volatile(Container&& c, T init = T())
463 {
464  using std::begin;
465  using std::end;
466 
467  auto begin_it = begin(c);
468  auto end_it = end(c);
469 
470  KahanSum<T, KahanSumImplementation::Volatile> reducer(std::move(init));
471 
472  for (; begin_it != end_it; ++begin_it)
473  {
474  reducer.combine(*begin_it);
475  }
476 
477  return reducer.get_and_reset();
478 }
479 
486 template<typename Container,
487  typename T = detail::ContainerVal<Container>,
488  typename BinaryOp = operators::plus<T>>
489 RAJA_HOST_DEVICE RAJA_INLINE constexpr concepts::
490  enable_if_t<T, type_traits::is_range<Container>>
491  high_accuracy_reduce(Container&& c,
492  T init = BinaryOp::identity(),
493  BinaryOp op = BinaryOp {})
494 {
495  using std::begin;
496  using std::end;
497  static_assert(type_traits::is_binary_function<BinaryOp, T, T, T>::value,
498  "BinaryOp must model BinaryFunction");
499 
500  auto begin_it = begin(c);
501  auto end_it = end(c);
502 
503  HighAccuracyReduce<T, BinaryOp> reducer(std::move(init), std::move(op));
504 
505  for (; begin_it != end_it; ++begin_it)
506  {
507  reducer.combine(*begin_it);
508  }
509 
510  return reducer.get_and_reset();
511 }
512 
518 template<typename Container,
519  typename T = detail::ContainerVal<Container>,
520  typename BinaryOp = operators::plus<T>>
521 RAJA_HOST_DEVICE RAJA_INLINE constexpr concepts::
522  enable_if_t<T, type_traits::is_range<Container>>
523  accumulate(Container&& c,
524  T init = BinaryOp::identity(),
525  BinaryOp op = BinaryOp {})
526 {
527  using std::begin;
528  using std::end;
529  static_assert(type_traits::is_binary_function<BinaryOp, T, T, T>::value,
530  "BinaryOp must model BinaryFunction");
531 
532  auto begin_it = begin(c);
533  auto end_it = end(c);
534 
535  LeftFoldReduce<T, BinaryOp> reducer(std::move(init), std::move(op));
536 
537  for (; begin_it != end_it; ++begin_it)
538  {
539  reducer.combine(*begin_it);
540  }
541 
542  return reducer.get_and_reset();
543 }
544 
545 } // namespace RAJA
546 
547 #endif
Header file for RAJA operator definitions.
Header file for RAJA algorithm definitions.
Header file for RAJA concept definitions.
Header file for common RAJA internal macro definitions.
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
Header file providing RAJA math templates.
camp::decay< decltype(*camp::val< camp::iterator_from< Container > >())> ContainerVal
Definition: algorithm.hpp:51
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE concepts::enable_if_t< T, type_traits::is_range< Container > > high_accuracy_reduce(Container &&c, T init=BinaryOp::identity(), BinaryOp op=BinaryOp {})
Reduce given range to a single value using an algorithm with high accuracy when floating point round ...
Definition: reduce.hpp:491
RAJA_HOST_DEVICE constexpr RAJA_INLINE concepts::enable_if_t< T, type_traits::is_range< Container > > left_fold_reduce(Container &&c, T init=BinaryOp::identity(), BinaryOp op=BinaryOp {})
Accumulate given range to a single value using a left fold algorithm in O(N) operations and O(1) extr...
Definition: reduce.hpp:369
RAJA_HOST_DEVICE constexpr RAJA_INLINE concepts::enable_if_t< T, type_traits::is_range< Container > > accumulate(Container &&c, T init=BinaryOp::identity(), BinaryOp op=BinaryOp {})
Accumulate given range to a single value using a left fold algorithm in O(N) operations and O(1) extr...
Definition: reduce.hpp:523
KahanSumImplementation
Definition: reduce.hpp:257
RAJA_HOST_DEVICE constexpr RAJA_INLINE RAJA::zip_tuple_element_t< I, zip_tuple< is_val, Ts... > > & get(zip_tuple< is_val, Ts... > &z) noexcept
Definition: zip_tuple.hpp:56
std::conditional_t< RAJA::operators::is_fp_associative< T >::value, BinaryTreeReduce< T, BinaryOp >, LeftFoldReduce< T, BinaryOp > > HighAccuracyReduce
Definition: reduce.hpp:357
RAJA_HOST_DEVICE constexpr RAJA_INLINE concepts::enable_if_t< T, type_traits::is_range< Container > > binary_tree_reduce(Container &&c, T init=BinaryOp::identity(), BinaryOp op=BinaryOp {})
Reduce given range to a single value using a binary tree algorithm in O(N) operations and O(lg(N)) ex...
Definition: reduce.hpp:401
RAJA_HOST_DEVICE constexpr RAJA_INLINE concepts::enable_if_t< T, type_traits::is_range< Container >, std::is_floating_point< T > > kahan_sum_volatile(Container &&c, T init=T())
Accumulate given range to a single value using a kahan summation algorithm in O(N) operations and O(1...
Definition: reduce.hpp:462
RAJA_HOST_DEVICE constexpr RAJA_INLINE concepts::enable_if_t< T, type_traits::is_range< Container >, std::is_floating_point< T > > kahan_sum(Container &&c, T init=T())
Accumulate given range to a single value using a kahan summation algorithm in O(N) operations and O(1...
Definition: reduce.hpp:434
Definition: ListSegment.hpp:416
Reduce class that does a reduction with a binary tree.
Definition: reduce.hpp:134
RAJA_HOST_DEVICE constexpr RAJA_INLINE void operator+=(T value)
combine a value into the reducer
Definition: reduce.hpp:219
RAJA_HOST_DEVICE constexpr RAJA_INLINE BinaryTreeReduce(T init=BinaryOp::identity(), BinaryOp op=BinaryOp {}) noexcept
Definition: reduce.hpp:142
RAJA_HOST_DEVICE constexpr RAJA_INLINE void reset(T init=BinaryOp::identity()) noexcept
reset the combined value of the reducer to the identity
Definition: reduce.hpp:153
RAJA_HOST_DEVICE constexpr RAJA_INLINE void combine(T value)
combine a value into the reducer
Definition: reduce.hpp:200
RAJA_HOST_DEVICE constexpr RAJA_INLINE T get_and_reset(T init=BinaryOp::identity())
return the combined value and reset the reducer
Definition: reduce.hpp:164
RAJA_HOST_DEVICE constexpr RAJA_INLINE T get() const
return the combined value
Definition: reduce.hpp:177
Reduce class that does a reduction with a left fold.
Definition: reduce.hpp:275
RAJA_HOST_DEVICE constexpr RAJA_INLINE KahanSum(T init=T()) noexcept
Definition: reduce.hpp:278
RAJA_HOST_DEVICE constexpr RAJA_INLINE void reset(T init=T()) noexcept
reset the combined value of the reducer to the identity
Definition: reduce.hpp:287
RAJA_HOST_DEVICE constexpr RAJA_INLINE T get_and_reset(T init=T())
return the combined value and reset the reducer
Definition: reduce.hpp:296
RAJA_HOST_DEVICE constexpr RAJA_INLINE T get() const
return the combined value
Definition: reduce.hpp:308
RAJA_HOST_DEVICE constexpr RAJA_INLINE void combine(T val)
combine a value into the reducer
Definition: reduce.hpp:316
RAJA_HOST_DEVICE constexpr RAJA_INLINE void operator+=(T val)
combine a value into the reducer
Definition: reduce.hpp:343
Reduce class that does a reduction with a left fold.
Definition: reduce.hpp:45
RAJA_HOST_DEVICE constexpr RAJA_INLINE LeftFoldReduce(T init=BinaryOp::identity(), BinaryOp op=BinaryOp {}) noexcept
Definition: reduce.hpp:46
RAJA_HOST_DEVICE constexpr RAJA_INLINE void reset(T init=BinaryOp::identity()) noexcept
reset the combined value of the reducer to the identity
Definition: reduce.hpp:55
RAJA_HOST_DEVICE constexpr RAJA_INLINE void combine(T value)
combine a value into the reducer
Definition: reduce.hpp:85
RAJA_HOST_DEVICE constexpr RAJA_INLINE T get() const
return the combined value
Definition: reduce.hpp:77
RAJA_HOST_DEVICE constexpr RAJA_INLINE void operator+=(T value)
combine a value into the reducer
Definition: reduce.hpp:94
RAJA_HOST_DEVICE constexpr RAJA_INLINE T get_and_reset(T init=BinaryOp::identity())
return the combined value and reset the reducer
Definition: reduce.hpp:64
Definition: Operators.hpp:367