RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
multi_reduce.hpp
Go to the documentation of this file.
1 
14 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
15 // Copyright (c) Lawrence Livermore National Security, LLC and other
16 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
17 // files for dates and other details. No copyright assignment is required
18 // to contribute to RAJA.
19 //
20 // SPDX-License-Identifier: (BSD-3-Clause)
21 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
22 
23 #ifndef RAJA_omp_multi_reduce_HPP
24 #define RAJA_omp_multi_reduce_HPP
25 
26 #include "RAJA/config.hpp"
27 
28 #if defined(RAJA_ENABLE_OPENMP)
29 
30 #include <memory>
31 #include <vector>
32 
33 #include <omp.h>
34 
35 #include "RAJA/util/types.hpp"
36 #include "RAJA/util/reduce.hpp"
37 #include "RAJA/util/RepeatView.hpp"
38 
40 
43 
45 
46 namespace RAJA
47 {
48 
49 namespace detail
50 {
51 
61 template<typename T, typename t_MultiReduceOp, typename tuning>
62 struct MultiReduceDataOMP;
63 
73 template<typename T, typename t_MultiReduceOp>
74 struct MultiReduceDataOMP<
75  T,
76  t_MultiReduceOp,
77  RAJA::omp::MultiReduceTuning<
78  RAJA::omp::multi_reduce_algorithm::combine_on_destruction>>
79 {
80  using value_type = T;
81  using MultiReduceOp = t_MultiReduceOp;
82 
83  MultiReduceDataOMP() = delete;
84 
85  template<typename Container,
86  std::enable_if_t<
87  !std::is_same<Container, MultiReduceDataOMP>::value>* = nullptr>
88  MultiReduceDataOMP(Container const& container, T identity)
89  : m_parent(nullptr),
90  m_num_bins(container.size()),
91  m_identity(identity),
92  m_data(nullptr)
93  {
94  m_data = create_data(container, m_num_bins);
95  }
96 
97  MultiReduceDataOMP(MultiReduceDataOMP const& other)
98  : m_parent(other.m_parent ? other.m_parent : &other),
99  m_num_bins(other.m_num_bins),
100  m_identity(other.m_identity),
101  m_data(nullptr)
102  {
103  m_data =
104  create_data(RepeatView<value_type>(other.m_identity, other.m_num_bins),
105  other.m_num_bins);
106  }
107 
108  MultiReduceDataOMP(MultiReduceDataOMP&&) = delete;
109  MultiReduceDataOMP& operator=(MultiReduceDataOMP const&) = delete;
110  MultiReduceDataOMP& operator=(MultiReduceDataOMP&&) = delete;
111 
112  ~MultiReduceDataOMP()
113  {
114  if (m_data)
115  {
116  if (m_parent && (m_num_bins != size_t(0)))
117  {
118 #pragma omp critical(ompMultiReduceCritical)
119  {
120  for (size_t bin = 0; bin < m_num_bins; ++bin)
121  {
122  MultiReduceOp {}(m_parent->m_data[bin], m_data[bin]);
123  }
124  }
125  }
126  destroy_data(m_data, m_num_bins);
127  }
128  }
129 
130  template<typename Container>
131  void reset(Container const& container, T identity)
132  {
133  m_identity = identity;
134  size_t new_num_bins = container.size();
135  if (new_num_bins != m_num_bins)
136  {
137  destroy_data(m_data, m_num_bins);
138  m_num_bins = new_num_bins;
139  m_data = create_data(container, m_num_bins);
140  }
141  else
142  {
143  size_t bin = 0;
144  for (auto const& value : container)
145  {
146  m_data[bin] = value;
147  ++bin;
148  }
149  }
150  }
151 
152  size_t num_bins() const { return m_num_bins; }
153 
154  T identity() const { return m_identity; }
155 
156  void combine(size_t bin, T const& val) { MultiReduceOp {}(m_data[bin], val); }
157 
158  T get(size_t bin) const { return m_data[bin]; }
159 
160 private:
161  MultiReduceDataOMP const* m_parent;
162  size_t m_num_bins;
163  T m_identity;
164  T* m_data;
165 
166  template<typename Container>
167  static T* create_data(Container const& container, size_t num_bins)
168  {
169  if (num_bins == size_t(0))
170  {
171  return nullptr;
172  }
173  auto data =
174  RAJA::allocate_aligned_type<T>(RAJA::DATA_ALIGN, num_bins * sizeof(T));
175  size_t bin = 0;
176  for (auto const& value : container)
177  {
178  new (&data[bin]) T(value);
179  ++bin;
180  }
181  return data;
182  }
183 
184  static void destroy_data(T*& data, size_t num_bins)
185  {
186  if (num_bins == size_t(0))
187  {
188  return;
189  }
190  for (size_t bin = num_bins; bin > 0; --bin)
191  {
192  data[bin - 1].~T();
193  }
194  RAJA::free_aligned(data);
195  data = nullptr;
196  }
197 };
198 
208 template<typename T, typename t_MultiReduceOp>
209 struct MultiReduceDataOMP<
210  T,
211  t_MultiReduceOp,
212  RAJA::omp::MultiReduceTuning<
213  RAJA::omp::multi_reduce_algorithm::combine_on_get>>
214 {
215  using value_type = T;
216  using MultiReduceOp = t_MultiReduceOp;
217 
218  MultiReduceDataOMP() = delete;
219 
220  template<typename Container,
221  std::enable_if_t<
222  !std::is_same<Container, MultiReduceDataOMP>::value>* = nullptr>
223  MultiReduceDataOMP(Container const& container, T identity)
224  : m_parent(nullptr),
225  m_max_threads(omp_get_max_threads()),
226  m_num_bins(container.size()),
227  m_padded_threads(pad_threads(m_max_threads)),
228  m_padded_bins(pad_bins(m_num_bins)),
229  m_identity(identity),
230  m_data(nullptr)
231  {
232  m_data = create_data(container, identity, m_num_bins, m_max_threads,
233  m_padded_bins, m_padded_threads);
234  }
235 
236  MultiReduceDataOMP(MultiReduceDataOMP const& other)
237  : m_parent(other.m_parent ? other.m_parent : &other),
238  m_num_bins(other.m_num_bins),
239  m_padded_threads(other.m_padded_threads),
240  m_padded_bins(other.m_padded_bins),
241  m_identity(other.m_identity),
242  m_data(other.m_data)
243  {}
244 
245  MultiReduceDataOMP(MultiReduceDataOMP&&) = delete;
246  MultiReduceDataOMP& operator=(MultiReduceDataOMP const&) = delete;
247  MultiReduceDataOMP& operator=(MultiReduceDataOMP&&) = delete;
248 
249  ~MultiReduceDataOMP()
250  {
251  if (m_data)
252  {
253  if (!m_parent)
254  {
255  destroy_data(m_data, m_num_bins, m_max_threads, m_padded_bins,
256  m_padded_threads);
257  }
258  }
259  }
260 
261  template<typename Container>
262  void reset(Container const& container, T identity)
263  {
264  m_identity = identity;
265  size_t new_num_bins = container.size();
266  if (new_num_bins != m_num_bins)
267  {
268  destroy_data(m_data, m_num_bins, m_max_threads, m_padded_bins,
269  m_padded_threads);
270  m_num_bins = new_num_bins;
271  m_padded_bins = pad_bins(m_num_bins);
272  m_data = create_data(container, identity, m_num_bins, m_max_threads,
273  m_padded_bins, m_padded_threads);
274  }
275  else
276  {
277  if (m_max_threads > 0)
278  {
279  {
280  size_t thread_idx = 0;
281  size_t bin = 0;
282  for (auto const& value : container)
283  {
284  m_data[index_data(bin, thread_idx, m_padded_bins,
285  m_padded_threads)] = value;
286  ++bin;
287  }
288  }
289  for (size_t thread_idx = 1; thread_idx < m_max_threads; ++thread_idx)
290  {
291  for (size_t bin = 0; bin < m_num_bins; ++bin)
292  {
293  m_data[index_data(bin, thread_idx, m_padded_bins,
294  m_padded_threads)] = identity;
295  }
296  }
297  }
298  }
299  }
300 
301  size_t num_bins() const { return m_num_bins; }
302 
303  T identity() const { return m_identity; }
304 
305  void combine(size_t bin, T const& val)
306  {
307  size_t thread_idx = omp_get_thread_num();
308  MultiReduceOp {}(
309  m_data[index_data(bin, thread_idx, m_padded_bins, m_padded_threads)],
310  val);
311  }
312 
313  T get(size_t bin) const
314  {
316  reducer(m_identity);
317  for (size_t thread_idx = 0; thread_idx < m_max_threads; ++thread_idx)
318  {
319  reducer.combine(
320  m_data[index_data(bin, thread_idx, m_padded_bins, m_padded_threads)]);
321  }
322  return reducer.get_and_reset();
323  }
324 
325 private:
326  MultiReduceDataOMP const* m_parent;
327  size_t m_max_threads;
328  size_t m_num_bins;
329  size_t m_padded_threads;
330  size_t m_padded_bins;
331  T m_identity;
332  T* m_data;
333 
334  static constexpr size_t pad_bins(size_t num_bins)
335  {
336  size_t num_cache_lines =
337  RAJA_DIVIDE_CEILING_INT(num_bins * sizeof(T), RAJA::DATA_ALIGN);
338  return RAJA_DIVIDE_CEILING_INT(num_cache_lines * RAJA::DATA_ALIGN,
339  sizeof(T));
340  }
341 
342  static constexpr size_t pad_threads(size_t max_threads)
343  {
344  return max_threads;
345  }
346 
347  static constexpr size_t index_data(size_t bin,
348  size_t thread_idx,
349  size_t padded_bins,
350  size_t RAJA_UNUSED_ARG(padded_threads))
351  {
352  return bin + thread_idx * padded_bins;
353  }
354 
355  template<typename Container>
356  static T* create_data(Container const& container,
357  T identity,
358  size_t num_bins,
359  size_t max_threads,
360  size_t padded_bins,
361  size_t padded_threads)
362  {
363  if (num_bins == size_t(0))
364  {
365  return nullptr;
366  }
367  auto data = RAJA::allocate_aligned_type<T>(
368  RAJA::DATA_ALIGN, padded_threads * padded_bins * sizeof(T));
369  if (max_threads > 0)
370  {
371  {
372  size_t thread_idx = 0;
373  size_t bin = 0;
374  for (auto const& value : container)
375  {
376  new (&data[index_data(bin, thread_idx, padded_bins, padded_threads)])
377  T(value);
378  ++bin;
379  }
380  }
381  for (size_t thread_idx = 1; thread_idx < max_threads; ++thread_idx)
382  {
383  for (size_t bin = 0; bin < num_bins; ++bin)
384  {
385  new (&data[index_data(bin, thread_idx, padded_bins, padded_threads)])
386  T(identity);
387  }
388  }
389  }
390  return data;
391  }
392 
393  static void destroy_data(T*& data,
394  size_t num_bins,
395  size_t max_threads,
396  size_t padded_bins,
397  size_t padded_threads)
398  {
399  if (num_bins == size_t(0))
400  {
401  return;
402  }
403  for (size_t thread_idx = max_threads; thread_idx > 0; --thread_idx)
404  {
405  for (size_t bin = num_bins; bin > 0; --bin)
406  {
407  data[index_data(bin - 1, thread_idx - 1, padded_bins, padded_threads)]
408  .~T();
409  }
410  }
411  RAJA::free_aligned(data);
412  data = nullptr;
413  }
414 };
415 
416 } // namespace detail
417 
418 RAJA_DECLARE_ALL_MULTI_REDUCERS(policy::omp::omp_multi_reduce_policy,
419  detail::MultiReduceDataOMP)
420 
421 } // namespace RAJA
422 
423 #endif // closing endif for RAJA_ENABLE_OPENMP guard
424 
425 #endif // closing endif for header file include guard
Header file defining prototypes for routines used to manage memory for CPU reductions and other opera...
Header file for RAJA RepeatView constructs.
#define RAJA_UNUSED_ARG(x)
Definition: macros.hpp:97
#define RAJA_DIVIDE_CEILING_INT(dividend, divisor)
Definition: macros.hpp:122
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE RAJA::zip_tuple_element_t< I, zip_tuple< is_val, Ts... > > & get(zip_tuple< is_val, Ts... > &z) noexcept
Definition: zip_tuple.hpp:56
std::conditional_t< RAJA::operators::is_fp_associative< T >::value, BinaryTreeReduce< T, BinaryOp >, LeftFoldReduce< T, BinaryOp > > HighAccuracyReduce
Definition: reduce.hpp:357
void free_aligned(void *ptr)
Definition: MemUtils_CPU.hpp:80
Header file containing RAJA OpenMP policy definitions.
Base types used in common for RAJA reducer objects.
#define RAJA_DECLARE_ALL_MULTI_REDUCERS(POL, DATA)
Definition: multi_reduce.hpp:49
Header file providing RAJA reduction declarations.
Header file for RAJA type definitions.
Header file providing RAJA sort templates.