23 #ifndef RAJA_omp_multi_reduce_HPP
24 #define RAJA_omp_multi_reduce_HPP
26 #include "RAJA/config.hpp"
28 #if defined(RAJA_ENABLE_OPENMP)
61 template<
typename T,
typename t_MultiReduceOp,
typename tuning>
62 struct MultiReduceDataOMP;
73 template<
typename T,
typename t_MultiReduceOp>
74 struct MultiReduceDataOMP<
77 RAJA::omp::MultiReduceTuning<
78 RAJA::omp::multi_reduce_algorithm::combine_on_destruction>>
81 using MultiReduceOp = t_MultiReduceOp;
83 MultiReduceDataOMP() =
delete;
85 template<
typename Container,
87 !std::is_same<Container, MultiReduceDataOMP>::value>* =
nullptr>
88 MultiReduceDataOMP(Container
const& container, T identity)
90 m_num_bins(container.size()),
94 m_data = create_data(container, m_num_bins);
97 MultiReduceDataOMP(MultiReduceDataOMP
const& other)
98 : m_parent(other.m_parent ? other.m_parent : &other),
99 m_num_bins(other.m_num_bins),
100 m_identity(other.m_identity),
104 create_data(RepeatView<value_type>(other.m_identity, other.m_num_bins),
108 MultiReduceDataOMP(MultiReduceDataOMP&&) =
delete;
109 MultiReduceDataOMP& operator=(MultiReduceDataOMP
const&) =
delete;
110 MultiReduceDataOMP& operator=(MultiReduceDataOMP&&) =
delete;
112 ~MultiReduceDataOMP()
116 if (m_parent && (m_num_bins !=
size_t(0)))
118 #pragma omp critical(ompMultiReduceCritical)
120 for (
size_t bin = 0; bin < m_num_bins; ++bin)
122 MultiReduceOp {}(m_parent->m_data[bin], m_data[bin]);
126 destroy_data(m_data, m_num_bins);
130 template<
typename Container>
131 void reset(Container
const& container, T identity)
133 m_identity = identity;
134 size_t new_num_bins = container.size();
135 if (new_num_bins != m_num_bins)
137 destroy_data(m_data, m_num_bins);
138 m_num_bins = new_num_bins;
139 m_data = create_data(container, m_num_bins);
144 for (
auto const& value : container)
152 size_t num_bins()
const {
return m_num_bins; }
154 T identity()
const {
return m_identity; }
156 void combine(
size_t bin, T
const& val) { MultiReduceOp {}(m_data[bin], val); }
158 T
get(
size_t bin)
const {
return m_data[bin]; }
161 MultiReduceDataOMP
const* m_parent;
166 template<
typename Container>
167 static T* create_data(Container
const& container,
size_t num_bins)
169 if (num_bins ==
size_t(0))
174 RAJA::allocate_aligned_type<T>(RAJA::DATA_ALIGN, num_bins *
sizeof(T));
176 for (
auto const& value : container)
178 new (&data[bin]) T(value);
184 static void destroy_data(T*& data,
size_t num_bins)
186 if (num_bins ==
size_t(0))
190 for (
size_t bin = num_bins; bin > 0; --bin)
208 template<
typename T,
typename t_MultiReduceOp>
209 struct MultiReduceDataOMP<
212 RAJA::omp::MultiReduceTuning<
213 RAJA::omp::multi_reduce_algorithm::combine_on_get>>
215 using value_type = T;
216 using MultiReduceOp = t_MultiReduceOp;
218 MultiReduceDataOMP() =
delete;
220 template<
typename Container,
222 !std::is_same<Container, MultiReduceDataOMP>::value>* =
nullptr>
223 MultiReduceDataOMP(Container
const& container, T identity)
225 m_max_threads(omp_get_max_threads()),
226 m_num_bins(container.size()),
227 m_padded_threads(pad_threads(m_max_threads)),
228 m_padded_bins(pad_bins(m_num_bins)),
229 m_identity(identity),
232 m_data = create_data(container, identity, m_num_bins, m_max_threads,
233 m_padded_bins, m_padded_threads);
236 MultiReduceDataOMP(MultiReduceDataOMP
const& other)
237 : m_parent(other.m_parent ? other.m_parent : &other),
238 m_num_bins(other.m_num_bins),
239 m_padded_threads(other.m_padded_threads),
240 m_padded_bins(other.m_padded_bins),
241 m_identity(other.m_identity),
245 MultiReduceDataOMP(MultiReduceDataOMP&&) =
delete;
246 MultiReduceDataOMP& operator=(MultiReduceDataOMP
const&) =
delete;
247 MultiReduceDataOMP& operator=(MultiReduceDataOMP&&) =
delete;
249 ~MultiReduceDataOMP()
255 destroy_data(m_data, m_num_bins, m_max_threads, m_padded_bins,
261 template<
typename Container>
262 void reset(Container
const& container, T identity)
264 m_identity = identity;
265 size_t new_num_bins = container.size();
266 if (new_num_bins != m_num_bins)
268 destroy_data(m_data, m_num_bins, m_max_threads, m_padded_bins,
270 m_num_bins = new_num_bins;
271 m_padded_bins = pad_bins(m_num_bins);
272 m_data = create_data(container, identity, m_num_bins, m_max_threads,
273 m_padded_bins, m_padded_threads);
277 if (m_max_threads > 0)
280 size_t thread_idx = 0;
282 for (
auto const& value : container)
284 m_data[index_data(bin, thread_idx, m_padded_bins,
285 m_padded_threads)] = value;
289 for (
size_t thread_idx = 1; thread_idx < m_max_threads; ++thread_idx)
291 for (
size_t bin = 0; bin < m_num_bins; ++bin)
293 m_data[index_data(bin, thread_idx, m_padded_bins,
294 m_padded_threads)] = identity;
301 size_t num_bins()
const {
return m_num_bins; }
303 T identity()
const {
return m_identity; }
305 void combine(
size_t bin, T
const& val)
307 size_t thread_idx = omp_get_thread_num();
309 m_data[index_data(bin, thread_idx, m_padded_bins, m_padded_threads)],
313 T
get(
size_t bin)
const
317 for (
size_t thread_idx = 0; thread_idx < m_max_threads; ++thread_idx)
320 m_data[index_data(bin, thread_idx, m_padded_bins, m_padded_threads)]);
322 return reducer.get_and_reset();
326 MultiReduceDataOMP
const* m_parent;
327 size_t m_max_threads;
329 size_t m_padded_threads;
330 size_t m_padded_bins;
334 static constexpr
size_t pad_bins(
size_t num_bins)
336 size_t num_cache_lines =
342 static constexpr
size_t pad_threads(
size_t max_threads)
347 static constexpr
size_t index_data(
size_t bin,
352 return bin + thread_idx * padded_bins;
355 template<
typename Container>
356 static T* create_data(Container
const& container,
361 size_t padded_threads)
363 if (num_bins ==
size_t(0))
367 auto data = RAJA::allocate_aligned_type<T>(
368 RAJA::DATA_ALIGN, padded_threads * padded_bins *
sizeof(T));
372 size_t thread_idx = 0;
374 for (
auto const& value : container)
376 new (&data[index_data(bin, thread_idx, padded_bins, padded_threads)])
381 for (
size_t thread_idx = 1; thread_idx < max_threads; ++thread_idx)
383 for (
size_t bin = 0; bin < num_bins; ++bin)
385 new (&data[index_data(bin, thread_idx, padded_bins, padded_threads)])
393 static void destroy_data(T*& data,
397 size_t padded_threads)
399 if (num_bins ==
size_t(0))
403 for (
size_t thread_idx = max_threads; thread_idx > 0; --thread_idx)
405 for (
size_t bin = num_bins; bin > 0; --bin)
407 data[index_data(bin - 1, thread_idx - 1, padded_bins, padded_threads)]
419 detail::MultiReduceDataOMP)
Header file defining prototypes for routines used to manage memory for CPU reductions and other opera...
Header file for RAJA RepeatView constructs.
#define RAJA_UNUSED_ARG(x)
Definition: macros.hpp:97
#define RAJA_DIVIDE_CEILING_INT(dividend, divisor)
Definition: macros.hpp:122
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE RAJA::zip_tuple_element_t< I, zip_tuple< is_val, Ts... > > & get(zip_tuple< is_val, Ts... > &z) noexcept
Definition: zip_tuple.hpp:56
std::conditional_t< RAJA::operators::is_fp_associative< T >::value, BinaryTreeReduce< T, BinaryOp >, LeftFoldReduce< T, BinaryOp > > HighAccuracyReduce
Definition: reduce.hpp:357
void free_aligned(void *ptr)
Definition: MemUtils_CPU.hpp:80
Header file containing RAJA OpenMP policy definitions.
Base types used in common for RAJA reducer objects.
#define RAJA_DECLARE_ALL_MULTI_REDUCERS(POL, DATA)
Definition: multi_reduce.hpp:49
Header file providing RAJA reduction declarations.
Header file for RAJA type definitions.
Header file providing RAJA sort templates.