20 #ifndef RAJA_sort_openmp_HPP
21 #define RAJA_sort_openmp_HPP
23 #include "RAJA/config.hpp"
54 #if defined(RAJA_ENABLE_OPENMP_TASK_INTERNAL)
59 template<
typename Sorter,
typename Iter,
typename Compare>
60 inline void sort_task(Sorter sorter,
68 const diff_type n = i_end - i_begin;
70 if (n <= iterates_per_task)
73 sorter(begin + i_begin, begin + i_end, comp);
78 const diff_type i_middle = i_begin + n / 2;
81 sort_task(sorter, begin, i_begin, i_middle, iterates_per_task, comp);
84 sort_task(sorter, begin, i_middle, i_end, iterates_per_task, comp);
101 template<
typename Sorter,
typename Iter,
typename Compare>
110 const diff_type num_threads = omp_get_num_threads();
112 const diff_type thread_id = omp_get_thread_num();
114 const diff_type i_begin =
firstIndex(n, num_threads, thread_id);
116 const diff_type i_end =
firstIndex(n, num_threads, thread_id + 1);
119 sorter(begin + i_begin, begin + i_end, comp);
123 for (diff_type middle_offset = 1; middle_offset < num_threads;
127 diff_type end_offset = 2 * middle_offset;
130 n, num_threads,
std::min(thread_id + middle_offset, num_threads));
132 n, num_threads,
std::min(thread_id + end_offset, num_threads));
136 if (thread_id % end_offset == 0)
143 begin + i_end, comp);
154 template<
typename Sorter,
typename Iter,
typename Compare>
155 inline void sort(Sorter sorter, Iter begin, Iter end, Compare comp)
161 const diff_type n = end - begin;
163 if (n <= min_iterates_per_task)
166 sorter(begin, end, comp);
171 const diff_type max_threads = omp_get_max_threads();
173 #if defined(RAJA_ENABLE_OPENMP_TASK_INTERNAL)
175 const diff_type iterates_per_task =
176 std::max(n / (2 * max_threads), min_iterates_per_task);
178 const diff_type requested_num_threads =
179 std::min((n + iterates_per_task - 1) / iterates_per_task, max_threads);
182 #pragma omp parallel num_threads(static_cast<int>(requested_num_threads))
185 sort_task(sorter, begin, 0, n, iterates_per_task, comp);
190 const diff_type requested_num_threads =
std::min(
191 (n + min_iterates_per_task - 1) / min_iterates_per_task, max_threads);
194 #pragma omp parallel num_threads(static_cast<int>(requested_num_threads))
210 template<
typename ExecPolicy,
typename Iter,
typename Compare>
211 concepts::enable_if_t<resources::EventProxy<resources::Host>,
221 return resources::EventProxy<resources::Host>(host_res);
227 template<
typename ExecPolicy,
typename Iter,
typename Compare>
228 concepts::enable_if_t<resources::EventProxy<resources::Host>,
238 return resources::EventProxy<resources::Host>(host_res);
248 concepts::enable_if_t<resources::EventProxy<resources::Host>,
257 auto begin =
RAJA::zip(keys_begin, vals_begin);
258 auto end =
RAJA::zip(keys_end, vals_begin + (keys_end - keys_begin));
261 RAJA::compare_first<zip_ref>(comp));
263 return resources::EventProxy<resources::Host>(host_res);
274 concepts::enable_if_t<resources::EventProxy<resources::Host>,
283 auto begin =
RAJA::zip(keys_begin, vals_begin);
284 auto end =
RAJA::zip(keys_end, vals_begin + (keys_end - keys_begin));
287 RAJA::compare_first<zip_ref>(comp));
289 return resources::EventProxy<resources::Host>(host_res);
Header file for RAJA algorithm definitions.
Header file for RAJA concept definitions.
Header file for common RAJA internal macro definitions.
RAJA_HOST_DEVICE RAJA_INLINE void RAJA_UNUSED_VAR(T &&...) noexcept
Definition: macros.hpp:120
void RAJA_INLINE inplace_merge(Iter first, Iter middle, Iter last, Compare comp)
merge a range with midpoint using comparison function with local range/2 copy
Definition: sort.hpp:441
typename ::std::iterator_traits< Iter >::reference IterRef
Definition: algorithm.hpp:41
typename ::std::iterator_traits< Iter >::difference_type IterDiff
Definition: algorithm.hpp:44
RAJA_INLINE DiffType firstIndex(DiffType n, CountType num_threads, CountType thread_id)
Definition: algorithm.hpp:62
void sort(Sorter sorter, Iter begin, Iter end, Compare comp)
sort given range using sorter and comparison function
Definition: sort.hpp:155
constexpr int get_min_iterates_per_task()
Definition: sort.hpp:52
void sort_parallel_region(Sorter sorter, Iter begin, RAJA::detail::IterDiff< Iter > n, Compare comp)
sort given range using sorter and comparison function by manually assigning work to threads
Definition: sort.hpp:102
concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< ExecPolicy > > stable_pairs(resources::Host host_res, const ExecPolicy &, KeyIter keys_begin, KeyIter keys_end, ValIter vals_begin, Compare comp)
Definition: sort.hpp:276
concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< ExecPolicy > > stable(resources::Host host_res, const ExecPolicy &, Iter begin, Iter end, Compare comp)
stable sort given range using comparison function
Definition: sort.hpp:230
concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< ExecPolicy > > unstable(resources::Host host_res, const ExecPolicy &, Iter begin, Iter end, Compare comp)
sort given range using comparison function
Definition: sort.hpp:213
concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< ExecPolicy > > unstable_pairs(resources::Host host_res, const ExecPolicy &, KeyIter keys_begin, KeyIter keys_end, ValIter vals_begin, Compare comp)
sort given range of pairs using comparison function on keys
Definition: sort.hpp:250
concepts::enable_if_t< resources::EventProxy< Res >, type_traits::is_execution_policy< ExecPolicy >, type_traits::is_resource< Res >, std::is_constructible< camp::resources::Resource, Res >, type_traits::is_range< Container > > sort(ExecPolicy &&p, Res r, Container &&c, Compare comp=Compare {})
sort execution pattern
Definition: sort.hpp:61
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result min(Args... args)
Definition: foldl.hpp:161
RAJA_HOST_DEVICE constexpr RAJA_INLINE auto zip(Args &&... args) -> ZipIterator< camp::decay< Args >... >
Zip multiple iterators together to iterate them simultaneously with a single ZipIterator object.
Definition: zip.hpp:242
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result max(Args... args)
Definition: foldl.hpp:155
Header file containing RAJA OpenMP policy definitions.
Header file providing RAJA sort declarations.
Functional that performs a stable sort with the given arguments, calls RAJA::merge_sort.
Definition: sort.hpp:67
Functional that performs an unstable sort with the given arguments, uses RAJA::intro_sort.
Definition: sort.hpp:54
Definition: IndexSet.hpp:70
Definition: PolicyBase.hpp:215
Tuple used by ZipIterator for storing multiple references and values. Acts like a reference to its me...
Definition: zip_tuple.hpp:247