RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
sort.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_sort_openmp_HPP
21 #define RAJA_sort_openmp_HPP
22 
23 #include "RAJA/config.hpp"
24 
25 #include <algorithm>
26 #include <functional>
27 #include <iterator>
28 
29 #include <omp.h>
30 
31 #include "RAJA/util/macros.hpp"
32 
33 #include "RAJA/util/concepts.hpp"
34 
38 
39 namespace RAJA
40 {
41 namespace impl
42 {
43 namespace sort
44 {
45 
46 namespace detail
47 {
48 namespace openmp
49 {
50 
51 // this number is arbitrary
52 constexpr int get_min_iterates_per_task() { return 128; }
53 
54 #if defined(RAJA_ENABLE_OPENMP_TASK_INTERNAL)
59 template<typename Sorter, typename Iter, typename Compare>
60 inline void sort_task(Sorter sorter,
61  Iter begin,
64  RAJA::detail::IterDiff<Iter> iterates_per_task,
65  Compare comp)
66 {
67  using diff_type = RAJA::detail::IterDiff<Iter>;
68  const diff_type n = i_end - i_begin;
69 
70  if (n <= iterates_per_task)
71  {
72 
73  sorter(begin + i_begin, begin + i_end, comp);
74  }
75  else
76  {
77 
78  const diff_type i_middle = i_begin + n / 2;
79 
80 #pragma omp task
81  sort_task(sorter, begin, i_begin, i_middle, iterates_per_task, comp);
82 
83 #pragma omp task
84  sort_task(sorter, begin, i_middle, i_end, iterates_per_task, comp);
85 
86 #pragma omp taskwait
87 
88  // std::inplace_merge(begin + i_begin, begin + i_middle, begin + i_end,
89  // comp);
90  RAJA::detail::inplace_merge(begin + i_begin, begin + i_middle,
91  begin + i_end, comp);
92  }
93 }
94 
95 #else
96 
101 template<typename Sorter, typename Iter, typename Compare>
102 inline void sort_parallel_region(Sorter sorter,
103  Iter begin,
105  Compare comp)
106 {
108  using diff_type = RAJA::detail::IterDiff<Iter>;
109 
110  const diff_type num_threads = omp_get_num_threads();
111 
112  const diff_type thread_id = omp_get_thread_num();
113 
114  const diff_type i_begin = firstIndex(n, num_threads, thread_id);
115  {
116  const diff_type i_end = firstIndex(n, num_threads, thread_id + 1);
117 
118  // this thread sorts range [i_begin, i_end)
119  sorter(begin + i_begin, begin + i_end, comp);
120  }
121 
122  // hierarchically merge ranges
123  for (diff_type middle_offset = 1; middle_offset < num_threads;
124  middle_offset *= 2)
125  {
126 
127  diff_type end_offset = 2 * middle_offset;
128 
129  const diff_type i_middle = firstIndex(
130  n, num_threads, std::min(thread_id + middle_offset, num_threads));
131  const diff_type i_end = firstIndex(
132  n, num_threads, std::min(thread_id + end_offset, num_threads));
133 
134 #pragma omp barrier
135 
136  if (thread_id % end_offset == 0)
137  {
138 
139  // this thread merges ranges [i_begin, i_middle) and [i_middle, i_end)
140  // std::inplace_merge(begin + i_begin, begin + i_middle, begin + i_end,
141  // comp);
142  RAJA::detail::inplace_merge(begin + i_begin, begin + i_middle,
143  begin + i_end, comp);
144  }
145  }
146 }
147 
148 #endif
149 
150 
154 template<typename Sorter, typename Iter, typename Compare>
155 inline void sort(Sorter sorter, Iter begin, Iter end, Compare comp)
156 {
157  using diff_type = RAJA::detail::IterDiff<Iter>;
158 
159  constexpr diff_type min_iterates_per_task = get_min_iterates_per_task();
160 
161  const diff_type n = end - begin;
162 
163  if (n <= min_iterates_per_task)
164  {
165 
166  sorter(begin, end, comp);
167  }
168  else
169  {
170 
171  const diff_type max_threads = omp_get_max_threads();
172 
173 #if defined(RAJA_ENABLE_OPENMP_TASK_INTERNAL)
174 
175  const diff_type iterates_per_task =
176  std::max(n / (2 * max_threads), min_iterates_per_task);
177 
178  const diff_type requested_num_threads =
179  std::min((n + iterates_per_task - 1) / iterates_per_task, max_threads);
180  RAJA_UNUSED_VAR(requested_num_threads); // avoid warning in hip device code
181 
182 #pragma omp parallel num_threads(static_cast<int>(requested_num_threads))
183 #pragma omp master
184  {
185  sort_task(sorter, begin, 0, n, iterates_per_task, comp);
186  }
187 
188 #else
189 
190  const diff_type requested_num_threads = std::min(
191  (n + min_iterates_per_task - 1) / min_iterates_per_task, max_threads);
192  RAJA_UNUSED_VAR(requested_num_threads); // avoid warning in hip device code
193 
194 #pragma omp parallel num_threads(static_cast<int>(requested_num_threads))
195  {
196  sort_parallel_region(sorter, begin, n, comp);
197  }
198 
199 #endif
200  }
201 }
202 
203 } // namespace openmp
204 
205 } // namespace detail
206 
210 template<typename ExecPolicy, typename Iter, typename Compare>
211 concepts::enable_if_t<resources::EventProxy<resources::Host>,
213 unstable(resources::Host host_res,
214  const ExecPolicy&,
215  Iter begin,
216  Iter end,
217  Compare comp)
218 {
219  detail::openmp::sort(detail::UnstableSorter {}, begin, end, comp);
220 
221  return resources::EventProxy<resources::Host>(host_res);
222 }
223 
227 template<typename ExecPolicy, typename Iter, typename Compare>
228 concepts::enable_if_t<resources::EventProxy<resources::Host>,
230 stable(resources::Host host_res,
231  const ExecPolicy&,
232  Iter begin,
233  Iter end,
234  Compare comp)
235 {
236  detail::openmp::sort(detail::StableSorter {}, begin, end, comp);
237 
238  return resources::EventProxy<resources::Host>(host_res);
239 }
240 
244 template<typename ExecPolicy,
245  typename KeyIter,
246  typename ValIter,
247  typename Compare>
248 concepts::enable_if_t<resources::EventProxy<resources::Host>,
250 unstable_pairs(resources::Host host_res,
251  const ExecPolicy&,
252  KeyIter keys_begin,
253  KeyIter keys_end,
254  ValIter vals_begin,
255  Compare comp)
256 {
257  auto begin = RAJA::zip(keys_begin, vals_begin);
258  auto end = RAJA::zip(keys_end, vals_begin + (keys_end - keys_begin));
259  using zip_ref = RAJA::detail::IterRef<camp::decay<decltype(begin)>>;
261  RAJA::compare_first<zip_ref>(comp));
262 
263  return resources::EventProxy<resources::Host>(host_res);
264 }
265 
270 template<typename ExecPolicy,
271  typename KeyIter,
272  typename ValIter,
273  typename Compare>
274 concepts::enable_if_t<resources::EventProxy<resources::Host>,
276 stable_pairs(resources::Host host_res,
277  const ExecPolicy&,
278  KeyIter keys_begin,
279  KeyIter keys_end,
280  ValIter vals_begin,
281  Compare comp)
282 {
283  auto begin = RAJA::zip(keys_begin, vals_begin);
284  auto end = RAJA::zip(keys_end, vals_begin + (keys_end - keys_begin));
285  using zip_ref = RAJA::detail::IterRef<camp::decay<decltype(begin)>>;
287  RAJA::compare_first<zip_ref>(comp));
288 
289  return resources::EventProxy<resources::Host>(host_res);
290 }
291 
292 } // namespace sort
293 
294 } // namespace impl
295 
296 } // namespace RAJA
297 
298 #endif
Header file for RAJA algorithm definitions.
Header file for RAJA concept definitions.
Header file for common RAJA internal macro definitions.
RAJA_HOST_DEVICE RAJA_INLINE void RAJA_UNUSED_VAR(T &&...) noexcept
Definition: macros.hpp:120
void RAJA_INLINE inplace_merge(Iter first, Iter middle, Iter last, Compare comp)
merge a range with midpoint using comparison function with local range/2 copy
Definition: sort.hpp:441
typename ::std::iterator_traits< Iter >::reference IterRef
Definition: algorithm.hpp:41
typename ::std::iterator_traits< Iter >::difference_type IterDiff
Definition: algorithm.hpp:44
RAJA_INLINE DiffType firstIndex(DiffType n, CountType num_threads, CountType thread_id)
Definition: algorithm.hpp:62
void sort(Sorter sorter, Iter begin, Iter end, Compare comp)
sort given range using sorter and comparison function
Definition: sort.hpp:155
constexpr int get_min_iterates_per_task()
Definition: sort.hpp:52
void sort_parallel_region(Sorter sorter, Iter begin, RAJA::detail::IterDiff< Iter > n, Compare comp)
sort given range using sorter and comparison function by manually assigning work to threads
Definition: sort.hpp:102
concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< ExecPolicy > > stable_pairs(resources::Host host_res, const ExecPolicy &, KeyIter keys_begin, KeyIter keys_end, ValIter vals_begin, Compare comp)
Definition: sort.hpp:276
concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< ExecPolicy > > stable(resources::Host host_res, const ExecPolicy &, Iter begin, Iter end, Compare comp)
stable sort given range using comparison function
Definition: sort.hpp:230
concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< ExecPolicy > > unstable(resources::Host host_res, const ExecPolicy &, Iter begin, Iter end, Compare comp)
sort given range using comparison function
Definition: sort.hpp:213
concepts::enable_if_t< resources::EventProxy< resources::Host >, type_traits::is_openmp_policy< ExecPolicy > > unstable_pairs(resources::Host host_res, const ExecPolicy &, KeyIter keys_begin, KeyIter keys_end, ValIter vals_begin, Compare comp)
sort given range of pairs using comparison function on keys
Definition: sort.hpp:250
concepts::enable_if_t< resources::EventProxy< Res >, type_traits::is_execution_policy< ExecPolicy >, type_traits::is_resource< Res >, std::is_constructible< camp::resources::Resource, Res >, type_traits::is_range< Container > > sort(ExecPolicy &&p, Res r, Container &&c, Compare comp=Compare {})
sort execution pattern
Definition: sort.hpp:61
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result min(Args... args)
Definition: foldl.hpp:161
RAJA_HOST_DEVICE constexpr RAJA_INLINE auto zip(Args &&... args) -> ZipIterator< camp::decay< Args >... >
Zip multiple iterators together to iterate them simultaneously with a single ZipIterator object.
Definition: zip.hpp:242
RAJA_HOST_DEVICE constexpr RAJA_INLINE Result max(Args... args)
Definition: foldl.hpp:155
Header file containing RAJA OpenMP policy definitions.
Header file providing RAJA sort declarations.
Functional that performs a stable sort with the given arguments, calls RAJA::merge_sort.
Definition: sort.hpp:67
Functional that performs an unstable sort with the given arguments, uses RAJA::intro_sort.
Definition: sort.hpp:54
Definition: IndexSet.hpp:70
Definition: PolicyBase.hpp:215
Tuple used by ZipIterator for storing multiple references and values. Acts like a reference to its me...
Definition: zip_tuple.hpp:247