20 #ifndef RAJA_hip_WorkGroup_WorkRunner_HPP
21 #define RAJA_hip_WorkGroup_WorkRunner_HPP
23 #include "RAJA/config.hpp"
40 template<
size_t BLOCK_SIZE,
42 typename DISPATCH_POLICY_T,
53 RAJA::hip_work<BLOCK_SIZE, Async>,
61 RAJA::hip_work<BLOCK_SIZE, Async>,
75 template<
typename WorkContainer>
81 base::run(storage, r, std::forward<Args>(
args)...);
83 IndexType num_loops = std::distance(std::begin(storage), std::end(storage));
86 if (num_loops > 0 && BLOCK_SIZE > 0)
102 template<
size_t BLOCK_SIZE,
104 typename DISPATCH_POLICY_T,
105 typename ALLOCATOR_T,
115 RAJA::hip_work<BLOCK_SIZE, Async>,
116 RAJA::reverse_ordered,
123 RAJA::hip_work<BLOCK_SIZE, Async>,
137 template<
typename WorkContainer>
143 base::run(storage, r, std::forward<Args>(
args)...);
145 IndexType num_loops = std::distance(std::begin(storage), std::end(storage));
148 if (num_loops > 0 && BLOCK_SIZE > 0)
164 template<
typename Segment_type,
170 template<
typename segment_in,
typename body_in>
172 : m_segment(
std::forward<segment_in>(segment)),
173 m_body(
std::forward<body_in>(
body))
180 const index_type i_begin = threadIdx.x + blockIdx.x * blockDim.x;
181 const index_type stride = blockDim.x * gridDim.x;
182 const auto begin = m_segment.begin();
183 const auto end = m_segment.end();
184 const index_type len(end - begin);
185 for (index_type i = i_begin; i < len; i += stride)
187 m_body(begin[i], std::forward<Args>(
args)...);
192 Segment_type m_segment;
196 template<
size_t BLOCK_SIZE,
197 typename StorageIter,
202 void hip_unordered_y_block_global(const StorageIter
iter, Args...
args)
204 const index_type i_loop = blockIdx.y;
207 value_type::device_call(&
iter[i_loop],
args...);
216 template<
size_t BLOCK_SIZE,
218 typename DISPATCH_POLICY_T,
219 typename ALLOCATOR_T,
223 RAJA::hip_work<BLOCK_SIZE, Async>,
224 RAJA::policy::hip::unordered_hip_loop_y_block_iter_x_threadblock_average,
232 RAJA::policy::hip::unordered_hip_loop_y_block_iter_x_threadblock_average;
243 typename camp::at<T, camp::num<0>>
::type,
244 typename camp::at<T, camp::num<1>>
::type,
264 RAJA::hip_work<BLOCK_SIZE, true>,
274 o.m_total_iterations = 0;
279 m_total_iterations = o.m_total_iterations;
281 o.m_total_iterations = 0;
287 template<
typename WorkContainer,
typename Iterable,
typename LoopBody>
290 LoopBody&& loop_body)
292 using Iterator = camp::decay<decltype(std::begin(
iter))>;
293 using LOOP_BODY = camp::decay<LoopBody>;
294 using ITERABLE = camp::decay<Iterable>;
296 camp::decay<decltype(std::distance(std::begin(
iter), std::end(
iter)))>;
303 Iterator begin = std::begin(
iter);
304 Iterator end = std::end(
iter);
305 IndexType len = std::distance(begin, end);
308 if (len > 0 && BLOCK_SIZE > 0)
311 m_total_iterations += len;
321 storage.template emplace<holder>(
323 std::forward<Iterable>(
iter), std::forward<LoopBody>(loop_body));
330 template<
typename WorkContainer>
335 using Iterator = camp::decay<decltype(std::begin(storage))>;
336 using IndexType = camp::decay<decltype(std::distance(std::begin(storage),
337 std::end(storage)))>;
338 using value_type =
typename WorkContainer::value_type;
342 auto func = hip_unordered_y_block_global<BLOCK_SIZE, Iterator, value_type,
348 Iterator begin = std::begin(storage);
349 Iterator end = std::end(storage);
350 IndexType num_loops = std::distance(begin, end);
353 if (num_loops > 0 && BLOCK_SIZE > 0)
357 m_total_iterations /
static_cast<index_type>(num_loops);
363 hip_dim_t blockSize {
static_cast<hip_dim_member_t
>(block_size), 1, 1};
365 static_cast<hip_dim_member_t
>((average_iterations + block_size - 1) /
367 static_cast<hip_dim_member_t
>(num_loops), 1};
379 void* func_args[] = {(
void*)&begin, (
void*)&
args...};
389 void clear() { m_total_iterations = 0; }
392 index_type m_total_iterations = 0;
395 #if !defined(RAJA_ENABLE_HIP_INDIRECT_FUNCTION_CALL)
398 template<
size_t BLOCK_SIZE,
400 typename ALLOCATOR_T,
404 RAJA::hip_work<BLOCK_SIZE, Async>,
405 RAJA::policy::hip::unordered_hip_loop_y_block_iter_x_threadblock_average,
411 template<size_t BLOCK_SIZE,
413 typename ALLOCATOR_T,
417 RAJA::hip_work<BLOCK_SIZE, Async>,
418 RAJA::policy::hip::unordered_hip_loop_y_block_iter_x_threadblock_average,
419 RAJA::indirect_virtual_function_dispatch,
Header file defining prototypes for routines used to manage memory for HIP reductions and other opera...
Header file containing RAJA HIP policy definitions.
#define RAJA_DEVICE
Definition: macros.hpp:66
__launch_bounds__(BLOCK_SIZE, BLOCKS_PER_SM) __global__ void cuda_unordered_y_block_global(const RAJA_CUDA_GRID_CONSTANT StorageIter iter
Args args
Definition: WorkRunner.hpp:212
value_type::device_call &[i_loop] iter
Definition: WorkRunner.hpp:216
typename dispatcher_transform_types< dispatch_policy, holder_type >::type dispatcher_transform_types_t
Definition: Dispatcher.hpp:72
Definition: AlignedRangeIndexSetBuilders.cpp:35
void launch(LaunchParams const &launch_params, ReduceParams &&... rest_of_launch_args)
Definition: launch_core.hpp:268
auto & body
Definition: launch.hpp:177
void synchronize()
Synchronize all current RAJA executions for the specified policy.
Definition: synchronize.hpp:44
Definition: ListSegment.hpp:416
Header file providing RAJA WorkStorage.
Definition: Dispatcher.hpp:85
Definition: WorkRunner.hpp:169
RAJA_DEVICE RAJA_INLINE void operator()(Args... args) const
Definition: WorkRunner.hpp:176
HoldHipDeviceXThreadblockLoop(segment_in &&segment, body_in &&body)
Definition: WorkRunner.hpp:171
Definition: WorkRunner.hpp:162
typename resources::get_resource< FORALL_EXEC_POLICY >::type resource_type
Definition: WorkRunner.hpp:169
int per_run_storage
Definition: WorkRunner.hpp:231
Definition: WorkRunner.hpp:252
Definition: WorkRunner.hpp:300
per_run_storage run(WorkContainer const &storage, typename base::resource_type r, Args... args) const
Definition: WorkRunner.hpp:76
INDEX_T IndexType
Definition: WorkRunner.hpp:68
WorkRunner & operator=(WorkRunner &&o)
Definition: WorkRunner.hpp:277
typename holder_type::template type< T > holder_type_t
Definition: WorkRunner.hpp:251
RAJA::policy::hip::unordered_hip_loop_y_block_iter_x_threadblock_average order_policy
Definition: WorkRunner.hpp:232
dispatcher_transform_types_t< dispatch_policy, holder_type > dispatcher_holder_policy
Definition: WorkRunner.hpp:260
RAJA::hip_work< BLOCK_SIZE, Async > exec_policy
Definition: WorkRunner.hpp:230
void enqueue(WorkContainer &storage, Iterable &&iter, LoopBody &&loop_body)
Definition: WorkRunner.hpp:288
WorkRunner(WorkRunner const &)=delete
DISPATCH_POLICY_T dispatch_policy
Definition: WorkRunner.hpp:233
INDEX_T index_type
Definition: WorkRunner.hpp:235
resources::Hip resource_type
Definition: WorkRunner.hpp:236
WorkRunner & operator=(WorkRunner const &)=delete
void clear()
Definition: WorkRunner.hpp:389
int per_run_storage
Definition: WorkRunner.hpp:328
exec_policy dispatcher_exec_policy
Definition: WorkRunner.hpp:255
WorkRunner(WorkRunner &&o)
Definition: WorkRunner.hpp:272
ALLOCATOR_T Allocator
Definition: WorkRunner.hpp:234
per_run_storage run(WorkContainer const &storage, resource_type r, Args... args) const
Definition: WorkRunner.hpp:331
per_run_storage run(WorkContainer const &storage, typename base::resource_type r, Args... args) const
Definition: WorkRunner.hpp:138
INDEX_T IndexType
Definition: WorkRunner.hpp:130
Definition: WorkRunner.hpp:149
Dispatch using function pointers to make indirect function calls.
Definition: WorkGroup.hpp:77
Definition: WorkGroup.hpp:45
Definition: WorkGroup.hpp:53