RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
reduce.hpp
Go to the documentation of this file.
1 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
2 // Copyright (c) Lawrence Livermore National Security, LLC and other
3 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
4 // files for dates and other details. No copyright assignment is required
5 // to contribute to RAJA.
6 //
7 // SPDX-License-Identifier: (BSD-3-Clause)
8 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
9 
10 #ifndef NEW_REDUCE_CUDA_REDUCE_HPP
11 #define NEW_REDUCE_CUDA_REDUCE_HPP
12 
13 #if defined(RAJA_CUDA_ACTIVE)
14 
15 #include <cuda.h>
19 
21 
22 namespace RAJA
23 {
24 namespace expt
25 {
26 namespace detail
27 {
28 
29 // Init
30 template<typename EXEC_POL, typename OP, typename T, typename VOp>
31 camp::concepts::enable_if<RAJA::type_traits::is_cuda_policy<EXEC_POL>>
32 param_init(EXEC_POL const&,
33  Reducer<OP, T, VOp>& red,
34  RAJA::cuda::detail::cudaInfo& ci)
35 {
36  red.devicetarget =
37  RAJA::cuda::pinned_mempool_type::getInstance().template malloc<T>(1);
38  red.device_mem.allocate(ci.gridDim.x * ci.gridDim.y * ci.gridDim.z);
39  red.device_count = RAJA::cuda::device_zeroed_mempool_type::getInstance()
40  .template malloc<unsigned int>(1);
41 }
42 
43 // Combine
44 template<typename EXEC_POL, typename OP, typename T, typename VOp>
45 RAJA_HOST_DEVICE camp::concepts::enable_if<
47 param_combine(EXEC_POL const&, Reducer<OP, T, VOp>& red)
48 {
49  RAJA::cuda::impl::expt::grid_reduce<typename EXEC_POL::IterationGetter, OP>(
50  red.devicetarget, red.getVal(), red.device_mem, red.device_count);
51 }
52 
53 // Resolve
54 template<typename EXEC_POL, typename OP, typename T, typename VOp>
55 camp::concepts::enable_if<RAJA::type_traits::is_cuda_policy<EXEC_POL>>
56 param_resolve(EXEC_POL const&,
57  Reducer<OP, T, VOp>& red,
58  RAJA::cuda::detail::cudaInfo& ci)
59 {
60  // complete reduction
61  ci.res.wait();
62 
63  red.combineTarget(*red.devicetarget);
64 
65  // free memory
66  RAJA::cuda::device_zeroed_mempool_type::getInstance().free(red.device_count);
67  red.device_count = nullptr;
68  red.device_mem.deallocate();
69  RAJA::cuda::pinned_mempool_type::getInstance().free(red.devicetarget);
70  red.devicetarget = nullptr;
71 }
72 
73 } // namespace detail
74 } // namespace expt
75 } // namespace RAJA
76 
77 #endif
78 
79 #endif // NEW_REDUCE_CUDA_REDUCE_HPP
Header file defining prototypes for routines used to manage memory for CUDA reductions and other oper...
Header file containing RAJA CUDA policy definitions.
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
camp::concepts::enable_if< std::is_same< EXEC_POL, RAJA::seq_exec > > param_init(EXEC_POL const &, RAJA::detail::Name &)
Definition: kernel_name.hpp:24
camp::concepts::enable_if< concepts::negate< is_instance_of_Reducer< camp::decay< T > > >, concepts::negate< std::is_same< T, RAJA::detail::Name > > > param_combine(EXEC_POL const &, T &, const T &)
Definition: forall.hpp:97
camp::concepts::enable_if< std::is_same< EXEC_POL, RAJA::seq_exec > > param_resolve(EXEC_POL const &, RAJA::detail::Name &)
Definition: kernel_name.hpp:40
Definition: AlignedRangeIndexSetBuilders.cpp:35
Header file containing RAJA reduction templates for CUDA execution.
Definition: PolicyBase.hpp:224