RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
Reduce.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 
21 #ifndef RAJA_policy_cuda_kernel_Reduce_HPP
22 #define RAJA_policy_cuda_kernel_Reduce_HPP
23 
24 #include "RAJA/config.hpp"
25 
27 
28 namespace RAJA
29 {
30 
31 namespace internal
32 {
33 
34 
35 //
36 // Executor that handles reductions across a single CUDA thread block
37 //
38 template<typename Data,
39  template<typename...> class ReduceOperator,
40  typename ParamId,
41  typename... EnclosedStmts,
42  typename Types>
43 struct CudaStatementExecutor<Data,
44  statement::Reduce<RAJA::cuda_block_reduce,
45  ReduceOperator,
46  ParamId,
47  EnclosedStmts...>,
48  Types>
49 {
50 
51  using stmt_list_t = StatementList<EnclosedStmts...>;
52 
53  using enclosed_stmts_t = CudaStatementListExecutor<Data, stmt_list_t, Types>;
54 
55  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
56  {
57  // block reduce on the specified parameter
58  auto value = data.template get_param<ParamId>();
59  using value_t = decltype(value);
60  value_t ident = value_t();
61 
62  // if this thread isn't active, just set it to the identity
63  if (!thread_active)
64  {
65  value = ident;
66  }
67 
68  // Call out existing block reduction algorithm that we use for
69  // reduction objects
70  using combiner_t =
72  value_t new_value =
73  RAJA::cuda::impl::block_reduce<combiner_t>(value, ident);
74 
75 
76  // execute enclosed statements, and mask off everyone but thread 0
77  thread_active = threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0;
78  if (thread_active)
79  {
80  // Only update to new value on root thread
81  data.template assign_param<ParamId>(new_value);
82  }
83  enclosed_stmts_t::exec(data, thread_active);
84  }
85 
86  static inline LaunchDims calculateDimensions(Data const& data)
87  {
88  // combine with enclosed statements
89  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
90  return enclosed_dims;
91  }
92 };
93 
94 //
95 // Executor that handles reductions across a single CUDA thread warp
96 //
97 template<typename Data,
98  template<typename...> class ReduceOperator,
99  typename ParamId,
100  typename... EnclosedStmts,
101  typename Types>
102 struct CudaStatementExecutor<Data,
103  statement::Reduce<RAJA::cuda_warp_reduce,
104  ReduceOperator,
105  ParamId,
106  EnclosedStmts...>,
107  Types>
108 {
109 
110  using stmt_list_t = StatementList<EnclosedStmts...>;
111 
112  using enclosed_stmts_t = CudaStatementListExecutor<Data, stmt_list_t, Types>;
113 
114  static inline RAJA_DEVICE void exec(Data& data, bool thread_active)
115  {
116  // block reduce on the specified parameter
117  auto value = data.template get_param<ParamId>();
118  using value_t = decltype(value);
119  value_t ident = value_t();
120 
121  // if this thread isn't active, just set it to the identity
122  if (!thread_active)
123  {
124  value = ident;
125  }
126 
127  // Call warp reduction routine
128  using combiner_t =
130  value_t new_value = RAJA::cuda::impl::warp_reduce<combiner_t>(value, ident);
131  data.template assign_param<ParamId>(new_value);
132 
133  // execute enclosed statements, and mask off everyone but lane 0
134  thread_active = threadIdx.x == 0;
135  if (thread_active)
136  {
137  // Only update to new value on root thread
138  data.template assign_param<ParamId>(new_value);
139  }
140  enclosed_stmts_t::exec(data, thread_active);
141  }
142 
143  static inline LaunchDims calculateDimensions(Data const& data)
144  {
145  // combine with enclosed statements
146  LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);
147  return enclosed_dims;
148  }
149 };
150 
151 
152 } // namespace internal
153 } // end namespace RAJA
154 
155 
156 #endif /* RAJA_policy_cuda_kernel_Reduce_HPP */
#define RAJA_DEVICE
Definition: macros.hpp:66
constexpr auto Reduce(T *target)
Definition: reducer.hpp:231
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA header file containing constructs used to run kernel traversals on GPU with CUDA.
Definition: reduce.hpp:70