20 #ifndef RAJA_pattern_tensor_TensorTileExec_HPP
21 #define RAJA_pattern_tensor_TensorTileExec_HPP
23 #include "RAJA/config.hpp"
38 template<
typename STORAGE,
typename DIM_SEQ,
typename IDX_SEQ>
41 template<
typename STORAGE,
typename DIM_SEQ>
47 template<
typename STORAGE, camp::idx_t DIM0, camp::idx_t... DIM_REST>
53 template<
typename OTILE,
typename TTYPE,
typename BODY>
59 auto const orig_begin = otile.m_begin[DIM0];
60 auto const orig_size = otile.m_size[DIM0];
63 for (
tile.m_begin[DIM0] = orig_begin;
65 tile.m_begin[DIM0] + STORAGE::s_dim_elem(DIM0) <=
66 orig_begin + orig_size;
68 tile.m_begin[DIM0] += STORAGE::s_dim_elem(DIM0))
76 if (
tile.m_begin[DIM0] < orig_begin + orig_size)
83 auto tmp_size = part_tile.m_size[DIM0];
86 part_tile.m_size[DIM0] = orig_begin + orig_size -
tile.m_begin[DIM0];
89 inner_t::exec(otile, part_tile,
body);
92 part_tile.m_size[DIM0] = tmp_size;
96 tile.m_begin[DIM0] = orig_begin;
99 template<
typename OTILE,
typename TTYPE,
typename BODY>
106 auto constexpr orig_begin = OTILE::begin_type::value_at(DIM0);
107 auto constexpr orig_size = OTILE::size_type::value_at(DIM0);
109 auto constexpr tile_begin = TTYPE::begin_type::value_at(DIM0);
111 auto constexpr step_size = STORAGE::s_dim_elem(DIM0);
113 auto constexpr iter_count =
114 (tile_begin >= orig_begin) && (tile_begin < (orig_begin + orig_size))
115 ? ((orig_begin + orig_size) - tile_begin + step_size - 1) /
121 camp::integral_constant<typename TTYPE::index_type, iter_count>;
122 using DimSeq = camp::idx_seq<DIM0, DIM_REST...>;
123 using IdxSeq =
typename camp::detail::gen_seq<
typename TTYPE::index_type,
133 template<
typename STORAGE>
137 template<
typename OTILE,
typename TTYPE,
typename BODY>
147 template<
typename OTILE,
typename TTYPE,
typename BODY>
158 template<
typename STORAGE,
161 camp::idx_t... IDX_SEQ,
162 camp::idx_t... DIM_SEQ>
164 TILE_TYPE
const& orig_tile,
166 camp::idx_seq<IDX_SEQ...>
const&,
167 camp::idx_seq<DIM_SEQ...>
const&)
173 {orig_tile.m_begin[IDX_SEQ]...},
174 {STORAGE::s_dim_elem(IDX_SEQ)...},
186 using layout_order =
typename STORAGE::layout_type::seq_t;
190 tensor_tile_exec_t::exec(orig_tile, full_tile,
body);
194 template<
typename STORAGE,
typename DIM_SEQ,
typename IDX_SEQ>
195 struct StaticTensorTileExec;
201 template<
typename STORAGE,
203 camp::idx_t... DIM_REST,
205 camp::idx_t... IDX_REST>
207 camp::idx_seq<DIM0, DIM_REST...>,
208 camp::idx_seq<IDX, IDX_REST...>>
211 using DimList = camp::idx_seq<DIM0, DIM_REST...>;
213 using IdxList = camp::idx_seq<IDX, IDX_REST...>;
218 camp::idx_seq<DIM0, DIM_REST...>,
219 camp::idx_seq<IDX_REST...>>;
221 static auto const step_size = STORAGE::s_dim_elem(DIM0);
223 template<
typename OTILE,
typename TTYPE,
typename BODY>
229 auto constexpr orig_begin = OTILE::begin_type::value_at(DIM0);
230 auto constexpr orig_size = OTILE::size_type::value_at(DIM0);
232 auto constexpr tile_begin = TTYPE::begin_type::value_at(DIM0);
235 camp::integral_constant<
typename TTYPE::index_type,
236 tile_begin + STORAGE::s_dim_elem(DIM0)>;
238 camp::integral_constant<
typename TTYPE::index_type,
239 (orig_begin + orig_size) - tile_begin>;
247 using PartTile =
typename TailTile::Partial;
250 static_assert((tile_begin + STORAGE::s_dim_elem(DIM0)) <=
251 (orig_begin + orig_size + STORAGE::s_dim_elem(DIM0)),
252 "OOB StaticTensorTileExec DOWN");
254 if ((tile_begin + STORAGE::s_dim_elem(DIM0)) <= (orig_begin + orig_size))
256 DownExec::static_exec(otile,
tile,
body);
258 NextExec::exec(otile, next_tile,
body);
260 else if (tile_begin < (orig_begin + orig_size))
263 DownExec::static_exec(otile, part_tile,
body);
268 template<
typename STORAGE,
271 camp::idx_t... IDX_REST>
274 camp::idx_seq<IDX, IDX_REST...>>
278 camp::idx_seq<IDX_REST...>>;
280 template<
typename OTILE,
typename TTYPE,
typename BODY>
285 auto constexpr orig_begin = OTILE::begin_type::value_at(DIM0);
286 auto constexpr orig_size = OTILE::size_type::value_at(DIM0);
288 auto constexpr tile_begin = TTYPE::begin_type::value_at(DIM0);
291 camp::integral_constant<
typename TTYPE::index_type,
292 tile_begin + STORAGE::s_dim_elem(DIM0)>;
294 camp::integral_constant<
typename TTYPE::index_type,
295 (orig_begin + orig_size) - tile_begin>;
303 using PartTile =
typename TailTile::Partial;
306 static_assert((tile_begin + STORAGE::s_dim_elem(DIM0)) <=
307 (orig_begin + orig_size + STORAGE::s_dim_elem(DIM0)),
308 "OOB StaticTensorTileExec ACROSS");
310 if ((tile_begin + STORAGE::s_dim_elem(DIM0)) <= (orig_begin + orig_size))
314 NextExec::exec(otile, next_tile,
body);
316 else if (tile_begin < (orig_begin + orig_size))
324 template<
typename STORAGE, camp::idx_t... DIM_REST>
326 camp::idx_seq<DIM_REST...>,
330 template<
typename OTILE,
typename TTYPE,
typename BODY>
337 template<
typename STORAGE,
343 camp::idx_t... IDX_SEQ,
344 camp::idx_t... DIM_SEQ>
348 camp::idx_seq<IDX_SEQ...>
const&,
349 camp::idx_seq<DIM_SEQ...>
const&)
354 using InputBegin =
typename InputType::begin_type;
358 camp::int_seq<INDEX_TYPE, InputBegin::value_at(IDX_SEQ)...>,
359 camp::int_seq<INDEX_TYPE, STORAGE::s_dim_elem(IDX_SEQ)...>>;
365 using layout_order =
typename STORAGE::layout_type::seq_t;
369 tensor_tile_exec_t::static_exec(orig_tile, full_tile,
body);
372 template<
typename STORAGE,
typename TILE_TYPE,
typename BODY>
376 using layout_type =
typename STORAGE::layout_type;
377 tensorTileExec_expanded<STORAGE>(
378 tile,
body, camp::make_idx_seq_t<STORAGE::s_num_dims> {}, layout_type {});
RAJA header file defining SIMD/SIMT register operations.
Header file for common RAJA internal macro definitions.
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
RAJA_INLINE RAJA_HOST_DEVICE void tensorTileExec(TILE_TYPE const &tile, BODY &&body)
Definition: TensorTileExec.hpp:373
TensorTileSize
Definition: TensorRef.hpp:234
@ TENSOR_FULL
Definition: TensorRef.hpp:236
RAJA_INLINE constexpr RAJA_HOST_DEVICE TensorTile< INDEX_TYPE, TENSOR_FULL, NUM_DIMS > & make_tensor_tile_full(TensorTile< INDEX_TYPE, RTENSOR_SIZE, NUM_DIMS > &tile)
Definition: TensorRef.hpp:721
RAJA_INLINE constexpr RAJA_HOST_DEVICE TensorTile< INDEX_TYPE, TENSOR_PARTIAL, NUM_DIMS > & make_tensor_tile_partial(TensorTile< INDEX_TYPE, RTENSOR_SIZE, NUM_DIMS > &tile)
Definition: TensorRef.hpp:733
RAJA_INLINE RAJA_HOST_DEVICE void tensorTileExec_expanded(TILE_TYPE const &orig_tile, BODY &&body, camp::idx_seq< IDX_SEQ... > const &, camp::idx_seq< DIM_SEQ... > const &)
Definition: TensorTileExec.hpp:163
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch_core.hpp:589
auto & body
Definition: launch.hpp:177
RAJA header file defining SIMD/SIMT register operations.
Definition: TensorRef.hpp:377
Definition: TensorRef.hpp:399
RAJA_HOST_DEVICE static RAJA_INLINE void exec(OTILE const &, TTYPE const &, BODY &&)
Definition: TensorTileExec.hpp:331
RAJA_HOST_DEVICE static RAJA_INLINE void exec(OTILE const &otile, TTYPE const &tile, BODY &&body)
Definition: TensorTileExec.hpp:281
camp::idx_seq< DIM_REST... > DimTail
Definition: TensorTileExec.hpp:212
camp::idx_seq< DIM0, DIM_REST... > DimList
Definition: TensorTileExec.hpp:211
RAJA_HOST_DEVICE static RAJA_INLINE void exec(OTILE const &otile, TTYPE const &tile, BODY &&body)
Definition: TensorTileExec.hpp:224
camp::idx_seq< IDX, IDX_REST... > IdxList
Definition: TensorTileExec.hpp:213
camp::idx_seq< IDX_REST... > IdxTail
Definition: TensorTileExec.hpp:214
Definition: TensorTileExec.hpp:39
Definition: TensorRef.hpp:309
RAJA_HOST_DEVICE static RAJA_INLINE void exec(OTILE const &otile, TTYPE &tile, BODY &&body)
Definition: TensorTileExec.hpp:54
RAJA_HOST_DEVICE static RAJA_INLINE void static_exec(OTILE const &otile, TTYPE const &tile, BODY &&body)
Definition: TensorTileExec.hpp:100
RAJA_HOST_DEVICE static RAJA_INLINE void static_exec(OTILE const &, TTYPE const &tile, BODY &&body)
Definition: TensorTileExec.hpp:148
RAJA_HOST_DEVICE static RAJA_INLINE void exec(OTILE &, TTYPE const &tile, BODY &&body)
Definition: TensorTileExec.hpp:138
Definition: TensorTileExec.hpp:42