24 #ifndef RAJA_forall_sycl_HPP
25 #define RAJA_forall_sycl_HPP
27 #include "RAJA/config.hpp"
29 #if defined(RAJA_ENABLE_SYCL)
70 ::sycl::range<1> getGridDim(
size_t len,
size_t block_size)
72 size_t size = {block_size * ((len + block_size - 1) / block_size)};
73 ::sycl::range<1> gridSize(size);
88 template<
typename Iterable,
93 RAJA_INLINE concepts::enable_if_t<
94 resources::EventProxy<resources::Sycl>,
97 sycl_exec<BlockSize, Async>
const& pol,
100 ForallParam f_params)
103 using Iterator = camp::decay<decltype(std::begin(
iter))>;
105 camp::decay<decltype(std::distance(std::begin(
iter), std::end(
iter)))>;
106 using EXEC_POL = camp::decay<decltype(pol)>;
107 using LOOP_BODY = camp::decay<LoopBody>;
111 constexpr
bool is_parampack_empty =
113 constexpr
bool is_lbody_trivially_copyable =
114 std::is_trivially_copyable<LoopBody>::value;
119 Iterator begin = std::begin(
iter);
120 Iterator end = std::end(
iter);
121 IndexType len = std::distance(begin, end);
124 if (len <= 0 || BlockSize <= 0)
126 return resources::EventProxy<resources::Sycl>(sycl_res);
132 sycl_dim_t blockSize {BlockSize};
133 sycl_dim_t gridSize = impl::getGridDim(
static_cast<size_t>(len), BlockSize);
135 ::sycl::queue* q = sycl_res.get_queue();
136 LOOP_BODY* lbody =
nullptr;
137 Iterator* d_begin =
nullptr;
139 if constexpr (!is_parampack_empty)
143 if constexpr (!is_lbody_trivially_copyable)
150 lbody = (LOOP_BODY*)::sycl::malloc_device(
sizeof(LoopBody), *q);
151 q->memcpy(lbody, &loop_body,
sizeof(LOOP_BODY)).wait();
153 d_begin = (Iterator*)::sycl::malloc_device(
sizeof(Iterator), *q);
154 q->memcpy(d_begin, &begin,
sizeof(Iterator)).wait();
159 if constexpr (!is_parampack_empty)
161 auto combiner = [](ForallParam
x, ForallParam
y) {
166 ForallParam* res = ::sycl::malloc_shared<ForallParam>(1, *q);
168 auto reduction = ::sycl::reduction(res, f_params, combiner);
170 q->submit([&](::sycl::handler& h) {
171 h.parallel_for(::sycl::range<1>(len), reduction,
172 [=](::sycl::item<1> it,
auto& red) {
175 IndexType ii = it.get_id(0);
178 if constexpr (is_lbody_trivially_copyable)
193 ::sycl::free(res, *q);
199 q->submit([&](::sycl::handler& h) {
200 h.parallel_for(::sycl::nd_range<1> {gridSize, blockSize},
201 [=](::sycl::nd_item<1> it) {
202 IndexType ii = it.get_global_id(0);
205 if constexpr (is_lbody_trivially_copyable)
207 loop_body(begin[ii]);
211 (*lbody)((*d_begin)[ii]);
225 if constexpr (!is_lbody_trivially_copyable)
227 ::sycl::free(lbody, *q);
228 ::sycl::free(d_begin, *q);
232 return resources::EventProxy<resources::Sycl>(sycl_res);
253 template<
typename LoopBody,
256 typename... SegmentTypes>
257 RAJA_INLINE resources::EventProxy<resources::Sycl>
forall_impl(
259 ExecPolicy<
seq_segit, sycl_exec<BlockSize, Async>>,
260 const TypedIndexSet<SegmentTypes...>& iset,
261 LoopBody&& loop_body)
263 int num_seg = iset.getNumSegments();
264 for (
int isi = 0; isi < num_seg; ++isi)
266 iset.segmentCall(r, isi, detail::CallForall(), sycl_exec<BlockSize, true>(),
272 ::sycl::queue* q = r.get_queue();
276 return resources::EventProxy<resources::Sycl>(r);
RAJA header file defining index set classes.
Header file defining prototypes for routines used to manage memory for SYCL reductions and other oper...
Header file for common RAJA internal macro definitions.
value_type::device_call &[i_loop] iter
Definition: WorkRunner.hpp:216
constexpr RAJA_HOST_DEVICE auto invoke_body(Params &¶ms, Fn &&f, Ts &&... extra)
Definition: forall.hpp:598
seq_exec seq_segit
Definition: policy.hpp:83
RAJA_INLINE concepts::enable_if_t< resources::EventProxy< resources::Host >, expt::type_traits::is_ForallParamPack< ForallParam >, expt::type_traits::is_ForallParamPack_empty< ForallParam > > forall_impl(resources::Host host_res, const simd_exec &, Iterable &&iter, Func &&body, ForallParam)
Definition: forall.hpp:81
Definition: AlignedRangeIndexSetBuilders.cpp:35
Header file containing RAJA index set and segment iteration template methods that take an execution p...
Header file for RAJA resource definitions.
static constexpr void parampack_resolve(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:304
static constexpr void parampack_init(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:269
static RAJA_HOST_DEVICE constexpr void parampack_combine(EXEC_POL const &pol, ForallParamPack< Params... > &f_params, Args &&... args)
Definition: forall.hpp:286
Definition: TypeTraits.hpp:67
Definition: TypeTraits.hpp:59
Header file containing RAJA SYCL policy definitions.
RAJA header file for handling different SYCL header include paths.
Header file for RAJA type definitions.