20 #ifndef RAJA_cuda_WorkGroup_Dispatcher_HPP
21 #define RAJA_cuda_WorkGroup_Dispatcher_HPP
23 #include "RAJA/config.hpp"
25 #include "camp/resource.hpp"
45 template<
typename Factory>
55 static size_t cached_nbytes = 0;
56 static void* ptr =
nullptr;
57 if (nbytes > cached_nbytes)
60 CAMP_CUDA_API_INVOKE_AND_CHECK(cudaFreeHost, ptr);
61 CAMP_CUDA_API_INVOKE_AND_CHECK(cudaMallocHost, &ptr, nbytes);
62 cached_nbytes = nbytes;
71 static std::mutex s_mutex;
78 template<
typename Factory>
81 using value_type =
typename std::decay_t<Factory>::value_type;
84 auto res = ::camp::resources::Cuda::get_default();
87 reinterpret_cast<const void*
>(&get_value_global<std::decay_t<Factory>>);
88 void*
args[] = {(
void*)&ptr, (
void*)&factory};
89 CAMP_CUDA_API_INVOKE_AND_CHECK(cudaLaunchKernel, func, 1, 1,
args, 0,
91 CAMP_CUDA_API_INVOKE_AND_CHECK(cudaStreamSynchronize, res.get_stream());
98 template<
typename Factory>
101 static auto value =
get_value(std::forward<Factory>(factory));
111 typename Dispatcher_T,
113 size_t BLOCKS_PER_SM,
116 cuda_work_explicit<BLOCK_SIZE, BLOCKS_PER_SM, Async>
const&)
118 static Dispatcher_T dispatcher {
119 Dispatcher_T::template makeDispatcher<T>([](
auto&& factory) {
Header file containing RAJA CUDA policy definitions.
auto get_value(Factory &&factory)
Definition: Dispatcher.hpp:79
std::mutex & get_value_mutex()
Definition: Dispatcher.hpp:69
void * get_cached_value_ptr(size_t nbytes)
Definition: Dispatcher.hpp:53
auto get_cached_value(Factory &&factory)
Definition: Dispatcher.hpp:99
__global__ void get_value_global(typename Factory::value_type *ptr, Factory factory)
Definition: Dispatcher.hpp:46
Args args
Definition: WorkRunner.hpp:212
const Dispatcher_T * get_Dispatcher(cuda_work_explicit< BLOCK_SIZE, BLOCKS_PER_SM, Async > const &)
Definition: Dispatcher.hpp:115
Definition: AlignedRangeIndexSetBuilders.cpp:35
Header file providing RAJA Dispatcher for workgroup.