doxygen/html/policy_2cuda_2kernel_2For_8hpp_source.html

 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

 // Copyright (c) Lawrence Livermore National Security, LLC and other

 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT

 // files for dates and other details. No copyright assignment is required

 // to contribute to RAJA.

 //

 // SPDX-License-Identifier: (BSD-3-Clause)

 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//


 #ifndef RAJA_policy_cuda_kernel_For_HPP

 #define RAJA_policy_cuda_kernel_For_HPP


 #include "RAJA/config.hpp"


 #include "RAJA/policy/cuda/kernel/internal.hpp"


 namespace RAJA

 {


 namespace internal

 {


 /*

  * Executor for work sharing inside CudaKernel.

  * Mapping without checking from IndexMapper to indices

  * Assigns the loop index to offset ArgumentId

  * Meets all sync requirements

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename IndexMapper,

          kernel_sync_requirement sync,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<

     Data,

     statement::For<

         ArgumentId,

         RAJA::policy::cuda::

             cuda_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>,

         EnclosedStmts...>,

     Types>

 {


   using stmt_list_t = StatementList<EnclosedStmts...>;


   // Set the argument type for this loop

   using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;


   using enclosed_stmts_t =

       CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;


   using diff_t = segment_diff_type<ArgumentId, Data>;


   using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<

       RAJA::policy::cuda::

           cuda_indexer<iteration_mapping::DirectUnchecked, sync, IndexMapper>>;


   static inline RAJA_DEVICE void exec(Data& data, bool thread_active)

   {

     const diff_t i = IndexMapper::template index<diff_t>();


     // Assign the index to the argument

     data.template assign_offset<ArgumentId>(i);


     // execute enclosed statements

     enclosed_stmts_t::exec(data, thread_active);

   }


   static inline LaunchDims calculateDimensions(Data const& data)

   {

     const diff_t len = segment_length<ArgumentId>(data);


     LaunchDims dims = DimensionCalculator::get_dimensions(len);


     LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);


     return combine(dims, enclosed_dims);

   }

 };


 /*

  * Executor for work sharing inside CudaKernel.

  * Mapping directly from IndexMapper to indices

  * Assigns the loop index to offset ArgumentId

  * Meets all sync requirements

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename IndexMapper,

          kernel_sync_requirement sync,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<

     Data,

     statement::For<ArgumentId,

                    RAJA::policy::cuda::cuda_indexer<iteration_mapping::Direct,

                                                     sync,

                                                     IndexMapper>,

                    EnclosedStmts...>,

     Types>

 {


   using stmt_list_t = StatementList<EnclosedStmts...>;


   // Set the argument type for this loop

   using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;


   using enclosed_stmts_t =

       CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;


   using diff_t = segment_diff_type<ArgumentId, Data>;


   using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<

       RAJA::policy::cuda::

           cuda_indexer<iteration_mapping::Direct, sync, IndexMapper>>;


   static inline RAJA_DEVICE void exec(Data& data, bool thread_active)

   {

     const diff_t len = segment_length<ArgumentId>(data);

     const diff_t i   = IndexMapper::template index<diff_t>();


     // execute enclosed statements if any thread will

     // but mask off threads without work

     const bool have_work = (i < len);


     // Assign the index to the argument

     data.template assign_offset<ArgumentId>(i);


     // execute enclosed statements

     enclosed_stmts_t::exec(data, thread_active && have_work);

   }


   static inline LaunchDims calculateDimensions(Data const& data)

   {

     const diff_t len = segment_length<ArgumentId>(data);


     LaunchDims dims = DimensionCalculator::get_dimensions(len);


     LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);


     return combine(dims, enclosed_dims);

   }

 };


 /*

  * Executor for work sharing inside CudaKernel.

  * Provides a strided loop for IndexMapper.

  * Assigns the loop index to offset ArgumentId.

  * Meets all sync requirements

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename IndexMapper,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<

     Data,

     statement::For<ArgumentId,

                    RAJA::policy::cuda::cuda_indexer<

                        iteration_mapping::StridedLoop<named_usage::unspecified>,

                        kernel_sync_requirement::sync,

                        IndexMapper>,

                    EnclosedStmts...>,

     Types>

 {


   using stmt_list_t = StatementList<EnclosedStmts...>;


   // Set the argument type for this loop

   using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;


   using enclosed_stmts_t =

       CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;


   using diff_t = segment_diff_type<ArgumentId, Data>;


   using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<

       RAJA::policy::cuda::cuda_indexer<

           iteration_mapping::StridedLoop<named_usage::unspecified>,

           kernel_sync_requirement::sync,

           IndexMapper>>;


   static inline RAJA_DEVICE void exec(Data& data, bool thread_active)

   {

     // grid stride loop

     const diff_t len      = segment_length<ArgumentId>(data);

     const diff_t i_init   = IndexMapper::template index<diff_t>();

     const diff_t i_stride = IndexMapper::template size<diff_t>();


     // Iterate through in chunks

     // threads will have the same numbers of iterations

     for (diff_t ii = 0; ii < len; ii += i_stride)

     {

       const diff_t i = ii + i_init;


       // execute enclosed statements if any thread will

       // but mask off threads without work

       const bool have_work = (i < len);


       // Assign the index to the argument

       data.template assign_offset<ArgumentId>(i);


       // execute enclosed statements

       enclosed_stmts_t::exec(data, thread_active && have_work);

     }

   }


   static inline LaunchDims calculateDimensions(Data const& data)

   {

     diff_t len = segment_length<ArgumentId>(data);


     LaunchDims dims = DimensionCalculator::get_dimensions(len);


     LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);


     return combine(dims, enclosed_dims);

   }

 };


 /*

  * Executor for work sharing inside CudaKernel.

  * Provides a strided loop for IndexMapper.

  * Assigns the loop index to offset ArgumentId.

  * Meets no sync requirements

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename IndexMapper,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<

     Data,

     statement::For<ArgumentId,

                    RAJA::policy::cuda::cuda_indexer<

                        iteration_mapping::StridedLoop<named_usage::unspecified>,

                        kernel_sync_requirement::none,

                        IndexMapper>,

                    EnclosedStmts...>,

     Types>

 {


   using stmt_list_t = StatementList<EnclosedStmts...>;


   // Set the argument type for this loop

   using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;


   using enclosed_stmts_t =

       CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;


   using diff_t = segment_diff_type<ArgumentId, Data>;


   using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<

       RAJA::policy::cuda::cuda_indexer<

           iteration_mapping::StridedLoop<named_usage::unspecified>,

           kernel_sync_requirement::none,

           IndexMapper>>;


   static inline RAJA_DEVICE void exec(Data& data, bool thread_active)

   {

     // grid stride loop

     const diff_t len      = segment_length<ArgumentId>(data);

     const diff_t i_init   = IndexMapper::template index<diff_t>();

     const diff_t i_stride = IndexMapper::template size<diff_t>();


     // Iterate through one at a time

     // threads will have different numbers of iterations

     for (diff_t i = i_init; i < len; i += i_stride)

     {


       // Assign the index to the argument

       data.template assign_offset<ArgumentId>(i);


       // execute enclosed statements

       enclosed_stmts_t::exec(data, thread_active);

     }

   }


   static inline LaunchDims calculateDimensions(Data const& data)

   {

     const diff_t len = segment_length<ArgumentId>(data);


     LaunchDims dims = DimensionCalculator::get_dimensions(len);


     LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);


     return combine(dims, enclosed_dims);

   }

 };


 /*

  * Executor for sequential loops inside of a CudaKernel.

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<

     Data,

     statement::For<ArgumentId, seq_exec, EnclosedStmts...>,

     Types>

     : CudaStatementExecutor<

           Data,

           statement::For<

               ArgumentId,

               RAJA::policy::cuda::cuda_indexer<

                   iteration_mapping::StridedLoop<named_usage::unspecified>,

                   kernel_sync_requirement::none,

                   cuda::IndexGlobal<named_dim::x,

                                     named_usage::ignored,

                                     named_usage::ignored>>,

               EnclosedStmts...>,

           Types>

 {};


 /*

  * Executor for thread work sharing loop inside CudaKernel.

  * Mapping directly from a warp lane

  * Assigns the loop index to offset ArgumentId

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename Mask,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<Data,

                              statement::For<ArgumentId,

                                             RAJA::cuda_warp_masked_direct<Mask>,

                                             EnclosedStmts...>,

                              Types>

 {


   using stmt_list_t = StatementList<EnclosedStmts...>;


   // Set the argument type for this loop

   using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;


   using enclosed_stmts_t =

       CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;


   using mask_t = Mask;


   using diff_t = segment_diff_type<ArgumentId, Data>;


   static_assert(mask_t::max_masked_size <=

                     RAJA::policy::cuda::device_constants.WARP_SIZE,

                 "BitMask is too large for CUDA warp size");


   static inline RAJA_DEVICE void exec(Data& data, bool thread_active)

   {

     const diff_t len = segment_length<ArgumentId>(data);


     const diff_t i = mask_t::maskValue((diff_t)threadIdx.x);


     // assign thread id directly to offset

     data.template assign_offset<ArgumentId>(i);


     // execute enclosed statements if in bounds

     enclosed_stmts_t::exec(data, thread_active && (i < len));

   }


   static inline LaunchDims calculateDimensions(Data const& data)

   {

     // Get enclosed statements

     LaunchDims dims = enclosed_stmts_t::calculateDimensions(data);


     // we always get EXACTLY one warp by allocating one warp in the X

     // dimension

     const diff_t len = RAJA::policy::cuda::device_constants.WARP_SIZE;


     // request one thread per element in the segment

     set_cuda_dim<named_dim::x>(dims.dims.threads, len);


     // since we are direct-mapping, we REQUIRE len

     set_cuda_dim<named_dim::x>(dims.min_dims.threads, len);


     return (dims);

   }

 };


 /*

  * Executor for thread work sharing loop inside CudaKernel.

  * Mapping directly from a warp lane

  * Assigns the loop index to offset ArgumentId

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename Mask,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<Data,

                              statement::For<ArgumentId,

                                             RAJA::cuda_warp_masked_loop<Mask>,

                                             EnclosedStmts...>,

                              Types>

 {


   using stmt_list_t = StatementList<EnclosedStmts...>;


   // Set the argument type for this loop

   using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;


   using enclosed_stmts_t =

       CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;


   using mask_t = Mask;


   using diff_t = segment_diff_type<ArgumentId, Data>;


   using DimensionCalculator =

       RAJA::internal::KernelDimensionCalculator<cuda_warp_loop>;


   static_assert(mask_t::max_masked_size <=

                     RAJA::policy::cuda::device_constants.WARP_SIZE,

                 "BitMask is too large for CUDA warp size");


   static inline RAJA_DEVICE void exec(Data& data, bool thread_active)

   {

     // masked size strided loop

     const diff_t len      = segment_length<ArgumentId>(data);

     const diff_t i_init   = mask_t::maskValue((diff_t)threadIdx.x);

     const diff_t i_stride = (diff_t)mask_t::max_masked_size;


     // Iterate through grid stride of chunks

     for (diff_t ii = 0; ii < len; ii += i_stride)

     {

       const diff_t i = ii + i_init;


       // execute enclosed statements if any thread will

       // but mask off threads without work

       bool have_work = i < len;


       // Assign the x thread to the argument

       data.template assign_offset<ArgumentId>(i);


       // execute enclosed statements

       enclosed_stmts_t::exec(data, thread_active && have_work);

     }

   }


   static inline LaunchDims calculateDimensions(Data const& data)

   {

     diff_t len = segment_length<ArgumentId>(data);


     LaunchDims dims = DimensionCalculator::get_dimensions(len);


     LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);


     return combine(dims, enclosed_dims);

   }

 };


 /*

  * Executor for thread work sharing loop inside CudaKernel.

  * Mapping directly from raw threadIdx.x

  * Assigns the loop index to offset ArgumentId

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename Mask,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<

     Data,

     statement::For<ArgumentId,

                    RAJA::cuda_thread_masked_direct<Mask>,

                    EnclosedStmts...>,

     Types>

 {


   using stmt_list_t = StatementList<EnclosedStmts...>;


   // Set the argument type for this loop

   using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;


   using enclosed_stmts_t =

       CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;


   using mask_t = Mask;


   using diff_t = segment_diff_type<ArgumentId, Data>;


   using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<

       cuda_thread_size_x_direct<mask_t::max_input_size>>;


   static inline RAJA_DEVICE void exec(Data& data, bool thread_active)

   {

     const diff_t len = segment_length<ArgumentId>(data);


     const diff_t i = mask_t::maskValue((diff_t)threadIdx.x);


     // assign thread id directly to offset

     data.template assign_offset<ArgumentId>(i);


     // execute enclosed statements if in bounds

     enclosed_stmts_t::exec(data, thread_active && (i < len));

   }


   static inline LaunchDims calculateDimensions(Data const& data)

   {

     const diff_t len = segment_length<ArgumentId>(data);


     LaunchDims dims = DimensionCalculator::get_dimensions(len);


     LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);


     return combine(dims, enclosed_dims);

   }

 };


 /*

  * Executor for thread work sharing loop inside CudaKernel.

  * Mapping directly from a warp lane

  * Assigns the loop index to offset ArgumentId

  */

 template<typename Data,

          camp::idx_t ArgumentId,

          typename Mask,

          typename... EnclosedStmts,

          typename Types>

 struct CudaStatementExecutor<Data,

                              statement::For<ArgumentId,

                                             RAJA::cuda_thread_masked_loop<Mask>,

                                             EnclosedStmts...>,

                              Types>

 {


   using stmt_list_t = StatementList<EnclosedStmts...>;


   // Set the argument type for this loop

   using NewTypes = setSegmentTypeFromData<Types, ArgumentId, Data>;


   using enclosed_stmts_t =

       CudaStatementListExecutor<Data, stmt_list_t, NewTypes>;


   using mask_t = Mask;


   using diff_t = segment_diff_type<ArgumentId, Data>;


   using DimensionCalculator = RAJA::internal::KernelDimensionCalculator<

       cuda_thread_size_x_loop<mask_t::max_input_size>>;


   static inline RAJA_DEVICE void exec(Data& data, bool thread_active)

   {

     // masked size strided loop

     const diff_t len      = segment_length<ArgumentId>(data);

     const diff_t i_init   = mask_t::maskValue((diff_t)threadIdx.x);

     const diff_t i_stride = (diff_t)mask_t::max_masked_size;


     // Iterate through grid stride of chunks

     for (diff_t ii = 0; ii < len; ii += i_stride)

     {

       const diff_t i = ii + i_init;


       // execute enclosed statements if any thread will

       // but mask off threads without work

       bool have_work = i < len;


       // Assign the x thread to the argument

       data.template assign_offset<ArgumentId>(i);


       // execute enclosed statements

       enclosed_stmts_t::exec(data, thread_active && have_work);

     }

   }


   static inline LaunchDims calculateDimensions(Data const& data)

   {

     diff_t len = segment_length<ArgumentId>(data);


     LaunchDims dims = DimensionCalculator::get_dimensions(len);


     LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(data);


     return combine(dims, enclosed_dims);

   }

 };


 }  // namespace internal

 }  // end namespace RAJA


 #endif /* RAJA_policy_cuda_kernel_For_HPP */

RAJA_DEVICE
#define RAJA_DEVICE
Definition: macros.hpp:66

RAJA::internal::setSegmentTypeFromData
setSegmentType< Types, Segment, camp::at_v< typename camp::decay< Data >::index_types_t, Segment > > setSegmentTypeFromData
Definition: LoopTypes.hpp:95

RAJA::internal::StatementList
camp::list< Stmts... > StatementList
Definition: StatementList.hpp:41

RAJA::internal::segment_diff_type
typename std::iterator_traits< typename camp::at_v< typename Data::segment_tuple_t::TList, ArgumentId >::iterator >::difference_type segment_diff_type
Definition: LoopData.hpp:184

RAJA
Definition: AlignedRangeIndexSetBuilders.cpp:35

RAJA::Launch::sync
@ sync

RAJA::kernel_sync_requirement
kernel_sync_requirement
Definition: types.hpp:63

RAJA::kernel_sync_requirement::none
@ none

RAJA::kernel_sync_requirement::sync
@ sync

internal.hpp
RAJA header file containing constructs used to run kernel traversals on GPU with CUDA.

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_loop< Mask >, EnclosedStmts... >, Types >::NewTypes
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:412

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_loop< Mask >, EnclosedStmts... >, Types >::calculateDimensions
static LaunchDims calculateDimensions(Data const &data)
Definition: For.hpp:452

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_loop< Mask >, EnclosedStmts... >, Types >::enclosed_stmts_t
CudaStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:415

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_loop< Mask >, EnclosedStmts... >, Types >::exec
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: For.hpp:428

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_loop< Mask >, EnclosedStmts... >, Types >::DimensionCalculator
RAJA::internal::KernelDimensionCalculator< cuda_warp_loop > DimensionCalculator
Definition: For.hpp:422

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_loop< Mask >, EnclosedStmts... >, Types >::mask_t
Mask mask_t
Definition: For.hpp:417

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_loop< Mask >, EnclosedStmts... >, Types >::diff_t
segment_diff_type< ArgumentId, Data > diff_t
Definition: For.hpp:419

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_loop< Mask >, EnclosedStmts... >, Types >::stmt_list_t
StatementList< EnclosedStmts... > stmt_list_t
Definition: For.hpp:409

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_loop< Mask >, EnclosedStmts... >, Types >::NewTypes
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:542

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_loop< Mask >, EnclosedStmts... >, Types >::DimensionCalculator
RAJA::internal::KernelDimensionCalculator< cuda_thread_size_x_loop< mask_t::max_input_size > > DimensionCalculator
Definition: For.hpp:552

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_loop< Mask >, EnclosedStmts... >, Types >::diff_t
segment_diff_type< ArgumentId, Data > diff_t
Definition: For.hpp:549

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_loop< Mask >, EnclosedStmts... >, Types >::mask_t
Mask mask_t
Definition: For.hpp:547

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_loop< Mask >, EnclosedStmts... >, Types >::exec
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: For.hpp:554

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_loop< Mask >, EnclosedStmts... >, Types >::calculateDimensions
static LaunchDims calculateDimensions(Data const &data)
Definition: For.hpp:578

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_loop< Mask >, EnclosedStmts... >, Types >::stmt_list_t
StatementList< EnclosedStmts... > stmt_list_t
Definition: For.hpp:539

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_loop< Mask >, EnclosedStmts... >, Types >::enclosed_stmts_t
CudaStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:545

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::Direct, sync, IndexMapper >, EnclosedStmts... >, Types >::diff_t
segment_diff_type< ArgumentId, Data > diff_t
Definition: For.hpp:123

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::Direct, sync, IndexMapper >, EnclosedStmts... >, Types >::enclosed_stmts_t
CudaStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:121

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::Direct, sync, IndexMapper >, EnclosedStmts... >, Types >::stmt_list_t
StatementList< EnclosedStmts... > stmt_list_t
Definition: For.hpp:115

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::Direct, sync, IndexMapper >, EnclosedStmts... >, Types >::DimensionCalculator
RAJA::internal::KernelDimensionCalculator< RAJA::policy::cuda::cuda_indexer< iteration_mapping::Direct, sync, IndexMapper > > DimensionCalculator
Definition: For.hpp:127

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::Direct, sync, IndexMapper >, EnclosedStmts... >, Types >::calculateDimensions
static LaunchDims calculateDimensions(Data const &data)
Definition: For.hpp:145

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::Direct, sync, IndexMapper >, EnclosedStmts... >, Types >::NewTypes
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:118

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::Direct, sync, IndexMapper >, EnclosedStmts... >, Types >::exec
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: For.hpp:129

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_direct< Mask >, EnclosedStmts... >, Types >::NewTypes
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:347

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_direct< Mask >, EnclosedStmts... >, Types >::calculateDimensions
static LaunchDims calculateDimensions(Data const &data)
Definition: For.hpp:373

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_direct< Mask >, EnclosedStmts... >, Types >::mask_t
Mask mask_t
Definition: For.hpp:352

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_direct< Mask >, EnclosedStmts... >, Types >::stmt_list_t
StatementList< EnclosedStmts... > stmt_list_t
Definition: For.hpp:344

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_direct< Mask >, EnclosedStmts... >, Types >::diff_t
segment_diff_type< ArgumentId, Data > diff_t
Definition: For.hpp:354

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_direct< Mask >, EnclosedStmts... >, Types >::exec
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: For.hpp:360

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_warp_masked_direct< Mask >, EnclosedStmts... >, Types >::enclosed_stmts_t
CudaStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:350

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_direct< Mask >, EnclosedStmts... >, Types >::diff_t
segment_diff_type< ArgumentId, Data > diff_t
Definition: For.hpp:492

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_direct< Mask >, EnclosedStmts... >, Types >::exec
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: For.hpp:497

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_direct< Mask >, EnclosedStmts... >, Types >::calculateDimensions
static LaunchDims calculateDimensions(Data const &data)
Definition: For.hpp:510

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_direct< Mask >, EnclosedStmts... >, Types >::stmt_list_t
StatementList< EnclosedStmts... > stmt_list_t
Definition: For.hpp:482

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_direct< Mask >, EnclosedStmts... >, Types >::enclosed_stmts_t
CudaStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:488

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_direct< Mask >, EnclosedStmts... >, Types >::DimensionCalculator
RAJA::internal::KernelDimensionCalculator< cuda_thread_size_x_direct< mask_t::max_input_size > > DimensionCalculator
Definition: For.hpp:495

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_direct< Mask >, EnclosedStmts... >, Types >::NewTypes
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:485

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::cuda_thread_masked_direct< Mask >, EnclosedStmts... >, Types >::mask_t
Mask mask_t
Definition: For.hpp:490

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper >, EnclosedStmts... >, Types >::diff_t
segment_diff_type< ArgumentId, Data > diff_t
Definition: For.hpp:262

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper >, EnclosedStmts... >, Types >::stmt_list_t
StatementList< EnclosedStmts... > stmt_list_t
Definition: For.hpp:254

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper >, EnclosedStmts... >, Types >::enclosed_stmts_t
CudaStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:260

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper >, EnclosedStmts... >, Types >::calculateDimensions
static LaunchDims calculateDimensions(Data const &data)
Definition: For.hpp:290

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper >, EnclosedStmts... >, Types >::exec
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: For.hpp:270

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper >, EnclosedStmts... >, Types >::NewTypes
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:257

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper >, EnclosedStmts... >, Types >::DimensionCalculator
RAJA::internal::KernelDimensionCalculator< RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::none, IndexMapper > > DimensionCalculator
Definition: For.hpp:268

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper >, EnclosedStmts... >, Types >::calculateDimensions
static LaunchDims calculateDimensions(Data const &data)
Definition: For.hpp:220

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper >, EnclosedStmts... >, Types >::exec
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: For.hpp:195

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper >, EnclosedStmts... >, Types >::stmt_list_t
StatementList< EnclosedStmts... > stmt_list_t
Definition: For.hpp:179

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper >, EnclosedStmts... >, Types >::enclosed_stmts_t
CudaStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:185

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper >, EnclosedStmts... >, Types >::diff_t
segment_diff_type< ArgumentId, Data > diff_t
Definition: For.hpp:187

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper >, EnclosedStmts... >, Types >::DimensionCalculator
RAJA::internal::KernelDimensionCalculator< RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper > > DimensionCalculator
Definition: For.hpp:193

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::StridedLoop< named_usage::unspecified >, kernel_sync_requirement::sync, IndexMapper >, EnclosedStmts... >, Types >::NewTypes
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:182

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper >, EnclosedStmts... >, Types >::DimensionCalculator
RAJA::internal::KernelDimensionCalculator< RAJA::policy::cuda::cuda_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper > > DimensionCalculator
Definition: For.hpp:68

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper >, EnclosedStmts... >, Types >::NewTypes
setSegmentTypeFromData< Types, ArgumentId, Data > NewTypes
Definition: For.hpp:59

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper >, EnclosedStmts... >, Types >::calculateDimensions
static LaunchDims calculateDimensions(Data const &data)
Definition: For.hpp:81

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper >, EnclosedStmts... >, Types >::stmt_list_t
StatementList< EnclosedStmts... > stmt_list_t
Definition: For.hpp:56

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper >, EnclosedStmts... >, Types >::exec
static RAJA_DEVICE void exec(Data &data, bool thread_active)
Definition: For.hpp:70

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper >, EnclosedStmts... >, Types >::enclosed_stmts_t
CudaStatementListExecutor< Data, stmt_list_t, NewTypes > enclosed_stmts_t
Definition: For.hpp:62

RAJA::internal::CudaStatementExecutor< Data, statement::For< ArgumentId, RAJA::policy::cuda::cuda_indexer< iteration_mapping::DirectUnchecked, sync, IndexMapper >, EnclosedStmts... >, Types >::diff_t
segment_diff_type< ArgumentId, Data > diff_t
Definition: For.hpp:64

RAJA::iteration_mapping::StridedLoop
Definition: types.hpp:209