RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
RegisterBase.hpp
Go to the documentation of this file.
1 
11 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 // Copyright (c) Lawrence Livermore National Security, LLC and other
13 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
14 // files for dates and other details. No copyright assignment is required
15 // to contribute to RAJA.
16 //
17 // SPDX-License-Identifier: (BSD-3-Clause)
18 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
19 
20 #ifndef RAJA_pattern_tensor_RegisterBase_HPP
21 #define RAJA_pattern_tensor_RegisterBase_HPP
22 
23 #include "RAJA/config.hpp"
24 
25 #include "RAJA/util/macros.hpp"
26 
27 #include "camp/camp.hpp"
30 #include "RAJA/util/BitMask.hpp"
31 
33 
34 namespace RAJA
35 {
36 namespace expt
37 {
38 template<typename T, typename REGISTER_POLICY>
39 class Register;
40 }
41 
42 namespace internal
43 {
44 namespace expt
45 {
47 {};
48 
49 /*
50  * Overload for: arithmetic + TensorRegister
51 
52  */
53 template<
54  typename LEFT,
55  typename RIGHT,
56  typename std::enable_if<std::is_arithmetic<LEFT>::value, bool>::type = true,
57  typename std::enable_if<std::is_base_of<RegisterConcreteBase, RIGHT>::value,
58  bool>::type = true>
59 RAJA_INLINE RAJA_HOST_DEVICE RIGHT operator+(LEFT const& lhs, RIGHT const& rhs)
60 {
61  return RIGHT(lhs).add(rhs);
62 }
63 
64 /*
65  * Overload for: arithmetic - TensorRegister
66 
67  */
68 template<
69  typename LEFT,
70  typename RIGHT,
71  typename std::enable_if<std::is_arithmetic<LEFT>::value, bool>::type = true,
72  typename std::enable_if<std::is_base_of<RegisterConcreteBase, RIGHT>::value,
73  bool>::type = true>
74 RAJA_INLINE RAJA_HOST_DEVICE RIGHT operator-(LEFT const& lhs, RIGHT const& rhs)
75 {
76  return RIGHT(lhs).subtract(rhs);
77 }
78 
79 /*
80  * Overload for: arithmetic * TensorRegister
81 
82  */
83 template<
84  typename LEFT,
85  typename RIGHT,
86  typename std::enable_if<std::is_arithmetic<LEFT>::value, bool>::type = true,
87  typename std::enable_if<std::is_base_of<RegisterConcreteBase, RIGHT>::value,
88  bool>::type = true>
89 RAJA_INLINE RAJA_HOST_DEVICE RIGHT operator*(LEFT const& lhs, RIGHT const& rhs)
90 {
91  return rhs.scale(lhs);
92 }
93 
94 /*
95  * Overload for: arithmetic / TensorRegister
96 
97  */
98 template<
99  typename LEFT,
100  typename RIGHT,
101  typename std::enable_if<std::is_arithmetic<LEFT>::value, bool>::type = true,
102  typename std::enable_if<std::is_base_of<RegisterConcreteBase, RIGHT>::value,
103  bool>::type = true>
104 RAJA_INLINE RAJA_HOST_DEVICE RIGHT operator/(LEFT const& lhs, RIGHT const& rhs)
105 {
106  return RIGHT(lhs).divide(rhs);
107 }
108 
109 
116 template<typename Derived>
118 
119 template<typename T, typename REGISTER_POLICY>
120 class RegisterBase<RAJA::expt::Register<T, REGISTER_POLICY>>
121  : public RegisterConcreteBase
122 {
123 public:
125  using element_type = camp::decay<T>;
126 
127  using index_type = camp::idx_t;
128 
133 
134 private:
136 
137  RAJA_INLINE
138  self_type* getThis() { return static_cast<self_type*>(this); }
139 
141 
142  RAJA_INLINE
143  constexpr self_type const* getThis() const
144  {
145  return static_cast<self_type const*>(this);
146  }
147 
148 public:
150 
151  RAJA_INLINE
152  static constexpr bool is_root() { return true; }
153 
155 
156  RAJA_INLINE
157  constexpr RegisterBase() {}
158 
160 
161  RAJA_INLINE
163 
165 
166  RAJA_INLINE
167  constexpr RegisterBase(RegisterBase const&) {}
168 
169  RAJA_INLINE
170 
172  constexpr RegisterBase(self_type const&) {}
173 
179 
180  RAJA_INLINE
181  static self_type s_broadcast_n(element_type const& value, camp::idx_t N)
182  {
183  self_type x;
184  for (camp::idx_t i = 0; i < N; ++i)
185  {
186  x.set(value, i);
187  }
188  return x;
189  }
190 
196 
197  RAJA_INLINE
199  {
200  self_type x;
201  x.broadcast(getThis()->get(i));
202  return x;
203  }
204 
214  template<typename T2>
216  element_type const* ptr,
218  {
219 #ifdef RAJA_ENABLE_VECTOR_STATS
220  RAJA::tensor_stats::num_vector_load_strided_n++;
221 #endif
222  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
223  {
224  getThis()->set(ptr[offsets.get(i)], i);
225  }
226  return *getThis();
227  }
228 
238  template<typename T2>
240  element_type const* ptr,
242  camp::idx_t N)
243  {
244 #ifdef RAJA_ENABLE_VECTOR_STATS
245  RAJA::tensor_stats::num_vector_load_strided_n++;
246 #endif
247  for (camp::idx_t i = 0; i < N; ++i)
248  {
249  getThis()->set(ptr[offsets.get(i)], i);
250  }
251  return *getThis();
252  }
253 
265 
266  RAJA_INLINE
268  camp::idx_t segbits,
269  camp::idx_t stride_inner,
270  camp::idx_t stride_outer)
271  {
272  getThis()->gather(ptr, self_type::s_segmented_offsets(segbits, stride_inner,
273  stride_outer));
274  return *getThis();
275  }
276 
285 
286  RAJA_INLINE
288  camp::idx_t segbits,
289  camp::idx_t stride_inner,
290  camp::idx_t stride_outer,
291  camp::idx_t num_inner,
292  camp::idx_t num_outer)
293  {
294 
295  camp::idx_t num_segments = self_type::s_num_elem >> segbits;
296  camp::idx_t seg_size = 1 << segbits;
297 
298  camp::idx_t lane = 0;
299  for (camp::idx_t seg = 0; seg < num_segments; ++seg)
300  {
301  for (camp::idx_t i = 0; i < seg_size; ++i)
302  {
303 
304  if (seg >= num_outer || i >= num_inner)
305  {
306  getThis()->set(element_type(0), lane);
307  }
308  else
309  {
310 
311  camp::idx_t offset = seg * stride_outer + i * stride_inner;
312 
313  element_type value = ptr[offset];
314 
315  getThis()->set(value, lane);
316  }
317 
318  lane++;
319  }
320  }
321 
322  return *getThis();
323  }
324 
334  template<typename T2>
335  RAJA_HOST_DEVICE RAJA_INLINE self_type const& scatter(
336  element_type* ptr,
337  RAJA::expt::Register<T2, REGISTER_POLICY> const& offsets) const
338  {
339 #ifdef RAJA_ENABLE_VECTOR_STATS
340  RAJA::tensor_stats::num_vector_load_strided_n++;
341 #endif
342  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
343  {
344  ptr[offsets.get(i)] = getThis()->get(i);
345  }
346  return *getThis();
347  }
348 
358  template<typename T2>
360  element_type* ptr,
362  camp::idx_t N) const
363  {
364 #ifdef RAJA_ENABLE_VECTOR_STATS
365  RAJA::tensor_stats::num_vector_load_strided_n++;
366 #endif
367  for (camp::idx_t i = 0; i < N; ++i)
368  {
369  ptr[offsets.get(i)] = getThis()->get(i);
370  }
371  return *getThis();
372  }
373 
385 
386  RAJA_INLINE
388  camp::idx_t segbits,
389  camp::idx_t stride_inner,
390  camp::idx_t stride_outer) const
391  {
392  getThis()->scatter(ptr, self_type::s_segmented_offsets(
393  segbits, stride_inner, stride_outer));
394  return *getThis();
395  }
396 
405 
406  RAJA_INLINE
408  camp::idx_t segbits,
409  camp::idx_t stride_inner,
410  camp::idx_t stride_outer,
411  camp::idx_t num_inner,
412  camp::idx_t num_outer) const
413  {
414 
415  camp::idx_t num_segments = self_type::s_num_elem >> segbits;
416  camp::idx_t seg_size = 1 << segbits;
417 
418  camp::idx_t lane = 0;
419  for (camp::idx_t seg = 0; seg < num_segments; ++seg)
420  {
421  for (camp::idx_t i = 0; i < seg_size; ++i)
422  {
423 
424  if (!(seg >= num_outer || i >= num_inner))
425  {
426 
427  camp::idx_t offset = seg * stride_outer + i * stride_inner;
428 
429  ptr[offset] = getThis()->get(lane);
430  }
431 
432  lane++;
433  }
434  }
435 
436  return *getThis();
437  }
438 
445 
446  RAJA_INLINE
448  {
449  getThis()->broadcast(value);
450  return *getThis();
451  }
452 
458  template<typename T2>
461  {
462  getThis()->broadcast(value.get(0));
463  return *getThis();
464  }
465 
473 
474  RAJA_INLINE
476  {
477  getThis()->copy(x);
478  return *getThis();
479  }
480 
488 
489  RAJA_INLINE
490  self_type operator+(self_type const& x) const { return getThis()->add(x); }
491 
499 
500  RAJA_INLINE
502  {
503  *getThis() = getThis()->add(x);
504  return *getThis();
505  }
506 
516 
517  RAJA_INLINE
518  self_type operator+(element_type const& x) const { return getThis()->add(x); }
519 
530 
531  RAJA_INLINE
533  {
534  *getThis() = getThis()->add(x);
535  return *getThis();
536  }
537 
544 
545  RAJA_INLINE
546  self_type operator-() const { return self_type(0).subtract(*getThis()); }
547 
555 
556  RAJA_INLINE
557  self_type operator-(self_type const& x) const
558  {
559  return getThis()->subtract(x);
560  }
561 
569 
570  RAJA_INLINE
572  {
573  *getThis() = getThis()->subtract(x);
574  return *getThis();
575  }
576 
584 
585  RAJA_INLINE
587  {
588  return getThis()->subtract(x);
589  }
590 
598 
599  RAJA_INLINE
601  {
602  *getThis() = getThis()->subtract(x);
603  return *getThis();
604  }
605 
611  template<typename RHS>
612  RAJA_HOST_DEVICE RAJA_INLINE self_type operator*(RHS const& rhs) const
613  {
614  return getThis()->multiply(rhs);
615  }
616 
622  template<typename RHS>
623  RAJA_HOST_DEVICE RAJA_INLINE self_type& operator*=(RHS const& rhs)
624  {
625  *getThis() = getThis()->multiply(rhs);
626  return *getThis();
627  }
628 
635  RAJA_INLINE
636 
638  self_type operator/(self_type const& x) const { return getThis()->divide(x); }
639 
647 
648  RAJA_INLINE
650  {
651  *getThis() = getThis()->divide(x);
652  return *getThis();
653  }
654 
661  RAJA_INLINE
662 
665  {
666  return getThis()->divide(x);
667  }
668 
676 
677  RAJA_INLINE
679  {
680  *getThis() = getThis()->divide(x);
681  return *getThis();
682  }
683 
692 
693  RAJA_INLINE
694  self_type divide_n(self_type const& b, camp::idx_t n) const
695  {
696  self_type q(*getThis());
697  for (camp::idx_t i = 0; i < n; ++i)
698  {
699  q.set(getThis()->get(i) / b.get(i), i);
700  }
701  return q;
702  }
703 
712 
713  RAJA_INLINE
714  self_type divide_n(element_type const& b, camp::idx_t n) const
715  {
716  self_type q(*getThis());
717  for (camp::idx_t i = 0; i < n; ++i)
718  {
719  q.set(getThis()->get(i) / b, i);
720  }
721  return q;
722  }
723 
730  RAJA_INLINE
731 
733  element_type dot(self_type const& x) const
734  {
735  return getThis()->multiply(x).sum();
736  }
737 
748  RAJA_INLINE
749 
751  self_type multiply_add(self_type const& b, self_type const& c) const
752  {
753  return (self_type(*getThis()) * self_type(b)) + self_type(c);
754  }
755 
766  RAJA_INLINE
767 
769  self_type multiply_subtract(self_type const& b, self_type const& c) const
770  {
771  return getThis()->multiply_add(b, -c);
772  }
773 
778  RAJA_INLINE
779 
782  {
783  return getThis()->multiply(self_type(c));
784  }
785 
790  RAJA_INLINE
791 
793  element_type min_n(camp::idx_t N) const { return getThis()->min(N); }
794 
799  RAJA_INLINE
800 
802  element_type max_n(camp::idx_t N) const { return getThis()->max(N); }
803 
819  RAJA_INLINE
820 
822  self_type transpose_shuffle_left(camp::idx_t lvl, self_type const& y) const
823  {
824  auto const& x = *getThis();
825 
826  self_type z;
827 
828  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
829  {
830 
831  // extract value x or y
832  camp::idx_t xy_select = (i >> lvl) & 0x1;
833 
834 
835  z.set(xy_select == 0 ? x.get(i) : y.get(i - (1 << lvl)), i);
836  }
837 
838  return z;
839  }
840 
856  RAJA_INLINE
857 
859  self_type transpose_shuffle_right(int lvl, self_type const& y) const
860  {
861  auto const& x = *getThis();
862 
863  self_type z;
864 
865  camp::idx_t i0 = 1 << lvl;
866 
867  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
868  {
869 
870  // extract value x or y
871  camp::idx_t xy_select = (i >> lvl) & 0x1;
872 
873  z.set(xy_select == 0 ? x.get(i0 + i) : y.get(i0 + i - (1 << lvl)), i);
874  }
875 
876  return z;
877  }
878 
886  RAJA_INLINE
887  static int_vector_type s_segmented_offsets(camp::idx_t segbits,
888  camp::idx_t stride_inner,
889  camp::idx_t stride_outer)
890  {
891  int_vector_type result;
892 
893  camp::idx_t num_segments = self_type::s_num_elem >> segbits;
894  camp::idx_t seg_size = 1 << segbits;
895 
896  camp::idx_t lane = 0;
897  for (camp::idx_t seg = 0; seg < num_segments; ++seg)
898  {
899  for (camp::idx_t i = 0; i < seg_size; ++i)
900  {
901  result.set(seg * stride_outer + i * stride_inner, lane);
902  lane++;
903  }
904  }
905 
906  return result;
907  }
908 
942  RAJA_INLINE
943  self_type segmented_sum_inner(camp::idx_t segbits,
944  camp::idx_t output_segment) const
945  {
946  self_type result(0);
947 
948  // default implementation is dumb, just sum each value into
949  // appropriate segment lane
950  int output_offset = output_segment * self_type::s_num_elem >> segbits;
951 
952  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
953  {
954  auto value =
955  getThis()->get(i) + result.get((i >> segbits) + output_offset);
956  result.set(value, (i >> segbits) + output_offset);
957  }
958 
959  return result;
960  }
961 
998  RAJA_INLINE
999  self_type segmented_sum_outer(camp::idx_t segbits,
1000  camp::idx_t output_segment) const
1001  {
1002  self_type result(0);
1003 
1004  // default implementation is dumb, just sum each value into
1005  // appropriate segment lane
1006  int output_offset = output_segment * (1 << segbits);
1007 
1008  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
1009  {
1010  camp::idx_t output_i = output_offset + (i & ((1 << segbits) - 1));
1011  auto value = getThis()->get(i) + result.get(output_i);
1012  result.set(value, output_i);
1013  }
1014 
1015  return result;
1016  }
1017 
1018  RAJA_INLINE
1020  camp::idx_t segbits,
1021  camp::idx_t num_inner,
1022  camp::idx_t num_outer) const
1023  {
1024  self_type result;
1025 
1026  camp::idx_t num_segments = self_type::s_num_elem >> segbits;
1027  camp::idx_t seg_size = 1 << segbits;
1028 
1029  camp::idx_t lane = 0;
1030  for (camp::idx_t seg = 0; seg < num_segments; ++seg)
1031  {
1032  for (camp::idx_t i = 0; i < seg_size; ++i)
1033  {
1034 
1035  if (seg >= num_outer || i >= num_inner)
1036  {
1037  result.set(element_type(0), lane);
1038  }
1039  else
1040  {
1041 
1042  element_type div = getThis()->get(lane) / den.get(lane);
1043 
1044  result.set(div, lane);
1045  }
1046 
1047  lane++;
1048  }
1049  }
1050 
1051  return result;
1052  }
1053 
1085  RAJA_INLINE
1086 
1088  self_type segmented_dot(camp::idx_t segbits,
1089  camp::idx_t output_segment,
1090  self_type const& x) const
1091  {
1092  return getThis()->multiply(x).segmented_sum_inner(segbits, output_segment);
1093  }
1094 
1142  RAJA_INLINE
1144  camp::idx_t input_segment) const
1145  {
1146  self_type result;
1147 
1148  camp::idx_t mask = (1 << segbits) - 1;
1149  camp::idx_t offset = input_segment << segbits;
1150 
1151  // default implementation is dumb, just sum each value into
1152  // appropriate segment lane
1153  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
1154  {
1155 
1156  auto off = (i & mask) + offset;
1157 
1158  result.set(getThis()->get(off), i);
1159  }
1160 
1161  return result;
1162  }
1163 
1200  RAJA_INLINE
1202  camp::idx_t input_segment) const
1203  {
1204  self_type result;
1205 
1206  camp::idx_t offset = input_segment * (self_type::s_num_elem >> segbits);
1207 
1208  // default implementation is dumb, just sum each value into
1209  // appropriate segment lane
1210  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
1211  {
1212 
1213  auto off = (i >> segbits) + offset;
1214 
1215  result.set(getThis()->get(off), i);
1216  }
1217 
1218  return result;
1219  }
1220 
1226  RAJA_INLINE
1227  std::string to_string() const
1228  {
1229  std::string s = "Register(" + std::to_string(self_type::s_num_elem) + ")[ ";
1230 
1231  //
1232  for (camp::idx_t i = 0; i < self_type::s_num_elem; ++i)
1233  {
1234  s += std::to_string(getThis()->get(i)) + " ";
1235  }
1236 
1237  s += " ]\n";
1238 
1239  return s;
1240  }
1241 };
1242 
1243 
1244 } // namespace expt
1245 } // namespace internal
1246 } // namespace RAJA
1247 
1248 
1249 #endif
RAJA header file defining a bit masking operator.
RAJA header file defining SIMD/SIMT register operations.
RAJA header file defining SIMD/SIMT register operations.
Header file containing RAJA simd policy definitions.
Definition: RegisterBase.hpp:39
RAJA_INLINE RAJA_HOST_DEVICE self_type transpose_shuffle_right(int lvl, self_type const &y) const
Definition: RegisterBase.hpp:859
RAJA_HOST_DEVICE RAJA_INLINE self_type const & segmented_store_nm(element_type *ptr, camp::idx_t segbits, camp::idx_t stride_inner, camp::idx_t stride_outer, camp::idx_t num_inner, camp::idx_t num_outer) const
Generic segmented load operation used for loading sub-matrices from larger arrays where we load parti...
Definition: RegisterBase.hpp:407
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE element_type dot(self_type const &x) const
Dot product of two registers.
Definition: RegisterBase.hpp:733
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE static RAJA_INLINE self_type s_broadcast_n(element_type const &value, camp::idx_t N)
Broadcast scalar value to first N register elements.
Definition: RegisterBase.hpp:181
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE element_type min_n(camp::idx_t N) const
Definition: RegisterBase.hpp:793
RAJA_HOST_DEVICE RAJA_INLINE self_type & operator*=(RHS const &rhs)
Multiply a register with this register.
Definition: RegisterBase.hpp:623
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator-=(self_type const &x)
Subtract a register from this register.
Definition: RegisterBase.hpp:571
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type const & segmented_store(element_type *ptr, camp::idx_t segbits, camp::idx_t stride_inner, camp::idx_t stride_outer) const
Generic segmented load operation used for loading sub-matrices from larger arrays.
Definition: RegisterBase.hpp:387
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator=(RAJA::expt::Register< T2, RAJA::expt::scalar_register > const &value)
Set entire register to a single scalar value.
Definition: RegisterBase.hpp:459
RAJA_INLINE self_type segmented_broadcast_inner(camp::idx_t segbits, camp::idx_t input_segment) const
Definition: RegisterBase.hpp:1143
RAJA_INLINE self_type segmented_broadcast_outer(camp::idx_t segbits, camp::idx_t input_segment) const
Definition: RegisterBase.hpp:1201
RAJA_HOST_DEVICE RAJA_INLINE self_type const & scatter_n(element_type *ptr, RAJA::expt::Register< T2, REGISTER_POLICY > const &offsets, camp::idx_t N) const
Generic scatter operation for n-length subvector.
Definition: RegisterBase.hpp:359
RAJA_HOST_DEVICE RAJA_INLINE self_type & gather(element_type const *ptr, RAJA::expt::Register< T2, REGISTER_POLICY > offsets)
Generic gather operation for full vector.
Definition: RegisterBase.hpp:215
RAJA_HOST_DEVICE static constexpr RAJA_INLINE bool is_root()
Definition: RegisterBase.hpp:152
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator+=(element_type x)
Add a scalar to this register.
Definition: RegisterBase.hpp:532
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE self_type multiply_subtract(self_type const &b, self_type const &c) const
Fused multiply subtract: fms(b, c) = (*this)*b-c.
Definition: RegisterBase.hpp:769
RAJA_INLINE constexpr RAJA_HOST_DEVICE RegisterBase(self_type const &)
Definition: RegisterBase.hpp:172
typename RegisterTraits< REGISTER_POLICY, T >::int_element_type int_element_type
Definition: RegisterBase.hpp:130
RAJA_INLINE RAJA_HOST_DEVICE self_type transpose_shuffle_left(camp::idx_t lvl, self_type const &y) const
Definition: RegisterBase.hpp:822
RAJA_HOST_DEVICE RAJA_INLINE self_type operator*(RHS const &rhs) const
Multiply two register registers, element wise.
Definition: RegisterBase.hpp:612
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type operator+(element_type const &x) const
Add scalar to this register.
Definition: RegisterBase.hpp:518
RAJA_HOST_DEVICE RAJA_INLINE ~RegisterBase()
Definition: RegisterBase.hpp:162
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type divide_n(self_type const &b, camp::idx_t n) const
Divide n elements of this register by another register.
Definition: RegisterBase.hpp:694
RAJA_HOST_DEVICE RAJA_INLINE self_type & gather_n(element_type const *ptr, RAJA::expt::Register< T2, REGISTER_POLICY > const &offsets, camp::idx_t N)
Generic gather operation for n-length subvector.
Definition: RegisterBase.hpp:239
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator-=(element_type const &x)
Subtract a scalar from this register.
Definition: RegisterBase.hpp:600
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE self_type segmented_dot(camp::idx_t segbits, camp::idx_t output_segment, self_type const &x) const
Definition: RegisterBase.hpp:1088
RAJA_HOST_DEVICE constexpr RAJA_INLINE RegisterBase()
Definition: RegisterBase.hpp:157
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE element_type max_n(camp::idx_t N) const
Definition: RegisterBase.hpp:802
RAJA_HOST_DEVICE constexpr RAJA_INLINE RegisterBase(RegisterBase const &)
Definition: RegisterBase.hpp:167
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE self_type operator/(element_type const &x) const
Divide by a scalar, element wise.
Definition: RegisterBase.hpp:664
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator/=(self_type const &x)
Divide this register by another register.
Definition: RegisterBase.hpp:649
RAJA_INLINE self_type segmented_sum_inner(camp::idx_t segbits, camp::idx_t output_segment) const
Definition: RegisterBase.hpp:943
RAJA_INLINE self_type segmented_divide_nm(self_type den, camp::idx_t segbits, camp::idx_t num_inner, camp::idx_t num_outer) const
Definition: RegisterBase.hpp:1019
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE self_type scale(element_type c) const
Definition: RegisterBase.hpp:781
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE self_type operator/(self_type const &x) const
Divide two register registers, element wise.
Definition: RegisterBase.hpp:638
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type operator-() const
Negate the value of this register.
Definition: RegisterBase.hpp:546
RAJA_INLINE std::string to_string() const
Converts to vector to a string.
Definition: RegisterBase.hpp:1227
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type get_and_broadcast(int i) const
Extracts a scalar value and broadcasts to a new register.
Definition: RegisterBase.hpp:198
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator=(element_type value)
Set entire register to a single scalar value.
Definition: RegisterBase.hpp:447
static RAJA_INLINE int_vector_type s_segmented_offsets(camp::idx_t segbits, camp::idx_t stride_inner, camp::idx_t stride_outer)
Definition: RegisterBase.hpp:887
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & segmented_load(element_type const *ptr, camp::idx_t segbits, camp::idx_t stride_inner, camp::idx_t stride_outer)
Generic segmented load operation used for loading sub-matrices from larger arrays.
Definition: RegisterBase.hpp:267
RAJA_HOST_DEVICE RAJA_INLINE self_type const & scatter(element_type *ptr, RAJA::expt::Register< T2, REGISTER_POLICY > const &offsets) const
Generic scatter operation for full vector.
Definition: RegisterBase.hpp:335
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE self_type multiply_add(self_type const &b, self_type const &c) const
Fused multiply add: fma(b, c) = (*this)*b+c.
Definition: RegisterBase.hpp:751
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type operator-(element_type const &x) const
Subtract scalar from this register.
Definition: RegisterBase.hpp:586
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator/=(element_type const &x)
Divide this register by another register.
Definition: RegisterBase.hpp:678
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type operator-(self_type const &x) const
Subtract two register registers.
Definition: RegisterBase.hpp:557
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator=(self_type const &x)
Assign one register to another.
Definition: RegisterBase.hpp:475
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type & operator+=(self_type const &x)
Add a register to this register.
Definition: RegisterBase.hpp:501
RAJA_INLINE self_type segmented_sum_outer(camp::idx_t segbits, camp::idx_t output_segment) const
Definition: RegisterBase.hpp:999
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type divide_n(element_type const &b, camp::idx_t n) const
Divide n elements of this register by a scalar.
Definition: RegisterBase.hpp:714
RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE self_type operator+(self_type const &x) const
Add two registers.
Definition: RegisterBase.hpp:490
RAJA_HOST_DEVICE RAJA_INLINE self_type & segmented_load_nm(element_type const *ptr, camp::idx_t segbits, camp::idx_t stride_inner, camp::idx_t stride_outer, camp::idx_t num_inner, camp::idx_t num_outer)
Generic segmented load operation used for loading sub-matrices from larger arrays where we load parti...
Definition: RegisterBase.hpp:287
Definition: RegisterBase.hpp:117
Definition: RegisterBase.hpp:47
Header file for common RAJA internal macro definitions.
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
#define RAJA_SUPPRESS_HD_WARN
Definition: macros.hpp:68
RAJA_INLINE RAJA_HOST_DEVICE RIGHT operator/(LEFT const &lhs, RIGHT const &rhs)
Definition: RegisterBase.hpp:104
RAJA_INLINE RAJA_HOST_DEVICE RIGHT operator+(LEFT const &lhs, RIGHT const &rhs)
Definition: RegisterBase.hpp:59
RAJA_INLINE RAJA_HOST_DEVICE RIGHT operator-(LEFT const &lhs, RIGHT const &rhs)
Definition: RegisterBase.hpp:74
RAJA_INLINE RAJA_HOST_DEVICE RIGHT operator*(LEFT const &lhs, RIGHT const &rhs)
Definition: RegisterBase.hpp:89
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE constexpr RAJA_INLINE RAJA::zip_tuple_element_t< I, zip_tuple< is_val, Ts... > > & get(zip_tuple< is_val, Ts... > &z) noexcept
Definition: zip_tuple.hpp:56