RAJA
RAJA provides a collection of platform portability abstractions for C++ HPC applications.
MultiplyOperator.hpp
Go to the documentation of this file.
1 
12 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
13 // Copyright (c) Lawrence Livermore National Security, LLC and other
14 // RAJA Project Developers. See top-level LICENSE and COPYRIGHT
15 // files for dates and other details. No copyright assignment is required
16 // to contribute to RAJA.
17 //
18 // SPDX-License-Identifier: (BSD-3-Clause)
19 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
20 
21 #ifndef RAJA_pattern_tensor_ET_MultiplyOperator_HPP
22 #define RAJA_pattern_tensor_ET_MultiplyOperator_HPP
23 
24 namespace RAJA
25 {
26 namespace internal
27 {
28 namespace expt
29 {
30 // forward
31 class TensorBlockConcreteBase;
32 
33 namespace ET
34 {
35 
36 
44 template<typename LEFT_OPERAND_TYPE,
45  typename RIGHT_OPERAND_TYPE,
46  class ENABLE = void>
48 {
49 
50  using result_type = typename LEFT_OPERAND_TYPE::result_type;
51  static constexpr camp::idx_t s_num_dims = LEFT_OPERAND_TYPE::s_num_dims;
52 
53  RAJA_INLINE
54 
56  static void print_ast()
57  {
58  printf("Elemental(%d,%d)", (int)s_num_dims,
59  (int)RIGHT_OPERAND_TYPE::s_num_dims);
60  }
61 
62  RAJA_INLINE
63 
65  static int getDimSize(int dim,
66  LEFT_OPERAND_TYPE const& left,
67  RIGHT_OPERAND_TYPE const& right)
68  {
69  return dim == 0 ? left.getDimSize(0) : right.getDimSize(1);
70  }
71 
75  template<typename TILE_TYPE>
76  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply(
77  TILE_TYPE const& tile,
78  LEFT_OPERAND_TYPE const& left,
79  RIGHT_OPERAND_TYPE const& right)
80  -> decltype(left.eval(tile) * right.eval(tile))
81  {
82  return left.eval(tile) * right.eval(tile);
83  }
84 
88  template<typename TILE_TYPE, typename ADD_OPERAND_TYPE>
89  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply_add(
90  TILE_TYPE const& tile,
91  LEFT_OPERAND_TYPE const& left,
92  RIGHT_OPERAND_TYPE const& right,
93  ADD_OPERAND_TYPE const& add)
94  -> decltype(left.eval(tile).multiply_add(right.eval(tile),
95  add.eval(tile)))
96  {
97  return left.eval(tile).multiply_add(right.eval(tile), add.eval(tile));
98  }
99 
103  template<typename TILE_TYPE, typename SUBTRACT_OPERAND_TYPE>
104  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply_subtract(
105  TILE_TYPE const& tile,
106  LEFT_OPERAND_TYPE const& left,
107  RIGHT_OPERAND_TYPE const& right,
108  SUBTRACT_OPERAND_TYPE const& subtract)
109  -> decltype(left.eval(tile).multiply_subtract(right.eval(tile),
110  subtract.eval(tile)))
111  {
112  return left.eval(tile).multiply_subtract(right.eval(tile),
113  subtract.eval(tile));
114  }
115 };
116 
120 template<typename LEFT_OPERAND_TYPE, typename RIGHT_OPERAND_TYPE>
122  LEFT_OPERAND_TYPE,
123  RIGHT_OPERAND_TYPE,
124  typename std::enable_if<LEFT_OPERAND_TYPE::s_num_dims == 0>::type>
125 {
126 
127  using result_type = typename RIGHT_OPERAND_TYPE::result_type;
128  static constexpr camp::idx_t s_num_dims = RIGHT_OPERAND_TYPE::s_num_dims;
129 
130  RAJA_INLINE
131 
133  static void print_ast() { printf("Scale"); }
134 
135  RAJA_INLINE
136 
138  static int getDimSize(int dim,
139  LEFT_OPERAND_TYPE const&,
140  RIGHT_OPERAND_TYPE const& right)
141  {
142  return right.getDimSize(dim);
143  }
144 
148  template<typename TILE_TYPE>
149  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply(
150  TILE_TYPE const& tile,
151  LEFT_OPERAND_TYPE const& left,
152  RIGHT_OPERAND_TYPE const& right)
153  -> decltype(right.eval(tile).scale(left.eval(tile)))
154  {
155  return right.eval(tile).scale(left.eval(tile));
156  }
157 
161  template<typename TILE_TYPE, typename ADD_OPERAND_TYPE>
162  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply_add(
163  TILE_TYPE const& tile,
164  LEFT_OPERAND_TYPE const& left,
165  RIGHT_OPERAND_TYPE const& right,
166  ADD_OPERAND_TYPE const& add)
167  -> decltype(right.eval(tile).scale(left.eval(tile)) + add.eval(tile))
168  {
169  return right.eval(tile).scale(left.eval(tile)) + add.eval(tile);
170  }
171 
175  template<typename TILE_TYPE, typename SUBTRACT_OPERAND_TYPE>
176  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply_subtract(
177  TILE_TYPE const& tile,
178  LEFT_OPERAND_TYPE const& left,
179  RIGHT_OPERAND_TYPE const& right,
180  SUBTRACT_OPERAND_TYPE const& subtract)
181  -> decltype(right.eval(tile).scale(left.eval(tile)) - subtract.eval(tile))
182  {
183  return right.eval(tile).scale(left.eval(tile)) - subtract.eval(tile);
184  }
185 };
186 
190 template<typename LEFT_OPERAND_TYPE, typename RIGHT_OPERAND_TYPE>
192  LEFT_OPERAND_TYPE,
193  RIGHT_OPERAND_TYPE,
194  typename std::enable_if<RIGHT_OPERAND_TYPE::s_num_dims == 0>::type>
195 {
196 
197  using result_type = typename LEFT_OPERAND_TYPE::result_type;
198  static constexpr camp::idx_t s_num_dims = LEFT_OPERAND_TYPE::s_num_dims;
199 
200  RAJA_INLINE
201 
203  static void print_ast() { printf("Scale"); }
204 
205  RAJA_INLINE
206 
208  static int getDimSize(int dim,
209  LEFT_OPERAND_TYPE const& left,
210  RIGHT_OPERAND_TYPE const&)
211  {
212  return left.getDimSize(dim);
213  }
214 
218  template<typename TILE_TYPE>
219  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply(
220  TILE_TYPE const& tile,
221  LEFT_OPERAND_TYPE const& left,
222  RIGHT_OPERAND_TYPE const& right)
223  -> decltype(left.eval(tile).scale(right.eval(tile)))
224  {
225  return left.eval(tile).scale(right.eval(tile));
226  }
227 
231  template<typename TILE_TYPE, typename ADD_OPERAND_TYPE>
232  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply_add(
233  TILE_TYPE const& tile,
234  LEFT_OPERAND_TYPE const& left,
235  RIGHT_OPERAND_TYPE const& right,
236  ADD_OPERAND_TYPE const& add)
237  -> decltype(left.eval(tile).scale(right.eval(tile)) + add.eval(tile))
238  {
239  return left.eval(tile).scale(right.eval(tile)) + add.eval(tile);
240  }
241 
245  template<typename TILE_TYPE, typename SUBTRACT_OPERAND_TYPE>
246  RAJA_INLINE RAJA_HOST_DEVICE static auto multiply_subtract(
247  TILE_TYPE const& tile,
248  LEFT_OPERAND_TYPE const& left,
249  RIGHT_OPERAND_TYPE const& right,
250  SUBTRACT_OPERAND_TYPE const& subtract)
251  -> decltype(left.eval(tile).scale(right.eval(tile)) - subtract.eval(tile))
252  {
253  return left.eval(tile).scale(right.eval(tile)) - subtract.eval(tile);
254  }
255 };
256 
269 template<typename LEFT_OPERAND_TYPE, typename RIGHT_OPERAND_TYPE>
271  LEFT_OPERAND_TYPE,
272  RIGHT_OPERAND_TYPE,
273  typename std::enable_if<LEFT_OPERAND_TYPE::s_num_dims == 2 &&
274  RIGHT_OPERAND_TYPE::s_num_dims == 1>::type>
275 {
276 
277  using left_type = LEFT_OPERAND_TYPE;
278  using right_type = RIGHT_OPERAND_TYPE;
279  using result_type =
280  typename LEFT_OPERAND_TYPE::result_type::column_vector_type;
281  static constexpr camp::idx_t s_num_dims = 1;
282 
283  RAJA_INLINE
284 
286  static void print_ast() { printf("Matrx*Vector"); }
287 
288  RAJA_INLINE
289 
291  static int getDimSize(int dim,
292  LEFT_OPERAND_TYPE const&,
293  RIGHT_OPERAND_TYPE const& right)
294  {
295  return dim == 0 ? right.getDimSize(0) : 0;
296  }
297 
301  template<typename TILE_TYPE>
303  TILE_TYPE const& tile,
304  LEFT_OPERAND_TYPE const& left,
305  RIGHT_OPERAND_TYPE const& right)
306  {
307 
308  // clear result
309  result_type result(0);
310 
311  // multiply left and right into result
312  multiply_into_result(result, tile, left, right);
313 
314  return result;
315  }
316 
317  template<typename TILE_TYPE, typename ADD_TYPE>
319  TILE_TYPE const& tile,
320  LEFT_OPERAND_TYPE const& left,
321  RIGHT_OPERAND_TYPE const& right,
322  ADD_TYPE const& add)
323  {
324 
325  // evaluate add into result
326  result_type result = add.eval(tile);
327 
328  // multiply left and right into result
329  multiply_into_result(result, tile, left, right);
330 
331  return result;
332  }
333 
334 private:
335  template<typename STORAGE, typename TILE_TYPE, typename INDEX = void>
336  struct MultiplyBridge;
337 
338  template<typename STORAGE, typename TILE_TYPE>
339  RAJA_INLINE RAJA_HOST_DEVICE static void multiply_into_result(
340  STORAGE& result,
341  TILE_TYPE const& tile,
342  LEFT_OPERAND_TYPE const& et_left,
343  RIGHT_OPERAND_TYPE const& et_right)
344  {
345  // using LHS_STORAGE = typename LEFT_OPERAND_TYPE::result_type;
346 
347  // get tile size from matrix type
348  auto tile_size = left_type::result_type::s_dim_elem(1);
349  auto k_size = et_left.getDimSize(1);
350  // TODO: check that left and right are compatible
351  // m_left.getDimSize(1) == m_right.getDimSize(0)
352  // how do we provide checking for this kind of error?
353 
354  // tile over row of left and column of right
355  auto left_tile =
356  LEFT_OPERAND_TYPE::result_type::s_get_default_tile().nonstatic();
357  left_tile.m_begin[0] = tile.m_begin[0];
358  left_tile.m_size[0] = tile.m_size[0];
359  left_tile.m_size[1] = tile_size;
360 
361  using RightType = typename TILE_TYPE::nonstatic_self_type;
362 
363  RightType right_tile = tile;
364  right_tile.m_size[0] = tile_size;
365 
366  // Do full tiles in k
367  decltype(k_size) k = 0;
368  for (; k + tile_size <= k_size; k += tile_size)
369  {
370 
371  // evaluate both sides of operator
372  left_tile.m_begin[1] = k;
373  auto left = et_left.eval(left_tile);
374 
375  right_tile.m_begin[0] = k;
376  auto right = et_right.eval(right_tile);
377 
378  // accumulate product
379  result = left.right_multiply_vector_accumulate(right, result);
380  }
381  // remainder tile in k
382  if (k < k_size)
383  {
384  auto& left_part_tile = make_tensor_tile_partial(left_tile);
385  left_part_tile.m_begin[1] = k;
386  left_part_tile.m_size[1] = k_size - k;
387  auto left = et_left.eval(left_part_tile);
388 
389  auto& right_part_tile = make_tensor_tile_partial(right_tile);
390  right_part_tile.m_begin[0] = k;
391  right_part_tile.m_size[0] = k_size - k;
392  auto right = et_right.eval(right_part_tile);
393 
394  // accumulate product of partial tile
395  result = left.right_multiply_vector_accumulate(right, result);
396  }
397  }
398 
399  template<typename T>
400  struct Diag
401  {
402  static_assert(!std::is_same<T, void>::value, "diag");
403  };
404 
405  template<typename I, TensorTileSize TTS, typename B, typename S>
406  struct Diag<StaticTensorTile<I, TTS, B, S>>
407  {
408  static_assert(std::is_same<I, void>::value, "diag");
409  };
410 
411  template<typename STORAGE, typename TILE_TYPE, typename INDEX>
412  struct MultiplyBridge
413  {
414 
415  Diag<TILE_TYPE> diag;
416 
417  RAJA_INLINE
418 
420  static void multiply_into_result(STORAGE& result,
421  TILE_TYPE const& tile,
422  LEFT_OPERAND_TYPE const& et_left,
423  RIGHT_OPERAND_TYPE const& et_right)
424  {
425  // using LHS_STORAGE = typename LEFT_OPERAND_TYPE::result_type;
426 
427  // get tile size from matrix type
428  auto tile_size = left_type::result_type::s_dim_elem(1);
429  auto k_size = et_left.getDimSize(1);
430  // TODO: check that left and right are compatible
431  // m_left.getDimSize(1) == m_right.getDimSize(0)
432  // how do we provide checking for this kind of error?
433 
434  // tile over row of left and column of right
435  auto left_tile =
436  LEFT_OPERAND_TYPE::result_type::s_get_default_tile().nonstatic();
437  left_tile.m_begin[0] = tile.m_begin[0];
438  left_tile.m_size[0] = tile.m_size[0];
439  left_tile.m_size[1] = tile_size;
440 
441  using RightType = typename TILE_TYPE::nonstatic_self_type;
442 
443  RightType right_tile = tile;
444  right_tile.m_size[0] = tile_size;
445 
446  // Do full tiles in k
447  decltype(k_size) k = 0;
448  for (; k + tile_size <= k_size; k += tile_size)
449  {
450 
451  // evaluate both sides of operator
452  left_tile.m_begin[1] = k;
453  auto left = et_left.eval(left_tile);
454 
455  right_tile.m_begin[0] = k;
456  auto right = et_right.eval(right_tile);
457 
458  // accumulate product
459  result = left.right_multiply_vector_accumulate(right, result);
460  }
461  // remainder tile in k
462  if (k < k_size)
463  {
464  auto& left_part_tile = make_tensor_tile_partial(left_tile);
465  left_part_tile.m_begin[1] = k;
466  left_part_tile.m_size[1] = k_size - k;
467  auto left = et_left.eval(left_part_tile);
468 
469  auto& right_part_tile = make_tensor_tile_partial(right_tile);
470  right_part_tile.m_begin[0] = k;
471  right_part_tile.m_size[0] = k_size - k;
472  auto right = et_right.eval(right_part_tile);
473 
474  // accumulate product of partial tile
475  result = left.right_multiply_vector_accumulate(right, result);
476  }
477  }
478  };
479 
480  template<size_t INDEX,
481  typename STORAGE,
482  typename INDEX_TYPE,
483  TensorTileSize TENSOR_SIZE,
484  INDEX_TYPE Begin0,
485  INDEX_TYPE... BeginTail,
486  INDEX_TYPE Size0,
487  INDEX_TYPE... SizeTail>
488  struct MultiplyBridge<
489  STORAGE,
490  StaticTensorTile<INDEX_TYPE,
491  TENSOR_SIZE,
492  camp::int_seq<INDEX_TYPE, Begin0, BeginTail...>,
493  camp::int_seq<INDEX_TYPE, Size0, SizeTail...>>,
494  camp::integral_constant<size_t, INDEX>>
495  {
496 
497  using TileType =
498  StaticTensorTile<INDEX_TYPE,
499  TENSOR_SIZE,
500  camp::int_seq<INDEX_TYPE, Begin0, BeginTail...>,
501  camp::int_seq<INDEX_TYPE, Size0, SizeTail...>>;
502 
503  RAJA_INLINE
504 
506  static void multiply_into_result(STORAGE& result,
507  TileType const& tile,
508  LEFT_OPERAND_TYPE const& et_left,
509  RIGHT_OPERAND_TYPE const& et_right)
510  {
511 
512  // get tile size from matrix type
513  const auto tile_size = left_type::result_type::s_dim_elem(1);
514  const auto k_size = et_left.getDimSize(1);
515 
516  auto const offset = INDEX * tile_size;
517 
518  if ((offset + tile_size) <= k_size)
519  {
520 
521  using LeftType =
522  StaticTensorTile<INDEX_TYPE, TENSOR_SIZE,
523  camp::int_seq<INDEX_TYPE, Begin0, offset>,
524  camp::int_seq<INDEX_TYPE, Size0, tile_size>>;
525  // evaluate both sides of operator
526  auto left = et_left.eval(LeftType());
527 
528  using RightType =
529  StaticTensorTile<INDEX_TYPE, TENSOR_SIZE,
530  camp::int_seq<INDEX_TYPE, offset>,
531  camp::int_seq<INDEX_TYPE, tile_size>>;
532 
533  auto right = et_right.eval(RightType());
534 
535  // accumulate product
536  auto temp = left.right_multiply_vector_accumulate(right, result);
537  MultiplyBridge<STORAGE, TileType,
538  camp::integral_constant<size_t, INDEX - 1>>::
539  multiply_into_result(result, tile, et_left, et_right);
540  result += temp;
541  }
542  else
543  {
544 
545  using LeftType =
546  StaticTensorTile<INDEX_TYPE, TENSOR_PARTIAL,
547  camp::int_seq<INDEX_TYPE, Begin0, offset>,
548  camp::int_seq<INDEX_TYPE, Size0, k_size - offset>>;
549  auto left = et_left.eval(LeftType());
550 
551  using RightType =
552  StaticTensorTile<INDEX_TYPE, TENSOR_PARTIAL,
553  camp::int_seq<INDEX_TYPE, offset>,
554  camp::int_seq<INDEX_TYPE, k_size - offset>>;
555  auto right = et_right.eval(RightType());
556 
557  // accumulate product of partial tile
558  result = left.right_multiply_vector_accumulate(right, result);
559  }
560  }
561  };
562 
563  template<typename STORAGE,
564  typename INDEX_TYPE,
565  TensorTileSize TENSOR_SIZE,
566  INDEX_TYPE Begin0,
567  INDEX_TYPE... BeginTail,
568  INDEX_TYPE Size0,
569  INDEX_TYPE... SizeTail>
570  struct MultiplyBridge<
571  STORAGE,
572  StaticTensorTile<INDEX_TYPE,
573  TENSOR_SIZE,
574  camp::int_seq<INDEX_TYPE, Begin0, BeginTail...>,
575  camp::int_seq<INDEX_TYPE, Size0, SizeTail...>>,
576  camp::integral_constant<size_t, 0>>
577  {
578 
579  using TileType =
580  StaticTensorTile<INDEX_TYPE,
581  TENSOR_SIZE,
582  camp::int_seq<INDEX_TYPE, Begin0, BeginTail...>,
583  camp::int_seq<INDEX_TYPE, Size0, SizeTail...>>;
584 
585  RAJA_INLINE
586 
588  static void multiply_into_result(STORAGE& result,
589  TileType const&,
590  LEFT_OPERAND_TYPE const& et_left,
591  RIGHT_OPERAND_TYPE const& et_right)
592  {
593 
594  // get tile size from matrix type
595  const auto tile_size = left_type::result_type::s_dim_elem(1);
596  const auto k_size = et_left.getDimSize(1);
597 
598  auto const offset = 0;
599 
600  if ((offset + tile_size) <= k_size)
601  {
602 
603  using LeftType =
604  StaticTensorTile<INDEX_TYPE, TENSOR_SIZE,
605  camp::int_seq<INDEX_TYPE, Begin0, offset>,
606  camp::int_seq<INDEX_TYPE, Size0, tile_size>>;
607  // evaluate both sides of operator
608  auto left = et_left.eval(LeftType());
609 
610  using RightType =
611  StaticTensorTile<INDEX_TYPE, TENSOR_SIZE,
612  camp::int_seq<INDEX_TYPE, offset>,
613  camp::int_seq<INDEX_TYPE, tile_size>>;
614 
615  auto right = et_right.eval(RightType());
616 
617  // accumulate product
618  auto temp = left.right_multiply_vector_accumulate(right, result);
619  result += temp;
620  }
621  else
622  {
623 
624  using LeftType =
625  StaticTensorTile<INDEX_TYPE, TENSOR_PARTIAL,
626  camp::int_seq<INDEX_TYPE, Begin0, offset>,
627  camp::int_seq<INDEX_TYPE, Size0, k_size - offset>>;
628  auto left = et_left.eval(LeftType());
629 
630  using RightType =
631  StaticTensorTile<INDEX_TYPE, TENSOR_PARTIAL,
632  camp::int_seq<INDEX_TYPE, offset>,
633  camp::int_seq<INDEX_TYPE, k_size - offset>>;
634  auto right = et_right.eval(RightType());
635 
636  // accumulate product of partial tile
637  result = left.right_multiply_vector_accumulate(right, result);
638  }
639  }
640  };
641 
642  template<typename STORAGE,
643  typename INDEX_TYPE,
644  TensorTileSize TENSOR_SIZE,
645  INDEX_TYPE Begin0,
646  INDEX_TYPE... BeginTail,
647  INDEX_TYPE Size0,
648  INDEX_TYPE... SizeTail>
649  struct MultiplyBridge<
650  STORAGE,
651  StaticTensorTile<INDEX_TYPE,
652  TENSOR_SIZE,
653  camp::int_seq<INDEX_TYPE, Begin0, BeginTail...>,
654  camp::int_seq<INDEX_TYPE, Size0, SizeTail...>>,
655  void>
656  {
657 
658  using TileType =
659  StaticTensorTile<INDEX_TYPE,
660  TENSOR_SIZE,
661  camp::int_seq<INDEX_TYPE, Begin0, BeginTail...>,
662  camp::int_seq<INDEX_TYPE, Size0, SizeTail...>>;
663 
664  RAJA_INLINE
665 
667  static void multiply_into_result(STORAGE& result,
668  TileType const& tile,
669  LEFT_OPERAND_TYPE const& et_left,
670  RIGHT_OPERAND_TYPE const& et_right)
671  {
672 
673  const auto tile_size = left_type::result_type::s_dim_elem(1);
674  const auto k_size = et_left.getDimSize(1);
675  const size_t iter_count =
676  (k_size / tile_size) + ((k_size % tile_size != 0) ? 1 : 0);
677 
678  MultiplyBridge<STORAGE, TileType,
679  camp::integral_constant<size_t, iter_count>>::
680  multiply_into_result(result, tile, et_left, et_right);
681  }
682  };
683 };
684 
685 
686 template<typename LEFT_OPERAND_TYPE,
687  typename RIGHT_OPERAND_TYPE,
688  typename ADD_OPERAND_TYPE>
689 class TensorMultiplyAdd;
690 
703 template<typename LEFT_OPERAND_TYPE, typename RIGHT_OPERAND_TYPE>
705  LEFT_OPERAND_TYPE,
706  RIGHT_OPERAND_TYPE,
707  typename std::enable_if<LEFT_OPERAND_TYPE::s_num_dims == 1 &&
708  RIGHT_OPERAND_TYPE::s_num_dims == 2>::type>
709 {
710 
711  using left_type = LEFT_OPERAND_TYPE;
712  using right_type = RIGHT_OPERAND_TYPE;
713  using result_type = typename RIGHT_OPERAND_TYPE::result_type::row_vector_type;
714  static constexpr camp::idx_t s_num_dims = 1;
715 
716  RAJA_INLINE
717 
719  static void print_ast() { printf("Vector*Matrix"); }
720 
721  RAJA_INLINE
722 
724  static int getDimSize(int dim,
725  LEFT_OPERAND_TYPE const& left,
726  RIGHT_OPERAND_TYPE const&)
727  {
728  return dim == 0 ? left.getDimSize(0) : 0;
729  }
730 
734  template<typename TILE_TYPE>
736  TILE_TYPE const& tile,
737  LEFT_OPERAND_TYPE const& left,
738  RIGHT_OPERAND_TYPE const& right)
739  {
740  // clear result
741  result_type result(0);
742 
743  // multiply left and right into result
744  multiply_into_result(result, tile, left, right);
745 
746  return result;
747  }
748 
749  template<typename TILE_TYPE, typename ADD_TYPE>
751  TILE_TYPE const& tile,
752  LEFT_OPERAND_TYPE const& left,
753  RIGHT_OPERAND_TYPE const& right,
754  ADD_TYPE const& add)
755  {
756  // evaluate add into result
757  result_type result = add.eval(tile);
758 
759  // multiply left and right into result
760  multiply_into_result(result, tile, left, right);
761 
762  return result;
763  }
764 
765 private:
766  template<typename STORAGE, typename TILE_TYPE>
767  RAJA_INLINE RAJA_HOST_DEVICE static void multiply_into_result(
768  STORAGE& result,
769  TILE_TYPE const& tile,
770  LEFT_OPERAND_TYPE const& et_left,
771  RIGHT_OPERAND_TYPE const& et_right)
772  {
773  // get tile size from matrix type
774  auto tile_size = right_type::result_type::s_dim_elem(0);
775  auto k_size = et_right.getDimSize(0);
776 
777 
778  // TODO: check that left and right are compatible
779  // m_left.getDimSize(1) == m_right.getDimSize(0)
780  // how do we provide checking for this kind of error?
781 
782  // tile over row of left and column of right
783  auto right_tile =
784  RIGHT_OPERAND_TYPE::result_type::s_get_default_tile().nonstatic();
785  right_tile.m_begin[1] = tile.m_begin[0];
786  right_tile.m_size[1] = tile.m_size[0];
787  right_tile.m_size[0] = tile_size;
788 
789  TILE_TYPE left_tile = tile;
790  left_tile.m_size[0] = tile_size;
791 
792 
793  // Do full tiles in k
794  decltype(k_size) k = 0;
795  for (; k + tile_size <= k_size; k += tile_size)
796  {
797 
798  // evaluate both sides of operator
799  right_tile.m_begin[0] = k;
800  auto right = et_right.eval(right_tile);
801 
802  left_tile.m_begin[0] = k;
803  auto left = et_left.eval(left_tile);
804 
805  // accumulate product
806  result = right.left_multiply_vector_accumulate(left, result);
807  }
808  // remainder tile in k
809  if (k < k_size)
810  {
811  auto& right_part_tile = make_tensor_tile_partial(right_tile);
812  right_part_tile.m_begin[0] = k;
813  right_part_tile.m_size[0] = k_size - k;
814  auto right = et_right.eval(right_part_tile);
815 
816  auto& left_part_tile = make_tensor_tile_partial(left_tile);
817  left_part_tile.m_begin[0] = k;
818  left_part_tile.m_size[0] = k_size - k;
819  auto left = et_left.eval(left_part_tile);
820 
821  // compute product into x of partial tile
822  result = right.left_multiply_vector_accumulate(left, result);
823  }
824  }
825 };
826 
834 template<typename LEFT_OPERAND_TYPE, typename RIGHT_OPERAND_TYPE>
836  LEFT_OPERAND_TYPE,
837  RIGHT_OPERAND_TYPE,
838  typename std::enable_if<LEFT_OPERAND_TYPE::s_num_dims == 2 &&
839  RIGHT_OPERAND_TYPE::s_num_dims == 2>::type>
840 {
841 
842  using left_type = LEFT_OPERAND_TYPE;
843  using right_type = RIGHT_OPERAND_TYPE;
844  using result_type = typename LEFT_OPERAND_TYPE::result_type::product_type;
845  static constexpr camp::idx_t s_num_dims = 2;
846 
847  RAJA_INLINE
848 
850  static void print_ast() { printf("Matrx*Matrix"); }
851 
852  RAJA_INLINE
853 
855  static int getDimSize(int dim,
856  LEFT_OPERAND_TYPE const& left,
857  RIGHT_OPERAND_TYPE const& right)
858  {
859  return dim == 0 ? left.getDimSize(0) : right.getDimSize(1);
860  }
861 
865  template<typename TILE_TYPE>
867  TILE_TYPE const& tile,
868  LEFT_OPERAND_TYPE const& left,
869  RIGHT_OPERAND_TYPE const& right)
870  {
871 
872  /*
873  *
874  * For TensorRegister:
875  *
876  * Return's a register containing product of left and right operands
877  *
878  * For TensorBlock:
879  *
880  * Return's an ET TensorLiteral containing the left and right operrands
881  *
882  * OR
883  *
884  * Returns an ET multiply
885  *
886  */
887  // create zeroed temporary
888  result_type result;
889  result.broadcast(0);
890 
891  // multiply left and right operands into temporary
892  multiply_into_result(result, tile, left, right);
893 
894  return result;
895  }
896 
897  template<typename TILE_TYPE, typename ADD_TYPE>
899  TILE_TYPE const& tile,
900  LEFT_OPERAND_TYPE const& left,
901  RIGHT_OPERAND_TYPE const& right,
902  ADD_TYPE const& add)
903  {
904 
905  // start accumulator with addition term
906  result_type result = add.eval(tile);
907 
908  multiply_into_result(result, tile, left, right);
909 
910  return result;
911  }
912 
913 private:
914  template<typename STORAGE, typename TILE_TYPE>
915  RAJA_INLINE RAJA_HOST_DEVICE static void multiply_into_result(
916  STORAGE& result,
917  TILE_TYPE const& tile,
918  LEFT_OPERAND_TYPE const& et_left,
919  RIGHT_OPERAND_TYPE const& et_right)
920  {
921  // get tile size from matrix type
922  using right_tensor_type = typename right_type::result_type;
923  auto tile_size = right_tensor_type::s_dim_elem(0);
924  auto k_size = et_left.getDimSize(1);
925 
926  // TODO: check that left and right are compatible
927  // m_left.getDimSize(1) == m_right.getDimSize(0)
928  // how do we provide checking for this kind of error?
929 
930  // tile over row of left and column of right
931  TILE_TYPE left_tile = tile;
932  left_tile.m_size[1] = tile_size;
933  auto left_begin = et_left.getDimBegin(1);
934 
935  TILE_TYPE right_tile = tile;
936  right_tile.m_size[0] = tile_size;
937  auto right_begin = et_right.getDimBegin(0);
938 
939 
940  // Do full tiles in k
941  decltype(k_size) k = 0;
942  for (; k + tile_size <= k_size; k += tile_size)
943  {
944 
945  // evaluate both sides of operator
946  left_tile.m_begin[1] = k + left_begin;
947  auto left = et_left.eval(left_tile);
948 
949  right_tile.m_begin[0] = k + right_begin;
950  auto right = et_right.eval(right_tile);
951 
952  // accumulate product
953  left.matrix_multiply_accumulate(result, right);
954  }
955  // remainder tile in k
956  if (k < k_size)
957  {
958 
959  auto& left_part_tile = make_tensor_tile_partial(left_tile);
960  left_part_tile.m_begin[1] = k + left_begin;
961  left_part_tile.m_size[1] = k_size - k;
962  auto left = et_left.eval(left_part_tile);
963 
964  auto& right_part_tile = make_tensor_tile_partial(right_tile);
965  right_part_tile.m_begin[0] = k + right_begin;
966  right_part_tile.m_size[0] = k_size - k;
967  auto right = et_right.eval(right_part_tile);
968 
969  // accumulate product
970  left.matrix_multiply_accumulate(result, right);
971  }
972  }
973 };
974 
975 template<typename OPERAND_TYPE, typename TILE_TYPE>
977  : public TensorExpressionBase<RestrictExtents<OPERAND_TYPE, TILE_TYPE>>
978 {
979 public:
981  using operand_type = OPERAND_TYPE;
982  using result_type = typename OPERAND_TYPE::result_type;
983  using index_type = typename TILE_TYPE::index_type;
984  using tile_type = TILE_TYPE;
985  static constexpr camp::idx_t s_num_dims = OPERAND_TYPE::s_num_dims;
986 
987 private:
988  operand_type m_operand;
989  tile_type m_tile;
990 
991 public:
992  RAJA_INLINE
993 
995  RestrictExtents(operand_type const& operand, tile_type const& tile)
996  : m_operand {operand},
997  m_tile {tile}
998  {}
999 
1000  RAJA_INLINE
1001 
1003  constexpr index_type getDimSize(index_type dim) const
1004  {
1005  return m_tile.m_size[dim];
1006  }
1007 
1008  RAJA_INLINE
1009 
1011  constexpr index_type getDimBegin(camp::idx_t dim) const
1012  {
1013  return m_tile.m_begin[dim];
1014  }
1015 
1016  template<typename TILE_TYPE2>
1017  RAJA_INLINE RAJA_HOST_DEVICE auto eval(TILE_TYPE2 const& tile) const
1018  -> decltype(m_operand.eval(tile))
1019  {
1020  return m_operand.eval(tile);
1021  }
1022 
1023  RAJA_INLINE
1024 
1026  void print_ast() const
1027  {
1028  printf("RestrictExtents(");
1029  m_operand.print_ast();
1030  printf(")");
1031  }
1032 };
1033 
1034 template<typename OPERAND, typename TILE>
1036  TILE const& tile)
1037 {
1038  using tile_type = typename OPERAND::tile_type;
1039  tile_type new_tile;
1040  new_tile.copy(tile);
1041  return RestrictExtents<OPERAND, TILE>(operand, new_tile);
1042 }
1043 
1052 template<typename LEFT_OPERAND_TYPE, typename RIGHT_OPERAND_TYPE>
1054  LEFT_OPERAND_TYPE,
1055  RIGHT_OPERAND_TYPE,
1056  typename std::enable_if<
1057  std::is_base_of<TensorBlockConcreteBase,
1058  typename RIGHT_OPERAND_TYPE::tensor_type>::value &&
1059  LEFT_OPERAND_TYPE::s_num_dims == 2 &&
1060  RIGHT_OPERAND_TYPE::s_num_dims == 2>::type>
1061 {
1062  using left_type = LEFT_OPERAND_TYPE;
1063  using right_type = RIGHT_OPERAND_TYPE;
1064  using result_type = typename LEFT_OPERAND_TYPE::result_type::product_type;
1065  static constexpr camp::idx_t s_num_dims = 2;
1066 
1067  // static_assert(LEFT_OPERAND_TYPE::s_num_dims == 1, "WHAOO");
1068  // static_assert(! std::is_base_of<TensorBlockConcreteBase, typename
1069  // RIGHT_OPERAND_TYPE::tensor_type>::value, "MATCH");
1070 
1071 
1072  // This tensor type is a TensorBlock of some kind
1073  using tensor_type = typename RIGHT_OPERAND_TYPE::tensor_type;
1074 
1075  // Get the storage type from the TensorBlock
1076  using storage_type = typename tensor_type::storage_type;
1077 
1078  // Create a BlockLiteral that uses the TensorBlock's indicated storage
1079  // and has an eval() that produces the TensorBlock's register type
1082 
1083  RAJA_INLINE
1084 
1086  static void print_ast() { printf("Matrx*Matrix"); }
1087 
1088  RAJA_INLINE
1089 
1091  static int getDimSize(int dim,
1092  LEFT_OPERAND_TYPE const& left,
1093  RIGHT_OPERAND_TYPE const& right)
1094  {
1095  return dim == 0 ? left.getDimSize(0) : right.getDimSize(1);
1096  }
1097 
1101  template<typename TILE_TYPE>
1103  TILE_TYPE const& tile,
1104  LEFT_OPERAND_TYPE const&,
1105  RIGHT_OPERAND_TYPE const&) //->
1109  {
1110 
1111  /*
1112  * First pass: just return a Multiply ET that evaluates the block
1113  * with underlying TensorRegisters
1114  *
1115  *
1116  * Second pass: we want to return a TensorLiteral ET node with the
1117  * matrix product already evaluated.?
1118  *
1119  * What we really care about is improving the data reuse: so perhaps
1120  * returning a Multiply ET node with TensorLiteral nodes for each
1121  * of the operands
1122  *
1123  */
1124  // create a BlockLiteral
1125  block_literal result(tile);
1126 
1127  // evaluate the block-wise product into result
1128 
1129  // return TensorMultiply<decltype(left.eval(tile)),
1130  // decltype(right.eval(tile))>(left.eval(tile), right.eval(tile));
1131 
1132  // return the BlockLiterat ET
1133  return result;
1134  }
1135 
1136  template<typename TILE_TYPE, typename ADD_TYPE>
1138  TILE_TYPE const& tile,
1139  LEFT_OPERAND_TYPE const& left,
1140  RIGHT_OPERAND_TYPE const& right,
1141  ADD_TYPE const&
1142  add) //->
1143  // decltype(TensorMultiplyAdd<decltype(left.eval(tile)),
1144  // decltype(right.eval(tile)),
1145  // decltype(add.eval(tile))>(left.eval(tile),
1146  // right.eval(tile), add.eval(tile)))
1147  {
1148  /*
1149  * First pass: we want to return a BlockLiteral ET node with the
1150  * matrix product already evaluated. We do this by creating
1151  * a LoadStore node wrapping the BlockLiteral, and evaluating it as
1152  * a sub-expression.
1153  *
1154  * What we really care about is improving the data reuse: so perhaps
1155  * returning a Multiply ET node with TensorLiteral nodes for each
1156  * of the operands
1157  *
1158  */
1159 
1160  // create a BlockLiteral
1161  using block_tile_type = typename block_literal::tile_type;
1162  block_tile_type block_tile;
1163  block_tile.copy(tile);
1164  block_literal result(block_tile);
1165 
1166  using ref_type = typename block_literal::ref_type;
1167  using load_store_type = TensorLoadStore<tensor_type, ref_type>;
1168 
1169  // initialize the result with our addition term
1170  auto result_et = load_store_type(result.get_ref()).eval(tile);
1171  result_et = add.eval(tile);
1172 
1173  // return TensorMultiplyAdd<decltype(left.eval(tile)),
1174  // decltype(right.eval(tile)), decltype(add.eval(tile))>(left.eval(tile),
1175  // right.eval(tile), add.eval(tile));
1176 
1177  // multiply_into_result(result_et, tile, restrictExtents(left,
1178  // tile), restrictExtents(right, tile));
1179  multiply_into_result(result_et, tile, left, right);
1180 
1181  // return the BlockLiterat ET
1182  return result;
1183  }
1184 
1185 private:
1186  template<typename STORAGE, typename TILE_TYPE>
1187  RAJA_INLINE RAJA_HOST_DEVICE static void multiply_into_result(
1188  STORAGE& result,
1189  TILE_TYPE const& tile,
1190  LEFT_OPERAND_TYPE const& et_left,
1191  RIGHT_OPERAND_TYPE const& et_right)
1192  {
1193 
1194  // get tile size from matrix type
1195  auto tile_size = result_type::s_dim_elem(1);
1196  auto k_size = et_left.getDimSize(1);
1197 
1198  // TODO: check that left and right are compatible
1199  // m_left.getDimSize(1) == m_right.getDimSize(0)
1200  // how do we provide checking for this kind of error?
1201 
1202  // tile over row of left and column of right
1203  TILE_TYPE left_tile = tile;
1204  left_tile.m_size[1] = tile_size;
1205  auto left_begin = et_left.getDimBegin(1);
1206 
1207  TILE_TYPE right_tile = tile;
1208  right_tile.m_size[0] = tile_size;
1209  auto right_begin = et_right.getDimBegin(0);
1210 
1211 
1212  // Do full tiles in k
1213  decltype(k_size) k = 0;
1214  for (; k + tile_size <= k_size; k += tile_size)
1215  {
1216 
1217 
1218  // evaluate both sides of operator
1219  left_tile.m_begin[1] = k + left_begin;
1220  auto left = et_left.eval(left_tile);
1221 
1222  right_tile.m_begin[0] = k + right_begin;
1223  auto right = et_right.eval(right_tile);
1224 
1225  // accumulate product
1226  // left.matrix_multiply_accumulate(result, right);
1227  result +=
1228  restrictExtents(left, left_tile) * restrictExtents(right, right_tile);
1229  }
1230  // remainder tile in k
1231  if (k < k_size)
1232  {
1233 
1234  auto& left_part_tile = make_tensor_tile_partial(left_tile);
1235  left_part_tile.m_begin[1] = k + left_begin;
1236  left_part_tile.m_size[1] = k_size - k;
1237  auto left = et_left.eval(left_part_tile);
1238 
1239  auto& right_part_tile = make_tensor_tile_partial(right_tile);
1240  right_part_tile.m_begin[0] = k + right_begin;
1241  right_part_tile.m_size[0] = k_size - k;
1242  auto right = et_right.eval(right_part_tile);
1243 
1244  // accumulate product
1245  // left.matrix_multiply_accumulate(result, right);
1246  result += restrictExtents(left, left_part_tile) *
1247  restrictExtents(right, right_part_tile);
1248  }
1249  }
1250 };
1251 
1252 
1253 } // namespace ET
1254 
1255 } // namespace expt
1256 } // namespace internal
1257 
1258 } // namespace RAJA
1259 
1260 
1261 #endif
Definition: BlockLiteral.hpp:51
typename STORAGE_TYPE::ref_type ref_type
Definition: BlockLiteral.hpp:57
typename ref_type::tile_type tile_type
Definition: BlockLiteral.hpp:58
RAJA_INLINE RAJA_HOST_DEVICE ref_type get_ref()
Definition: BlockLiteral.hpp:102
Definition: MultiplyOperator.hpp:978
RAJA_INLINE constexpr RAJA_HOST_DEVICE index_type getDimBegin(camp::idx_t dim) const
Definition: MultiplyOperator.hpp:1011
typename OPERAND_TYPE::result_type result_type
Definition: MultiplyOperator.hpp:982
typename TILE_TYPE::index_type index_type
Definition: MultiplyOperator.hpp:983
RAJA_INLINE RAJA_HOST_DEVICE RestrictExtents(operand_type const &operand, tile_type const &tile)
Definition: MultiplyOperator.hpp:995
TILE_TYPE tile_type
Definition: MultiplyOperator.hpp:984
RAJA_INLINE RAJA_HOST_DEVICE void print_ast() const
Definition: MultiplyOperator.hpp:1026
RAJA_INLINE RAJA_HOST_DEVICE auto eval(TILE_TYPE2 const &tile) const -> decltype(m_operand.eval(tile))
Definition: MultiplyOperator.hpp:1017
OPERAND_TYPE operand_type
Definition: MultiplyOperator.hpp:981
static constexpr camp::idx_t s_num_dims
Definition: MultiplyOperator.hpp:985
RAJA_INLINE constexpr RAJA_HOST_DEVICE index_type getDimSize(index_type dim) const
Definition: MultiplyOperator.hpp:1003
Definition: ExpressionTemplateBase.hpp:72
Definition: TensorLoadStore.hpp:75
RAJA_SUPPRESS_HD_WARN RAJA_INLINE RAJA_HOST_DEVICE auto eval(TILE_TYPE const &tile) const -> decltype(tensor_type::s_load_ref(merge_ref_tile(m_ref, tile)))
Definition: TensorLoadStore.hpp:166
#define RAJA_HOST_DEVICE
Definition: macros.hpp:65
RestrictExtents< OPERAND, TILE > restrictExtents(OPERAND const &operand, TILE const &tile)
Definition: MultiplyOperator.hpp:1035
TensorTileSize
Definition: TensorRef.hpp:234
@ TENSOR_PARTIAL
Definition: TensorRef.hpp:235
RAJA_INLINE constexpr RAJA_HOST_DEVICE TensorTile< INDEX_TYPE, TENSOR_PARTIAL, NUM_DIMS > & make_tensor_tile_partial(TensorTile< INDEX_TYPE, RTENSOR_SIZE, NUM_DIMS > &tile)
Definition: TensorRef.hpp:733
Definition: AlignedRangeIndexSetBuilders.cpp:35
RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const &ctx, TILE_T tile_size, SEGMENT const &segment, BODY const &body)
Definition: launch_core.hpp:589
Definition: ListSegment.hpp:416
RAJA_INLINE static RAJA_HOST_DEVICE block_literal multiply_add(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, ADD_TYPE const &add)
Definition: MultiplyOperator.hpp:1137
RAJA_INLINE static RAJA_HOST_DEVICE result_type multiply_add(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, ADD_TYPE const &add)
Definition: MultiplyOperator.hpp:750
RAJA_INLINE static RAJA_HOST_DEVICE int getDimSize(int dim, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &)
Definition: MultiplyOperator.hpp:724
RAJA_INLINE static RAJA_HOST_DEVICE result_type multiply(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right)
Definition: MultiplyOperator.hpp:735
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right) -> decltype(left.eval(tile).scale(right.eval(tile)))
Definition: MultiplyOperator.hpp:219
RAJA_INLINE static RAJA_HOST_DEVICE int getDimSize(int dim, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &)
Definition: MultiplyOperator.hpp:208
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply_add(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, ADD_OPERAND_TYPE const &add) -> decltype(left.eval(tile).scale(right.eval(tile))+add.eval(tile))
Definition: MultiplyOperator.hpp:232
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply_subtract(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, SUBTRACT_OPERAND_TYPE const &subtract) -> decltype(left.eval(tile).scale(right.eval(tile)) - subtract.eval(tile))
Definition: MultiplyOperator.hpp:246
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right) -> decltype(right.eval(tile).scale(left.eval(tile)))
Definition: MultiplyOperator.hpp:149
RAJA_INLINE static RAJA_HOST_DEVICE int getDimSize(int dim, LEFT_OPERAND_TYPE const &, RIGHT_OPERAND_TYPE const &right)
Definition: MultiplyOperator.hpp:138
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply_add(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, ADD_OPERAND_TYPE const &add) -> decltype(right.eval(tile).scale(left.eval(tile))+add.eval(tile))
Definition: MultiplyOperator.hpp:162
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply_subtract(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, SUBTRACT_OPERAND_TYPE const &subtract) -> decltype(right.eval(tile).scale(left.eval(tile)) - subtract.eval(tile))
Definition: MultiplyOperator.hpp:176
RAJA_INLINE static RAJA_HOST_DEVICE int getDimSize(int dim, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right)
Definition: MultiplyOperator.hpp:855
RAJA_INLINE static RAJA_HOST_DEVICE result_type multiply_add(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, ADD_TYPE const &add)
Definition: MultiplyOperator.hpp:898
RAJA_INLINE static RAJA_HOST_DEVICE result_type multiply(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right)
Definition: MultiplyOperator.hpp:866
RAJA_INLINE static RAJA_HOST_DEVICE result_type multiply_add(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, ADD_TYPE const &add)
Definition: MultiplyOperator.hpp:318
RAJA_INLINE static RAJA_HOST_DEVICE result_type multiply(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right)
Definition: MultiplyOperator.hpp:302
RAJA_INLINE static RAJA_HOST_DEVICE int getDimSize(int dim, LEFT_OPERAND_TYPE const &, RIGHT_OPERAND_TYPE const &right)
Definition: MultiplyOperator.hpp:291
Definition: MultiplyOperator.hpp:48
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right) -> decltype(left.eval(tile) *right.eval(tile))
Definition: MultiplyOperator.hpp:76
static constexpr camp::idx_t s_num_dims
Definition: MultiplyOperator.hpp:51
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply_subtract(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, SUBTRACT_OPERAND_TYPE const &subtract) -> decltype(left.eval(tile).multiply_subtract(right.eval(tile), subtract.eval(tile)))
Definition: MultiplyOperator.hpp:104
typename LEFT_OPERAND_TYPE::result_type result_type
Definition: MultiplyOperator.hpp:50
RAJA_INLINE static RAJA_HOST_DEVICE void print_ast()
Definition: MultiplyOperator.hpp:56
RAJA_INLINE static RAJA_HOST_DEVICE auto multiply_add(TILE_TYPE const &tile, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right, ADD_OPERAND_TYPE const &add) -> decltype(left.eval(tile).multiply_add(right.eval(tile), add.eval(tile)))
Definition: MultiplyOperator.hpp:89
RAJA_INLINE static RAJA_HOST_DEVICE int getDimSize(int dim, LEFT_OPERAND_TYPE const &left, RIGHT_OPERAND_TYPE const &right)
Definition: MultiplyOperator.hpp:65