TensorEvaluator.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
12 
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
28 // Generic evaluator
29 template<typename Derived, typename Device>
31 {
32  typedef typename Derived::Index Index;
33  typedef typename Derived::Scalar Scalar;
34  typedef typename Derived::Scalar CoeffReturnType;
36  typedef typename Derived::Dimensions Dimensions;
37  typedef Derived XprType;
39  typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
42 
43  // NumDimensions is -1 for variable dim tensors
44  static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
45  internal::traits<Derived>::NumDimensions : 0;
46  static constexpr int Layout = Derived::Layout;
47 
48  enum {
49  IsAligned = Derived::IsAligned,
51  BlockAccess = internal::is_arithmetic<std::remove_const_t<Scalar>>::value,
52  PreferBlockAccess = false,
53  CoordAccess = NumCoords > 0,
54  RawAccess = true
55  };
56 
57  typedef std::remove_const_t<Scalar> ScalarNoConst;
58 
59  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
60  typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
61  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
62 
63  typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
64  Layout, Index>
66  //===--------------------------------------------------------------------===//
67 
68  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
69  TensorEvaluator(const Derived& m, const Device& device)
70  : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
71  m_dims(m.dimensions()),
72  m_device(device)
73  { }
74 
75 
76  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
77 
78  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
79  if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && dest) {
80  m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
81  return false;
82  }
83  return true;
84  }
85 
86 #ifdef EIGEN_USE_THREADS
87  template <typename EvalSubExprsCallback>
88  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
89  EvaluatorPointerType dest, EvalSubExprsCallback done) {
90  // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
91  done(evalSubExprsIfNeeded(dest));
92  }
93 #endif // EIGEN_USE_THREADS
94 
95  EIGEN_STRONG_INLINE void cleanup() {}
96 
97  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
98  eigen_assert(m_data != NULL);
99  return m_data[index];
100  }
101 
102  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) const{
103  eigen_assert(m_data != NULL);
104  return m_data[index];
105  }
106 
107  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
109  {
110  return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
111  }
112 
113  // Return a packet starting at `index` where `umask` specifies which elements
114  // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
115  // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
116  // float element will be loaded, otherwise 0 will be loaded.
117  // Function has been templatized to enable Sfinae.
118  template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
119  std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
120  partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
121  {
122  return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
123  }
124 
125  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
126  void writePacket(Index index, const PacketReturnType& x) const
127  {
128  return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
129  }
130 
131  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
132  eigen_assert(m_data != NULL);
133  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
134  return m_data[m_dims.IndexOfColMajor(coords)];
135  } else {
136  return m_data[m_dims.IndexOfRowMajor(coords)];
137  }
138  }
139 
140  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
141  coeffRef(const array<DenseIndex, NumCoords>& coords) const {
142  eigen_assert(m_data != NULL);
143  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
144  return m_data[m_dims.IndexOfColMajor(coords)];
145  } else {
146  return m_data[m_dims.IndexOfRowMajor(coords)];
147  }
148  }
149 
150  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
151  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
153  }
154 
155  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
156  internal::TensorBlockResourceRequirements getResourceRequirements() const {
157  return internal::TensorBlockResourceRequirements::any();
158  }
159 
160  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
162  bool /*root_of_expr_ast*/ = false) const {
163  eigen_assert(m_data != NULL);
164  return TensorBlock::materialize(m_data, m_dims, desc, scratch);
165  }
166 
167  template<typename TensorBlock>
168  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
169  const TensorBlockDesc& desc, const TensorBlock& block) {
170  eigen_assert(m_data != NULL);
171 
172  typedef typename TensorBlock::XprType TensorBlockExpr;
173  typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
174  Index>
175  TensorBlockAssign;
176 
177  TensorBlockAssign::Run(
178  TensorBlockAssign::target(desc.dimensions(),
179  internal::strides<Layout>(m_dims), m_data,
180  desc.offset()),
181  block.expr());
182  }
183 
185 
186  protected:
190 };
191 
192 namespace internal {
193 template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
194 T loadConstant(const T* address) {
195  return *address;
196 }
197 // Use the texture cache on CUDA devices whenever possible
198 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
200 float loadConstant(const float* address) {
201  return __ldg(address);
202 }
204 double loadConstant(const double* address) {
205  return __ldg(address);
206 }
208 Eigen::half loadConstant(const Eigen::half* address) {
209  return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
210 }
211 #endif
212 
213 } // namespace internal
214 
215 // Default evaluator for rvalues
216 template<typename Derived, typename Device>
217 struct TensorEvaluator<const Derived, Device>
218 {
219  typedef typename Derived::Index Index;
220  typedef typename Derived::Scalar Scalar;
221  typedef typename Derived::Scalar CoeffReturnType;
223  typedef typename Derived::Dimensions Dimensions;
224  typedef const Derived XprType;
225  typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
228 
229  typedef std::remove_const_t<Scalar> ScalarNoConst;
230 
231  // NumDimensions is -1 for variable dim tensors
232  static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
233  internal::traits<Derived>::NumDimensions : 0;
235  static constexpr int Layout = Derived::Layout;
236 
237  enum {
238  IsAligned = Derived::IsAligned,
240  BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
241  PreferBlockAccess = false,
242  CoordAccess = NumCoords > 0,
243  RawAccess = true
244  };
245 
246  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
247  typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
248  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
249 
250  typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
251  Layout, Index>
253  //===--------------------------------------------------------------------===//
254 
255  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
256  TensorEvaluator(const Derived& m, const Device& device)
257  : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
258  { }
259 
260  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
261 
262  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
263  if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && data) {
264  m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
265  return false;
266  }
267  return true;
268  }
269 
270 #ifdef EIGEN_USE_THREADS
271  template <typename EvalSubExprsCallback>
272  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
273  EvaluatorPointerType dest, EvalSubExprsCallback done) {
274  // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
275  done(evalSubExprsIfNeeded(dest));
276  }
277 #endif // EIGEN_USE_THREADS
278 
279  EIGEN_STRONG_INLINE void cleanup() { }
280 
281  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
282  eigen_assert(m_data != NULL);
283  return internal::loadConstant(m_data+index);
284  }
285 
286  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
288  {
289  return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
290  }
291 
292  // Return a packet starting at `index` where `umask` specifies which elements
293  // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
294  // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
295  // float element will be loaded, otherwise 0 will be loaded.
296  // Function has been templatized to enable Sfinae.
297  template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
298  std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
299  partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
300  {
301  return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
302  }
303 
304  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
305  eigen_assert(m_data != NULL);
306  const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
307  : m_dims.IndexOfRowMajor(coords);
308  return internal::loadConstant(m_data+index);
309  }
310 
311  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
312  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
314  }
315 
316  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
317  internal::TensorBlockResourceRequirements getResourceRequirements() const {
318  return internal::TensorBlockResourceRequirements::any();
319  }
320 
321  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
323  bool /*root_of_expr_ast*/ = false) const {
324  eigen_assert(m_data != NULL);
325  return TensorBlock::materialize(m_data, m_dims, desc, scratch);
326  }
327 
329 
330  protected:
334 };
335 
336 
337 
338 
339 // -------------------- CwiseNullaryOp --------------------
340 
341 template<typename NullaryOp, typename ArgType, typename Device>
342 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
343 {
345 
347  TensorEvaluator(const XprType& op, const Device& device)
348  : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
349  { }
350 
351  typedef typename XprType::Index Index;
352  typedef typename XprType::Scalar Scalar;
353  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
359 
361  enum {
362  IsAligned = true,
363  PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
364  #ifdef EIGEN_USE_SYCL
366  #endif
367  ,
368  BlockAccess = false,
369  PreferBlockAccess = false,
370  CoordAccess = false, // to be implemented
371  RawAccess = false
372  };
373 
374  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
375  typedef internal::TensorBlockNotImplemented TensorBlock;
376  //===--------------------------------------------------------------------===//
377 
378  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
379 
380  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
381 
382 #ifdef EIGEN_USE_THREADS
383  template <typename EvalSubExprsCallback>
384  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
385  EvaluatorPointerType, EvalSubExprsCallback done) {
386  done(true);
387  }
388 #endif // EIGEN_USE_THREADS
389 
390  EIGEN_STRONG_INLINE void cleanup() { }
391 
393  {
394  return m_wrapper(m_functor, index);
395  }
396 
397  template<int LoadMode>
398  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
399  {
400  return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
401  }
402 
403  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
404  costPerCoeff(bool vectorized) const {
405  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
407  }
408 
409  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
410 
411  private:
412  const NullaryOp m_functor;
414  const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
415 };
416 
417 
418 
419 // -------------------- CwiseUnaryOp --------------------
420 
421 template<typename UnaryOp, typename ArgType, typename Device>
422 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
423 {
425 
427  enum {
430  int(internal::functor_traits<UnaryOp>::PacketAccess),
433  CoordAccess = false, // to be implemented
434  RawAccess = false
435  };
436 
438  TensorEvaluator(const XprType& op, const Device& device)
439  : m_device(device),
440  m_functor(op.functor()),
441  m_argImpl(op.nestedExpression(), device)
442  { }
443 
444  typedef typename XprType::Index Index;
445  typedef typename XprType::Scalar Scalar;
446  typedef std::remove_const_t<Scalar> ScalarNoConst;
447  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
453  static constexpr int NumDims = internal::array_size<Dimensions>::value;
454 
455  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
456  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
457  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
458 
461 
462  typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
464  //===--------------------------------------------------------------------===//
465 
466  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
467 
468  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
469  m_argImpl.evalSubExprsIfNeeded(NULL);
470  return true;
471  }
472 
473 #ifdef EIGEN_USE_THREADS
474  template <typename EvalSubExprsCallback>
475  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
476  EvaluatorPointerType, EvalSubExprsCallback done) {
477  m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
478  }
479 #endif // EIGEN_USE_THREADS
480 
481  EIGEN_STRONG_INLINE void cleanup() {
482  m_argImpl.cleanup();
483  }
484 
486  {
487  return m_functor(m_argImpl.coeff(index));
488  }
489 
490  template<int LoadMode>
491  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
492  {
493  return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
494  }
495 
496  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
497  const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
498  return m_argImpl.costPerCoeff(vectorized) +
499  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
500  }
501 
502  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
503  internal::TensorBlockResourceRequirements getResourceRequirements() const {
504  static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
505  return m_argImpl.getResourceRequirements().addCostPerCoeff(
506  {0, 0, functor_cost / PacketSize});
507  }
508 
509  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
511  bool /*root_of_expr_ast*/ = false) const {
512  return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
513  }
514 
515  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
516 
517  private:
519  const UnaryOp m_functor;
521 };
522 
523 
524 // -------------------- CwiseBinaryOp --------------------
525 
526 template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device>
527 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
528 {
530 
532  enum {
537  int(internal::functor_traits<BinaryOp>::PacketAccess),
542  CoordAccess = false, // to be implemented
543  RawAccess = false
544  };
545 
547  TensorEvaluator(const XprType& op, const Device& device)
548  : m_device(device),
549  m_functor(op.functor()),
550  m_leftImpl(op.lhsExpression(), device),
551  m_rightImpl(op.rhsExpression(), device)
552  {
553  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
554  eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
555  }
556 
557  typedef typename XprType::Index Index;
558  typedef typename XprType::Scalar Scalar;
559  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
565 
566  static constexpr int NumDims = internal::array_size<
568 
569  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
570  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
571  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
572 
577 
578  typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
581  //===--------------------------------------------------------------------===//
582 
584  {
585  // TODO: use right impl instead if right impl dimensions are known at compile time.
586  return m_leftImpl.dimensions();
587  }
588 
589  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
590  m_leftImpl.evalSubExprsIfNeeded(NULL);
591  m_rightImpl.evalSubExprsIfNeeded(NULL);
592  return true;
593  }
594 
595 #ifdef EIGEN_USE_THREADS
596  template <typename EvalSubExprsCallback>
597  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
598  EvaluatorPointerType, EvalSubExprsCallback done) {
599  // TODO(ezhulenev): Evaluate two expression in parallel?
600  m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
601  m_rightImpl.evalSubExprsIfNeededAsync(nullptr,
602  [done](bool) { done(true); });
603  });
604  }
605 #endif // EIGEN_USE_THREADS
606 
607  EIGEN_STRONG_INLINE void cleanup() {
608  m_leftImpl.cleanup();
609  m_rightImpl.cleanup();
610  }
611 
613  {
614  return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
615  }
616  template<int LoadMode>
617  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
618  {
619  return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
620  }
621 
622  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
623  costPerCoeff(bool vectorized) const {
624  const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
625  return m_leftImpl.costPerCoeff(vectorized) +
626  m_rightImpl.costPerCoeff(vectorized) +
627  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
628  }
629 
630  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
631  internal::TensorBlockResourceRequirements getResourceRequirements() const {
632  static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
633  return internal::TensorBlockResourceRequirements::merge(
634  m_leftImpl.getResourceRequirements(),
635  m_rightImpl.getResourceRequirements())
636  .addCostPerCoeff({0, 0, functor_cost / PacketSize});
637  }
638 
639  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
641  bool /*root_of_expr_ast*/ = false) const {
642  desc.DropDestinationBuffer();
643  return TensorBlock(m_leftImpl.block(desc, scratch),
644  m_rightImpl.block(desc, scratch), m_functor);
645  }
646 
647  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
648 
649  private:
654 };
655 
656 // -------------------- CwiseTernaryOp --------------------
657 
658 template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device>
659 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
660 {
662 
664  enum {
669  internal::functor_traits<TernaryOp>::PacketAccess,
670  BlockAccess = false,
674  CoordAccess = false, // to be implemented
675  RawAccess = false
676  };
677 
679  TensorEvaluator(const XprType& op, const Device& device)
680  : m_functor(op.functor()),
681  m_arg1Impl(op.arg1Expression(), device),
682  m_arg2Impl(op.arg2Expression(), device),
683  m_arg3Impl(op.arg3Expression(), device)
684  {
685  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
686 
687  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
688  typename internal::traits<Arg2Type>::StorageKind>::value),
689  STORAGE_KIND_MUST_MATCH)
690  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
691  typename internal::traits<Arg3Type>::StorageKind>::value),
692  STORAGE_KIND_MUST_MATCH)
693  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
694  typename internal::traits<Arg2Type>::Index>::value),
695  STORAGE_INDEX_MUST_MATCH)
696  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
697  typename internal::traits<Arg3Type>::Index>::value),
698  STORAGE_INDEX_MUST_MATCH)
699 
700  eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
701  }
702 
703  typedef typename XprType::Index Index;
704  typedef typename XprType::Scalar Scalar;
705  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
711 
712  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
713  typedef internal::TensorBlockNotImplemented TensorBlock;
714  //===--------------------------------------------------------------------===//
715 
717  {
718  // TODO: use arg2 or arg3 dimensions if they are known at compile time.
719  return m_arg1Impl.dimensions();
720  }
721 
722  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
723  m_arg1Impl.evalSubExprsIfNeeded(NULL);
724  m_arg2Impl.evalSubExprsIfNeeded(NULL);
725  m_arg3Impl.evalSubExprsIfNeeded(NULL);
726  return true;
727  }
728  EIGEN_STRONG_INLINE void cleanup() {
729  m_arg1Impl.cleanup();
730  m_arg2Impl.cleanup();
731  m_arg3Impl.cleanup();
732  }
733 
735  {
736  return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
737  }
738  template<int LoadMode>
739  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
740  {
741  return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
742  m_arg2Impl.template packet<LoadMode>(index),
743  m_arg3Impl.template packet<LoadMode>(index));
744  }
745 
746  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
747  costPerCoeff(bool vectorized) const {
748  const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
749  return m_arg1Impl.costPerCoeff(vectorized) +
750  m_arg2Impl.costPerCoeff(vectorized) +
751  m_arg3Impl.costPerCoeff(vectorized) +
752  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
753  }
754 
755  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
756 
757  private:
758  const TernaryOp m_functor;
762 };
763 
764 
765 // -------------------- SelectOp --------------------
766 
767 template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device>
768 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
769 {
771  typedef typename XprType::Scalar Scalar;
772 
773  using TernarySelectOp = internal::scalar_boolean_select_op<typename internal::traits<ThenArgType>::Scalar,
774  typename internal::traits<ElseArgType>::Scalar,
775  typename internal::traits<IfArgType>::Scalar>;
776  static constexpr bool TernaryPacketAccess =
778  TensorEvaluator<IfArgType, Device>::PacketAccess && internal::functor_traits<TernarySelectOp>::PacketAccess;
779 
781  enum {
786  PacketType<Scalar, Device>::HasBlend) || TernaryPacketAccess,
793  CoordAccess = false, // to be implemented
794  RawAccess = false
795  };
796 
798  TensorEvaluator(const XprType& op, const Device& device)
799  : m_condImpl(op.ifExpression(), device),
800  m_thenImpl(op.thenExpression(), device),
801  m_elseImpl(op.elseExpression(), device)
802  {
803  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
804  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
805  eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
806  eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
807  }
808 
809  typedef typename XprType::Index Index;
810  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
816 
817  static constexpr int NumDims = internal::array_size<Dimensions>::value;
818 
819  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
820  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
821  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
822 
829 
830  struct TensorSelectOpBlockFactory {
831  template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
832  struct XprType {
834  };
835 
836  template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
838  const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const {
839  return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
840  }
841  };
842 
843  typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
844  IfArgTensorBlock, ThenArgTensorBlock,
845  ElseArgTensorBlock>
847  //===--------------------------------------------------------------------===//
848 
850  {
851  // TODO: use then or else impl instead if they happen to be known at compile time.
852  return m_condImpl.dimensions();
853  }
854 
855  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
856  m_condImpl.evalSubExprsIfNeeded(NULL);
857  m_thenImpl.evalSubExprsIfNeeded(NULL);
858  m_elseImpl.evalSubExprsIfNeeded(NULL);
859  return true;
860  }
861 
862 #ifdef EIGEN_USE_THREADS
863  template <typename EvalSubExprsCallback>
864  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
865  EvaluatorPointerType, EvalSubExprsCallback done) {
866  m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
867  m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
868  m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); });
869  });
870  });
871  }
872 #endif // EIGEN_USE_THREADS
873 
874  EIGEN_STRONG_INLINE void cleanup() {
875  m_condImpl.cleanup();
876  m_thenImpl.cleanup();
877  m_elseImpl.cleanup();
878  }
879 
881  {
882  return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
883  }
884 
885  template<int LoadMode, bool UseTernary = TernaryPacketAccess,
886  std::enable_if_t<!UseTernary, bool> = true>
888  {
889  internal::Selector<PacketSize> select;
891  for (Index i = 0; i < PacketSize; ++i) {
892  select.select[i] = m_condImpl.coeff(index+i);
893  }
894  return internal::pblend(select,
895  m_thenImpl.template packet<LoadMode>(index),
896  m_elseImpl.template packet<LoadMode>(index));
897 
898  }
899 
900  template <int LoadMode, bool UseTernary = TernaryPacketAccess,
901  std::enable_if_t<UseTernary, bool> = true>
903  return TernarySelectOp().template packetOp<PacketReturnType>(m_thenImpl.template packet<LoadMode>(index),
904  m_elseImpl.template packet<LoadMode>(index),
905  m_condImpl.template packet<LoadMode>(index));
906  }
907 
908  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
909  costPerCoeff(bool vectorized) const {
910  return m_condImpl.costPerCoeff(vectorized) +
911  m_thenImpl.costPerCoeff(vectorized)
912  .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
913  }
914 
915  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
916  internal::TensorBlockResourceRequirements getResourceRequirements() const {
917  auto then_req = m_thenImpl.getResourceRequirements();
918  auto else_req = m_elseImpl.getResourceRequirements();
919 
920  auto merged_req =
921  internal::TensorBlockResourceRequirements::merge(then_req, else_req);
922  merged_req.cost_per_coeff =
923  then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
924 
925  return internal::TensorBlockResourceRequirements::merge(
926  m_condImpl.getResourceRequirements(), merged_req);
927  }
928 
929  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
931  bool /*root_of_expr_ast*/ = false) const {
932  // It's unsafe to pass destination buffer to underlying expressions, because
933  // output might be aliased with one of the inputs.
934  desc.DropDestinationBuffer();
935 
936  return TensorBlock(
937  m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
938  m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
939  }
940 
941  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
942 
943 #ifdef EIGEN_USE_SYCL
944  // binding placeholder accessors to a command group handler for SYCL
945  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
946  m_condImpl.bind(cgh);
947  m_thenImpl.bind(cgh);
948  m_elseImpl.bind(cgh);
949  }
950 #endif
951  private:
955 };
956 
957 } // end namespace Eigen
958 
959 #if defined(EIGEN_USE_SYCL) && defined(SYCL_COMPILER_IS_DPCPP)
960 template <typename Derived, typename Device>
961 struct cl::sycl::is_device_copyable<
962  Eigen::TensorEvaluator<Derived, Device>,
963  std::enable_if_t<!std::is_trivially_copyable<
964  Eigen::TensorEvaluator<Derived, Device>>::value>> : std::true_type {};
965 #endif
966 
967 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
Matrix3f m
int i
#define EIGEN_ALWAYS_INLINE
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define eigen_assert(x)
#define EIGEN_STATIC_ASSERT(X, MSG)
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:36
Eigen::internal::traits< TensorCwiseBinaryOp >::Index Index
Definition: TensorExpr.h:208
Eigen::internal::traits< TensorCwiseBinaryOp >::Scalar Scalar
Definition: TensorExpr.h:203
Eigen::internal::traits< TensorCwiseNullaryOp >::Scalar Scalar
Definition: TensorExpr.h:57
Eigen::internal::traits< TensorCwiseNullaryOp >::Index Index
Definition: TensorExpr.h:62
Eigen::internal::traits< TensorCwiseTernaryOp >::Index Index
Definition: TensorExpr.h:288
Eigen::internal::traits< TensorCwiseTernaryOp >::Scalar Scalar
Definition: TensorExpr.h:283
Eigen::internal::traits< TensorCwiseUnaryOp >::Scalar Scalar
Definition: TensorExpr.h:122
Eigen::internal::traits< TensorCwiseUnaryOp >::Index Index
Definition: TensorExpr.h:127
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:32
TensorOpCost cwiseMax(const TensorOpCost &rhs) const
Eigen::internal::traits< TensorSelectOp >::Index Index
Definition: TensorExpr.h:363
Eigen::internal::traits< TensorSelectOp >::Scalar Scalar
Definition: TensorExpr.h:357
EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
EIGEN_ALWAYS_INLINE T loadConstant(const T *address)
Packet16c pblend(const Selector< 16 > &ifPacket, const Packet16c &thenPacket, const Packet16c &elsePacket)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
std::array< T, N > array
EIGEN_ALWAYS_INLINE bool dimensions_match(Dims1 dims1, Dims2 dims2)
internal::packet_traits< Scalar >::type type
Definition: TensorMeta.h:55
std::enable_if_t< internal::unpacket_traits< PacketReturnTypeT >::masked_load_available, PacketReturnTypeT > partialPacket(Index index, typename internal::unpacket_traits< PacketReturnTypeT >::mask_t umask) const
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
bool evalSubExprsIfNeeded(EvaluatorPointerType data)
internal::TensorBlockDescriptor< NumCoords, Index > TensorBlockDesc
CoeffReturnType coeff(Index index) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
TensorEvaluator(const Derived &m, const Device &device)
StorageMemory< const Scalar, Device > Storage
internal::traits< Derived >::template MakePointer< const Scalar >::Type TensorPointerType
internal::TensorBlockResourceRequirements getResourceRequirements() const
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
PacketReturnType packet(Index index) const
TensorOpCost costPerCoeff(bool vectorized) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
CoeffReturnType coeff(const array< DenseIndex, NumCoords > &coords) const
internal::TensorCwiseBinaryBlock< BinaryOp, LeftTensorBlock, RightTensorBlock > TensorBlock
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
const internal::nullary_wrapper< CoeffReturnType, NullaryOp > m_wrapper
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
internal::TensorBlockResourceRequirements getResourceRequirements() const
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
internal::TensorCwiseUnaryBlock< UnaryOp, ArgTensorBlock > TensorBlock
XprType< IfArgXprType, ThenArgXprType, ElseArgXprType >::type expr(const IfArgXprType &if_expr, const ThenArgXprType &then_expr, const ElseArgXprType &else_expr) const
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
internal::scalar_boolean_select_op< typename internal::traits< ThenArgType >::Scalar, typename internal::traits< ElseArgType >::Scalar, typename internal::traits< IfArgType >::Scalar > TernarySelectOp
internal::TensorTernaryExprBlock< TensorSelectOpBlockFactory, IfArgTensorBlock, ThenArgTensorBlock, ElseArgTensorBlock > TensorBlock
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
PacketReturnType packet(Index index) const
static constexpr int Layout
Derived::Scalar Scalar
const Device EIGEN_DEVICE_REF m_device
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
void writePacket(Index index, const PacketReturnType &x) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
CoeffReturnType coeff(Index index) const
Storage::Type EvaluatorPointerType
CoeffReturnType & coeffRef(Index index) const
internal::TensorBlockDescriptor< NumCoords, Index > TensorBlockDesc
TensorEvaluator(const Derived &m, const Device &device)
static constexpr int PacketSize
internal::TensorBlockResourceRequirements getResourceRequirements() const
std::enable_if_t< internal::unpacket_traits< PacketReturnTypeT >::masked_load_available, PacketReturnTypeT > partialPacket(Index index, typename internal::unpacket_traits< PacketReturnTypeT >::mask_t umask) const
EvaluatorPointerType data() const
Derived::Scalar CoeffReturnType
void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
internal::traits< Derived >::template MakePointer< Scalar >::Type TensorPointerType
static constexpr int NumCoords
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
StorageMemory< Scalar, Device > Storage
bool evalSubExprsIfNeeded(EvaluatorPointerType dest)
std::remove_const_t< Scalar > ScalarNoConst
PacketType< CoeffReturnType, Device >::type PacketReturnType
Derived::Dimensions Dimensions
EvaluatorPointerType m_data
TensorOpCost costPerCoeff(bool vectorized) const
CoeffReturnType coeff(const array< DenseIndex, NumCoords > &coords) const
CoeffReturnType & coeffRef(const array< DenseIndex, NumCoords > &coords) const