10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
29 template<
typename Derived,
typename Device>
32 typedef typename Derived::Index
Index;
33 typedef typename Derived::Scalar
Scalar;
44 static constexpr
int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
45 internal::traits<Derived>::NumDimensions : 0;
46 static constexpr
int Layout = Derived::Layout;
51 BlockAccess = internal::is_arithmetic<std::remove_const_t<Scalar>>::value,
52 PreferBlockAccess =
false,
79 if (!
NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && dest) {
86 #ifdef EIGEN_USE_THREADS
87 template <
typename EvalSubExprsCallback>
88 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
110 return internal::ploadt<PacketReturnType, LoadMode>(
m_data + index);
119 std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
120 partialPacket(
Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const
122 return internal::ploadu<PacketReturnTypeT>(
m_data + index, umask);
128 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(
m_data + index,
x);
157 return internal::TensorBlockResourceRequirements::any();
162 bool =
false)
const {
164 return TensorBlock::materialize(
m_data,
m_dims, desc, scratch);
167 template<
typename TensorBlock>
173 typedef internal::TensorBlockAssignment<
Scalar,
NumCoords, TensorBlockExpr,
177 TensorBlockAssign::Run(
178 TensorBlockAssign::target(desc.dimensions(),
198 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
201 return __ldg(address);
205 return __ldg(address);
216 template<
typename Derived,
typename Device>
219 typedef typename Derived::Index
Index;
232 static constexpr
int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
233 internal::traits<Derived>::NumDimensions : 0;
235 static constexpr
int Layout = Derived::Layout;
240 BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
241 PreferBlockAccess =
false,
263 if (!
NumTraits<std::remove_const_t<Scalar>>::RequireInitialization &&
data) {
270 #ifdef EIGEN_USE_THREADS
271 template <
typename EvalSubExprsCallback>
272 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
289 return internal::ploadt_ro<PacketReturnType, LoadMode>(
m_data + index);
298 std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
299 partialPacket(
Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const
301 return internal::ploadu<PacketReturnTypeT>(
m_data + index, umask);
307 :
m_dims.IndexOfRowMajor(coords);
318 return internal::TensorBlockResourceRequirements::any();
323 bool =
false)
const {
325 return TensorBlock::materialize(
m_data,
m_dims, desc, scratch);
341 template<
typename NullaryOp,
typename ArgType,
typename Device>
348 : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
363 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
364 #ifdef EIGEN_USE_SYCL
369 PreferBlockAccess =
false,
382 #ifdef EIGEN_USE_THREADS
383 template <
typename EvalSubExprsCallback>
384 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
394 return m_wrapper(m_functor, index);
397 template<
int LoadMode>
400 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
414 const internal::nullary_wrapper<CoeffReturnType,NullaryOp>
m_wrapper;
421 template<
typename UnaryOp,
typename ArgType,
typename Device>
430 int(internal::functor_traits<UnaryOp>::PacketAccess),
440 m_functor(op.functor()),
441 m_argImpl(op.nestedExpression(), device)
453 static constexpr
int NumDims = internal::array_size<Dimensions>::value;
462 typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
469 m_argImpl.evalSubExprsIfNeeded(NULL);
473 #ifdef EIGEN_USE_THREADS
474 template <
typename EvalSubExprsCallback>
475 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
477 m_argImpl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
487 return m_functor(m_argImpl.coeff(index));
490 template<
int LoadMode>
493 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
497 const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
498 return m_argImpl.costPerCoeff(vectorized) +
504 static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
505 return m_argImpl.getResourceRequirements().addCostPerCoeff(
511 bool =
false)
const {
512 return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
526 template<
typename BinaryOp,
typename LeftArgType,
typename RightArgType,
typename Device>
537 int(internal::functor_traits<BinaryOp>::PacketAccess),
549 m_functor(op.functor()),
550 m_leftImpl(op.lhsExpression(), device),
551 m_rightImpl(op.rhsExpression(), device)
566 static constexpr
int NumDims = internal::array_size<
586 return m_leftImpl.dimensions();
590 m_leftImpl.evalSubExprsIfNeeded(NULL);
591 m_rightImpl.evalSubExprsIfNeeded(NULL);
595 #ifdef EIGEN_USE_THREADS
596 template <
typename EvalSubExprsCallback>
597 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
600 m_leftImpl.evalSubExprsIfNeededAsync(
nullptr, [
this, done](
bool) {
601 m_rightImpl.evalSubExprsIfNeededAsync(
nullptr,
602 [done](
bool) { done(
true); });
608 m_leftImpl.cleanup();
609 m_rightImpl.cleanup();
614 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
616 template<
int LoadMode>
619 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
624 const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
625 return m_leftImpl.costPerCoeff(vectorized) +
626 m_rightImpl.costPerCoeff(vectorized) +
632 static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
633 return internal::TensorBlockResourceRequirements::merge(
634 m_leftImpl.getResourceRequirements(),
635 m_rightImpl.getResourceRequirements())
636 .addCostPerCoeff({0, 0, functor_cost /
PacketSize});
641 bool =
false)
const {
642 desc.DropDestinationBuffer();
643 return TensorBlock(m_leftImpl.block(desc, scratch),
644 m_rightImpl.block(desc, scratch), m_functor);
658 template<
typename TernaryOp,
typename Arg1Type,
typename Arg2Type,
typename Arg3Type,
typename Device>
669 internal::functor_traits<TernaryOp>::PacketAccess,
680 : m_functor(op.functor()),
681 m_arg1Impl(op.arg1Expression(), device),
682 m_arg2Impl(op.arg2Expression(), device),
683 m_arg3Impl(op.arg3Expression(), device)
688 typename internal::traits<Arg2Type>::StorageKind>::value),
689 STORAGE_KIND_MUST_MATCH)
691 typename internal::traits<Arg3Type>::StorageKind>::value),
692 STORAGE_KIND_MUST_MATCH)
694 typename internal::traits<Arg2Type>::Index>::value),
695 STORAGE_INDEX_MUST_MATCH)
697 typename internal::traits<Arg3Type>::Index>::value),
698 STORAGE_INDEX_MUST_MATCH)
719 return m_arg1Impl.dimensions();
723 m_arg1Impl.evalSubExprsIfNeeded(NULL);
724 m_arg2Impl.evalSubExprsIfNeeded(NULL);
725 m_arg3Impl.evalSubExprsIfNeeded(NULL);
729 m_arg1Impl.cleanup();
730 m_arg2Impl.cleanup();
731 m_arg3Impl.cleanup();
736 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
738 template<
int LoadMode>
741 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
742 m_arg2Impl.template packet<LoadMode>(index),
743 m_arg3Impl.template packet<LoadMode>(index));
748 const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
749 return m_arg1Impl.costPerCoeff(vectorized) +
750 m_arg2Impl.costPerCoeff(vectorized) +
751 m_arg3Impl.costPerCoeff(vectorized) +
767 template<
typename IfArgType,
typename ThenArgType,
typename ElseArgType,
typename Device>
773 using TernarySelectOp = internal::scalar_boolean_select_op<typename internal::traits<ThenArgType>::Scalar,
774 typename internal::traits<ElseArgType>::Scalar,
775 typename internal::traits<IfArgType>::Scalar>;
776 static constexpr
bool TernaryPacketAccess =
799 : m_condImpl(op.ifExpression(), device),
800 m_thenImpl(op.thenExpression(), device),
801 m_elseImpl(op.elseExpression(), device)
817 static constexpr
int NumDims = internal::array_size<Dimensions>::value;
830 struct TensorSelectOpBlockFactory {
831 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
836 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
838 const IfArgXprType& if_expr,
const ThenArgXprType& then_expr,
const ElseArgXprType& else_expr)
const {
843 typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
844 IfArgTensorBlock, ThenArgTensorBlock,
852 return m_condImpl.dimensions();
856 m_condImpl.evalSubExprsIfNeeded(NULL);
857 m_thenImpl.evalSubExprsIfNeeded(NULL);
858 m_elseImpl.evalSubExprsIfNeeded(NULL);
862 #ifdef EIGEN_USE_THREADS
863 template <
typename EvalSubExprsCallback>
864 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
866 m_condImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
867 m_thenImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
868 m_elseImpl.evalSubExprsIfNeeded(
nullptr, [done](
bool) { done(
true); });
875 m_condImpl.cleanup();
876 m_thenImpl.cleanup();
877 m_elseImpl.cleanup();
882 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
885 template<
int LoadMode,
bool UseTernary = TernaryPacketAccess,
886 std::enable_if_t<!UseTernary, bool> =
true>
889 internal::Selector<PacketSize> select;
892 select.select[
i] = m_condImpl.coeff(index+
i);
895 m_thenImpl.template packet<LoadMode>(index),
896 m_elseImpl.template packet<LoadMode>(index));
900 template <
int LoadMode,
bool UseTernary = TernaryPacketAccess,
901 std::enable_if_t<UseTernary, bool> =
true>
903 return TernarySelectOp().template packetOp<PacketReturnType>(m_thenImpl.template packet<LoadMode>(index),
904 m_elseImpl.template packet<LoadMode>(index),
905 m_condImpl.template packet<LoadMode>(index));
910 return m_condImpl.costPerCoeff(vectorized) +
911 m_thenImpl.costPerCoeff(vectorized)
912 .
cwiseMax(m_elseImpl.costPerCoeff(vectorized));
917 auto then_req = m_thenImpl.getResourceRequirements();
918 auto else_req = m_elseImpl.getResourceRequirements();
921 internal::TensorBlockResourceRequirements::merge(then_req, else_req);
922 merged_req.cost_per_coeff =
923 then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
925 return internal::TensorBlockResourceRequirements::merge(
926 m_condImpl.getResourceRequirements(), merged_req);
931 bool =
false)
const {
934 desc.DropDestinationBuffer();
937 m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
938 m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
943 #ifdef EIGEN_USE_SYCL
946 m_condImpl.bind(cgh);
947 m_thenImpl.bind(cgh);
948 m_elseImpl.bind(cgh);
959 #if defined(EIGEN_USE_SYCL) && defined(SYCL_COMPILER_IS_DPCPP)
960 template <
typename Derived,
typename Device>
961 struct cl::sycl::is_device_copyable<
962 Eigen::TensorEvaluator<Derived, Device>,
963 std::enable_if_t<!std::is_trivially_copyable<
964 Eigen::TensorEvaluator<Derived, Device>>::value>> : std::true_type {};
#define EIGEN_ALWAYS_INLINE
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
Eigen::internal::traits< TensorCwiseBinaryOp >::Index Index
Eigen::internal::traits< TensorCwiseBinaryOp >::Scalar Scalar
Eigen::internal::traits< TensorCwiseNullaryOp >::Scalar Scalar
Eigen::internal::traits< TensorCwiseNullaryOp >::Index Index
Eigen::internal::traits< TensorCwiseTernaryOp >::Index Index
Eigen::internal::traits< TensorCwiseTernaryOp >::Scalar Scalar
Eigen::internal::traits< TensorCwiseUnaryOp >::Scalar Scalar
Eigen::internal::traits< TensorCwiseUnaryOp >::Index Index
A tensor expression mapping an existing array of data.
TensorOpCost cwiseMax(const TensorOpCost &rhs) const
Eigen::internal::traits< TensorSelectOp >::Index Index
Eigen::internal::traits< TensorSelectOp >::Scalar Scalar
EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
EIGEN_ALWAYS_INLINE T loadConstant(const T *address)
Packet16c pblend(const Selector< 16 > &ifPacket, const Packet16c &thenPacket, const Packet16c &elsePacket)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_ALWAYS_INLINE bool dimensions_match(Dims1 dims1, Dims2 dims2)
internal::packet_traits< Scalar >::type type
EvaluatorPointerType m_data
std::enable_if_t< internal::unpacket_traits< PacketReturnTypeT >::masked_load_available, PacketReturnTypeT > partialPacket(Index index, typename internal::unpacket_traits< PacketReturnTypeT >::mask_t umask) const
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
bool evalSubExprsIfNeeded(EvaluatorPointerType data)
internal::TensorBlockDescriptor< NumCoords, Index > TensorBlockDesc
const Device EIGEN_DEVICE_REF m_device
CoeffReturnType coeff(Index index) const
Storage::Type EvaluatorPointerType
Derived::Dimensions Dimensions
PacketType< CoeffReturnType, Device >::type PacketReturnType
TensorEvaluator(const Derived &m, const Device &device)
StorageMemory< const Scalar, Device > Storage
internal::traits< Derived >::template MakePointer< const Scalar >::Type TensorPointerType
const Dimensions & dimensions() const
internal::TensorBlockResourceRequirements getResourceRequirements() const
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
PacketReturnType packet(Index index) const
TensorOpCost costPerCoeff(bool vectorized) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Derived::Scalar CoeffReturnType
std::remove_const_t< Scalar > ScalarNoConst
EvaluatorPointerType data() const
CoeffReturnType coeff(const array< DenseIndex, NumCoords > &coords) const
EvaluatorPointerType data() const
PacketReturnType packet(Index index) const
internal::TensorCwiseBinaryBlock< BinaryOp, LeftTensorBlock, RightTensorBlock > TensorBlock
const Dimensions & dimensions() const
Storage::Type EvaluatorPointerType
TensorEvaluator< LeftArgType, Device >::Dimensions Dimensions
TensorOpCost costPerCoeff(bool vectorized) const
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
TensorCwiseBinaryOp< BinaryOp, LeftArgType, RightArgType > XprType
const Device EIGEN_DEVICE_REF m_device
CoeffReturnType coeff(Index index) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
TensorEvaluator(const XprType &op, const Device &device)
internal::TensorBlockResourceRequirements getResourceRequirements() const
TensorEvaluator< RightArgType, Device > m_rightImpl
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
TensorEvaluator< const LeftArgType, Device >::TensorBlock LeftTensorBlock
TensorEvaluator< LeftArgType, Device > m_leftImpl
TensorEvaluator< const RightArgType, Device >::TensorBlock RightTensorBlock
internal::traits< XprType >::Scalar CoeffReturnType
StorageMemory< CoeffReturnType, Device > Storage
bool evalSubExprsIfNeeded(EvaluatorPointerType)
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
internal::traits< XprType >::Scalar CoeffReturnType
CoeffReturnType coeff(Index index) const
internal::TensorBlockNotImplemented TensorBlock
Storage::Type EvaluatorPointerType
TensorCwiseNullaryOp< NullaryOp, ArgType > XprType
TensorEvaluator< ArgType, Device > m_argImpl
TensorEvaluator(const XprType &op, const Device &device)
TensorEvaluator< ArgType, Device >::Dimensions Dimensions
const internal::nullary_wrapper< CoeffReturnType, NullaryOp > m_wrapper
PacketType< CoeffReturnType, Device >::type PacketReturnType
bool evalSubExprsIfNeeded(EvaluatorPointerType)
TensorOpCost costPerCoeff(bool vectorized) const
StorageMemory< CoeffReturnType, Device > Storage
const Dimensions & dimensions() const
PacketReturnType packet(Index index) const
const NullaryOp m_functor
EvaluatorPointerType data() const
TensorEvaluator< Arg1Type, Device > m_arg1Impl
internal::traits< XprType >::Scalar CoeffReturnType
TensorEvaluator(const XprType &op, const Device &device)
Storage::Type EvaluatorPointerType
PacketReturnType packet(Index index) const
bool evalSubExprsIfNeeded(EvaluatorPointerType)
const Dimensions & dimensions() const
const TernaryOp m_functor
TensorCwiseTernaryOp< TernaryOp, Arg1Type, Arg2Type, Arg3Type > XprType
CoeffReturnType coeff(Index index) const
StorageMemory< CoeffReturnType, Device > Storage
TensorEvaluator< Arg2Type, Device > m_arg2Impl
TensorOpCost costPerCoeff(bool vectorized) const
TensorEvaluator< Arg1Type, Device >::Dimensions Dimensions
PacketType< CoeffReturnType, Device >::type PacketReturnType
EvaluatorPointerType data() const
internal::TensorBlockNotImplemented TensorBlock
TensorEvaluator< Arg3Type, Device > m_arg3Impl
TensorEvaluator< ArgType, Device > m_argImpl
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
internal::TensorBlockResourceRequirements getResourceRequirements() const
PacketReturnType packet(Index index) const
const Device EIGEN_DEVICE_REF m_device
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
TensorOpCost costPerCoeff(bool vectorized) const
CoeffReturnType coeff(Index index) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
internal::TensorCwiseUnaryBlock< UnaryOp, ArgTensorBlock > TensorBlock
internal::traits< XprType >::Scalar CoeffReturnType
EvaluatorPointerType data() const
TensorCwiseUnaryOp< UnaryOp, ArgType > XprType
StorageMemory< CoeffReturnType, Device > Storage
TensorEvaluator(const XprType &op, const Device &device)
std::remove_const_t< Scalar > ScalarNoConst
Storage::Type EvaluatorPointerType
const Dimensions & dimensions() const
PacketType< CoeffReturnType, Device >::type PacketReturnType
TensorEvaluator< ArgType, Device >::Dimensions Dimensions
bool evalSubExprsIfNeeded(EvaluatorPointerType)
TensorSelectOp< const IfArgXprType, const ThenArgXprType, const ElseArgXprType > type
XprType< IfArgXprType, ThenArgXprType, ElseArgXprType >::type expr(const IfArgXprType &if_expr, const ThenArgXprType &then_expr, const ElseArgXprType &else_expr) const
TensorEvaluator< ElseArgType, Device > m_elseImpl
internal::TensorBlockResourceRequirements getResourceRequirements() const
bool evalSubExprsIfNeeded(EvaluatorPointerType)
const Dimensions & dimensions() const
Storage::Type EvaluatorPointerType
TensorEvaluator(const XprType &op, const Device &device)
TensorEvaluator< ThenArgType, Device > m_thenImpl
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
TensorEvaluator< const ElseArgType, Device >::TensorBlock ElseArgTensorBlock
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
TensorEvaluator< const IfArgType, Device >::TensorBlock IfArgTensorBlock
internal::scalar_boolean_select_op< typename internal::traits< ThenArgType >::Scalar, typename internal::traits< ElseArgType >::Scalar, typename internal::traits< IfArgType >::Scalar > TernarySelectOp
StorageMemory< CoeffReturnType, Device > Storage
TensorSelectOp< IfArgType, ThenArgType, ElseArgType > XprType
PacketType< CoeffReturnType, Device >::type PacketReturnType
internal::traits< XprType >::Scalar CoeffReturnType
TensorEvaluator< IfArgType, Device > m_condImpl
TensorOpCost costPerCoeff(bool vectorized) const
CoeffReturnType coeff(Index index) const
PacketReturnType packet(Index index) const
internal::TensorTernaryExprBlock< TensorSelectOpBlockFactory, IfArgTensorBlock, ThenArgTensorBlock, ElseArgTensorBlock > TensorBlock
EvaluatorPointerType data() const
TensorEvaluator< IfArgType, Device >::Dimensions Dimensions
TensorEvaluator< const ThenArgType, Device >::TensorBlock ThenArgTensorBlock
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
PacketReturnType packet(Index index) const
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
void writePacket(Index index, const PacketReturnType &x) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
CoeffReturnType coeff(Index index) const
Storage::Type EvaluatorPointerType
CoeffReturnType & coeffRef(Index index) const
internal::TensorBlockDescriptor< NumCoords, Index > TensorBlockDesc
TensorEvaluator(const Derived &m, const Device &device)
static constexpr int PacketSize
internal::TensorBlockResourceRequirements getResourceRequirements() const
std::enable_if_t< internal::unpacket_traits< PacketReturnTypeT >::masked_load_available, PacketReturnTypeT > partialPacket(Index index, typename internal::unpacket_traits< PacketReturnTypeT >::mask_t umask) const
EvaluatorPointerType data() const
Derived::Scalar CoeffReturnType
void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
internal::traits< Derived >::template MakePointer< Scalar >::Type TensorPointerType
static constexpr int NumCoords
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
StorageMemory< Scalar, Device > Storage
bool evalSubExprsIfNeeded(EvaluatorPointerType dest)
std::remove_const_t< Scalar > ScalarNoConst
PacketType< CoeffReturnType, Device >::type PacketReturnType
Derived::Dimensions Dimensions
EvaluatorPointerType m_data
TensorOpCost costPerCoeff(bool vectorized) const
CoeffReturnType coeff(const array< DenseIndex, NumCoords > &coords) const
CoeffReturnType & coeffRef(const array< DenseIndex, NumCoords > &coords) const