10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
25 template<
typename XprType>
26 struct traits<TensorForcedEvalOp<XprType> >
29 typedef typename XprType::Scalar Scalar;
33 typedef typename XprType::Nested Nested;
34 typedef std::remove_reference_t<Nested> Nested_;
35 static constexpr
int NumDimensions = XprTraits::NumDimensions;
36 static constexpr
int Layout = XprTraits::Layout;
37 typedef typename XprTraits::PointerType PointerType;
44 template<
typename XprType>
45 struct eval<TensorForcedEvalOp<XprType>,
Eigen::Dense>
47 typedef const TensorForcedEvalOp<XprType>& type;
50 template<
typename XprType>
51 struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type>
53 typedef TensorForcedEvalOp<XprType> type;
60 template<
typename XprType>
64 typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar
Scalar;
67 typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type
Nested;
68 typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind
StorageKind;
69 typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index
Index;
83 template <
typename Device,
typename CoeffReturnType>
84 struct non_integral_type_placement_new{
85 template <
typename StorageType>
88 if (!internal::is_arithmetic<CoeffReturnType>::value) {
89 for (
Index i = 0;
i < numValues; ++
i)
new (m_buffer +
i) CoeffReturnType();
97 template <
typename CoeffReturnType>
98 struct non_integral_type_placement_new<
Eigen::SyclDevice, CoeffReturnType> {
99 template <
typename StorageType>
105 template<
typename ArgType_,
typename Device>
123 BlockAccess = internal::is_arithmetic<CoeffReturnType>::value,
124 PreferBlockAccess =
false,
129 static constexpr
int NumDims = internal::traits<ArgType>::NumDimensions;
135 typedef typename internal::TensorMaterializedBlock<
CoeffReturnType, NumDims,
141 : m_impl(op.expression(), device), m_op(op.expression()),
151 internal::non_integral_type_placement_new<Device, CoeffReturnType>()(numValues, m_buffer);
154 EvalTo evalToTmp(
m_device.get(m_buffer), m_op);
156 internal::TensorExecutor<
157 const EvalTo, std::remove_const_t<Device>,
158 internal::IsVectorizable<Device, const ArgType>::value,
159 internal::IsTileable<Device, const ArgType>::value>::
165 #ifdef EIGEN_USE_THREADS
166 template <
typename EvalSubExprsCallback>
167 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
174 EvalTo evalToTmp(
m_device.get(m_buffer), m_op);
176 auto on_done = std::bind([](EvalSubExprsCallback done_) { done_(
true); },
178 internal::TensorAsyncExecutor<
179 const EvalTo, std::remove_const_t<Device>,
181 internal::IsVectorizable<Device, const ArgType>::value,
182 internal::IsTileable<Device, const ArgType>::value>::
183 runAsync(evalToTmp,
m_device, std::move(on_done));
194 return m_buffer[index];
197 template<
int LoadMode>
200 return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
205 return internal::TensorBlockResourceRequirements::any();
210 bool =
false)
const {
212 return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
IndexedView_or_VectorBlock operator()(const Indices &indices)
#define EIGEN_DEVICE_FUNC
Eigen::internal::traits< TensorForcedEvalOp >::Scalar Scalar
Eigen::internal::traits< TensorForcedEvalOp >::Index Index
std::remove_const_t< typename XprType::CoeffReturnType > CoeffReturnType
TensorForcedEvalOp(const XprType &expr)
Eigen::internal::traits< TensorForcedEvalOp >::StorageKind StorageKind
Eigen::internal::nested< TensorForcedEvalOp >::type Nested
Eigen::NumTraits< Scalar >::Real RealScalar
const internal::remove_all_t< typename XprType::Nested > & expression() const
typename remove_all< T >::type remove_all_t
constexpr auto array_prod(const array< T, N > &arr) -> decltype(array_reduce< product_op, T, N >(arr, static_cast< T >(1)))
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
internal::TensorBlockResourceRequirements getResourceRequirements() const
PacketReturnType packet(Index index) const
TensorEvaluator< ArgType, Device >::Dimensions Dimensions
EvaluatorPointerType data() const
TensorEvaluator(const XprType &op, const Device &device)
Eigen::internal::traits< XprType >::PointerType TensorPointerType
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Storage::Type EvaluatorPointerType
EvaluatorPointerType m_buffer
TensorEvaluator< ArgType, Device > m_impl
TensorForcedEvalOp< ArgType > XprType
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
const internal::remove_all_t< ArgType_ > ArgType
const Device EIGEN_DEVICE_REF m_device
StorageMemory< CoeffReturnType, Device > Storage
TensorOpCost costPerCoeff(bool vectorized) const
const Dimensions & dimensions() const
PacketType< CoeffReturnType, Device >::type PacketReturnType
bool evalSubExprsIfNeeded(EvaluatorPointerType)
XprType::CoeffReturnType CoeffReturnType
CoeffReturnType coeff(Index index) const
A cost model used to limit the number of threads used for evaluating tensor expression.
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
Storage::Type EvaluatorPointerType
static constexpr int PacketSize
Derived::Scalar CoeffReturnType
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock