10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
25 template<
typename Str
ides,
typename XprType>
26 struct traits<TensorInflationOp<Strides, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename Str
ides,
typename XprType>
40 struct eval<TensorInflationOp<Strides, XprType>,
Eigen::Dense>
42 typedef const TensorInflationOp<Strides, XprType>& type;
45 template<
typename Str
ides,
typename XprType>
46 struct nested<TensorInflationOp<Strides, XprType>, 1, typename eval<TensorInflationOp<Strides, XprType> >::type>
48 typedef TensorInflationOp<Strides, XprType> type;
53 template<
typename Str
ides,
typename XprType>
57 typedef typename Eigen::internal::traits<TensorInflationOp>::Scalar
Scalar;
60 typedef typename Eigen::internal::nested<TensorInflationOp>::type
Nested;
61 typedef typename Eigen::internal::traits<TensorInflationOp>::StorageKind
StorageKind;
62 typedef typename Eigen::internal::traits<TensorInflationOp>::Index
Index;
80 template<
typename Str
ides,
typename ArgType,
typename Device>
85 static constexpr
int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
99 PreferBlockAccess =
false,
109 : m_impl(op.expression(), device), m_strides(op.
strides())
111 m_dimensions = m_impl.dimensions();
113 for (
int i = 0;
i < NumDims; ++
i) {
114 m_dimensions[
i] = (m_dimensions[
i] - 1) * op.
strides()[
i] + 1;
118 for (
int i = 0;
i < NumDims; ++
i) {
119 m_fastStrides[
i] = internal::TensorIntDivisor<Index>(m_strides[
i]);
124 m_outputStrides[0] = 1;
125 m_inputStrides[0] = 1;
126 for (
int i = 1;
i < NumDims; ++
i) {
127 m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
128 m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
131 m_outputStrides[NumDims-1] = 1;
132 m_inputStrides[NumDims-1] = 1;
133 for (
int i = NumDims - 2;
i >= 0; --
i) {
134 m_outputStrides[
i] = m_outputStrides[
i+1] * m_dimensions[
i+1];
135 m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
143 m_impl.evalSubExprsIfNeeded(NULL);
158 for (
int i = NumDims - 1;
i > 0; --
i) {
159 const Index idx = index / m_outputStrides[
i];
160 if (idx != idx / m_fastStrides[
i] * m_strides[
i]) {
163 *inputIndex += idx / m_strides[
i] * m_inputStrides[
i];
164 index -= idx * m_outputStrides[
i];
166 if (index != index / m_fastStrides[0] * m_strides[0]) {
169 *inputIndex += index / m_strides[0];
173 for (
int i = 0;
i < NumDims - 1; ++
i) {
174 const Index idx = index / m_outputStrides[
i];
175 if (idx != idx / m_fastStrides[
i] * m_strides[
i]) {
178 *inputIndex += idx / m_strides[
i] * m_inputStrides[
i];
179 index -= idx * m_outputStrides[
i];
181 if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) {
184 *inputIndex += index / m_strides[NumDims - 1];
191 Index inputIndex = 0;
192 if (getInputIndex(index, &inputIndex)) {
193 return m_impl.coeff(inputIndex);
201 template<
int LoadMode>
217 const double compute_cost = NumDims * (3 * TensorOpCost::DivCost<Index>() +
218 3 * TensorOpCost::MulCost<Index>() +
219 2 * TensorOpCost::AddCost<Index>());
220 const double input_size = m_impl.dimensions().TotalSize();
221 const double output_size = m_dimensions.TotalSize();
222 if (output_size == 0)
224 return m_impl.costPerCoeff(vectorized) +
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
Eigen::internal::traits< TensorInflationOp >::Scalar Scalar
Eigen::NumTraits< Scalar >::Real RealScalar
XprType::CoeffReturnType CoeffReturnType
const internal::remove_all_t< typename XprType::Nested > & expression() const
Eigen::internal::nested< TensorInflationOp >::type Nested
TensorInflationOp(const XprType &expr, const Strides &strides)
const Strides & strides() const
Eigen::internal::traits< TensorInflationOp >::StorageKind StorageKind
Eigen::internal::traits< TensorInflationOp >::Index Index
typename remove_all< T >::type remove_all_t
EIGEN_ALWAYS_INLINE DSizes< IndexType, NumDims > strides(const DSizes< IndexType, NumDims > &dimensions)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
const Dimensions & dimensions() const
EvaluatorPointerType data() const
DSizes< Index, NumDims > Dimensions
StorageMemory< CoeffReturnType, Device > Storage
array< Index, NumDims > m_inputStrides
PacketType< CoeffReturnType, Device >::type PacketReturnType
PacketReturnType packet(Index index) const
Storage::Type EvaluatorPointerType
TensorOpCost costPerCoeff(bool vectorized) const
CoeffReturnType coeff(Index index) const
TensorInflationOp< Strides, ArgType > XprType
bool evalSubExprsIfNeeded(EvaluatorPointerType)
internal::TensorBlockNotImplemented TensorBlock
array< Index, NumDims > m_outputStrides
XprType::CoeffReturnType CoeffReturnType
TensorEvaluator(const XprType &op, const Device &device)
array< internal::TensorIntDivisor< Index >, NumDims > m_fastStrides
TensorEvaluator< ArgType, Device > m_impl
bool getInputIndex(Index index, Index *inputIndex) const
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
CoeffReturnType coeff(Index index) const
static constexpr int PacketSize