10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
25 template<
typename Str
ides,
typename XprType>
26 struct traits<TensorStridingOp<Strides, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename Str
ides,
typename XprType>
40 struct eval<TensorStridingOp<Strides, XprType>,
Eigen::Dense>
45 template<
typename Str
ides,
typename XprType>
46 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
48 typedef TensorStridingOp<Strides, XprType> type;
55 template<
typename Str
ides,
typename XprType>
60 typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar
Scalar;
63 typedef typename Eigen::internal::nested<TensorStridingOp>::type
Nested;
64 typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind
StorageKind;
65 typedef typename Eigen::internal::traits<TensorStridingOp>::Index
Index;
86 template<
typename Str
ides,
typename ArgType,
typename Device>
91 static constexpr
int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
115 : m_impl(op.expression(), device)
117 m_dimensions = m_impl.dimensions();
118 for (
int i = 0;
i < NumDims; ++
i) {
124 m_outputStrides[0] = 1;
125 m_inputStrides[0] = 1;
126 for (
int i = 1;
i < NumDims; ++
i) {
127 m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
128 m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
129 m_inputStrides[
i-1] *= op.
strides()[
i-1];
131 m_inputStrides[NumDims-1] *= op.
strides()[NumDims-1];
133 m_outputStrides[NumDims-1] = 1;
134 m_inputStrides[NumDims-1] = 1;
135 for (
int i = NumDims - 2;
i >= 0; --
i) {
136 m_outputStrides[
i] = m_outputStrides[
i+1] * m_dimensions[
i+1];
137 m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
138 m_inputStrides[
i+1] *= op.
strides()[
i+1];
140 m_inputStrides[0] *= op.
strides()[0];
148 m_impl.evalSubExprsIfNeeded(NULL);
157 return m_impl.coeff(srcCoeff(index));
160 template<
int LoadMode>
166 Index inputIndices[] = {0, 0};
170 for (
int i = NumDims - 1;
i > 0; --
i) {
171 const Index idx0 = indices[0] / m_outputStrides[
i];
172 const Index idx1 = indices[1] / m_outputStrides[
i];
173 inputIndices[0] += idx0 * m_inputStrides[
i];
174 inputIndices[1] += idx1 * m_inputStrides[
i];
175 indices[0] -= idx0 * m_outputStrides[
i];
176 indices[1] -= idx1 * m_outputStrides[
i];
178 inputIndices[0] += indices[0] * m_inputStrides[0];
179 inputIndices[1] += indices[1] * m_inputStrides[0];
182 for (
int i = 0;
i < NumDims - 1; ++
i) {
183 const Index idx0 = indices[0] / m_outputStrides[
i];
184 const Index idx1 = indices[1] / m_outputStrides[
i];
185 inputIndices[0] += idx0 * m_inputStrides[
i];
186 inputIndices[1] += idx1 * m_inputStrides[
i];
187 indices[0] -= idx0 * m_outputStrides[
i];
188 indices[1] -= idx1 * m_outputStrides[
i];
190 inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
191 inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
193 if (inputIndices[1] - inputIndices[0] ==
PacketSize - 1) {
199 values[0] = m_impl.coeff(inputIndices[0]);
200 values[
PacketSize-1] = m_impl.coeff(inputIndices[1]);
211 double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
212 TensorOpCost::MulCost<Index>() +
213 TensorOpCost::DivCost<Index>()) +
214 TensorOpCost::MulCost<Index>();
218 const int innerDim = (
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor)) ? 0 : (NumDims - 1);
219 return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
229 Index inputIndex = 0;
232 for (
int i = NumDims - 1;
i > 0; --
i) {
233 const Index idx = index / m_outputStrides[
i];
234 inputIndex += idx * m_inputStrides[
i];
235 index -= idx * m_outputStrides[
i];
237 inputIndex += index * m_inputStrides[0];
240 for (
int i = 0;
i < NumDims - 1; ++
i) {
241 const Index idx = index / m_outputStrides[
i];
242 inputIndex += idx * m_inputStrides[
i];
243 index -= idx * m_outputStrides[
i];
245 inputIndex += index * m_inputStrides[NumDims-1];
257 template<
typename Str
ides,
typename ArgType,
typename Device>
259 :
public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
264 static constexpr
int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
271 PreferBlockAccess =
false,
277 :
Base(op, device) { }
287 return this->m_impl.coeffRef(this->srcCoeff(index));
296 Index inputIndices[] = {0, 0};
300 for (
int i = NumDims - 1;
i > 0; --
i) {
301 const Index idx0 = indices[0] / this->m_outputStrides[
i];
302 const Index idx1 = indices[1] / this->m_outputStrides[
i];
303 inputIndices[0] += idx0 * this->m_inputStrides[
i];
304 inputIndices[1] += idx1 * this->m_inputStrides[
i];
305 indices[0] -= idx0 * this->m_outputStrides[
i];
306 indices[1] -= idx1 * this->m_outputStrides[
i];
308 inputIndices[0] += indices[0] * this->m_inputStrides[0];
309 inputIndices[1] += indices[1] * this->m_inputStrides[0];
312 for (
int i = 0;
i < NumDims - 1; ++
i) {
313 const Index idx0 = indices[0] / this->m_outputStrides[
i];
314 const Index idx1 = indices[1] / this->m_outputStrides[
i];
315 inputIndices[0] += idx0 * this->m_inputStrides[
i];
316 inputIndices[1] += idx1 * this->m_inputStrides[
i];
317 indices[0] -= idx0 * this->m_outputStrides[
i];
318 indices[1] -= idx1 * this->m_outputStrides[
i];
320 inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
321 inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
323 if (inputIndices[1] - inputIndices[0] ==
PacketSize - 1) {
324 this->m_impl.template writePacket<Unaligned>(inputIndices[0],
x);
328 internal::pstore<Scalar, PacketReturnType>(values,
x);
329 this->m_impl.coeffRef(inputIndices[0]) = values[0];
330 this->m_impl.coeffRef(inputIndices[1]) = values[
PacketSize-1];
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
XprType::CoeffReturnType CoeffReturnType
TensorBase< TensorStridingOp< Strides, XprType > > Base
TensorStridingOp(const XprType &expr, const Strides &dims)
Eigen::internal::traits< TensorStridingOp >::Index Index
Eigen::internal::traits< TensorStridingOp >::StorageKind StorageKind
Eigen::internal::traits< TensorStridingOp >::Scalar Scalar
const Strides & strides() const
Eigen::internal::nested< TensorStridingOp >::type Nested
const internal::remove_all_t< typename XprType::Nested > & expression() const
Eigen::NumTraits< Scalar >::Real RealScalar
typename remove_all< T >::type remove_all_t
Scalar() ceil(const Scalar &x)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
XprType::CoeffReturnType CoeffReturnType
TensorEvaluator(const XprType &op, const Device &device)
TensorStridingOp< Strides, ArgType > XprType
Scalar & coeffRef(Index index) const
void writePacket(Index index, const PacketReturnType &x) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
TensorEvaluator< const XprType, Device > Base
array< Index, NumDims > m_inputStrides
StorageMemory< CoeffReturnType, Device > Storage
TensorEvaluator< ArgType, Device > m_impl
TensorOpCost costPerCoeff(bool vectorized) const
TensorStridingOp< Strides, ArgType > XprType
TensorEvaluator(const XprType &op, const Device &device)
Storage::Type data() const
XprType::CoeffReturnType CoeffReturnType
Storage::Type EvaluatorPointerType
DSizes< Index, NumDims > Dimensions
PacketReturnType packet(Index index) const
Index srcCoeff(Index index) const
const Dimensions & dimensions() const
CoeffReturnType coeff(Index index) const
internal::TensorBlockNotImplemented TensorBlock
bool evalSubExprsIfNeeded(EvaluatorPointerType)
PacketType< CoeffReturnType, Device >::type PacketReturnType
array< Index, NumDims > m_outputStrides
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
CoeffReturnType coeff(Index index) const
CoeffReturnType & coeffRef(Index index) const
static constexpr int PacketSize