TensorStriding.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
12 
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
24 namespace internal {
25 template<typename Strides, typename XprType>
26 struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
27 {
28  typedef typename XprType::Scalar Scalar;
29  typedef traits<XprType> XprTraits;
30  typedef typename XprTraits::StorageKind StorageKind;
31  typedef typename XprTraits::Index Index;
32  typedef typename XprType::Nested Nested;
33  typedef std::remove_reference_t<Nested> Nested_;
34  static constexpr int NumDimensions = XprTraits::NumDimensions;
35  static constexpr int Layout = XprTraits::Layout;
36  typedef typename XprTraits::PointerType PointerType;
37 };
38 
39 template<typename Strides, typename XprType>
40 struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
41 {
42  typedef const TensorStridingOp<Strides, XprType>EIGEN_DEVICE_REF type;
43 };
44 
45 template<typename Strides, typename XprType>
46 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
47 {
48  typedef TensorStridingOp<Strides, XprType> type;
49 };
50 
51 } // end namespace internal
52 
53 
54 
55 template<typename Strides, typename XprType>
56 class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
57 {
58  public:
60  typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
62  typedef typename XprType::CoeffReturnType CoeffReturnType;
63  typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
64  typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
65  typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index;
66 
67  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims)
68  : m_xpr(expr), m_dims(dims) {}
69 
71  const Strides& strides() const { return m_dims; }
72 
75  expression() const { return m_xpr; }
76 
78 
79  protected:
80  typename XprType::Nested m_xpr;
81  const Strides m_dims;
82 };
83 
84 
85 // Eval as rvalue
86 template<typename Strides, typename ArgType, typename Device>
87 struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
88 {
90  typedef typename XprType::Index Index;
91  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
93  typedef typename XprType::Scalar Scalar;
99 
101  enum {
102  IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
104  BlockAccess = false,
106  CoordAccess = false, // to be implemented
107  RawAccess = false
108  };
109 
110  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
111  typedef internal::TensorBlockNotImplemented TensorBlock;
112  //===--------------------------------------------------------------------===//
113 
114  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
115  : m_impl(op.expression(), device)
116  {
117  m_dimensions = m_impl.dimensions();
118  for (int i = 0; i < NumDims; ++i) {
119  m_dimensions[i] =Eigen::numext::ceil(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
120  }
121 
122  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
123  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
124  m_outputStrides[0] = 1;
125  m_inputStrides[0] = 1;
126  for (int i = 1; i < NumDims; ++i) {
127  m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
128  m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
129  m_inputStrides[i-1] *= op.strides()[i-1];
130  }
131  m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
132  } else { // RowMajor
133  m_outputStrides[NumDims-1] = 1;
134  m_inputStrides[NumDims-1] = 1;
135  for (int i = NumDims - 2; i >= 0; --i) {
136  m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
137  m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
138  m_inputStrides[i+1] *= op.strides()[i+1];
139  }
140  m_inputStrides[0] *= op.strides()[0];
141  }
142  }
143 
144 
145  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
146 
147  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) {
148  m_impl.evalSubExprsIfNeeded(NULL);
149  return true;
150  }
151  EIGEN_STRONG_INLINE void cleanup() {
152  m_impl.cleanup();
153  }
154 
155  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
156  {
157  return m_impl.coeff(srcCoeff(index));
158  }
159 
160  template<int LoadMode>
161  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
162  {
163  EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
164  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
165 
166  Index inputIndices[] = {0, 0};
167  Index indices[] = {index, index + PacketSize - 1};
168  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
170  for (int i = NumDims - 1; i > 0; --i) {
171  const Index idx0 = indices[0] / m_outputStrides[i];
172  const Index idx1 = indices[1] / m_outputStrides[i];
173  inputIndices[0] += idx0 * m_inputStrides[i];
174  inputIndices[1] += idx1 * m_inputStrides[i];
175  indices[0] -= idx0 * m_outputStrides[i];
176  indices[1] -= idx1 * m_outputStrides[i];
177  }
178  inputIndices[0] += indices[0] * m_inputStrides[0];
179  inputIndices[1] += indices[1] * m_inputStrides[0];
180  } else { // RowMajor
182  for (int i = 0; i < NumDims - 1; ++i) {
183  const Index idx0 = indices[0] / m_outputStrides[i];
184  const Index idx1 = indices[1] / m_outputStrides[i];
185  inputIndices[0] += idx0 * m_inputStrides[i];
186  inputIndices[1] += idx1 * m_inputStrides[i];
187  indices[0] -= idx0 * m_outputStrides[i];
188  indices[1] -= idx1 * m_outputStrides[i];
189  }
190  inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
191  inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
192  }
193  if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
194  PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
195  return rslt;
196  }
197  else {
198  EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
199  values[0] = m_impl.coeff(inputIndices[0]);
200  values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
202  for (int i = 1; i < PacketSize-1; ++i) {
203  values[i] = coeff(index+i);
204  }
205  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
206  return rslt;
207  }
208  }
209 
210  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
211  double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
212  TensorOpCost::MulCost<Index>() +
213  TensorOpCost::DivCost<Index>()) +
214  TensorOpCost::MulCost<Index>();
215  if (vectorized) {
216  compute_cost *= 2; // packet() computes two indices
217  }
218  const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1);
219  return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
220  // Computation is not vectorized per se, but it is done once per packet.
221  TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
222  }
223 
224  EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
225 
226  protected:
227  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
228  {
229  Index inputIndex = 0;
230  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
232  for (int i = NumDims - 1; i > 0; --i) {
233  const Index idx = index / m_outputStrides[i];
234  inputIndex += idx * m_inputStrides[i];
235  index -= idx * m_outputStrides[i];
236  }
237  inputIndex += index * m_inputStrides[0];
238  } else { // RowMajor
240  for (int i = 0; i < NumDims - 1; ++i) {
241  const Index idx = index / m_outputStrides[i];
242  inputIndex += idx * m_inputStrides[i];
243  index -= idx * m_outputStrides[i];
244  }
245  inputIndex += index * m_inputStrides[NumDims-1];
246  }
247  return inputIndex;
248  }
249 
254 };
255 
256 // Eval as lvalue
257 template<typename Strides, typename ArgType, typename Device>
258 struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
259  : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
260 {
263  // typedef typename XprType::Index Index;
264  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
265  // typedef DSizes<Index, NumDims> Dimensions;
266 
268  enum {
269  IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
271  PreferBlockAccess = false,
272  CoordAccess = false, // to be implemented
273  RawAccess = false
274  };
275 
276  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
277  : Base(op, device) { }
278 
279  typedef typename XprType::Index Index;
280  typedef typename XprType::Scalar Scalar;
284 
285  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) const
286  {
287  return this->m_impl.coeffRef(this->srcCoeff(index));
288  }
289 
290  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
291  void writePacket(Index index, const PacketReturnType& x) const
292  {
293  EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
294  eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize());
295 
296  Index inputIndices[] = {0, 0};
297  Index indices[] = {index, index + PacketSize - 1};
298  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
300  for (int i = NumDims - 1; i > 0; --i) {
301  const Index idx0 = indices[0] / this->m_outputStrides[i];
302  const Index idx1 = indices[1] / this->m_outputStrides[i];
303  inputIndices[0] += idx0 * this->m_inputStrides[i];
304  inputIndices[1] += idx1 * this->m_inputStrides[i];
305  indices[0] -= idx0 * this->m_outputStrides[i];
306  indices[1] -= idx1 * this->m_outputStrides[i];
307  }
308  inputIndices[0] += indices[0] * this->m_inputStrides[0];
309  inputIndices[1] += indices[1] * this->m_inputStrides[0];
310  } else { // RowMajor
312  for (int i = 0; i < NumDims - 1; ++i) {
313  const Index idx0 = indices[0] / this->m_outputStrides[i];
314  const Index idx1 = indices[1] / this->m_outputStrides[i];
315  inputIndices[0] += idx0 * this->m_inputStrides[i];
316  inputIndices[1] += idx1 * this->m_inputStrides[i];
317  indices[0] -= idx0 * this->m_outputStrides[i];
318  indices[1] -= idx1 * this->m_outputStrides[i];
319  }
320  inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
321  inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
322  }
323  if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
324  this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
325  }
326  else {
328  internal::pstore<Scalar, PacketReturnType>(values, x);
329  this->m_impl.coeffRef(inputIndices[0]) = values[0];
330  this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
332  for (int i = 1; i < PacketSize-1; ++i) {
333  this->coeffRef(index+i) = values[i];
334  }
335  }
336  }
337 };
338 
339 
340 } // end namespace Eigen
341 
342 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
int i
#define EIGEN_ALIGN_MAX
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define eigen_assert(x)
#define EIGEN_STATIC_ASSERT(X, MSG)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Definition: TensorMacros.h:80
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:36
The tensor base class.
XprType::CoeffReturnType CoeffReturnType
TensorBase< TensorStridingOp< Strides, XprType > > Base
TensorStridingOp(const XprType &expr, const Strides &dims)
Eigen::internal::traits< TensorStridingOp >::Index Index
Eigen::internal::traits< TensorStridingOp >::StorageKind StorageKind
XprType::Nested m_xpr
Eigen::internal::traits< TensorStridingOp >::Scalar Scalar
const Strides & strides() const
Eigen::internal::nested< TensorStridingOp >::type Nested
const internal::remove_all_t< typename XprType::Nested > & expression() const
Eigen::NumTraits< Scalar >::Real RealScalar
typename remove_all< T >::type remove_all_t
Scalar() ceil(const Scalar &x)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
std::array< T, N > array
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
Definition: TensorMeta.h:55
void writePacket(Index index, const PacketReturnType &x) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
CoeffReturnType coeff(Index index) const
CoeffReturnType & coeffRef(Index index) const
static constexpr int PacketSize