TensorReverse.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
5 // Benoit Steiner <benoit.steiner.goog@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
12 #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
23 namespace internal {
24 template<typename ReverseDimensions, typename XprType>
25 struct traits<TensorReverseOp<ReverseDimensions,
26  XprType> > : public traits<XprType>
27 {
28  typedef typename XprType::Scalar Scalar;
29  typedef traits<XprType> XprTraits;
30  typedef typename XprTraits::StorageKind StorageKind;
31  typedef typename XprTraits::Index Index;
32  typedef typename XprType::Nested Nested;
33  typedef std::remove_reference_t<Nested> Nested_;
34  static constexpr int NumDimensions = XprTraits::NumDimensions;
35  static constexpr int Layout = XprTraits::Layout;
36  typedef typename XprTraits::PointerType PointerType;
37 };
38 
39 template<typename ReverseDimensions, typename XprType>
40 struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense>
41 {
42  typedef const TensorReverseOp<ReverseDimensions, XprType>& type;
43 };
44 
45 template<typename ReverseDimensions, typename XprType>
46 struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
47  typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type>
48 {
49  typedef TensorReverseOp<ReverseDimensions, XprType> type;
50 };
51 
52 } // end namespace internal
53 
54 template<typename ReverseDimensions, typename XprType>
55 class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
56  XprType>, WriteAccessors>
57 {
58  public:
60  typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar;
62  typedef typename XprType::CoeffReturnType CoeffReturnType;
63  typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested;
64  typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
66  typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index;
67 
68  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(
69  const XprType& expr, const ReverseDimensions& reverse_dims)
70  : m_xpr(expr), m_reverse_dims(reverse_dims) { }
71 
73  const ReverseDimensions& reverse() const { return m_reverse_dims; }
74 
77  expression() const { return m_xpr; }
78 
80 
81 
82  protected:
83  typename XprType::Nested m_xpr;
84  const ReverseDimensions m_reverse_dims;
85 };
86 
87 // Eval as rvalue
88 template<typename ReverseDimensions, typename ArgType, typename Device>
89 struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device>
90 {
92  typedef typename XprType::Index Index;
93  static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
95  typedef typename XprType::Scalar Scalar;
101 
103  enum {
104  IsAligned = false,
106  BlockAccess = NumDims > 0,
107  PreferBlockAccess = true,
108  CoordAccess = false, // to be implemented
109  RawAccess = false
110  };
111 
112  typedef internal::TensorIntDivisor<Index> IndexDivisor;
113 
114  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
115  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
116  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
117 
120 
121  typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
122  Layout, Index>
124  //===--------------------------------------------------------------------===//
125 
126  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
127  : m_impl(op.expression(), device),
128  m_reverse(op.reverse()),
129  m_device(device)
130  {
131  // Reversing a scalar isn't supported yet. It would be a no-op anyway.
132  EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
133 
134  // Compute strides
135  m_dimensions = m_impl.dimensions();
136  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
137  m_strides[0] = 1;
138  for (int i = 1; i < NumDims; ++i) {
139  m_strides[i] = m_strides[i-1] * m_dimensions[i-1];
140  if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
141  }
142  } else {
143  m_strides[NumDims-1] = 1;
144  for (int i = NumDims - 2; i >= 0; --i) {
145  m_strides[i] = m_strides[i+1] * m_dimensions[i+1];
146  if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
147  }
148  }
149  }
150 
151  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
152  const Dimensions& dimensions() const { return m_dimensions; }
153 
154  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
155  m_impl.evalSubExprsIfNeeded(NULL);
156  return true;
157  }
158 
159 #ifdef EIGEN_USE_THREADS
160  template <typename EvalSubExprsCallback>
161  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
162  EvaluatorPointerType, EvalSubExprsCallback done) {
163  m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
164  }
165 #endif // EIGEN_USE_THREADS
166 
167  EIGEN_STRONG_INLINE void cleanup() {
168  m_impl.cleanup();
169  }
170 
171  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex(
172  Index index) const {
173  eigen_assert(index < dimensions().TotalSize());
174  Index inputIndex = 0;
175  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
177  for (int i = NumDims - 1; i > 0; --i) {
178  Index idx = index / m_fastStrides[i];
179  index -= idx * m_strides[i];
180  if (m_reverse[i]) {
181  idx = m_dimensions[i] - idx - 1;
182  }
183  inputIndex += idx * m_strides[i] ;
184  }
185  if (m_reverse[0]) {
186  inputIndex += (m_dimensions[0] - index - 1);
187  } else {
188  inputIndex += index;
189  }
190  } else {
192  for (int i = 0; i < NumDims - 1; ++i) {
193  Index idx = index / m_fastStrides[i];
194  index -= idx * m_strides[i];
195  if (m_reverse[i]) {
196  idx = m_dimensions[i] - idx - 1;
197  }
198  inputIndex += idx * m_strides[i] ;
199  }
200  if (m_reverse[NumDims-1]) {
201  inputIndex += (m_dimensions[NumDims-1] - index - 1);
202  } else {
203  inputIndex += index;
204  }
205  }
206  return inputIndex;
207  }
208 
210  Index index) const {
211  return m_impl.coeff(reverseIndex(index));
212  }
213 
214  template<int LoadMode>
215  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
217  {
218  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
219 
220  // TODO(ndjaitly): write a better packing routine that uses
221  // local structure.
222  EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType>
223  values[PacketSize];
225  for (int i = 0; i < PacketSize; ++i) {
226  values[i] = coeff(index+i);
227  }
228  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
229  return rslt;
230  }
231 
232  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
233  internal::TensorBlockResourceRequirements getResourceRequirements() const {
234  const size_t target_size = m_device.lastLevelCacheSize();
235  // Block evaluation reads underlying memory in reverse order, and default
236  // cost model does not properly catch this in bytes stored/loaded.
237  return internal::TensorBlockResourceRequirements::skewed<Scalar>(
238  target_size)
239  .addCostPerCoeff({0, 0, 24});
240  }
241 
242  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
244  bool /*root_of_expr_ast*/ = false) const {
245  // TODO(ezhulenev): If underlying tensor expression supports and prefers
246  // block evaluation we must use it. Currently we use coeff and packet
247  // access into the underlying tensor expression.
248  // static const bool useBlockAccessForArgType =
249  // TensorEvaluator<ArgType, Device>::BlockAccess &&
250  // TensorEvaluator<ArgType, Device>::PreferBlockAccess;
251 
252  static const bool isColMajor =
253  static_cast<int>(Layout) == static_cast<int>(ColMajor);
254 
255  static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
256  const bool inner_dim_reversed = m_reverse[inner_dim_idx];
257 
258  // Offset in the output block.
259  Index block_offset = 0;
260 
261  // Offset in the input Tensor.
262  Index input_offset = reverseIndex(desc.offset());
263 
264  // Initialize output block iterator state. Dimension in this array are
265  // always in inner_most -> outer_most order (col major layout).
267  for (int i = 0; i < NumDims; ++i) {
268  const int dim = isColMajor ? i : NumDims - 1 - i;
269  it[i].size = desc.dimension(dim);
270  it[i].count = 0;
271  it[i].reverse = m_reverse[dim];
272 
273  it[i].block_stride =
274  i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride);
275  it[i].block_span = it[i].block_stride * (it[i].size - 1);
276 
277  it[i].input_stride = m_strides[dim];
278  it[i].input_span = it[i].input_stride * (it[i].size - 1);
279 
280  if (it[i].reverse) {
281  it[i].input_stride = -1 * it[i].input_stride;
282  it[i].input_span = -1 * it[i].input_span;
283  }
284  }
285 
286  // If multiple inner dimensions have the same reverse flag, check if we can
287  // merge them into a single virtual inner dimension.
288  int effective_inner_dim = 0;
289  for (int i = 1; i < NumDims; ++i) {
290  if (it[i].reverse != it[effective_inner_dim].reverse) break;
291  if (it[i].block_stride != it[effective_inner_dim].size) break;
292  if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
293 
294  it[i].size = it[effective_inner_dim].size * it[i].size;
295 
296  it[i].block_stride = 1;
297  it[i].input_stride = (inner_dim_reversed ? -1 : 1);
298 
299  it[i].block_span = it[i].block_stride * (it[i].size - 1);
300  it[i].input_span = it[i].input_stride * (it[i].size - 1);
301 
302  effective_inner_dim = i;
303  }
304 
305  eigen_assert(it[effective_inner_dim].block_stride == 1);
306  eigen_assert(it[effective_inner_dim].input_stride ==
307  (inner_dim_reversed ? -1 : 1));
308 
309  const Index inner_dim_size = it[effective_inner_dim].size;
310 
311  // Prepare storage for the materialized reverse result.
312  const typename TensorBlock::Storage block_storage =
313  TensorBlock::prepareStorage(desc, scratch);
314  CoeffReturnType* block_buffer = block_storage.data();
315 
316  while (it[NumDims - 1].count < it[NumDims - 1].size) {
317  // Copy inner-most dimension data from reversed location in input.
318  Index dst = block_offset;
319  Index src = input_offset;
320 
321  // NOTE(ezhulenev): Adding vectorized path with internal::preverse showed
322  // worse results in benchmarks than a simple coefficient loop.
323  if (inner_dim_reversed) {
324  for (Index i = 0; i < inner_dim_size; ++i) {
325  block_buffer[dst] = m_impl.coeff(src);
326  ++dst;
327  --src;
328  }
329  } else {
330  for (Index i = 0; i < inner_dim_size; ++i) {
331  block_buffer[dst] = m_impl.coeff(src);
332  ++dst;
333  ++src;
334  }
335  }
336 
337  // For the 1d tensor we need to generate only one inner-most dimension.
338  if ((NumDims - effective_inner_dim) == 1) break;
339 
340  // Update offset.
341  for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
342  if (++it[i].count < it[i].size) {
343  block_offset += it[i].block_stride;
344  input_offset += it[i].input_stride;
345  break;
346  }
347  if (i != NumDims - 1) it[i].count = 0;
348  block_offset -= it[i].block_span;
349  input_offset -= it[i].input_span;
350  }
351  }
352 
353  return block_storage.AsTensorMaterializedBlock();
354  }
355 
356  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
357  double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
358  2 * TensorOpCost::MulCost<Index>() +
359  TensorOpCost::DivCost<Index>());
360  for (int i = 0; i < NumDims; ++i) {
361  if (m_reverse[i]) {
362  compute_cost += 2 * TensorOpCost::AddCost<Index>();
363  }
364  }
365  return m_impl.costPerCoeff(vectorized) +
366  TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
367  }
368 
369  EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
370 
371  protected:
376  ReverseDimensions m_reverse;
378 
379  private:
380  struct BlockIteratorState {
382  : size(0),
383  count(0),
384  reverse(false),
385  block_stride(0),
386  block_span(0),
387  input_stride(0),
388  input_span(0) {}
389 
392  bool reverse;
397  };
398 };
399 
400 // Eval as lvalue
401 
402 template <typename ReverseDimensions, typename ArgType, typename Device>
403 struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
404  : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
405  Device> {
407  Device> Base;
409  typedef typename XprType::Index Index;
410  static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
412 
414  enum {
415  IsAligned = false,
417  BlockAccess = false,
418  PreferBlockAccess = false,
419  CoordAccess = false, // to be implemented
420  RawAccess = false
421  };
422  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
423  : Base(op, device) {}
424 
425  typedef typename XprType::Scalar Scalar;
429 
430  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
431  typedef internal::TensorBlockNotImplemented TensorBlock;
432  //===--------------------------------------------------------------------===//
433 
434  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
435  const Dimensions& dimensions() const { return this->m_dimensions; }
436 
437  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) const {
438  return this->m_impl.coeffRef(this->reverseIndex(index));
439  }
440 
441  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
442  void writePacket(Index index, const PacketReturnType& x) const {
443  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
444 
445  // This code is pilfered from TensorMorphing.h
447  internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
449  for (int i = 0; i < PacketSize; ++i) {
450  this->coeffRef(index+i) = values[i];
451  }
452  }
453 };
454 
455 
456 } // end namespace Eigen
457 
458 #endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
int i
#define EIGEN_ALIGN_MAX
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define eigen_assert(x)
#define EIGEN_STATIC_ASSERT(X, MSG)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Definition: TensorMacros.h:80
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:36
The tensor base class.
TensorBase< TensorReverseOp< ReverseDimensions, XprType >, WriteAccessors > Base
Definition: TensorReverse.h:59
Eigen::internal::nested< TensorReverseOp >::type Nested
Definition: TensorReverse.h:63
const ReverseDimensions m_reverse_dims
Definition: TensorReverse.h:84
const ReverseDimensions & reverse() const
Definition: TensorReverse.h:73
Eigen::internal::traits< TensorReverseOp >::StorageKind StorageKind
Definition: TensorReverse.h:65
const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorReverse.h:77
XprType::CoeffReturnType CoeffReturnType
Definition: TensorReverse.h:62
TensorReverseOp(const XprType &expr, const ReverseDimensions &reverse_dims)
Definition: TensorReverse.h:68
XprType::Nested m_xpr
Definition: TensorReverse.h:83
Eigen::internal::traits< TensorReverseOp >::Scalar Scalar
Definition: TensorReverse.h:60
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorReverse.h:61
Eigen::internal::traits< TensorReverseOp >::Index Index
Definition: TensorReverse.h:66
WriteAccessors
typename remove_all< T >::type remove_all_t
EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
std::array< T, N > array
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
Definition: TensorMeta.h:55
SparseMat::Index size
void writePacket(Index index, const PacketReturnType &x) const
TensorEvaluator< const TensorReverseOp< ReverseDimensions, ArgType >, Device > Base
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
CoeffReturnType coeff(Index index) const
Storage::Type EvaluatorPointerType
CoeffReturnType & coeffRef(Index index) const
static constexpr int PacketSize
Derived::Scalar CoeffReturnType
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock