TensorMorphing.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12 
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
24 namespace internal {
25 template<typename NewDimensions, typename XprType>
26 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
27 {
28  typedef typename XprType::Scalar Scalar;
29  typedef traits<XprType> XprTraits;
30  typedef typename XprTraits::StorageKind StorageKind;
31  typedef typename XprTraits::Index Index;
32  typedef typename XprType::Nested Nested;
33  typedef std::remove_reference_t<Nested> Nested_;
34  static constexpr int NumDimensions = array_size<NewDimensions>::value;
35  static constexpr int Layout = XprTraits::Layout;
36  typedef typename XprTraits::PointerType PointerType;
37 };
38 
39 template<typename NewDimensions, typename XprType>
40 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
41 {
42  typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
43 };
44 
45 template<typename NewDimensions, typename XprType>
46 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
47 {
48  typedef TensorReshapingOp<NewDimensions, XprType> type;
49 };
50 
51 } // end namespace internal
52 
53 
54 
55 template<typename NewDimensions, typename XprType>
56 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
57 {
58  public:
60  typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
61  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
62  typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
63  typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
64  typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
65 
66  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
67  : m_xpr(expr), m_dims(dims) {}
68 
70  const NewDimensions& dimensions() const { return m_dims; }
71 
74  expression() const { return m_xpr; }
75 
77 
78  protected:
79  typename XprType::Nested m_xpr;
80  const NewDimensions m_dims;
81 };
82 
83 
84 // Eval as rvalue
85 template<typename NewDimensions, typename ArgType, typename Device>
86 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
87 {
89  typedef NewDimensions Dimensions;
90 
91  typedef typename XprType::Index Index;
92  typedef typename XprType::Scalar Scalar;
98 
99  static constexpr int NumOutputDims = internal::array_size<Dimensions>::value;
100  static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
101 
103  // We do not use layout information to determine reshaping kind.
104  // Depending on the layout `N` can be inner or outer dimension.
105  OneByN = 0, // expr.reshape(1, N)
106  NByOne = 1, // expr.reshape(N, 1)
107  Runtime = 2 // Reshape dimensions are dynamic (specified at runtime).
108  };
109 
110  // clang-format off
111  static const ReshapingKind kind =
112  (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN
113  : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne
114  : Runtime;
115  // clang-format on
116 
118  enum {
121  // For trivial reshapes with raw access to underlying data we will provide
122  // zero overhead block access.
123  // TODO(ezhulenev): Consider adding block access without raw access?
125  NumInputDims > 0 && NumOutputDims > 0,
126  PreferBlockAccess = false,
127  CoordAccess = false, // to be implemented
129  };
130 
131  typedef std::remove_const_t<Scalar> ScalarNoConst;
132 
133  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
134  typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
135  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
136 
137  typedef
138  typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims,
139  Layout, Index>
141  //===--------------------------------------------------------------------===//
142 
143  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
144  : m_impl(op.expression(), device), m_dimensions(op.dimensions())
145  {
146  // The total size of the reshaped tensor must be equal to the total size
147  // of the input tensor.
148  eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
149  }
150 
151  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
152 
153 #ifdef EIGEN_USE_THREADS
154  template <typename EvalSubExprsCallback>
155  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
156  EvaluatorPointerType data, EvalSubExprsCallback done) {
157  m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
158  }
159 #endif
160 
161  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
162  return m_impl.evalSubExprsIfNeeded(data);
163  }
164  EIGEN_STRONG_INLINE void cleanup() {
165  m_impl.cleanup();
166  }
167 
168  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
169  {
170  return m_impl.coeff(index);
171  }
172 
173  template<int LoadMode>
174  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
175  {
176  return m_impl.template packet<LoadMode>(index);
177  }
178 
179  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
180  return m_impl.costPerCoeff(vectorized);
181  }
182 
183  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
184  internal::TensorBlockResourceRequirements getResourceRequirements() const {
185  return internal::TensorBlockResourceRequirements::any();
186  }
187 
188  // required in block(OutputTensorBlock* output_block) const
189  // For C++03 compatibility this must be defined outside the method
190  struct BlockIteratorState {
195  };
196 
197  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
199  bool /*root_of_expr_ast*/ = false) const {
200  eigen_assert(m_impl.data() != NULL);
201  eigen_assert((kind == Runtime) ||
202  (kind == OneByN && desc.dimensions()[0] == 1) ||
203  (kind == NByOne && desc.dimensions()[1] == 1));
204 
205  if (kind == OneByN || kind == NByOne) {
206  // We can guarantee at compile time that block is just a contiguous slice
207  // of the underlying expression memory buffer.
209  m_impl.data() + desc.offset(), desc.dimensions());
210  } else {
211  // This will do additional runtime checks, and in the end it might be also
212  // a view, or it might be a block materialized in the temporary buffer.
213  return TensorBlock::materialize(m_impl.data(), m_dimensions, desc,
214  scratch);
215  }
216  }
217 
219  return constCast(m_impl.data());
220  }
221 
222  EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
223 
224  protected:
226  NewDimensions m_dimensions;
227 };
228 
229 
230 // Eval as lvalue
231 template<typename NewDimensions, typename ArgType, typename Device>
232  struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
233  : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
234 
235 {
238  typedef NewDimensions Dimensions;
239 
241  enum {
245  PreferBlockAccess = false,
246  CoordAccess = false, // to be implemented
248  };
249 
250  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
251  : Base(op, device)
252  { }
253 
254  typedef typename XprType::Index Index;
255  typedef typename XprType::Scalar Scalar;
258 
259  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
260  typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index>
262  //===--------------------------------------------------------------------===//
263 
264  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) const
265  {
266  return this->m_impl.coeffRef(index);
267  }
268 
269  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
270  void writePacket(Index index, const PacketReturnType& x) const
271  {
272  this->m_impl.template writePacket<StoreMode>(index, x);
273  }
274 
275  template <typename TensorBlock>
276  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
277  const TensorBlockDesc& desc, const TensorBlock& block) {
278  eigen_assert(this->m_impl.data() != NULL);
279 
280  typedef typename TensorBlock::XprType TensorBlockExpr;
281  typedef internal::TensorBlockAssignment<
282  Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr, Index>
283  TensorBlockAssign;
284 
285  TensorBlockAssign::Run(
286  TensorBlockAssign::target(desc.dimensions(),
287  internal::strides<Layout>(this->dimensions()),
288  this->m_impl.data(), desc.offset()),
289  block.expr());
290  }
291 };
292 
293 
301 namespace internal {
302 template<typename StartIndices, typename Sizes, typename XprType>
303 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
304 {
305  typedef typename XprType::Scalar Scalar;
306  typedef traits<XprType> XprTraits;
307  typedef typename XprTraits::StorageKind StorageKind;
308  typedef typename XprTraits::Index Index;
309  typedef typename XprType::Nested Nested;
310  typedef std::remove_reference_t<Nested> Nested_;
311  static constexpr int NumDimensions = array_size<StartIndices>::value;
312  static constexpr int Layout = XprTraits::Layout;
313  typedef typename XprTraits::PointerType PointerType;
314 };
315 
316 template<typename StartIndices, typename Sizes, typename XprType>
317 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
318 {
319  typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
320 };
321 
322 template<typename StartIndices, typename Sizes, typename XprType>
323 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
324 {
325  typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
326 };
327 
328 } // end namespace internal
329 
330 
331 
332 template<typename StartIndices, typename Sizes, typename XprType>
333 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
334 {
335  public:
337  typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
338  typedef typename XprType::CoeffReturnType CoeffReturnType;
339  typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
340  typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
341  typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
342 
343  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
344  : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
345 
347  const StartIndices& startIndices() const { return m_indices; }
349  const Sizes& sizes() const { return m_sizes; }
350 
353  expression() const { return m_xpr; }
354 
356 
357  protected:
358  typename XprType::Nested m_xpr;
359  const StartIndices m_indices;
360  const Sizes m_sizes;
361 };
362 
363 
364 namespace internal {
365 
366 // Fixme: figure out the exact threshold
367 template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
368  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
369  EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
370  const bool prefer_block_evaluation = BlockAccess && total > 32*1024;
371  return !prefer_block_evaluation && contiguous > threshold_;
372  }
373 
374  private:
375  Index threshold_;
376 };
377 
378 // It is very expensive to start the memcpy kernel on GPU: we therefore only
379 // use it for large copies.
380 #ifdef EIGEN_USE_GPU
381 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, GpuDevice, BlockAccess> {
382  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
383  EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; }
384 };
385 #endif
386 
387 // It is very expensive to start the memcpy kernel on GPU: we therefore only
388 // use it for large copies.
389 #ifdef EIGEN_USE_SYCL
390 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice, BlockAccess> {
391  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
392  EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; }
393 };
394 #endif
395 
396 } // namespace internal
397 
398 // Eval as rvalue
399 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
400 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
401 {
403  static constexpr int NumDims = internal::array_size<Sizes>::value;
404 
405  typedef typename XprType::Index Index;
406  typedef typename XprType::Scalar Scalar;
409  typedef Sizes Dimensions;
413 
415  enum {
416  // Alignment can't be guaranteed at compile time since it depends on the
417  // slice offsets and sizes.
418  IsAligned = false,
421  // FIXME: Temporary workaround for bug in slicing of bool tensors.
422  !internal::is_same<std::remove_const_t<Scalar>, bool>::value,
423  PreferBlockAccess = true,
424  CoordAccess = false,
425  RawAccess = false
426  };
427 
428  typedef std::remove_const_t<Scalar> ScalarNoConst;
429 
430  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
431  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
432  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
433 
434  // Tensor slicing does not change the block type.
437  //===--------------------------------------------------------------------===//
438 
439  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
440  : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
441  {
442  m_is_identity = true;
443  for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
444  eigen_assert(m_impl.dimensions()[i] >=
445  op.sizes()[i] + op.startIndices()[i]);
446  if (m_impl.dimensions()[i] != op.sizes()[i] ||
447  op.startIndices()[i] != 0) {
448  m_is_identity = false;
449  }
450  }
451 
452  // No strides for scalars.
453  if (NumDims == 0) return;
454 
455  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
456  const Sizes& output_dims = op.sizes();
457  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
458  m_inputStrides[0] = 1;
459  for (int i = 1; i < NumDims; ++i) {
460  m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
461  }
462 
463  // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
464  m_outputStrides[0] = 1;
465  for (int i = 1; i < NumDims; ++i) {
466  m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
467  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
468  }
469  } else {
470  m_inputStrides[NumDims-1] = 1;
471  for (int i = NumDims - 2; i >= 0; --i) {
472  m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
473  }
474 
475  // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
476  m_outputStrides[NumDims-1] = 1;
477  for (int i = NumDims - 2; i >= 0; --i) {
478  m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
479  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
480  }
481  }
482  }
483 
484  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
485 
486  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
487  m_impl.evalSubExprsIfNeeded(NULL);
488  if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization
489  && data && m_impl.data()) {
490  Index contiguous_values = 1;
491  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
492  for (int i = 0; i < NumDims; ++i) {
493  contiguous_values *= dimensions()[i];
494  if (dimensions()[i] != m_impl.dimensions()[i]) {
495  break;
496  }
497  }
498  } else {
499  for (int i = NumDims-1; i >= 0; --i) {
500  contiguous_values *= dimensions()[i];
501  if (dimensions()[i] != m_impl.dimensions()[i]) {
502  break;
503  }
504  }
505  }
506  // Use memcpy if it's going to be faster than using the regular evaluation.
507  const internal::MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
508  if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
509  EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
510  for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
511  Index offset = srcCoeff(i);
512  m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar));
513  }
514  return false;
515  }
516  }
517  return true;
518  }
519 
520 #ifdef EIGEN_USE_THREADS
521  template <typename EvalSubExprsCallback>
522  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
523  EvaluatorPointerType /*data*/, EvalSubExprsCallback done) {
524  m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
525  }
526 #endif // EIGEN_USE_THREADS
527 
528  EIGEN_STRONG_INLINE void cleanup() {
529  m_impl.cleanup();
530  }
531 
532  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
533  {
534  if (m_is_identity) {
535  return m_impl.coeff(index);
536  } else {
537  return m_impl.coeff(srcCoeff(index));
538  }
539  }
540 
541  template<int LoadMode>
542  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
543  {
544  const int packetSize = PacketType<CoeffReturnType, Device>::size;
545  EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
546  eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
547 
548  if (m_is_identity) {
549  return m_impl.template packet<LoadMode>(index);
550  }
551 
552  Index inputIndices[] = {0, 0};
553  Index indices[] = {index, index + packetSize - 1};
554  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
556  for (int i = NumDims - 1; i > 0; --i) {
557  const Index idx0 = indices[0] / m_fastOutputStrides[i];
558  const Index idx1 = indices[1] / m_fastOutputStrides[i];
559  inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
560  inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
561  indices[0] -= idx0 * m_outputStrides[i];
562  indices[1] -= idx1 * m_outputStrides[i];
563  }
564  inputIndices[0] += (indices[0] + m_offsets[0]);
565  inputIndices[1] += (indices[1] + m_offsets[0]);
566  } else {
568  for (int i = 0; i < NumDims - 1; ++i) {
569  const Index idx0 = indices[0] / m_fastOutputStrides[i];
570  const Index idx1 = indices[1] / m_fastOutputStrides[i];
571  inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
572  inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
573  indices[0] -= idx0 * m_outputStrides[i];
574  indices[1] -= idx1 * m_outputStrides[i];
575  }
576  inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
577  inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
578  }
579  if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
580  PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
581  return rslt;
582  }
583  else {
584  EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
585  values[0] = m_impl.coeff(inputIndices[0]);
586  values[packetSize-1] = m_impl.coeff(inputIndices[1]);
588  for (int i = 1; i < packetSize-1; ++i) {
589  values[i] = coeff(index+i);
590  }
591  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
592  return rslt;
593  }
594  }
595 
596  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
597  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
598  }
599 
600  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
601  internal::TensorBlockResourceRequirements getResourceRequirements() const {
602  const size_t target_size = m_device.lastLevelCacheSize();
603  return internal::TensorBlockResourceRequirements::merge(
604  internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
605  m_impl.getResourceRequirements());
606  }
607 
608  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
610  bool /*root_of_expr_ast*/ = false) const {
611  TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
612  TensorBlock block = m_impl.block(arg_desc, scratch);
613  if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
614  return block;
615  }
616 
617  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
618  typename Storage::Type result = constCast(m_impl.data());
619  if (result) {
620  Index offset = 0;
621  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
622  for (int i = 0; i < NumDims; ++i) {
623  if (m_dimensions[i] != m_impl.dimensions()[i]) {
624  offset += m_offsets[i] * m_inputStrides[i];
625  for (int j = i+1; j < NumDims; ++j) {
626  if (m_dimensions[j] > 1) {
627  return NULL;
628  }
629  offset += m_offsets[j] * m_inputStrides[j];
630  }
631  break;
632  }
633  }
634  } else {
635  for (int i = NumDims - 1; i >= 0; --i) {
636  if (m_dimensions[i] != m_impl.dimensions()[i]) {
637  offset += m_offsets[i] * m_inputStrides[i];
638  for (int j = i-1; j >= 0; --j) {
639  if (m_dimensions[j] > 1) {
640  return NULL;
641  }
642  offset += m_offsets[j] * m_inputStrides[j];
643  }
644  break;
645  }
646  }
647  }
648  return result + offset;
649  }
650  return NULL;
651  }
652 
653  protected:
654  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
655  {
656  Index inputIndex = 0;
657  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
659  for (int i = NumDims - 1; i > 0; --i) {
660  const Index idx = index / m_fastOutputStrides[i];
661  inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
662  index -= idx * m_outputStrides[i];
663  }
664  inputIndex += (index + m_offsets[0]);
665  } else {
667  for (int i = 0; i < NumDims - 1; ++i) {
668  const Index idx = index / m_fastOutputStrides[i];
669  inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
670  index -= idx * m_outputStrides[i];
671  }
672  inputIndex += (index + m_offsets[NumDims-1]);
673  }
674  return inputIndex;
675  }
676 
684  const StartIndices m_offsets;
685 };
686 
687 
688 // Eval as lvalue
689 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
690 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
691  : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
692 {
695  static constexpr int NumDims = internal::array_size<Sizes>::value;
696 
697  typedef typename XprType::Index Index;
698  typedef typename XprType::Scalar Scalar;
701  typedef Sizes Dimensions;
702 
704  enum {
705  IsAligned = false,
708  PreferBlockAccess = true,
709  CoordAccess = false,
710  RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
711  };
712 
713  typedef std::remove_const_t<Scalar> ScalarNoConst;
714 
715  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
716  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
717  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
718  //===--------------------------------------------------------------------===//
719 
720  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
721  : Base(op, device)
722  { }
723 
724  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) const
725  {
726  if (this->m_is_identity) {
727  return this->m_impl.coeffRef(index);
728  } else {
729  return this->m_impl.coeffRef(this->srcCoeff(index));
730  }
731  }
732 
733  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
734  void writePacket(Index index, const PacketReturnType& x) const
735  {
736  if (this->m_is_identity) {
737  this->m_impl.template writePacket<StoreMode>(index, x);
738  return;
739  }
740 
741  const int packetSize = PacketType<CoeffReturnType, Device>::size;
742  Index inputIndices[] = {0, 0};
743  Index indices[] = {index, index + packetSize - 1};
744  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
746  for (int i = NumDims - 1; i > 0; --i) {
747  const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
748  const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
749  inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
750  inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
751  indices[0] -= idx0 * this->m_outputStrides[i];
752  indices[1] -= idx1 * this->m_outputStrides[i];
753  }
754  inputIndices[0] += (indices[0] + this->m_offsets[0]);
755  inputIndices[1] += (indices[1] + this->m_offsets[0]);
756  } else {
758  for (int i = 0; i < NumDims - 1; ++i) {
759  const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
760  const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
761  inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
762  inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
763  indices[0] -= idx0 * this->m_outputStrides[i];
764  indices[1] -= idx1 * this->m_outputStrides[i];
765  }
766  inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
767  inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
768  }
769  if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
770  this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
771  }
772  else {
773  EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
774  internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
775  this->m_impl.coeffRef(inputIndices[0]) = values[0];
776  this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
778  for (int i = 1; i < packetSize-1; ++i) {
779  this->coeffRef(index+i) = values[i];
780  }
781  }
782  }
783 
784  template<typename TensorBlock>
785  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
786  const TensorBlockDesc& desc, const TensorBlock& block) {
787  TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
788  this->m_impl.writeBlock(arg_desc, block);
789  }
790 };
791 
792 namespace internal {
793 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
794 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
795 {
796  typedef typename XprType::Scalar Scalar;
797  typedef traits<XprType> XprTraits;
798  typedef typename XprTraits::StorageKind StorageKind;
799  typedef typename XprTraits::Index Index;
800  typedef typename XprType::Nested Nested;
801  typedef std::remove_reference_t<Nested> Nested_;
802  static constexpr int NumDimensions = array_size<StartIndices>::value;
803  static constexpr int Layout = XprTraits::Layout;
804  typedef typename XprTraits::PointerType PointerType;
805 };
806 
807 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
808 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
809 {
810  typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
811 };
812 
813 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
814 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
815 {
816  typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
817 };
818 
819 } // end namespace internal
820 
821 
822 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
823 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
824 {
825  public:
827  typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
828  typedef typename XprType::CoeffReturnType CoeffReturnType;
829  typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
830  typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
831  typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
832 
834  const XprType& expr, const StartIndices& startIndices,
835  const StopIndices& stopIndices, const Strides& strides)
837  m_strides(strides) {}
838 
840  const StartIndices& startIndices() const { return m_startIndices; }
842  const StartIndices& stopIndices() const { return m_stopIndices; }
844  const StartIndices& strides() const { return m_strides; }
845 
848  expression() const { return m_xpr; }
849 
851 
852  protected:
853  typename XprType::Nested m_xpr;
854  const StartIndices m_startIndices;
855  const StopIndices m_stopIndices;
856  const Strides m_strides;
857 };
858 
859 // Eval as rvalue
860 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
861 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
862 {
864  static constexpr int NumDims = internal::array_size<Strides>::value;
865  typedef typename XprType::Index Index;
866  typedef typename XprType::Scalar Scalar;
871  typedef Strides Dimensions;
872 
874  enum {
875  // Alignment can't be guaranteed at compile time since it depends on the
876  // slice offsets and sizes.
877  IsAligned = false,
878  PacketAccess = false,
879  BlockAccess = false,
881  RawAccess = false
882  };
883 
884  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
885  typedef internal::TensorBlockNotImplemented TensorBlock;
886  //===--------------------------------------------------------------------===//
887 
888  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
889  : m_impl(op.expression(), device),
890  m_device(device),
891  m_strides(op.strides())
892  {
893  // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
894  DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
895  for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
896  eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
897  if (m_strides[i] > 0) {
898  startIndicesClamped[i] =
899  clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
900  stopIndicesClamped[i] =
901  clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
902  } else {
903  /* implies m_strides[i] < 0 by assert */
904  startIndicesClamped[i] =
905  clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
906  stopIndicesClamped[i] =
907  clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
908  }
909  m_startIndices[i] = startIndicesClamped[i];
910  }
911 
912  typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
913  const InputDimensions& input_dims = m_impl.dimensions();
914 
915  // compute output tensor shape
916  m_is_identity = true;
917  for (int i = 0; i < NumDims; i++) {
918  Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
919  if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
920  m_dimensions[i] = 0;
921  } else {
922  m_dimensions[i] =
923  (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
924  eigen_assert(m_dimensions[i] >= 0);
925  }
926  if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
927  m_is_identity = false;
928  }
929  }
930 
931  Strides output_dims = m_dimensions;
932 
933  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
934  m_inputStrides[0] = m_strides[0];
935  m_offsets[0] = startIndicesClamped[0];
936  Index previousDimProduct = 1;
937  for (int i = 1; i < NumDims; ++i) {
938  previousDimProduct *= input_dims[i-1];
939  m_inputStrides[i] = previousDimProduct * m_strides[i];
940  m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
941  }
942 
943  // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
944  m_outputStrides[0] = 1;
945  for (int i = 1; i < NumDims; ++i) {
946  m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
947  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
948  }
949  } else {
950  m_inputStrides[NumDims-1] = m_strides[NumDims-1];
951  m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
952  Index previousDimProduct = 1;
953  for (int i = NumDims - 2; i >= 0; --i) {
954  previousDimProduct *= input_dims[i+1];
955  m_inputStrides[i] = previousDimProduct * m_strides[i];
956  m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
957  }
958 
959  m_outputStrides[NumDims-1] = 1;
960  for (int i = NumDims - 2; i >= 0; --i) {
961  m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
962  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
963  }
964  }
965  }
966 
967  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
968 
969 
970  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
971  m_impl.evalSubExprsIfNeeded(NULL);
972  return true;
973  }
974 
975  EIGEN_STRONG_INLINE void cleanup() {
976  m_impl.cleanup();
977  }
978 
979  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
980  {
981  if (m_is_identity) {
982  return m_impl.coeff(index);
983  } else {
984  return m_impl.coeff(srcCoeff(index));
985  }
986  }
987 
988  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
989  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
990  }
991 
992  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
993  return NULL;
994  }
995 
996  protected:
997  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
998  {
999  Index inputIndex = 0;
1000  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
1002  for (int i = NumDims - 1; i >= 0; --i) {
1003  const Index idx = index / m_fastOutputStrides[i];
1004  inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1005  index -= idx * m_outputStrides[i];
1006  }
1007  } else {
1009  for (int i = 0; i < NumDims; ++i) {
1010  const Index idx = index / m_fastOutputStrides[i];
1011  inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1012  index -= idx * m_outputStrides[i];
1013  }
1014  }
1015  return inputIndex;
1016  }
1017 
1018  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
1019 #ifndef SYCL_DEVICE_ONLY
1020  return numext::maxi(min, numext::mini(max,value));
1021 #else
1022  return cl::sycl::clamp(value, min, max);
1023 #endif
1024  }
1025 
1032  DSizes<Index, NumDims> m_startIndices; // clamped startIndices
1034  DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
1035  const Strides m_strides;
1036 };
1037 
1038 // Eval as lvalue
1039 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
1040 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1041  : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1042 {
1045  static constexpr int NumDims = internal::array_size<Strides>::value;
1047 
1048  enum {
1049  IsAligned = false,
1050  PacketAccess = false,
1051  BlockAccess = false,
1054  RawAccess = false
1055  };
1056 
1057  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
1058  typedef internal::TensorBlockNotImplemented TensorBlock;
1059  //===--------------------------------------------------------------------===//
1060 
1061  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
1062  : Base(op, device)
1063  { }
1064 
1065  typedef typename XprType::Index Index;
1066  typedef typename XprType::Scalar Scalar;
1069  typedef Strides Dimensions;
1070 
1071  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) const
1072  {
1073  if (this->m_is_identity) {
1074  return this->m_impl.coeffRef(index);
1075  } else {
1076  return this->m_impl.coeffRef(this->srcCoeff(index));
1077  }
1078  }
1079 };
1080 
1081 
1082 } // end namespace Eigen
1083 
1084 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
int i
#define EIGEN_ALIGN_MAX
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define eigen_assert(x)
#define EIGEN_STATIC_ASSERT(X, MSG)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Definition: TensorMacros.h:80
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:36
The tensor base class.
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:32
TensorReshapingOp(const XprType &expr, const NewDimensions &dims)
const NewDimensions & dimensions() const
Eigen::internal::nested< TensorReshapingOp >::type Nested
Eigen::internal::traits< TensorReshapingOp >::Scalar Scalar
const internal::remove_all_t< typename XprType::Nested > & expression() const
TensorBase< TensorReshapingOp< NewDimensions, XprType >, WriteAccessors > Base
const NewDimensions m_dims
std::remove_const_t< typename XprType::CoeffReturnType > CoeffReturnType
Eigen::internal::traits< TensorReshapingOp >::StorageKind StorageKind
Eigen::internal::traits< TensorReshapingOp >::Index Index
XprType::Nested m_xpr
XprType::CoeffReturnType CoeffReturnType
TensorBase< TensorSlicingOp< StartIndices, Sizes, XprType > > Base
const Sizes & sizes() const
const internal::remove_all_t< typename XprType::Nested > & expression() const
Eigen::internal::traits< TensorSlicingOp >::Scalar Scalar
Eigen::internal::traits< TensorSlicingOp >::Index Index
Eigen::internal::traits< TensorSlicingOp >::StorageKind StorageKind
const StartIndices m_indices
TensorSlicingOp(const XprType &expr, const StartIndices &indices, const Sizes &sizes)
Eigen::internal::nested< TensorSlicingOp >::type Nested
const StartIndices & startIndices() const
internal::traits< TensorStridingSlicingOp >::Scalar Scalar
internal::traits< TensorStridingSlicingOp >::Index Index
const internal::remove_all_t< typename XprType::Nested > & expression() const
const StartIndices & stopIndices() const
internal::traits< TensorStridingSlicingOp >::StorageKind StorageKind
const StartIndices & startIndices() const
const StartIndices & strides() const
const StartIndices m_startIndices
internal::nested< TensorStridingSlicingOp >::type Nested
XprType::CoeffReturnType CoeffReturnType
TensorBase< TensorStridingSlicingOp< StartIndices, StopIndices, Strides, XprType > > Base
TensorStridingSlicingOp(const XprType &expr, const StartIndices &startIndices, const StopIndices &stopIndices, const Strides &strides)
WriteAccessors
typename remove_all< T >::type remove_all_t
EIGEN_ALWAYS_INLINE DSizes< IndexType, NumDims > strides(const DSizes< IndexType, NumDims > &dimensions)
Definition: TensorBlock.h:28
constexpr auto array_prod(const array< T, N > &arr) -> decltype(array_reduce< product_op, T, N >(arr, static_cast< T >(1)))
EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
std::array< T, N > array
T * constCast(const T *data)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
CleanedUpDerType< DerType >::type() min(const AutoDiffScalar< DerType > &x, const T &y)
CleanedUpDerType< DerType >::type() max(const AutoDiffScalar< DerType > &x, const T &y)
internal::packet_traits< Scalar >::type type
Definition: TensorMeta.h:55
void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
TensorEvaluator< const TensorReshapingOp< NewDimensions, ArgType >, Device > Base
internal::TensorBlockDescriptor< TensorEvaluator::NumOutputDims, Index > TensorBlockDesc
void writePacket(Index index, const PacketReturnType &x) const
void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
TensorEvaluator< const TensorSlicingOp< StartIndices, Sizes, ArgType >, Device > Base
TensorStridingSlicingOp< StartIndices, StopIndices, Strides, ArgType > XprType
TensorEvaluator< const TensorStridingSlicingOp< StartIndices, StopIndices, Strides, ArgType >, Device > Base
StorageMemory< std::remove_const_t< CoeffReturnType >, Device > ConstCastStorage
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
internal::TensorBlockDescriptor< NumOutputDims, Index > TensorBlockDesc
internal::TensorMaterializedBlock< ScalarNoConst, NumOutputDims, Layout, Index > TensorBlock
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
StorageMemory< std::remove_const_t< CoeffReturnType >, Device > ConstCastStorage
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
Derived::Scalar Scalar
const Device EIGEN_DEVICE_REF m_device
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
CoeffReturnType coeff(Index index) const
Storage::Type EvaluatorPointerType
CoeffReturnType & coeffRef(Index index) const
EvaluatorPointerType data() const
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
std::remove_const_t< Scalar > ScalarNoConst
std::ptrdiff_t j