TensorConversion.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
12 
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
24 namespace internal {
25 template<typename TargetType, typename XprType>
26 struct traits<TensorConversionOp<TargetType, XprType> >
27 {
28  // Type promotion to handle the case where the types of the lhs and the rhs are different.
29  typedef TargetType Scalar;
30  typedef typename traits<XprType>::StorageKind StorageKind;
31  typedef typename traits<XprType>::Index Index;
32  typedef typename XprType::Nested Nested;
33  typedef std::remove_reference_t<Nested> Nested_;
34  static constexpr int NumDimensions = traits<XprType>::NumDimensions;
35  static constexpr int Layout = traits<XprType>::Layout;
36  enum { Flags = 0 };
37  typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
38 };
39 
40 template<typename TargetType, typename XprType>
41 struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense>
42 {
43  typedef const TensorConversionOp<TargetType, XprType>& type;
44 };
45 
46 template<typename TargetType, typename XprType>
47 struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
48 {
49  typedef TensorConversionOp<TargetType, XprType> type;
50 };
51 
52 } // end namespace internal
53 
54 
55 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
57 
58 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
59 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> {
60  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
62  : m_impl(impl) {}
63 
64  template<int LoadMode, typename Index>
65  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
66  return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
67  }
68 
69  private:
71 };
72 
73 
74 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
75 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
76  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
78  : m_impl(impl) {}
79 
80  template<int LoadMode, typename Index>
81  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
82  const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
83 
84  SrcPacket src1 = m_impl.template packet<LoadMode>(index);
85  SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
86  TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
87  return result;
88  }
89 
90  private:
92 };
93 
94 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
95 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
96  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
98  : m_impl(impl) {}
99 
100  template<int LoadMode, typename Index>
101  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
102  const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
103 
104  SrcPacket src1 = m_impl.template packet<LoadMode>(index);
105  SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
106  SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
107  SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
108  TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
109  return result;
110  }
111 
112  private:
114 };
115 
116 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
117 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> {
118  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
120  : m_impl(impl) {}
121 
122  template<int LoadMode, typename Index>
123  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
124  const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
125 
126  SrcPacket src1 = m_impl.template packet<LoadMode>(index);
127  SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize);
128  SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
129  SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
130  SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize);
131  SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize);
132  SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize);
133  SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize);
134  TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8);
135  return result;
136  }
137 
138  private:
140 };
141 
142 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int TgtCoeffRatio>
143 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
144  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
146  : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
147 
148  template<int LoadMode, typename Index>
149  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
150  const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
151  // Only call m_impl.packet() when we have direct access to the underlying data. This
152  // ensures that we don't compute the subexpression twice. We may however load some
153  // coefficients twice, but in practice this doesn't negatively impact performance.
154  if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
155  // Force unaligned memory loads since we can't ensure alignment anymore
156  return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
157  } else {
158  const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
159  typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
160  typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
161  internal::scalar_cast_op<SrcType, TgtType> converter;
162  EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
164  for (int i = 0; i < TgtPacketSize; ++i) {
165  values[i] = converter(m_impl.coeff(index+i));
166  }
167  TgtPacket rslt = internal::pload<TgtPacket>(values);
168  return rslt;
169  }
170  }
171 
172  private:
175 };
176 
177 template<typename TargetType, typename XprType>
178 class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors>
179 {
180  public:
181  typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
182  typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
183  typedef typename internal::traits<TensorConversionOp>::Index Index;
184  typedef typename internal::nested<TensorConversionOp>::type Nested;
187 
188  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr)
189  : m_xpr(xpr) {}
190 
193  expression() const { return m_xpr; }
194 
195  protected:
196  typename XprType::Nested m_xpr;
197 };
198 
199 template <bool SameType, typename Eval, typename EvalPointerType> struct ConversionSubExprEval {
200  static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
201  impl.evalSubExprsIfNeeded(NULL);
202  return true;
203  }
204 };
205 
206 template <typename Eval, typename EvalPointerType> struct ConversionSubExprEval<true, Eval, EvalPointerType> {
207  static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) {
208  return impl.evalSubExprsIfNeeded(data);
209  }
210 };
211 
212 #ifdef EIGEN_USE_THREADS
213 template <bool SameType, typename Eval, typename EvalPointerType,
214  typename EvalSubExprsCallback>
215 struct ConversionSubExprEvalAsync {
216  static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
217  impl.evalSubExprsIfNeededAsync(nullptr, std::move(done));
218  }
219 };
220 
221 template <typename Eval, typename EvalPointerType,
222  typename EvalSubExprsCallback>
223 struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType,
224  EvalSubExprsCallback> {
225  static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
226  impl.evalSubExprsIfNeededAsync(data, std::move(done));
227  }
228 };
229 #endif
230 
231 namespace internal {
232 
233 template <typename SrcType, typename TargetType, bool IsSameT>
234 struct CoeffConv {
235  template <typename ArgType, typename Device>
236  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
237  internal::scalar_cast_op<SrcType, TargetType> converter;
238  return converter(impl.coeff(index));
239  }
240 };
241 
242 template <typename SrcType, typename TargetType>
243 struct CoeffConv<SrcType, TargetType, true> {
244  template <typename ArgType, typename Device>
245  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
246  return impl.coeff(index);
247  }
248 };
249 
250 template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize, bool IsSameT>
251 struct PacketConv {
252  typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
253  typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
254 
255  static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
256 
257  template <typename ArgType, typename Device>
258  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
259  internal::scalar_cast_op<SrcType, TargetType> converter;
260  EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
262  for (int i = 0; i < PacketSize; ++i) {
263  values[i] = converter(impl.coeff(index+i));
264  }
265  TargetPacket rslt = internal::pload<TargetPacket>(values);
266  return rslt;
267  }
268 };
269 
270 template <typename SrcPacket, typename TargetPacket, int LoadMode, bool IsSameT>
271 struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
272  typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
273  typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
274 
275  template <typename ArgType, typename Device>
276  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
277  const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
278  const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
279  PacketConverter<TensorEvaluator<ArgType, Device>, SrcPacket, TargetPacket,
280  SrcCoeffRatio, TgtCoeffRatio> converter(impl);
281  return converter.template packet<LoadMode>(index);
282  }
283 };
284 
285 template <typename SrcPacket, typename TargetPacket, int LoadMode>
286 struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
287  typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
288  static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
289 
290  template <typename ArgType, typename Device>
291  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
292  EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
293  for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
294  return internal::pload<TargetPacket>(values);
295  }
296 };
297 
298 template <typename SrcPacket, typename TargetPacket, int LoadMode>
299 struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> {
300  template <typename ArgType, typename Device>
301  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
302  return impl.template packet<LoadMode>(index);
303  }
304 };
305 
306 } // namespace internal
307 
308 // Eval as rvalue
309 template<typename TargetType, typename ArgType, typename Device>
310 struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
311 {
313  typedef typename XprType::Index Index;
315  typedef TargetType Scalar;
316  typedef TargetType CoeffReturnType;
321  static constexpr bool IsSameType = internal::is_same<TargetType, SrcType>::value;
324 
325  enum {
326  IsAligned = false,
327  PacketAccess =
328  #ifndef EIGEN_USE_SYCL
329  true,
330  #else
332  internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
333  #endif
336  RawAccess = false
337  };
338 
340  static constexpr int NumDims = internal::array_size<Dimensions>::value;
341 
342  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
343  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
344  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
345 
348 
349  struct TensorConversionOpBlockFactory {
350  template <typename ArgXprType>
351  struct XprType {
353  };
354 
355  template <typename ArgXprType>
356  typename XprType<ArgXprType>::type expr(const ArgXprType& expr) const {
357  return typename XprType<ArgXprType>::type(expr);
358  }
359  };
360 
361  typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory,
362  ArgTensorBlock>
364  //===--------------------------------------------------------------------===//
365 
366  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
367  : m_impl(op.expression(), device)
368  {
369  }
370 
371  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
372 
374  {
376  }
377 
378 #ifdef EIGEN_USE_THREADS
379  template <typename EvalSubExprsCallback>
380  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
381  EvaluatorPointerType data, EvalSubExprsCallback done) {
382  ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>,
384  EvalSubExprsCallback>::run(m_impl, data, std::move(done));
385  }
386 #endif
387 
388  EIGEN_STRONG_INLINE void cleanup()
389  {
390  m_impl.cleanup();
391  }
392 
393  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
394  {
395  return internal::CoeffConv<SrcType, TargetType, IsSameType>::run(m_impl,index);
396  }
397 
398  template<int LoadMode>
399  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
400  packet(Index index) const {
401  // If we are not going to do the cast, we just need to check that base
402  // TensorEvaluator has packet access. Otherwise we also need to make sure,
403  // that we have an implementation of vectorized cast.
404  const bool Vectorizable =
405  IsSameType
408  int(internal::type_casting_traits<SrcType, TargetType>::VectorizedCast);
409 
410  return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
411  Vectorizable, IsSameType>::run(m_impl, index);
412  }
413 
414  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
415  costPerCoeff(bool vectorized) const {
416  const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
417  if (vectorized) {
418  const double SrcCoeffRatio =
419  internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
420  const double TgtCoeffRatio =
421  internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
422  return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
423  TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
424  } else {
425  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
426  }
427  }
428 
429  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
430  internal::TensorBlockResourceRequirements getResourceRequirements() const {
431  return m_impl.getResourceRequirements();
432  }
433 
434  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
436  bool /*root_of_expr_ast*/ = false) const {
437  return TensorBlock(m_impl.block(desc, scratch),
438  TensorConversionOpBlockFactory());
439  }
440 
441  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
442 
444  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
445 
446  protected:
448 };
449 
450 } // end namespace Eigen
451 
452 #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
int i
#define EIGEN_ALIGN_MAX
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
int data[]
The tensor base class.
Tensor conversion class. This class makes it possible to vectorize type casting operations when the n...
internal::traits< TensorConversionOp >::StorageKind StorageKind
TensorConversionOp(const XprType &xpr)
NumTraits< Scalar >::Real RealScalar
internal::traits< TensorConversionOp >::Index Index
internal::traits< TensorConversionOp >::Scalar Scalar
const internal::remove_all_t< typename XprType::Nested > & expression() const
internal::nested< TensorConversionOp >::type Nested
typename remove_all< T >::type remove_all_t
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
static bool run(Eval &impl, EvalPointerType data)
static bool run(Eval &impl, EvalPointerType)
internal::packet_traits< Scalar >::type type
Definition: TensorMeta.h:55
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
const TensorEvaluator< ArgType, Device > & impl() const
required by sycl in order to extract the sycl accessor
internal::TensorUnaryExprBlock< TensorConversionOpBlockFactory, ArgTensorBlock > TensorBlock
internal::remove_all_t< typename internal::traits< ArgType >::Scalar > SrcType
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
A cost model used to limit the number of threads used for evaluating tensor expression.
static constexpr int Layout
Storage::Type EvaluatorPointerType
static constexpr int PacketSize
EvaluatorPointerType data() const
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
PacketType< CoeffReturnType, Device >::type PacketReturnType