BlasUtil.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_BLASUTIL_H
11 #define EIGEN_BLASUTIL_H
12 
13 // This file contains many lightweight helper classes used to
14 // implement and control fast level 2 and level 3 BLAS-like routines.
15 
16 #include "../InternalHeaderCheck.h"
17 
18 namespace Eigen {
19 
20 namespace internal {
21 
22 // forward declarations
23 template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
24 struct gebp_kernel;
25 
26 template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
27 struct gemm_pack_rhs;
28 
29 template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
30 struct gemm_pack_lhs;
31 
32 template<
33  typename Index,
34  typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
35  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
36  int ResStorageOrder, int ResInnerStride>
37 struct general_matrix_matrix_product;
38 
39 template<typename Index,
40  typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,
41  typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
42 struct general_matrix_vector_product;
43 
44 template<typename From,typename To> struct get_factor {
45  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
46 };
47 
48 template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
50  static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
51 };
52 
53 
54 template<typename Scalar, typename Index>
55 class BlasVectorMapper {
56  public:
57  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
58 
60  return m_data[i];
61  }
62  template <typename Packet, int AlignmentType>
63  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
64  return ploadt<Packet, AlignmentType>(m_data + i);
65  }
66 
67  template <typename Packet>
68  EIGEN_DEVICE_FUNC bool aligned(Index i) const {
69  return (std::uintptr_t(m_data+i)%sizeof(Packet))==0;
70  }
71 
72  protected:
73  Scalar* m_data;
74 };
75 
76 template<typename Scalar, typename Index, int AlignmentType, int Incr=1>
77 class BlasLinearMapper;
78 
79 template<typename Scalar, typename Index, int AlignmentType>
80 class BlasLinearMapper<Scalar,Index,AlignmentType>
81 {
82 public:
83  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1)
84  : m_data(data)
85  {
87  eigen_assert(incr==1);
88  }
89 
91  internal::prefetch(&operator()(i));
92  }
93 
95  return m_data[i];
96  }
97 
98  template<typename PacketType>
99  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {
100  return ploadt<PacketType, AlignmentType>(m_data + i);
101  }
102 
103  template<typename PacketType>
104  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index n, Index offset = 0) const {
105  return ploadt_partial<PacketType, AlignmentType>(m_data + i, n, offset);
106  }
107 
108  template<typename PacketType, int AlignmentT>
109  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType load(Index i) const {
110  return ploadt<PacketType, AlignmentT>(m_data + i);
111  }
112 
113  template<typename PacketType>
114  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
115  pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p);
116  }
117 
118  template<typename PacketType>
119  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, const PacketType &p, Index n, Index offset = 0) const {
120  pstoret_partial<Scalar, PacketType, AlignmentType>(m_data + i, p, n, offset);
121  }
122 
123 protected:
124  Scalar *m_data;
125 };
126 
127 // Lightweight helper class to access matrix coefficients.
128 template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1>
129 class blas_data_mapper;
130 
131 // TMP to help PacketBlock store implementation.
132 // There's currently no known use case for PacketBlock load.
133 // The default implementation assumes ColMajor order.
134 // It always store each packet sequentially one `stride` apart.
135 template<typename Index, typename Scalar, typename Packet, int n, int idx, int StorageOrder>
136 struct PacketBlockManagement
137 {
138  PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, StorageOrder> pbm;
139  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
140  pbm.store(to, stride, i, j, block);
141  pstoreu<Scalar>(to + i + (j + idx)*stride, block.packet[idx]);
142  }
143 };
144 
145 // PacketBlockManagement specialization to take care of RowMajor order without ifs.
146 template<typename Index, typename Scalar, typename Packet, int n, int idx>
147 struct PacketBlockManagement<Index, Scalar, Packet, n, idx, RowMajor>
148 {
149  PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, RowMajor> pbm;
150  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
151  pbm.store(to, stride, i, j, block);
152  pstoreu<Scalar>(to + j + (i + idx)*stride, block.packet[idx]);
153  }
154 };
155 
156 template<typename Index, typename Scalar, typename Packet, int n, int StorageOrder>
157 struct PacketBlockManagement<Index, Scalar, Packet, n, -1, StorageOrder>
158 {
159  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
161  EIGEN_UNUSED_VARIABLE(stride);
165  }
166 };
167 
168 template<typename Index, typename Scalar, typename Packet, int n>
169 struct PacketBlockManagement<Index, Scalar, Packet, n, -1, RowMajor>
170 {
171  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
173  EIGEN_UNUSED_VARIABLE(stride);
177  }
178 };
179 
180 template<typename Scalar, typename Index, int StorageOrder, int AlignmentType>
181 class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1>
182 {
183 public:
184  typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
185  typedef BlasVectorMapper<Scalar, Index> VectorMapper;
186 
187  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)
188  : m_data(data), m_stride(stride)
189  {
191  eigen_assert(incr==1);
192  }
193 
194  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
195  getSubMapper(Index i, Index j) const {
196  return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
197  }
198 
199  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
200  return LinearMapper(&operator()(i, j));
201  }
202 
203  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
204  return VectorMapper(&operator()(i, j));
205  }
206 
208  internal::prefetch(&operator()(i, j));
209  }
210 
212  EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
213  return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
214  }
215 
216  template<typename PacketType>
217  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {
218  return ploadt<PacketType, AlignmentType>(&operator()(i, j));
219  }
220 
221  template<typename PacketType>
222  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index j, Index n, Index offset = 0) const {
223  return ploadt_partial<PacketType, AlignmentType>(&operator()(i, j), n, offset);
224  }
225 
226  template <typename PacketT, int AlignmentT>
227  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
228  return ploadt<PacketT, AlignmentT>(&operator()(i, j));
229  }
230 
231  template<typename PacketType>
232  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Index j, const PacketType &p) const {
233  pstoret<Scalar, PacketType, AlignmentType>(&operator()(i, j), p);
234  }
235 
236  template<typename PacketType>
237  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, Index j, const PacketType &p, Index n, Index offset = 0) const {
238  pstoret_partial<Scalar, PacketType, AlignmentType>(&operator()(i, j), p, n, offset);
239  }
240 
241  template<typename SubPacket>
242  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
243  pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
244  }
245 
246  template<typename SubPacket>
247  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
248  return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
249  }
250 
251  EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
252  EIGEN_DEVICE_FUNC const Index incr() const { return 1; }
253  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
254 
255  EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
256  if (std::uintptr_t(m_data)%sizeof(Scalar)) {
257  return -1;
258  }
259  return internal::first_default_aligned(m_data, size);
260  }
261 
262  template<typename SubPacket, int n>
263  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n> &block) const {
264  PacketBlockManagement<Index, Scalar, SubPacket, n, n-1, StorageOrder> pbm;
265  pbm.store(m_data, m_stride, i, j, block);
266  }
267 protected:
268  Scalar* EIGEN_RESTRICT m_data;
269  const Index m_stride;
270 };
271 
272 // Implementation of non-natural increment (i.e. inner-stride != 1)
273 // The exposed API is not complete yet compared to the Incr==1 case
274 // because some features makes less sense in this case.
275 template<typename Scalar, typename Index, int AlignmentType, int Incr>
276 class BlasLinearMapper
277 {
278 public:
279  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {}
280 
282  internal::prefetch(&operator()(i));
283  }
284 
286  return m_data[i*m_incr.value()];
287  }
288 
289  template<typename PacketType>
290  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {
291  return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value());
292  }
293 
294  template<typename PacketType>
295  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index n, Index /*offset*/ = 0) const {
296  return pgather_partial<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value(), n);
297  }
298 
299  template<typename PacketType>
300  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
301  pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
302  }
303 
304  template<typename PacketType>
305  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, const PacketType &p, Index n, Index /*offset*/ = 0) const {
306  pscatter_partial<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value(), n);
307  }
308 
309 protected:
310  Scalar *m_data;
311  const internal::variable_if_dynamic<Index,Incr> m_incr;
312 };
313 
314 template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr>
315 class blas_data_mapper
316 {
317 public:
318  typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;
319 
320  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
321 
322  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper
323  getSubMapper(Index i, Index j) const {
324  return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());
325  }
326 
327  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
328  return LinearMapper(&operator()(i, j), m_incr.value());
329  }
330 
332  internal::prefetch(&operator()(i, j));
333  }
334 
336  EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
337  return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
338  }
339 
340  template<typename PacketType>
341  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {
342  return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value());
343  }
344 
345  template<typename PacketType>
346  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index j, Index n, Index /*offset*/ = 0) const {
347  return pgather_partial<Scalar,PacketType>(&operator()(i, j),m_incr.value(),n);
348  }
349 
350  template <typename PacketT, int AlignmentT>
351  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
352  return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());
353  }
354 
355  template<typename PacketType>
356  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Index j, const PacketType &p) const {
357  pscatter<Scalar, PacketType>(&operator()(i, j), p, m_incr.value());
358  }
359 
360  template<typename PacketType>
361  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, Index j, const PacketType &p, Index n, Index /*offset*/ = 0) const {
362  pscatter_partial<Scalar, PacketType>(&operator()(i, j), p, m_incr.value(), n);
363  }
364 
365  template<typename SubPacket>
366  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
367  pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
368  }
369 
370  template<typename SubPacket>
371  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
372  return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
373  }
374 
375  // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types.
376  template<typename SubPacket, typename Scalar_, int n, int idx>
377  struct storePacketBlock_helper
378  {
379  storePacketBlock_helper<SubPacket, Scalar_, n, idx-1> spbh;
380  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
381  spbh.store(sup, i,j,block);
382  sup->template storePacket<SubPacket>(i, j+idx, block.packet[idx]);
383  }
384  };
385 
386  template<typename SubPacket, int n, int idx>
387  struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx>
388  {
389  storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;
390  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
391  spbh.store(sup,i,j,block);
392  sup->template storePacket<SubPacket>(i, j+idx, block.packet[idx]);
393  }
394  };
395 
396  template<typename SubPacket, int n, int idx>
397  struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx>
398  {
399  storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh;
400  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
401  spbh.store(sup,i,j,block);
402  for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
403  {
404  std::complex<double> *v = &sup->operator()(i+l, j+idx);
405  v->real(block.packet[idx].v[2*l+0]);
406  v->imag(block.packet[idx].v[2*l+1]);
407  }
408  }
409  };
410 
411  template<typename SubPacket, typename Scalar_, int n>
412  struct storePacketBlock_helper<SubPacket, Scalar_, n, -1>
413  {
414  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
415  }
416  };
417 
418  template<typename SubPacket, int n>
419  struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1>
420  {
421  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
422  }
423  };
424 
425  template<typename SubPacket, int n>
426  struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1>
427  {
428  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
429  }
430  };
431  // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible.
432  template<typename SubPacket, int n>
433  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const {
434  storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb;
435  spb.store(this, i,j,block);
436  }
437 
438  EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
439  EIGEN_DEVICE_FUNC const Index incr() const { return m_incr.value(); }
440  EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
441 protected:
442  Scalar* EIGEN_RESTRICT m_data;
443  const Index m_stride;
444  const internal::variable_if_dynamic<Index,Incr> m_incr;
445 };
446 
447 // lightweight helper class to access matrix coefficients (const version)
448 template<typename Scalar, typename Index, int StorageOrder>
449 class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
450  public:
451  EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
452 
453  EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
454  return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
455  }
456 };
457 
458 
459 /* Helper class to analyze the factors of a Product expression.
460  * In particular it allows to pop out operator-, scalar multiples,
461  * and conjugate */
462 template<typename XprType> struct blas_traits
463 {
464  typedef typename traits<XprType>::Scalar Scalar;
465  typedef const XprType& ExtractType;
466  typedef XprType ExtractType_;
467  enum {
468  IsComplex = NumTraits<Scalar>::IsComplex,
469  IsTransposed = false,
470  NeedToConjugate = false,
471  HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit)
472  && ( bool(XprType::IsVectorAtCompileTime)
473  || int(inner_stride_at_compile_time<XprType>::ret) == 1)
474  ) ? 1 : 0,
475  HasScalarFactor = false
476  };
477  typedef std::conditional_t<bool(HasUsableDirectAccess),
478  ExtractType,
479  typename ExtractType_::PlainObject
480  > DirectLinearAccessType;
481  EIGEN_DEVICE_FUNC static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }
482  EIGEN_DEVICE_FUNC static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
483 };
484 
485 // pop conjugate
486 template<typename Scalar, typename NestedXpr>
487 struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
488  : blas_traits<NestedXpr>
489 {
490  typedef blas_traits<NestedXpr> Base;
491  typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
492  typedef typename Base::ExtractType ExtractType;
493 
494  enum {
495  IsComplex = NumTraits<Scalar>::IsComplex,
496  NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
497  };
498  EIGEN_DEVICE_FUNC static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
499  EIGEN_DEVICE_FUNC static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
500 };
501 
502 // pop scalar multiple
503 template<typename Scalar, typename NestedXpr, typename Plain>
504 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
505  : blas_traits<NestedXpr>
506 {
507  enum {
508  HasScalarFactor = true
509  };
510  typedef blas_traits<NestedXpr> Base;
511  typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
512  typedef typename Base::ExtractType ExtractType;
513  EIGEN_DEVICE_FUNC static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
514  EIGEN_DEVICE_FUNC static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x)
515  { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
516 };
517 template<typename Scalar, typename NestedXpr, typename Plain>
518 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
519  : blas_traits<NestedXpr>
520 {
521  enum {
522  HasScalarFactor = true
523  };
524  typedef blas_traits<NestedXpr> Base;
525  typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
526  typedef typename Base::ExtractType ExtractType;
527  EIGEN_DEVICE_FUNC static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
528  EIGEN_DEVICE_FUNC static inline Scalar extractScalarFactor(const XprType& x)
529  { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
530 };
531 template<typename Scalar, typename Plain1, typename Plain2>
532 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
533  const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
534  : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
535 {};
536 
537 // pop opposite
538 template<typename Scalar, typename NestedXpr>
539 struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
540  : blas_traits<NestedXpr>
541 {
542  enum {
543  HasScalarFactor = true
544  };
545  typedef blas_traits<NestedXpr> Base;
546  typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
547  typedef typename Base::ExtractType ExtractType;
548  EIGEN_DEVICE_FUNC static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
549  EIGEN_DEVICE_FUNC static inline Scalar extractScalarFactor(const XprType& x)
550  { return - Base::extractScalarFactor(x.nestedExpression()); }
551 };
552 
553 // pop/push transpose
554 template<typename NestedXpr>
555 struct blas_traits<Transpose<NestedXpr> >
556  : blas_traits<NestedXpr>
557 {
558  typedef typename NestedXpr::Scalar Scalar;
559  typedef blas_traits<NestedXpr> Base;
560  typedef Transpose<NestedXpr> XprType;
561  typedef Transpose<const typename Base::ExtractType_> ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
562  typedef Transpose<const typename Base::ExtractType_> ExtractType_;
563  typedef std::conditional_t<bool(Base::HasUsableDirectAccess),
564  ExtractType,
565  typename ExtractType::PlainObject
566  > DirectLinearAccessType;
567  enum {
568  IsTransposed = Base::IsTransposed ? 0 : 1
569  };
570  EIGEN_DEVICE_FUNC static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
571  EIGEN_DEVICE_FUNC static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
572 };
573 
574 template<typename T>
575 struct blas_traits<const T>
576  : blas_traits<T>
577 {};
578 
579 template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
580 struct extract_data_selector {
581  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m)
582  {
583  return blas_traits<T>::extract(m).data();
584  }
585 };
586 
587 template<typename T>
588 struct extract_data_selector<T,false> {
589  EIGEN_DEVICE_FUNC static typename T::Scalar* run(const T&) { return 0; }
590 };
591 
592 template<typename T>
593 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m)
594 {
595  return extract_data_selector<T>::run(m);
596 }
597 
602 template<typename ResScalar, typename Lhs, typename Rhs>
603 struct combine_scalar_factors_impl
604 {
605  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const Lhs& lhs, const Rhs& rhs)
606  {
607  return blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);
608  }
609  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)
610  {
611  return alpha * blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);
612  }
613 };
614 template<typename Lhs, typename Rhs>
615 struct combine_scalar_factors_impl<bool, Lhs, Rhs>
616 {
617  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const Lhs& lhs, const Rhs& rhs)
618  {
619  return blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);
620  }
621  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const bool& alpha, const Lhs& lhs, const Rhs& rhs)
622  {
623  return alpha && blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);
624  }
625 };
626 
627 template<typename ResScalar, typename Lhs, typename Rhs>
628 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)
629 {
630  return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(alpha, lhs, rhs);
631 }
632 template<typename ResScalar, typename Lhs, typename Rhs>
633 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const Lhs& lhs, const Rhs& rhs)
634 {
635  return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(lhs, rhs);
636 }
637 
638 
639 } // end namespace internal
640 
641 } // end namespace Eigen
642 
643 #endif // EIGEN_BLASUTIL_H
Matrix3f m
Array< int, Dynamic, 1 > v
int n
EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL FixedBlockXpr<...,... >::Type block(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols)
Definition: BlockMethods.h:96
RealReturnType real() const
IndexedView_or_Block operator()(const RowIndices &rowIndices, const ColIndices &colIndices)
#define EIGEN_RESTRICT
Definition: Macros.h:1055
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:836
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:957
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
Definition: Macros.h:914
#define eigen_assert(x)
Definition: Macros.h:902
int data[]
float * p
AlignmentType
Definition: Constants.h:234
@ RowMajor
Definition: Constants.h:323
const unsigned int DirectAccessBit
Definition: Constants.h:157
EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar &alpha, const Lhs &lhs, const Rhs &rhs)
Definition: BlasUtil.h:628
static Index first_default_aligned(const DenseBase< Derived > &m)
void prefetch(const Scalar *addr)
EIGEN_ALWAYS_INLINE const T::Scalar * extract_data(const T &m)
Definition: BlasUtil.h:593
: InteropHeaders
Definition: Core:139
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
@ Specialized
Definition: Constants.h:312
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_conjugate_op< typename Derived::Scalar >, const Derived > conj(const Eigen::ArrayBase< Derived > &x)
Definition: BFloat16.h:222
std::ptrdiff_t j