ProductEvaluators.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 
13 #ifndef EIGEN_PRODUCTEVALUATORS_H
14 #define EIGEN_PRODUCTEVALUATORS_H
15 
16 #include "./InternalHeaderCheck.h"
17 
18 namespace Eigen {
19 
20 namespace internal {
21 
30 template<typename Lhs, typename Rhs, int Options>
31 struct evaluator<Product<Lhs, Rhs, Options> >
32  : public product_evaluator<Product<Lhs, Rhs, Options> >
33 {
34  typedef Product<Lhs, Rhs, Options> XprType;
35  typedef product_evaluator<XprType> Base;
36 
37  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
38 };
39 
40 // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
41 // TODO we should apply that rule only if that's really helpful
42 template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
43 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
44  const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
45  const Product<Lhs, Rhs, DefaultProduct> > >
46 {
47  static const bool value = true;
48 };
49 template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
50 struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
51  const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
52  const Product<Lhs, Rhs, DefaultProduct> > >
53  : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
54 {
55  typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
56  const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
57  const Product<Lhs, Rhs, DefaultProduct> > XprType;
58  typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
59 
60  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
61  : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
62  {}
63 };
64 
65 
66 template<typename Lhs, typename Rhs, int DiagIndex>
67 struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
68  : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
69 {
70  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
71  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
72 
73  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
74  : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
75  Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
76  xpr.index() ))
77  {}
78 };
79 
80 
81 // Helper class to perform a matrix product with the destination at hand.
82 // Depending on the sizes of the factors, there are different evaluation strategies
83 // as controlled by internal::product_type.
84 template< typename Lhs, typename Rhs,
85  typename LhsShape = typename evaluator_traits<Lhs>::Shape,
86  typename RhsShape = typename evaluator_traits<Rhs>::Shape,
87  int ProductType = internal::product_type<Lhs,Rhs>::value>
88 struct generic_product_impl;
89 
90 template<typename Lhs, typename Rhs>
91 struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
92  static const bool value = true;
93 };
94 
95 // This is the default evaluator implementation for products:
96 // It creates a temporary and call generic_product_impl
97 template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
98 struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
99  : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
100 {
101  typedef Product<Lhs, Rhs, Options> XprType;
102  typedef typename XprType::PlainObject PlainObject;
103  typedef evaluator<PlainObject> Base;
104  enum {
105  Flags = Base::Flags | EvalBeforeNestingBit
106  };
107 
108  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
109  explicit product_evaluator(const XprType& xpr)
110  : m_result(xpr.rows(), xpr.cols())
111  {
112  internal::construct_at<Base>(this, m_result);
113 
114 // FIXME shall we handle nested_eval here?,
115 // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
116 // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
117 // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
118 // typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
119 // typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
120 //
121 // const LhsNested lhs(xpr.lhs());
122 // const RhsNested rhs(xpr.rhs());
123 //
124 // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
125 
126  generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
127  }
128 
129 protected:
130  PlainObject m_result;
131 };
132 
133 // The following three shortcuts are enabled only if the scalar types match exactly.
134 // TODO: we could enable them for different scalar types when the product is not vectorized.
135 
136 // Dense = Product
137 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
138 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
139  std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
140 {
141  typedef Product<Lhs,Rhs,Options> SrcXprType;
142  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
143  void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
144  {
145  Index dstRows = src.rows();
146  Index dstCols = src.cols();
147  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
148  dst.resize(dstRows, dstCols);
149  // FIXME shall we handle nested_eval here?
150  generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
151  }
152 };
153 
154 // Dense += Product
155 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
156 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
157  std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
158 {
159  typedef Product<Lhs,Rhs,Options> SrcXprType;
160  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
161  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
162  {
163  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
164  // FIXME shall we handle nested_eval here?
165  generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
166  }
167 };
168 
169 // Dense -= Product
170 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
171 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
172  std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
173 {
174  typedef Product<Lhs,Rhs,Options> SrcXprType;
175  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
176  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
177  {
178  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
179  // FIXME shall we handle nested_eval here?
180  generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
181  }
182 };
183 
184 
185 // Dense ?= scalar * Product
186 // TODO we should apply that rule if that's really helpful
187 // for instance, this is not good for inner products
188 template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
189 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
190  const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
191 {
192  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
193  const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
194  const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
195  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
196  void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
197  {
198  call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
199  }
200 };
201 
202 //----------------------------------------
203 // Catch "Dense ?= xpr + Product<>" expression to save one temporary
204 // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
205 
206 template<typename OtherXpr, typename Lhs, typename Rhs>
207 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
208  const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
209  static const bool value = true;
210 };
211 
212 template<typename OtherXpr, typename Lhs, typename Rhs>
213 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
214  const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
215  static const bool value = true;
216 };
217 
218 template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
219 struct assignment_from_xpr_op_product
220 {
221  template<typename SrcXprType, typename InitialFunc>
222  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
223  void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
224  {
225  call_assignment_no_alias(dst, src.lhs(), Func1());
226  call_assignment_no_alias(dst, src.rhs(), Func2());
227  }
228 };
229 
230 #define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
231  template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
232  struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
233  const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
234  : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
235  {}
236 
237 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op);
238 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
239 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
240 
241 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op);
242 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
243 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
244 
245 //----------------------------------------
246 
247 template<typename Lhs, typename Rhs>
248 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
249 {
250  template<typename Dst>
251  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
252  {
253  dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
254  }
255 
256  template<typename Dst>
257  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
258  {
259  dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
260  }
261 
262  template<typename Dst>
263  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
264  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
265 };
266 
267 
268 
272 // Column major result
273 template<typename Dst, typename Lhs, typename Rhs, typename Func>
274 void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
275 {
276  evaluator<Rhs> rhsEval(rhs);
277  ei_declare_local_nested_eval(Lhs,lhs,Rhs::SizeAtCompileTime,actual_lhs);
278  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
279  // FIXME not very good if rhs is real and lhs complex while alpha is real too
280  const Index cols = dst.cols();
281  for (Index j=0; j<cols; ++j)
282  func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
283 }
284 
285 // Row major result
286 template<typename Dst, typename Lhs, typename Rhs, typename Func>
287 void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
288 {
289  evaluator<Lhs> lhsEval(lhs);
290  ei_declare_local_nested_eval(Rhs,rhs,Lhs::SizeAtCompileTime,actual_rhs);
291  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
292  // FIXME not very good if lhs is real and rhs complex while alpha is real too
293  const Index rows = dst.rows();
294  for (Index i=0; i<rows; ++i)
295  func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
296 }
297 
298 template<typename Lhs, typename Rhs>
299 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
300 {
301  template<typename T> struct is_row_major : std::conditional_t<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type> {};
302  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
303 
304  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
305  struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
306  struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
307  struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
308  struct adds {
309  Scalar m_scale;
310  explicit adds(const Scalar& s) : m_scale(s) {}
311  template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
312  dst.const_cast_derived() += m_scale * src;
313  }
314  };
315 
316  template<typename Dst>
317  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
318  {
319  internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
320  }
321 
322  template<typename Dst>
323  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
324  {
325  internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
326  }
327 
328  template<typename Dst>
329  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
330  {
331  internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
332  }
333 
334  template<typename Dst>
335  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
336  {
337  internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
338  }
339 
340 };
341 
342 
343 // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
344 template<typename Lhs, typename Rhs, typename Derived>
345 struct generic_product_impl_base
346 {
347  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
348 
349  template<typename Dst>
350  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
351  { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
352 
353  template<typename Dst>
354  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
355  { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
356 
357  template<typename Dst>
358  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
359  { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
360 
361  template<typename Dst>
362  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
363  { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
364 
365 };
366 
367 template<typename Lhs, typename Rhs>
368 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
369  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
370 {
371  typedef typename nested_eval<Lhs,1>::type LhsNested;
372  typedef typename nested_eval<Rhs,1>::type RhsNested;
373  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
374  enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
375  typedef internal::remove_all_t<std::conditional_t<int(Side)==OnTheRight,LhsNested,RhsNested>> MatrixType;
376 
377  template<typename Dest>
378  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
379  {
380  // Fallback to inner product if both the lhs and rhs is a runtime vector.
381  if (lhs.rows() == 1 && rhs.cols() == 1) {
382  dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
383  return;
384  }
385  LhsNested actual_lhs(lhs);
386  RhsNested actual_rhs(rhs);
387  internal::gemv_dense_selector<Side,
388  (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
389  bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
390  >::run(actual_lhs, actual_rhs, dst, alpha);
391  }
392 };
393 
394 template<typename Lhs, typename Rhs>
395 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
396 {
397  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
398 
399  template<typename Dst>
400  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
401  {
402  // Same as: dst.noalias() = lhs.lazyProduct(rhs);
403  // but easier on the compiler side
404  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
405  }
406 
407  template<typename Dst>
408  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
409  {
410  // dst.noalias() += lhs.lazyProduct(rhs);
411  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
412  }
413 
414  template<typename Dst>
415  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
416  {
417  // dst.noalias() -= lhs.lazyProduct(rhs);
418  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
419  }
420 
421  // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
422  // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
423  // dst {,+,-}= (s1*A)*(B*s2)
424  // will be rewritten as:
425  // dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
426  // There are at least four benefits of doing so:
427  // 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
428  // 2 - it is faster than simply by-passing the heap allocation through stack allocation.
429  // 3 - it makes this fallback consistent with the heavy GEMM routine.
430  // 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
431  // (see https://stackoverflow.com/questions/54738495)
432  // For small fixed sizes matrices, however, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
433  // and the behavior depends also a lot on the compiler... This is why this re-writing strategy is currently
434  // enabled only when falling back from the main GEMM.
435  template<typename Dst, typename Func>
436  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
437  void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func &func)
438  {
439  enum {
440  HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
441  ConjLhs = blas_traits<Lhs>::NeedToConjugate,
442  ConjRhs = blas_traits<Rhs>::NeedToConjugate
443  };
444  // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
445  // this is important for real*complex_mat
446  Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
447 
448  eval_dynamic_impl(dst,
449  blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
450  blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
451  func,
452  actualAlpha,
453  std::conditional_t<HasScalarFactor,true_type,false_type>());
454  }
455 
456 protected:
457 
458  template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
459  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
460  void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s /* == 1 */, false_type)
461  {
464  call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
465  }
466 
467  template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
468  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
469  void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s, true_type)
470  {
471  call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
472  }
473 };
474 
475 // This specialization enforces the use of a coefficient-based evaluation strategy
476 template<typename Lhs, typename Rhs>
477 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
478  : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
479 
480 // Case 2: Evaluate coeff by coeff
481 //
482 // This is mostly taken from CoeffBasedProduct.h
483 // The main difference is that we add an extra argument to the etor_product_*_impl::run() function
484 // for the inner dimension of the product, because evaluator object do not know their size.
485 
486 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
487 struct etor_product_coeff_impl;
488 
489 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
490 struct etor_product_packet_impl;
491 
492 template<typename Lhs, typename Rhs, int ProductTag>
493 struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
494  : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
495 {
496  typedef Product<Lhs, Rhs, LazyProduct> XprType;
497  typedef typename XprType::Scalar Scalar;
498  typedef typename XprType::CoeffReturnType CoeffReturnType;
499 
500  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
501  explicit product_evaluator(const XprType& xpr)
502  : m_lhs(xpr.lhs()),
503  m_rhs(xpr.rhs()),
504  m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
505  m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed,
506  // or perhaps declare them on the fly on the packet method... We have experiment to check what's best.
507  m_innerDim(xpr.lhs().cols())
508  {
511  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
512 #if 0
513  std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
514  std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
515  std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
516  std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
517  std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
518  std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
519  std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
520  std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
521  std::cerr << "Alignment= " << Alignment << "\n";
522  std::cerr << "Flags= " << Flags << "\n";
523 #endif
524  }
525 
526  // Everything below here is taken from CoeffBasedProduct.h
527 
528  typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
529  typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
530 
531  typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
532  typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
533 
534  typedef evaluator<LhsNestedCleaned> LhsEtorType;
535  typedef evaluator<RhsNestedCleaned> RhsEtorType;
536 
537  enum {
538  RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
539  ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
540  InnerSize = min_size_prefer_fixed(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
541  MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
542  MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
543  };
544 
545  typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
546  typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
547 
548  enum {
549 
550  LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
551  RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
552  CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
553  : InnerSize == Dynamic ? HugeCost
554  : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
555  + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
556 
557  Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
558 
559  LhsFlags = LhsEtorType::Flags,
560  RhsFlags = RhsEtorType::Flags,
561 
562  LhsRowMajor = LhsFlags & RowMajorBit,
563  RhsRowMajor = RhsFlags & RowMajorBit,
564 
565  LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
566  RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
567 
568  // Here, we don't care about alignment larger than the usable packet size.
569  LhsAlignment = plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
570  RhsAlignment = plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
571 
572  SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
573 
574  CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
575  CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
576 
577  EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
578  : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
579  : (bool(RhsRowMajor) && !CanVectorizeLhs),
580 
581  Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
582  | (EvalToRowMajor ? RowMajorBit : 0)
583  // TODO enable vectorization for mixed types
584  | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
585  | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
586 
587  LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
588  RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
589 
590  Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment))!=0 ? 0 : LhsAlignment)
591  : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment))!=0 ? 0 : RhsAlignment)
592  : 0,
593 
594  /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
595  * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
596  * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
597  * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
598  */
599  CanVectorizeInner = SameType
600  && LhsRowMajor
601  && (!RhsRowMajor)
602  && (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
603  && (int(InnerSize) % packet_traits<Scalar>::size == 0)
604  };
605 
606  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
607  {
608  return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
609  }
610 
611  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
612  * which is why we don't set the LinearAccessBit.
613  * TODO: this seems possible when the result is a vector
614  */
615  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
616  const CoeffReturnType coeff(Index index) const
617  {
618  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
619  const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
620  return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
621  }
622 
623  template<int LoadMode, typename PacketType>
624  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
625  const PacketType packet(Index row, Index col) const
626  {
627  PacketType res;
628  typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
629  Unroll ? int(InnerSize) : Dynamic,
630  LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
631  PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
632  return res;
633  }
634 
635  template<int LoadMode, typename PacketType>
636  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
637  const PacketType packet(Index index) const
638  {
639  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
640  const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
641  return packet<LoadMode,PacketType>(row,col);
642  }
643 
644 protected:
645  add_const_on_value_type_t<LhsNested> m_lhs;
646  add_const_on_value_type_t<RhsNested> m_rhs;
647 
648  LhsEtorType m_lhsImpl;
649  RhsEtorType m_rhsImpl;
650 
651  // TODO: Get rid of m_innerDim if known at compile time
652  Index m_innerDim;
653 };
654 
655 template<typename Lhs, typename Rhs>
656 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
657  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
658 {
659  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
660  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
661  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
662  enum {
663  Flags = Base::Flags | EvalBeforeNestingBit
664  };
665  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
666  explicit product_evaluator(const XprType& xpr)
667  : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
668  {}
669 };
670 
671 
675 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
676 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
677 {
678  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
679  {
680  etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
681  res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
682  }
683 };
684 
685 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
686 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
687 {
688  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
689  {
690  etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
691  res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
692  }
693 };
694 
695 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
696 struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
697 {
698  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
699  {
700  res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
701  }
702 };
703 
704 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
705 struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
706 {
707  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
708  {
709  res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
710  }
711 };
712 
713 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
714 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
715 {
716  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
717  {
718  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
719  }
720 };
721 
722 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
723 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
724 {
725  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
726  {
727  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
728  }
729 };
730 
731 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
732 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
733 {
734  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
735  {
736  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
737  for(Index i = 0; i < innerDim; ++i)
738  res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
739  }
740 };
741 
742 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
743 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
744 {
745  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
746  {
747  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
748  for(Index i = 0; i < innerDim; ++i)
749  res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
750  }
751 };
752 
753 
754 
757 template<int Mode, bool LhsIsTriangular,
758  typename Lhs, bool LhsIsVector,
759  typename Rhs, bool RhsIsVector>
760 struct triangular_product_impl;
761 
762 template<typename Lhs, typename Rhs, int ProductTag>
763 struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
764  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
765 {
766  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
767 
768  template<typename Dest>
769  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
770  {
771  triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
772  ::run(dst, lhs.nestedExpression(), rhs, alpha);
773  }
774 };
775 
776 template<typename Lhs, typename Rhs, int ProductTag>
777 struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
778 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
779 {
780  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
781 
782  template<typename Dest>
783  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
784  {
785  triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
786  }
787 };
788 
789 
790 
793 template <typename Lhs, int LhsMode, bool LhsIsVector,
794  typename Rhs, int RhsMode, bool RhsIsVector>
795 struct selfadjoint_product_impl;
796 
797 template<typename Lhs, typename Rhs, int ProductTag>
798 struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
799  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
800 {
801  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
802 
803  template<typename Dest>
804  static EIGEN_DEVICE_FUNC
805  void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
806  {
807  selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
808  }
809 };
810 
811 template<typename Lhs, typename Rhs, int ProductTag>
812 struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
813 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
814 {
815  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
816 
817  template<typename Dest>
818  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
819  {
820  selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
821  }
822 };
823 
824 
825 
829 template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
830 struct diagonal_product_evaluator_base
831  : evaluator_base<Derived>
832 {
833  typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
834 public:
835  enum {
836  CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
837 
838  MatrixFlags = evaluator<MatrixType>::Flags,
839  DiagFlags = evaluator<DiagonalType>::Flags,
840 
841  StorageOrder_ = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
842  : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
843  : MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
844  SameStorageOrder_ = StorageOrder_ == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
845 
846  ScalarAccessOnDiag_ = !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft)
847  ||(int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)),
848  SameTypes_ = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
849  // FIXME currently we need same types, but in the future the next rule should be the one
850  //Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ && bool(int(DiagFlags)&PacketAccessBit))),
851  Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit)
852  && SameTypes_
853  && (SameStorageOrder_ || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
854  && (ScalarAccessOnDiag_ || (bool(int(DiagFlags)&PacketAccessBit))),
855  LinearAccessMask_ = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
856  Flags = ((HereditaryBits|LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
857  Alignment = evaluator<MatrixType>::Alignment,
858 
859  AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
860  || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
861  || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
862  };
863 
864  EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
865  : m_diagImpl(diag), m_matImpl(mat)
866  {
868  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
869  }
870 
871  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
872  {
873  if(AsScalarProduct)
874  return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
875  else
876  return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
877  }
878 
879 protected:
880  template<int LoadMode,typename PacketType>
881  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
882  {
883  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
884  internal::pset1<PacketType>(m_diagImpl.coeff(id)));
885  }
886 
887  template<int LoadMode,typename PacketType>
888  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
889  {
890  enum {
891  InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
892  DiagonalPacketLoadMode = plain_enum_min(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
893  };
894  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
895  m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
896  }
897 
898  evaluator<DiagonalType> m_diagImpl;
899  evaluator<MatrixType> m_matImpl;
900 };
901 
902 // diagonal * dense
903 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
904 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
905  : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
906 {
907  typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
908  using Base::m_diagImpl;
909  using Base::m_matImpl;
910  using Base::coeff;
911  typedef typename Base::Scalar Scalar;
912 
913  typedef Product<Lhs, Rhs, ProductKind> XprType;
914  typedef typename XprType::PlainObject PlainObject;
915  typedef typename Lhs::DiagonalVectorType DiagonalType;
916 
917 
918  enum { StorageOrder = Base::StorageOrder_ };
919 
920  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
921  : Base(xpr.rhs(), xpr.lhs().diagonal())
922  {
923  }
924 
925  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
926  {
927  return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
928  }
929 
930 #ifndef EIGEN_GPUCC
931  template<int LoadMode,typename PacketType>
932  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
933  {
934  // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
935  // See also similar calls below.
936  return this->template packet_impl<LoadMode,PacketType>(row,col, row,
937  std::conditional_t<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>());
938  }
939 
940  template<int LoadMode,typename PacketType>
941  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
942  {
943  return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
944  }
945 #endif
946 };
947 
948 // dense * diagonal
949 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
950 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
951  : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
952 {
953  typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
954  using Base::m_diagImpl;
955  using Base::m_matImpl;
956  using Base::coeff;
957  typedef typename Base::Scalar Scalar;
958 
959  typedef Product<Lhs, Rhs, ProductKind> XprType;
960  typedef typename XprType::PlainObject PlainObject;
961 
962  enum { StorageOrder = Base::StorageOrder_ };
963 
964  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
965  : Base(xpr.lhs(), xpr.rhs().diagonal())
966  {
967  }
968 
969  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
970  {
971  return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
972  }
973 
974 #ifndef EIGEN_GPUCC
975  template<int LoadMode,typename PacketType>
976  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
977  {
978  return this->template packet_impl<LoadMode,PacketType>(row,col, col,
979  std::conditional_t<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>());
980  }
981 
982  template<int LoadMode,typename PacketType>
983  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
984  {
985  return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
986  }
987 #endif
988 };
989 
990 
999 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1000 struct permutation_matrix_product;
1001 
1002 template<typename ExpressionType, int Side, bool Transposed>
1003 struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
1004 {
1005  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1006  typedef remove_all_t<MatrixType> MatrixTypeCleaned;
1007 
1008  template<typename Dest, typename PermutationType>
1009  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
1010  {
1011  MatrixType mat(xpr);
1012  const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
1013  // FIXME we need an is_same for expression that is not sensitive to constness. For instance
1014  // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
1015  //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
1016  if(is_same_dense(dst, mat))
1017  {
1018  // apply the permutation inplace
1019  Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
1020  mask.fill(false);
1021  Index r = 0;
1022  while(r < perm.size())
1023  {
1024  // search for the next seed
1025  while(r<perm.size() && mask[r]) r++;
1026  if(r>=perm.size())
1027  break;
1028  // we got one, let's follow it until we are back to the seed
1029  Index k0 = r++;
1030  Index kPrev = k0;
1031  mask.coeffRef(k0) = true;
1032  for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
1033  {
1034  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
1035  .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
1036  (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
1037 
1038  mask.coeffRef(k) = true;
1039  kPrev = k;
1040  }
1041  }
1042  }
1043  else
1044  {
1045  for(Index i = 0; i < n; ++i)
1046  {
1047  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
1048  (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
1049 
1050  =
1051 
1052  Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
1053  (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
1054  }
1055  }
1056  }
1057 };
1058 
1059 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1060 struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
1061 {
1062  template<typename Dest>
1063  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1064  {
1065  permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1066  }
1067 };
1068 
1069 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1070 struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
1071 {
1072  template<typename Dest>
1073  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1074  {
1075  permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1076  }
1077 };
1078 
1079 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1080 struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
1081 {
1082  template<typename Dest>
1083  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1084  {
1085  permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1086  }
1087 };
1088 
1089 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1090 struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
1091 {
1092  template<typename Dest>
1093  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1094  {
1095  permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1096  }
1097 };
1098 
1099 
1100 
1104 // FIXME could we unify Transpositions and Permutation into a single "shape"??
1105 
1110 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1111 struct transposition_matrix_product
1112 {
1113  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1114  typedef remove_all_t<MatrixType> MatrixTypeCleaned;
1115 
1116  template<typename Dest, typename TranspositionType>
1117  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1118  {
1119  MatrixType mat(xpr);
1120  typedef typename TranspositionType::StorageIndex StorageIndex;
1121  const Index size = tr.size();
1122  StorageIndex j = 0;
1123 
1124  if(!is_same_dense(dst,mat))
1125  dst = mat;
1126 
1127  for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
1128  if(Index(j=tr.coeff(k))!=k)
1129  {
1130  if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
1131  else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
1132  }
1133  }
1134 };
1135 
1136 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1137 struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1138 {
1139  template<typename Dest>
1140  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1141  {
1142  transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1143  }
1144 };
1145 
1146 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1147 struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
1148 {
1149  template<typename Dest>
1150  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1151  {
1152  transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1153  }
1154 };
1155 
1156 
1157 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1158 struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1159 {
1160  template<typename Dest>
1161  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1162  {
1163  transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1164  }
1165 };
1166 
1167 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1168 struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
1169 {
1170  template<typename Dest>
1171  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1172  {
1173  transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1174  }
1175 };
1176 
1177 
1181 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1182 struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, MatrixShape, ProductTag>
1183 {
1184  template<typename Dest>
1185  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1186  {
1187  generic_product_impl<typename Lhs::DenseMatrixType , Rhs, DenseShape, MatrixShape, ProductTag>::evalTo(dst, lhs, rhs);
1188  }
1189 };
1190 
1191 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1192 struct generic_product_impl<Lhs, Rhs, MatrixShape, SkewSymmetricShape, ProductTag>
1193 {
1194  template<typename Dest>
1195  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1196  {
1197  generic_product_impl<Lhs, typename Rhs::DenseMatrixType, MatrixShape, DenseShape, ProductTag>::evalTo(dst, lhs, rhs);
1198  }
1199 };
1200 
1201 template<typename Lhs, typename Rhs, int ProductTag>
1202 struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, SkewSymmetricShape, ProductTag>
1203 {
1204  template<typename Dest>
1205  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1206  {
1207  generic_product_impl<typename Lhs::DenseMatrixType, typename Rhs::DenseMatrixType, DenseShape, DenseShape, ProductTag>::evalTo(dst, lhs, rhs);
1208  }
1209 };
1210 
1211 } // end namespace internal
1212 
1213 } // end namespace Eigen
1214 
1215 #endif // EIGEN_PRODUCT_EVALUATORS_H
int n
RowXpr row(Index i)
This is the const version of row(). *‍/.
ColXpr col(Index i)
This is the const version of col().
IndexedView_or_Block operator()(const RowIndices &rowIndices, const ColIndices &colIndices)
#define eigen_internal_assert(x)
Definition: Macros.h:908
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:957
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
#define eigen_assert(x)
Definition: Macros.h:902
#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR, EXPR, OPNAME)
Definition: Macros.h:1207
#define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME)
Definition: Memory.h:853
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
#define EIGEN_UNROLLING_LIMIT
Definition: Settings.h:24
#define EIGEN_INTERNAL_CHECK_COST_VALUE(C)
Definition: StaticAssert.h:112
Matrix< float, 1, Dynamic > MatrixType
@ Aligned16
Definition: Constants.h:237
@ ColMajor
Definition: Constants.h:321
@ RowMajor
Definition: Constants.h:323
@ OnTheLeft
Definition: Constants.h:334
@ OnTheRight
Definition: Constants.h:336
const unsigned int ActualPacketAccessBit
Definition: Constants.h:107
const unsigned int PacketAccessBit
Definition: Constants.h:96
const unsigned int LinearAccessBit
Definition: Constants.h:132
const unsigned int EvalBeforeNestingBit
Definition: Constants.h:72
const unsigned int RowMajorBit
Definition: Constants.h:68
constexpr int plain_enum_min(A a, B b)
Definition: Meta.h:516
constexpr int plain_enum_max(A a, B b)
Definition: Meta.h:524
void call_restricted_packet_assignment_no_alias(Dst &dst, const Src &src, const Func &func)
EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op, add_assign_op)
Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Packet pmul(const Packet &a, const Packet &b)
constexpr int min_size_prefer_fixed(A a, B b)
Definition: Meta.h:553
EIGEN_CONSTEXPR void call_assignment_no_alias(Dst &dst, const Src &src, const Func &func)
void outer_product_selector_run(Dst &dst, const Lhs &lhs, const Rhs &rhs, const Func &func, const false_type &)
typename remove_all< T >::type remove_all_t
Definition: Meta.h:119
bool is_same_dense(const T1 &mat1, const T2 &mat2, std::enable_if_t< possibly_same_dense< T1, T2 >::value > *=0)
Definition: XprHelper.h:745
bool is_exactly_one(const X &x)
Definition: Meta.h:482
: InteropHeaders
Definition: Core:139
@ GemvProduct
Definition: Constants.h:504
@ LazyProduct
Definition: Constants.h:504
@ InnerProduct
Definition: Constants.h:504
@ DefaultProduct
Definition: Constants.h:504
@ CoeffBasedProductMode
Definition: Constants.h:504
@ OuterProduct
Definition: Constants.h:504
@ LazyCoeffBasedProductMode
Definition: Constants.h:504
const unsigned int HereditaryBits
Definition: Constants.h:197
const int HugeCost
Definition: Constants.h:46
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
const int Dynamic
Definition: Constants.h:24
std::ptrdiff_t j