SparseDenseProduct.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_SPARSEDENSEPRODUCT_H
11 #define EIGEN_SPARSEDENSEPRODUCT_H
12 
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
17 namespace internal {
18 
19 template <> struct product_promote_storage_type<Sparse,Dense, OuterProduct> { typedef Sparse ret; };
20 template <> struct product_promote_storage_type<Dense,Sparse, OuterProduct> { typedef Sparse ret; };
21 
22 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,
23  typename AlphaType,
24  int LhsStorageOrder = ((SparseLhsType::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor,
25  bool ColPerCol = ((DenseRhsType::Flags&RowMajorBit)==0) || DenseRhsType::ColsAtCompileTime==1>
26 struct sparse_time_dense_product_impl;
27 
28 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
29 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, true>
30 {
31  typedef internal::remove_all_t<SparseLhsType> Lhs;
32  typedef internal::remove_all_t<DenseRhsType> Rhs;
33  typedef internal::remove_all_t<DenseResType> Res;
34  typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
35  typedef evaluator<Lhs> LhsEval;
36  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
37  {
38  LhsEval lhsEval(lhs);
39 
40  Index n = lhs.outerSize();
41 #ifdef EIGEN_HAS_OPENMP
43  Index threads = Eigen::nbThreads();
44 #endif
45 
46  for(Index c=0; c<rhs.cols(); ++c)
47  {
48 #ifdef EIGEN_HAS_OPENMP
49  // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
50  // It basically represents the minimal amount of work to be done to be worth it.
51  if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
52  {
53  #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
54  for(Index i=0; i<n; ++i)
55  processRow(lhsEval,rhs,res,alpha,i,c);
56  }
57  else
58 #endif
59  {
60  for(Index i=0; i<n; ++i)
61  processRow(lhsEval,rhs,res,alpha,i,c);
62  }
63  }
64  }
65 
66  static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)
67  {
68  // Two accumulators, which breaks the dependency chain on the accumulator
69  // and allows more instruction-level parallelism in the following loop
70  typename Res::Scalar tmp_a(0);
71  typename Res::Scalar tmp_b(0);
72  for(LhsInnerIterator it(lhsEval,i); it ;++it) {
73  tmp_a += it.value() * rhs.coeff(it.index(), col);
74  ++it;
75  if(it) {
76  tmp_b += it.value() * rhs.coeff(it.index(), col);
77  }
78  }
79  res.coeffRef(i, col) += alpha * (tmp_a + tmp_b);
80  }
81 
82 };
83 
84 // FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
85 // -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
86 // template<typename T1, typename T2/*, int Options_, typename StrideType_*/>
87 // struct ScalarBinaryOpTraits<T1, Ref<T2/*, Options_, StrideType_*/> >
88 // {
89 // enum {
90 // Defined = 1
91 // };
92 // typedef typename CwiseUnaryOp<scalar_multiple2_op<T1, typename T2::Scalar>, T2>::PlainObject ReturnType;
93 // };
94 
95 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
96 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType, ColMajor, true>
97 {
98  typedef internal::remove_all_t<SparseLhsType> Lhs;
99  typedef internal::remove_all_t<DenseRhsType> Rhs;
100  typedef internal::remove_all_t<DenseResType> Res;
101  typedef evaluator<Lhs> LhsEval;
102  typedef typename LhsEval::InnerIterator LhsInnerIterator;
103  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
104  {
105  LhsEval lhsEval(lhs);
106  for(Index c=0; c<rhs.cols(); ++c)
107  {
108  for(Index j=0; j<lhs.outerSize(); ++j)
109  {
110 // typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
111  typename ScalarBinaryOpTraits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));
112  for(LhsInnerIterator it(lhsEval,j); it ;++it)
113  res.coeffRef(it.index(),c) += it.value() * rhs_j;
114  }
115  }
116  }
117 };
118 
119 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
120 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, false>
121 {
122  typedef internal::remove_all_t<SparseLhsType> Lhs;
123  typedef internal::remove_all_t<DenseRhsType> Rhs;
124  typedef internal::remove_all_t<DenseResType> Res;
125  typedef evaluator<Lhs> LhsEval;
126  typedef typename LhsEval::InnerIterator LhsInnerIterator;
127  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
128  {
129  Index n = lhs.rows();
130  LhsEval lhsEval(lhs);
131 
132 #ifdef EIGEN_HAS_OPENMP
134  Index threads = Eigen::nbThreads();
135  // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
136  // It basically represents the minimal amount of work to be done to be worth it.
137  if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000)
138  {
139  #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
140  for(Index i=0; i<n; ++i)
141  processRow(lhsEval,rhs,res,alpha,i);
142  }
143  else
144 #endif
145  {
146  for(Index i=0; i<n; ++i)
147  processRow(lhsEval, rhs, res, alpha, i);
148  }
149  }
150 
151  static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha, Index i)
152  {
153  typename Res::RowXpr res_i(res.row(i));
154  for(LhsInnerIterator it(lhsEval,i); it ;++it)
155  res_i += (alpha*it.value()) * rhs.row(it.index());
156  }
157 };
158 
159 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
160 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, ColMajor, false>
161 {
162  typedef internal::remove_all_t<SparseLhsType> Lhs;
163  typedef internal::remove_all_t<DenseRhsType> Rhs;
164  typedef internal::remove_all_t<DenseResType> Res;
165  typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
166  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
167  {
168  evaluator<Lhs> lhsEval(lhs);
169  for(Index j=0; j<lhs.outerSize(); ++j)
170  {
171  typename Rhs::ConstRowXpr rhs_j(rhs.row(j));
172  for(LhsInnerIterator it(lhsEval,j); it ;++it)
173  res.row(it.index()) += (alpha*it.value()) * rhs_j;
174  }
175  }
176 };
177 
178 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,typename AlphaType>
179 inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
180 {
181  sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType>::run(lhs, rhs, res, alpha);
182 }
183 
184 } // end namespace internal
185 
186 namespace internal {
187 
188 template<typename Lhs, typename Rhs, int ProductType>
189 struct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
190  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SparseShape,DenseShape,ProductType> >
191 {
192  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
193 
194  template<typename Dest>
195  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
196  {
197  typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? 1 : Rhs::ColsAtCompileTime>::type LhsNested;
198  typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==0) ? 1 : Dynamic>::type RhsNested;
199  LhsNested lhsNested(lhs);
200  RhsNested rhsNested(rhs);
201  internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha);
202  }
203 };
204 
205 template<typename Lhs, typename Rhs, int ProductType>
206 struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, DenseShape, ProductType>
207  : generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
208 {};
209 
210 template<typename Lhs, typename Rhs, int ProductType>
211 struct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
212  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SparseShape,ProductType> >
213 {
214  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
215 
216  template<typename Dst>
217  static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
218  {
219  typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? Dynamic : 1>::type LhsNested;
220  typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==RowMajorBit) ? 1 : Lhs::RowsAtCompileTime>::type RhsNested;
221  LhsNested lhsNested(lhs);
222  RhsNested rhsNested(rhs);
223 
224  // transpose everything
225  Transpose<Dst> dstT(dst);
226  internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
227  }
228 };
229 
230 template<typename Lhs, typename Rhs, int ProductType>
231 struct generic_product_impl<Lhs, Rhs, DenseShape, SparseTriangularShape, ProductType>
232  : generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
233 {};
234 
235 template<typename LhsT, typename RhsT, bool NeedToTranspose>
236 struct sparse_dense_outer_product_evaluator
237 {
238 protected:
239  typedef std::conditional_t<NeedToTranspose,RhsT,LhsT> Lhs1;
240  typedef std::conditional_t<NeedToTranspose,LhsT,RhsT> ActualRhs;
241  typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType;
242 
243  // if the actual left-hand side is a dense vector,
244  // then build a sparse-view so that we can seamlessly iterate over it.
245  typedef std::conditional_t<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
246  Lhs1, SparseView<Lhs1> > ActualLhs;
247  typedef std::conditional_t<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
248  Lhs1 const&, SparseView<Lhs1> > LhsArg;
249 
250  typedef evaluator<ActualLhs> LhsEval;
251  typedef evaluator<ActualRhs> RhsEval;
252  typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator;
253  typedef typename ProdXprType::Scalar Scalar;
254 
255 public:
256  enum {
257  Flags = NeedToTranspose ? RowMajorBit : 0,
258  CoeffReadCost = HugeCost
259  };
260 
261  class InnerIterator : public LhsIterator
262  {
263  public:
264  InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer)
265  : LhsIterator(xprEval.m_lhsXprImpl, 0),
266  m_outer(outer),
267  m_empty(false),
268  m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind() ))
269  {}
270 
271  EIGEN_STRONG_INLINE Index outer() const { return m_outer; }
272  EIGEN_STRONG_INLINE Index row() const { return NeedToTranspose ? m_outer : LhsIterator::index(); }
273  EIGEN_STRONG_INLINE Index col() const { return NeedToTranspose ? LhsIterator::index() : m_outer; }
274 
275  EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; }
276  EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }
277 
278  protected:
279  Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const
280  {
281  return rhs.coeff(outer);
282  }
283 
284  Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse())
285  {
286  typename RhsEval::InnerIterator it(rhs, outer);
287  if (it && it.index()==0 && it.value()!=Scalar(0))
288  return it.value();
289  m_empty = true;
290  return Scalar(0);
291  }
292 
293  Index m_outer;
294  bool m_empty;
295  Scalar m_factor;
296  };
297 
298  sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs)
299  : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
300  {
301  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
302  }
303 
304  // transpose case
305  sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs)
306  : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
307  {
308  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
309  }
310 
311 protected:
312  const LhsArg m_lhs;
313  evaluator<ActualLhs> m_lhsXprImpl;
314  evaluator<ActualRhs> m_rhsXprImpl;
315 };
316 
317 // sparse * dense outer product
318 template<typename Lhs, typename Rhs>
319 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape>
320  : sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor>
321 {
322  typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> Base;
323 
324  typedef Product<Lhs, Rhs> XprType;
325  typedef typename XprType::PlainObject PlainObject;
326 
327  explicit product_evaluator(const XprType& xpr)
328  : Base(xpr.lhs(), xpr.rhs())
329  {}
330 
331 };
332 
333 template<typename Lhs, typename Rhs>
334 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape>
335  : sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor>
336 {
337  typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> Base;
338 
339  typedef Product<Lhs, Rhs> XprType;
340  typedef typename XprType::PlainObject PlainObject;
341 
342  explicit product_evaluator(const XprType& xpr)
343  : Base(xpr.lhs(), xpr.rhs())
344  {}
345 
346 };
347 
348 } // end namespace internal
349 
350 } // end namespace Eigen
351 
352 #endif // EIGEN_SPARSEDENSEPRODUCT_H
int n
RowXpr row(Index i)
This is the const version of row(). *‍/.
ColXpr col(Index i)
This is the const version of col().
Array33i c
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
#define EIGEN_INTERNAL_CHECK_COST_VALUE(C)
Definition: StaticAssert.h:112
@ ColMajor
Definition: Constants.h:321
@ RowMajor
Definition: Constants.h:323
const unsigned int RowMajorBit
Definition: Constants.h:68
void sparse_time_dense_product(const SparseLhsType &lhs, const DenseRhsType &rhs, DenseResType &res, const AlphaType &alpha)
: InteropHeaders
Definition: Core:139
@ DefaultProduct
Definition: Constants.h:504
@ OuterProduct
Definition: Constants.h:504
const int HugeCost
Definition: Constants.h:46
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
void initParallel()
Definition: Parallelizer.h:51
int nbThreads()
Definition: Parallelizer.h:61
const int Dynamic
Definition: Constants.h:24
std::ptrdiff_t j