SparseLU_panel_bmod.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
5 // Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 /*
12 
13  * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU
14 
15  * -- SuperLU routine (version 3.0) --
16  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
17  * and Lawrence Berkeley National Lab.
18  * October 15, 2003
19  *
20  * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
21  *
22  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
23  * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
24  *
25  * Permission is hereby granted to use or copy this program for any
26  * purpose, provided the above notices are retained on all copies.
27  * Permission to modify the code and to distribute modified code is
28  * granted, provided the above notices are retained, and a notice that
29  * the code was modified is included with the above copyright notice.
30  */
31 #ifndef SPARSELU_PANEL_BMOD_H
32 #define SPARSELU_PANEL_BMOD_H
33 
34 #include "./InternalHeaderCheck.h"
35 
36 namespace Eigen {
37 namespace internal {
38 
57 template <typename Scalar, typename StorageIndex>
58 void SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w, const Index jcol,
59  const Index nseg, ScalarVector& dense, ScalarVector& tempv,
60  IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
61 {
62 
63  Index ksub,jj,nextl_col;
64  Index fsupc, nsupc, nsupr, nrow;
65  Index krep, kfnz;
66  Index lptr; // points to the row subscripts of a supernode
67  Index luptr; // ...
68  Index segsize,no_zeros ;
69  // For each nonz supernode segment of U[*,j] in topological order
70  Index k = nseg - 1;
72 
73  for (ksub = 0; ksub < nseg; ksub++)
74  { // For each updating supernode
75  /* krep = representative of current k-th supernode
76  * fsupc = first supernodal column
77  * nsupc = number of columns in a supernode
78  * nsupr = number of rows in a supernode
79  */
80  krep = segrep(k); k--;
81  fsupc = glu.xsup(glu.supno(krep));
82  nsupc = krep - fsupc + 1;
83  nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
84  nrow = nsupr - nsupc;
85  lptr = glu.xlsub(fsupc);
86 
87  // loop over the panel columns to detect the actual number of columns and rows
88  Index u_rows = 0;
89  Index u_cols = 0;
90  for (jj = jcol; jj < jcol + w; jj++)
91  {
92  nextl_col = (jj-jcol) * m;
93  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
94 
95  kfnz = repfnz_col(krep);
96  if ( kfnz == emptyIdxLU )
97  continue; // skip any zero segment
98 
99  segsize = krep - kfnz + 1;
100  u_cols++;
101  u_rows = (std::max)(segsize,u_rows);
102  }
103 
104  if(nsupc >= 2)
105  {
106  Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
107  Map<ScalarMatrix, Aligned, OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
108 
109  // gather U
110  Index u_col = 0;
111  for (jj = jcol; jj < jcol + w; jj++)
112  {
113  nextl_col = (jj-jcol) * m;
114  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
115  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
116 
117  kfnz = repfnz_col(krep);
118  if ( kfnz == emptyIdxLU )
119  continue; // skip any zero segment
120 
121  segsize = krep - kfnz + 1;
122  luptr = glu.xlusup(fsupc);
123  no_zeros = kfnz - fsupc;
124 
125  Index isub = lptr + no_zeros;
126  Index off = u_rows-segsize;
127  for (Index i = 0; i < off; i++) U(i,u_col) = 0;
128  for (Index i = 0; i < segsize; i++)
129  {
130  Index irow = glu.lsub(isub);
131  U(i+off,u_col) = dense_col(irow);
132  ++isub;
133  }
134  u_col++;
135  }
136  // solve U = A^-1 U
137  luptr = glu.xlusup(fsupc);
138  Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
139  no_zeros = (krep - u_rows + 1) - fsupc;
140  luptr += lda * no_zeros + no_zeros;
141  MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
142  U = A.template triangularView<UnitLower>().solve(U);
143 
144  // update
145  luptr += u_rows;
146  MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
147  eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
148 
149  Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
150  Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
151  MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
152 
153  L.noalias() = B * U;
154 
155  // scatter U and L
156  u_col = 0;
157  for (jj = jcol; jj < jcol + w; jj++)
158  {
159  nextl_col = (jj-jcol) * m;
160  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
161  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
162 
163  kfnz = repfnz_col(krep);
164  if ( kfnz == emptyIdxLU )
165  continue; // skip any zero segment
166 
167  segsize = krep - kfnz + 1;
168  no_zeros = kfnz - fsupc;
169  Index isub = lptr + no_zeros;
170 
171  Index off = u_rows-segsize;
172  for (Index i = 0; i < segsize; i++)
173  {
174  Index irow = glu.lsub(isub++);
175  dense_col(irow) = U.coeff(i+off,u_col);
176  U.coeffRef(i+off,u_col) = 0;
177  }
178 
179  // Scatter l into SPA dense[]
180  for (Index i = 0; i < nrow; i++)
181  {
182  Index irow = glu.lsub(isub++);
183  dense_col(irow) -= L.coeff(i,u_col);
184  L.coeffRef(i,u_col) = 0;
185  }
186  u_col++;
187  }
188  }
189  else // level 2 only
190  {
191  // Sequence through each column in the panel
192  for (jj = jcol; jj < jcol + w; jj++)
193  {
194  nextl_col = (jj-jcol) * m;
195  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
196  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
197 
198  kfnz = repfnz_col(krep);
199  if ( kfnz == emptyIdxLU )
200  continue; // skip any zero segment
201 
202  segsize = krep - kfnz + 1;
203  luptr = glu.xlusup(fsupc);
204 
205  Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
206 
207  // Perform a trianglar solve and block update,
208  // then scatter the result of sup-col update to dense[]
209  no_zeros = kfnz - fsupc;
210  if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
211  else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
212  else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
213  else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
214  } // End for each column in the panel
215  }
216 
217  } // End for each updating supernode
218 } // end panel bmod
219 
220 } // end namespace internal
221 
222 } // end namespace Eigen
223 
224 #endif // SPARSELU_PANEL_BMOD_H
Matrix3f m
MatrixXcf A
MatrixXf B
MatrixXd L
Definition: LLT_example.cpp:6
#define eigen_assert(x)
Definition: Macros.h:902
RowVector3d w
bfloat16() max(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:690
static Index first_default_aligned(const DenseBase< Derived > &m)
: InteropHeaders
Definition: Core:139
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82