IDRSTABL.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl>
5 // Copyright (C) 2020 Mischa Senders <m.j.senders@student.tue.nl>
6 // Copyright (C) 2020 Lex Kuijpers <l.kuijpers@student.tue.nl>
7 // Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl>
8 // Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl>
9 // Copyright (C) 2020 Adithya Vijaykumar
10 //
11 // This Source Code Form is subject to the terms of the Mozilla
12 // Public License v. 2.0. If a copy of the MPL was not distributed
13 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14 /*
15 
16 The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L)
17 
18 This implementation of IDRSTABL is based on
19 1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with
20 reliable update strategies for solving sparse linear systems. Journal of
21 Computational and Applied Mathematics, 259, 244-258.
22  doi:10.1016/j.cam.2013.08.028
23  2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned
24 IDRSTABL Algorithms for Solving Nonsymmetric Linear Systems. International
25 Journal of Applied Mathematics, 45(3).
26  3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems:
27 Second Edition. Philadelphia, PA: SIAM.
28  4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family
29 of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear
30 Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062.
31  doi:10.1137/070685804
32  5. Sonneveld, P. (2012). On the convergence behavior of IDR (s)
33 and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598.
34 
35  Right-preconditioning based on Ref. 3 is implemented here.
36 */
37 
38 #ifndef EIGEN_IDRSTABL_H
39 #define EIGEN_IDRSTABL_H
40 
41 namespace Eigen {
42 
43 namespace internal {
44 
45 template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
46 bool idrstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters,
47  typename Dest::RealScalar &tol_error, Index L, Index S) {
48  /*
49  Setup and type definitions.
50  */
51  using numext::abs;
52  using numext::sqrt;
53  typedef typename Dest::Scalar Scalar;
54  typedef typename Dest::RealScalar RealScalar;
55  typedef Matrix<Scalar, Dynamic, 1> VectorType;
56  typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;
57 
58  const Index N = x.rows();
59 
60  Index k = 0; // Iteration counter
61  const Index maxIters = iters;
62 
63  const RealScalar rhs_norm = rhs.stableNorm();
64  const RealScalar tol = tol_error * rhs_norm;
65 
66  if (rhs_norm == 0) {
67  /*
68  If b==0, then the exact solution is x=0.
69  rhs_norm is needed for other calculations anyways, this exit is a freebie.
70  */
71  x.setZero();
72  tol_error = 0.0;
73  return true;
74  }
75  // Construct decomposition objects beforehand.
77 
78  if (S >= N || L >= N) {
79  /*
80  The matrix is very small, or the choice of L and S is very poor
81  in that case solving directly will be best.
82  */
83  lu_solver.compute(DenseMatrixType(mat));
84  x = lu_solver.solve(rhs);
85  tol_error = (rhs - mat * x).stableNorm() / rhs_norm;
86  return true;
87  }
88 
89  // Define maximum sizes to prevent any reallocation later on.
90  DenseMatrixType u(N, L + 1);
91  DenseMatrixType r(N, L + 1);
92 
93  DenseMatrixType V(N * (L + 1), S);
94 
95  VectorType alpha(S);
96  VectorType gamma(L);
97  VectorType update(N);
98 
99  /*
100  Main IDRSTABL algorithm
101  */
102  // Set up the initial residual
103  VectorType x0 = x;
104  r.col(0) = rhs - mat * x;
105  x.setZero(); // The final solution will be x0+x
106 
107  tol_error = r.col(0).stableNorm();
108 
109  // FOM = Full orthogonalisation method
110  DenseMatrixType h_FOM = DenseMatrixType::Zero(S, S - 1);
111 
112  // Construct an initial U matrix of size N x S
113  DenseMatrixType U(N * (L + 1), S);
114  for (Index col_index = 0; col_index < S; ++col_index) {
115  // Arnoldi-like process to generate a set of orthogonal vectors spanning
116  // {u,A*u,A*A*u,...,A^(S-1)*u}. This construction can be combined with the
117  // Full Orthogonalization Method (FOM) from Ref.3 to provide a possible
118  // early exit with no additional MV.
119  if (col_index != 0) {
120  /*
121  Modified Gram-Schmidt strategy:
122  */
123  VectorType w = mat * precond.solve(u.col(0));
124  for (Index i = 0; i < col_index; ++i) {
125  auto v = U.col(i).head(N);
126  h_FOM(i, col_index - 1) = v.dot(w);
127  w -= h_FOM(i, col_index - 1) * v;
128  }
129  u.col(0) = w;
130  h_FOM(col_index, col_index - 1) = u.col(0).stableNorm();
131 
132  if (abs(h_FOM(col_index, col_index - 1)) != RealScalar(0)) {
133  /*
134  This only happens if u is NOT exactly zero. In case it is exactly zero
135  it would imply that that this u has no component in the direction of the
136  current residual.
137 
138  By then setting u to zero it will not contribute any further (as it
139  should). Whereas attempting to normalize results in division by zero.
140 
141  Such cases occur if:
142  1. The basis of dimension <S is sufficient to exactly solve the linear
143  system. I.e. the current residual is in span{r,Ar,...A^{m-1}r}, where
144  (m-1)<=S.
145  2. Two vectors vectors generated from r, Ar,... are (numerically)
146  parallel.
147 
148  In case 1, the exact solution to the system can be obtained from the
149  "Full Orthogonalization Method" (Algorithm 6.4 in the book of Saad),
150  without any additional MV.
151 
152  Contrary to what one would suspect, the comparison with ==0.0 for
153  floating-point types is intended here. Any arbritary non-zero u is fine
154  to continue, however if u contains either NaN or Inf the algorithm will
155  break down.
156  */
157  u.col(0) /= h_FOM(col_index, col_index - 1);
158  }
159  } else {
160  u.col(0) = r.col(0);
161  u.col(0).normalize();
162  }
163 
164  U.col(col_index).head(N) = u.col(0);
165  }
166 
167  if (S > 1) {
168  // Check for early FOM exit.
169  Scalar beta = r.col(0).stableNorm();
170  VectorType e1 = VectorType::Zero(S - 1);
171  e1(0) = beta;
172  lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1));
173  VectorType y = lu_solver.solve(e1);
174  VectorType x2 = x + U.topLeftCorner(N, S - 1) * y;
175 
176  // Using proposition 6.7 in Saad, one MV can be saved to calculate the
177  // residual
178  RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).stableNorm();
179 
180  if (FOM_residual < tol) {
181  // Exit, the FOM algorithm was already accurate enough
182  iters = k;
183  // Convert back to the unpreconditioned solution
184  x = precond.solve(x2);
185  // x contains the updates to x0, add those back to obtain the solution
186  x += x0;
187  tol_error = FOM_residual / rhs_norm;
188  return true;
189  }
190  }
191 
192  /*
193  Select an initial (N x S) matrix R0.
194  1. Generate random R0, orthonormalize the result.
195  2. This results in R0, however to save memory and compute we only need the
196  adjoint of R0. This is given by the matrix R_T.\ Additionally, the matrix
197  (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the
198  anti-distributivity property of the adjoint. This results in AR_T, which is
199  constant if R_T does not have to be regenerated and can be precomputed.
200  Based on reference 4, this has zero probability in exact arithmetic.
201  */
202 
203  // Original IDRSTABL and Kensuke choose S random vectors:
204  const HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
205  DenseMatrixType R_T = (qr.householderQ() * DenseMatrixType::Identity(N, S)).adjoint();
206  DenseMatrixType AR_T = DenseMatrixType(R_T * mat);
207 
208  // Pre-allocate sigma.
209  DenseMatrixType sigma(S, S);
210 
211  bool reset_while = false; // Should the while loop be reset for some reason?
212 
213  while (k < maxIters) {
214  for (Index j = 1; j <= L; ++j) {
215  /*
216  The IDR Step
217  */
218  // Construction of the sigma-matrix, and the decomposition of sigma.
219  for (Index i = 0; i < S; ++i) {
220  sigma.col(i).noalias() = AR_T * precond.solve(U.block(N * (j - 1), i, N, 1));
221  }
222 
223  lu_solver.compute(sigma);
224  // Obtain the update coefficients alpha
225  if (j == 1) {
226  // alpha=inverse(sigma)*(R_T*r_0);
227  alpha.noalias() = lu_solver.solve(R_T * r.col(0));
228  } else {
229  // alpha=inverse(sigma)*(AR_T*r_{j-2})
230  alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.col(j - 2)));
231  }
232 
233  // Obtain new solution and residual from this update
234  update.noalias() = U.topRows(N) * alpha;
235  r.col(0) -= mat * precond.solve(update);
236  x += update;
237 
238  for (Index i = 1; i <= j - 2; ++i) {
239  // This only affects the case L>2
240  r.col(i) -= U.block(N * (i + 1), 0, N, S) * alpha;
241  }
242  if (j > 1) {
243  // r=[r;A*r_{j-2}]
244  r.col(j - 1).noalias() = mat * precond.solve(r.col(j - 2));
245  }
246  tol_error = r.col(0).stableNorm();
247 
248  if (tol_error < tol) {
249  // If at this point the algorithm has converged, exit.
250  reset_while = true;
251  break;
252  }
253 
254  bool break_normalization = false;
255  for (Index q = 1; q <= S; ++q) {
256  if (q == 1) {
257  // u = r;
258  u.leftCols(j + 1) = r.leftCols(j + 1);
259  } else {
260  // u=[u_1;u_2;...;u_j]
261  u.leftCols(j) = u.middleCols(1, j);
262  }
263 
264  // Obtain the update coefficients beta implicitly
265  // beta=lu_sigma.solve(AR_T * u.block(N * (j - 1), 0, N, 1)
266  u.reshaped().head(u.rows() * j) -= U.topRows(N * j) * lu_solver.solve(AR_T * precond.solve(u.col(j - 1)));
267 
268  // u=[u;Au_{j-1}]
269  u.col(j).noalias() = mat * precond.solve(u.col(j - 1));
270 
271  // Orthonormalize u_j to the columns of V_j(:,1:q-1)
272  if (q > 1) {
273  /*
274  Modified Gram-Schmidt-like procedure to make u orthogonal to the
275  columns of V from Ref. 1.
276 
277  The vector mu from Ref. 1 is obtained implicitly:
278  mu=V.block(N * j, 0, N, q - 1).adjoint() * u.block(N * j, 0, N, 1).
279  */
280  for (Index i = 0; i <= q - 2; ++i) {
281  auto v = V.col(i).segment(N * j, N);
282  Scalar h = v.squaredNorm();
283  h = v.dot(u.col(j)) / h;
284  u.reshaped().head(u.rows() * (j + 1)) -= h * V.block(0, i, N * (j + 1), 1);
285  }
286  }
287  // Normalize u and assign to a column of V
288  Scalar normalization_constant = u.col(j).stableNorm();
289  // If u is exactly zero, this will lead to a NaN. Small, non-zero u is
290  // fine.
291  if (normalization_constant == RealScalar(0.0)) {
292  break_normalization = true;
293  break;
294  } else {
295  u.leftCols(j + 1) /= normalization_constant;
296  }
297 
298  V.block(0, q - 1, N * (j + 1), 1).noalias() = u.reshaped().head(u.rows() * (j + 1));
299  }
300 
301  if (break_normalization == false) {
302  U = V;
303  }
304  }
305  if (reset_while) {
306  break;
307  }
308 
309  // r=[r;mat*r_{L-1}]
310  r.col(L).noalias() = mat * precond.solve(r.col(L - 1));
311 
312  /*
313  The polynomial step
314  */
315  ColPivHouseholderQR<DenseMatrixType> qr_solver(r.rightCols(L));
316  gamma.noalias() = qr_solver.solve(r.col(0));
317 
318  // Update solution and residual using the "minimized residual coefficients"
319  update.noalias() = r.leftCols(L) * gamma;
320  x += update;
321  r.col(0) -= mat * precond.solve(update);
322 
323  // Update iteration info
324  ++k;
325  tol_error = r.col(0).stableNorm();
326 
327  if (tol_error < tol) {
328  // Slightly early exit by moving the criterion before the update of U,
329  // after the main while loop the result of that calculation would not be
330  // needed.
331  break;
332  }
333 
334  /*
335  U=U0-sum(gamma_j*U_j)
336  Consider the first iteration. Then U only contains U0, so at the start of
337  the while-loop U should be U0. Therefore only the first N rows of U have to
338  be updated.
339  */
340  for (Index i = 1; i <= L; ++i) {
341  U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1);
342  }
343  }
344 
345  /*
346  Exit after the while loop terminated.
347  */
348  iters = k;
349  // Convert back to the unpreconditioned solution
350  x = precond.solve(x);
351  // x contains the updates to x0, add those back to obtain the solution
352  x += x0;
353  tol_error = tol_error / rhs_norm;
354  return true;
355 }
356 
357 } // namespace internal
358 
359 template <typename MatrixType_, typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar>>
360 class IDRSTABL;
361 
362 namespace internal {
363 
364 template <typename MatrixType_, typename Preconditioner_>
365 struct traits<IDRSTABL<MatrixType_, Preconditioner_>> {
366  typedef MatrixType_ MatrixType;
367  typedef Preconditioner_ Preconditioner;
368 };
369 
370 } // namespace internal
371 
411 template <typename MatrixType_, typename Preconditioner_>
412 class IDRSTABL : public IterativeSolverBase<IDRSTABL<MatrixType_, Preconditioner_>> {
414  using Base::m_error;
415  using Base::m_info;
416  using Base::m_isInitialized;
417  using Base::m_iterations;
418  using Base::matrix;
421 
422  public:
423  typedef MatrixType_ MatrixType;
424  typedef typename MatrixType::Scalar Scalar;
425  typedef typename MatrixType::RealScalar RealScalar;
426  typedef Preconditioner_ Preconditioner;
427 
428  public:
430  IDRSTABL() : m_L(2), m_S(4) {}
431 
442  template <typename MatrixDerived>
443  explicit IDRSTABL(const EigenBase<MatrixDerived> &A) : Base(A.derived()), m_L(2), m_S(4) {}
444 
451  template <typename Rhs, typename Dest>
452  void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const {
456 
458  }
459 
462  void setL(Index L) {
463  eigen_assert(L >= 1 && "L needs to be positive");
464  m_L = L;
465  }
468  void setS(Index S) {
469  eigen_assert(S >= 1 && "S needs to be positive");
470  m_S = S;
471  }
472 };
473 
474 } // namespace Eigen
475 
476 #endif /* EIGEN_IDRSTABL_H */
Array< int, Dynamic, 1 > v
SparseMatrix< double > A(n, n)
int i
MatrixXcd V
HouseholderQR< MatrixXf > qr(A)
MatrixXd L
#define eigen_assert(x)
RowVector3d w
MatrixXf mat
Matrix< float, 1, Dynamic > MatrixType
const Solve< ColPivHouseholderQR, Rhs > solve(const MatrixBase< Rhs > &b) const
FullPivLU & compute(const EigenBase< InputType > &matrix)
const Solve< FullPivLU, Rhs > solve(const MatrixBase< Rhs > &b) const
The IDR(s)STAB(l) is a combination of IDR(s) and BiCGSTAB(l). It is a short-recurrences Krylov method...
Definition: IDRSTABL.h:412
RealScalar m_error
void setL(Index L)
Definition: IDRSTABL.h:462
Preconditioner_ Preconditioner
Definition: IDRSTABL.h:426
MatrixType::Scalar Scalar
Definition: IDRSTABL.h:424
const ActualMatrixType & matrix() const
void setS(Index S)
Definition: IDRSTABL.h:468
MatrixType_ MatrixType
Definition: IDRSTABL.h:423
ComputationInfo m_info
IDRSTABL(const EigenBase< MatrixDerived > &A)
Definition: IDRSTABL.h:443
IterativeSolverBase< IDRSTABL > Base
Definition: IDRSTABL.h:413
MatrixType::RealScalar RealScalar
Definition: IDRSTABL.h:425
void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const
Definition: IDRSTABL.h:452
Preconditioner m_preconditioner
const ActualMatrixType & matrix() const
NumericalIssue
bool idrstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters, typename Dest::RealScalar &tol_error, Index L, Index S)
Definition: IDRSTABL.h:46
const Scalar & y
EIGEN_ALWAYS_INLINE double sqrt(const double &x)
EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_abs_op< typename Derived::Scalar >, const Derived > abs(const Eigen::ArrayBase< Derived > &x)
std::ptrdiff_t j