Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Experimental_RBILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
45 
46 #include <Tpetra_Experimental_BlockMultiVector.hpp>
47 #include<Ifpack2_OverlappingRowMatrix.hpp>
48 #include<Ifpack2_LocalFilter.hpp>
49 #include <Ifpack2_Experimental_RBILUK.hpp>
50 #include <Ifpack2_Utilities.hpp>
51 #include <Ifpack2_RILUK.hpp>
52 
53 //#define IFPACK2_RBILUK_INITIAL
54 #define IFPACK2_RBILUK_INITIAL_NOKK
55 
56 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
57 #include <KokkosBatched_Gemm_Decl.hpp>
58 #include <KokkosBatched_Gemm_Serial_Impl.hpp>
59 #include <KokkosBatched_Util.hpp>
60 #endif
61 
62 namespace Ifpack2 {
63 
64 namespace Experimental {
65 
66 template<class MatrixType>
68  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
69  A_(Matrix_in),
70  A_block_(Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(Matrix_in))
71 {}
72 
73 template<class MatrixType>
75  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
76  A_block_(Matrix_in)
77 {}
78 
79 
80 template<class MatrixType>
82 
83 
84 template<class MatrixType>
85 void
87 {
88  // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
89 
90  // It's legal for A to be null; in that case, you may not call
91  // initialize() until calling setMatrix() with a nonnull input.
92  // Regardless, setting the matrix invalidates any previous
93  // factorization.
94  if (A.getRawPtr () != A_block_.getRawPtr ())
95  {
96  this->isAllocated_ = false;
97  this->isInitialized_ = false;
98  this->isComputed_ = false;
99  this->Graph_ = Teuchos::null;
100  L_block_ = Teuchos::null;
101  U_block_ = Teuchos::null;
102  D_block_ = Teuchos::null;
103  A_block_ = A;
104  }
105 }
106 
107 
108 
109 template<class MatrixType>
110 const typename RBILUK<MatrixType>::block_crs_matrix_type&
112 {
114  L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
115  "is null. Please call initialize() (and preferably also compute()) "
116  "before calling this method. If the input matrix has not yet been set, "
117  "you must first call setMatrix() with a nonnull input matrix before you "
118  "may call initialize() or compute().");
119  return *L_block_;
120 }
121 
122 
123 template<class MatrixType>
124 const typename RBILUK<MatrixType>::block_crs_matrix_type&
126 {
128  D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
129  "(of diagonal entries) is null. Please call initialize() (and "
130  "preferably also compute()) before calling this method. If the input "
131  "matrix has not yet been set, you must first call setMatrix() with a "
132  "nonnull input matrix before you may call initialize() or compute().");
133  return *D_block_;
134 }
135 
136 
137 template<class MatrixType>
138 const typename RBILUK<MatrixType>::block_crs_matrix_type&
140 {
142  U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
143  "is null. Please call initialize() (and preferably also compute()) "
144  "before calling this method. If the input matrix has not yet been set, "
145  "you must first call setMatrix() with a nonnull input matrix before you "
146  "may call initialize() or compute().");
147  return *U_block_;
148 }
149 
150 template<class MatrixType>
152 {
153  using Teuchos::null;
154  using Teuchos::rcp;
155 
156  if (! this->isAllocated_) {
157  // Deallocate any existing storage. This avoids storing 2x
158  // memory, since RCP op= does not deallocate until after the
159  // assignment.
160  this->L_ = null;
161  this->U_ = null;
162  this->D_ = null;
163  L_block_ = null;
164  U_block_ = null;
165  D_block_ = null;
166 
167  // Allocate Matrix using ILUK graphs
168  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
169  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
170  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
171  blockSize_) );
172  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
173  U_block_->setAllToScalar (STM::zero ());
174  D_block_->setAllToScalar (STM::zero ());
175 
176  }
177  this->isAllocated_ = true;
178 }
179 
180 template<class MatrixType>
183  return A_block_;
184 }
185 
186 template<class MatrixType>
188 {
189  using Teuchos::RCP;
190  using Teuchos::rcp;
191  using Teuchos::rcp_dynamic_cast;
192  const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
193 
194  // FIXME (mfh 04 Nov 2015) Apparently it's OK for A_ to be null.
195  // That probably means that this preconditioner was created with a
196  // BlockCrsMatrix directly, so it doesn't need the LocalFilter.
197 
198  // TEUCHOS_TEST_FOR_EXCEPTION
199  // (A_.is_null (), std::runtime_error, prefix << "The matrix (A_, the "
200  // "RowMatrix) is null. Please call setMatrix() with a nonnull input "
201  // "before calling this method.");
202 
203  if (A_block_.is_null ()) {
204  // FIXME (mfh 04 Nov 2015) Why does the input have to be a
205  // LocalFilter? Why can't we just take a regular matrix, and
206  // apply a LocalFilter only if necessary, like other "local"
207  // Ifpack2 preconditioners already do?
208  RCP<const LocalFilter<row_matrix_type> > filteredA =
209  rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_);
211  (filteredA.is_null (), std::runtime_error, prefix <<
212  "Cannot cast to filtered matrix.");
213  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA =
214  rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
215  if (! overlappedA.is_null ()) {
216  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
217  } else {
218  //If there is no overlap, filteredA could be the block CRS matrix
219  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
220  }
221  }
222 
224  (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
225  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
226  "input before calling this method.");
228  (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
229  "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
230  "initialize() or compute() with this matrix until the matrix is fill "
231  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
232  "underlying graph is fill complete.");
233 
234  blockSize_ = A_block_->getBlockSize();
235 
236  Teuchos::Time timer ("RBILUK::initialize");
237  { // Start timing
238  Teuchos::TimeMonitor timeMon (timer);
239 
240  // Calling initialize() means that the user asserts that the graph
241  // of the sparse matrix may have changed. We must not just reuse
242  // the previous graph in that case.
243  //
244  // Regarding setting isAllocated_ to false: Eventually, we may want
245  // some kind of clever memory reuse strategy, but it's always
246  // correct just to blow everything away and start over.
247  this->isInitialized_ = false;
248  this->isAllocated_ = false;
249  this->isComputed_ = false;
250  this->Graph_ = Teuchos::null;
251 
252  typedef Tpetra::CrsGraph<local_ordinal_type,
254  node_type> crs_graph_type;
255 
256  RCP<const crs_graph_type> matrixCrsGraph = Teuchos::rcpFromRef(A_block_->getCrsGraph() );
257  this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type> (matrixCrsGraph,
258  this->LevelOfFill_, 0));
259 
260  this->Graph_->initialize ();
261  allocate_L_and_U_blocks ();
262 #ifdef IFPACK2_RBILUK_INITIAL
263  initAllValues (*A_block_);
264 #endif
265  } // Stop timing
266 
267  this->isInitialized_ = true;
268  this->numInitialize_ += 1;
269  this->initializeTime_ += timer.totalElapsedTime ();
270 }
271 
272 
273 template<class MatrixType>
274 void
276 initAllValues (const block_crs_matrix_type& A)
277 {
278  using Teuchos::RCP;
279  typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
280 
281  local_ordinal_type NumIn = 0, NumL = 0, NumU = 0;
282  bool DiagFound = false;
283  size_t NumNonzeroDiags = 0;
284  size_t MaxNumEntries = A.getNodeMaxNumRowEntries();
285  local_ordinal_type blockMatSize = blockSize_*blockSize_;
286 
287  // First check that the local row map ordering is the same as the local portion of the column map.
288  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
289  // implicitly assume that this is the case.
290  Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getNodeElementList();
291  Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getNodeElementList();
292  bool gidsAreConsistentlyOrdered=true;
293  global_ordinal_type indexOfInconsistentGID=0;
294  for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
295  if (rowGIDs[i] != colGIDs[i]) {
296  gidsAreConsistentlyOrdered=false;
297  indexOfInconsistentGID=i;
298  break;
299  }
300  }
301  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
302  "The ordering of the local GIDs in the row and column maps is not the same"
303  << std::endl << "at index " << indexOfInconsistentGID
304  << ". Consistency is required, as all calculations are done with"
305  << std::endl << "local indexing.");
306 
307  // Allocate temporary space for extracting the strictly
308  // lower and upper parts of the matrix A.
309  Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
310  Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
311  Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
312  Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
313 
314  Teuchos::Array<scalar_type> diagValues(blockMatSize);
315 
316  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
317  U_block_->setAllToScalar (STM::zero ());
318  D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
319 
320  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
321  // host, so sync to host first. The const_cast is unfortunate but
322  // is our only option to make this correct.
323  const_cast<block_crs_matrix_type&> (A).template sync<Kokkos::HostSpace> ();
324  L_block_->template sync<Kokkos::HostSpace> ();
325  U_block_->template sync<Kokkos::HostSpace> ();
326  D_block_->template sync<Kokkos::HostSpace> ();
327  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
328  L_block_->template modify<Kokkos::HostSpace> ();
329  U_block_->template modify<Kokkos::HostSpace> ();
330  D_block_->template modify<Kokkos::HostSpace> ();
331 
332  RCP<const map_type> rowMap = L_block_->getRowMap ();
333 
334  // First we copy the user's matrix into L and U, regardless of fill level.
335  // It is important to note that L and U are populated using local indices.
336  // This means that if the row map GIDs are not monotonically increasing
337  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
338  // matrix is not the one that you would get if you based L (U) on GIDs.
339  // This is ok, as the *order* of the GIDs in the rowmap is a better
340  // expression of the user's intent than the GIDs themselves.
341 
342  for (size_t myRow=0; myRow<A.getNodeNumRows(); ++myRow) {
343  local_ordinal_type local_row = myRow;
344 
345  //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
346  // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
347  const local_ordinal_type * InI = 0;
348  scalar_type * InV = 0;
349  A.getLocalRowView(local_row, InI, InV, NumIn);
350 
351  // Split into L and U (we don't assume that indices are ordered).
352 
353  NumL = 0;
354  NumU = 0;
355  DiagFound = false;
356 
357  for (local_ordinal_type j = 0; j < NumIn; ++j) {
358  const local_ordinal_type k = InI[j];
359  const local_ordinal_type blockOffset = blockMatSize*j;
360 
361  if (k == local_row) {
362  DiagFound = true;
363  // Store perturbed diagonal in Tpetra::Vector D_
364  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
365  diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
366  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
367  }
368  else if (k < 0) { // Out of range
370  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
371  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
372  "probably an artifact of the undocumented assumptions of the "
373  "original implementation (likely copied and pasted from Ifpack). "
374  "Nevertheless, the code I found here insisted on this being an error "
375  "state, so I will throw an exception here.");
376  }
377  else if (k < local_row) {
378  LI[NumL] = k;
379  const local_ordinal_type LBlockOffset = NumL*blockMatSize;
380  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
381  LV[LBlockOffset+jj] = InV[blockOffset+jj];
382  NumL++;
383  }
384  else if (Teuchos::as<size_t>(k) <= rowMap->getNodeNumElements()) {
385  UI[NumU] = k;
386  const local_ordinal_type UBlockOffset = NumU*blockMatSize;
387  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
388  UV[UBlockOffset+jj] = InV[blockOffset+jj];
389  NumU++;
390  }
391  }
392 
393  // Check in things for this row of L and U
394 
395  if (DiagFound) {
396  ++NumNonzeroDiags;
397  } else
398  {
399  for (local_ordinal_type jj = 0; jj < blockSize_; ++jj)
400  diagValues[jj*(blockSize_+1)] = this->Athresh_;
401  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
402  }
403 
404  if (NumL) {
405  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
406  }
407 
408  if (NumU) {
409  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
410  }
411  }
412 
413  // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
414  // ever gets a device implementation.
415  {
416  typedef typename block_crs_matrix_type::device_type device_type;
417  const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
418  L_block_->template sync<device_type> ();
419  U_block_->template sync<device_type> ();
420  D_block_->template sync<device_type> ();
421  }
422  this->isInitialized_ = true;
423 }
424 
425 namespace { // (anonymous)
426 
427 // For a given Kokkos::View type, possibly unmanaged, get the
428 // corresponding managed Kokkos::View type. This is handy for
429 // translating from little_block_type or little_vec_type (both
430 // possibly unmanaged) to their managed versions.
431 template<class LittleBlockType>
432 struct GetManagedView {
433  static_assert (Kokkos::Impl::is_view<LittleBlockType>::value,
434  "LittleBlockType must be a Kokkos::View.");
435  typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
436  typename LittleBlockType::array_layout,
437  typename LittleBlockType::device_type> managed_non_const_type;
438  static_assert (static_cast<int> (managed_non_const_type::rank) ==
439  static_cast<int> (LittleBlockType::rank),
440  "managed_non_const_type::rank != LittleBlock::rank. "
441  "Please report this bug to the Ifpack2 developers.");
442 };
443 
444 } // namespace (anonymous)
445 
446 template<class MatrixType>
448 {
449  typedef impl_scalar_type IST;
450  const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
451 
452  // initialize() checks this too, but it's easier for users if the
453  // error shows them the name of the method that they actually
454  // called, rather than the name of some internally called method.
456  (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
457  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
458  "input before calling this method.");
460  (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
461  "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
462  "initialize() or compute() with this matrix until the matrix is fill "
463  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
464  "underlying graph is fill complete.");
465 
466  if (! this->isInitialized ()) {
467  initialize (); // Don't count this in the compute() time
468  }
469 
470  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
471  // host, so sync to host first. The const_cast is unfortunate but
472  // is our only option to make this correct.
473  if (! A_block_.is_null ()) {
475  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
476  A_nc->template sync<Kokkos::HostSpace> ();
477  }
478  L_block_->template sync<Kokkos::HostSpace> ();
479  U_block_->template sync<Kokkos::HostSpace> ();
480  D_block_->template sync<Kokkos::HostSpace> ();
481  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
482  L_block_->template modify<Kokkos::HostSpace> ();
483  U_block_->template modify<Kokkos::HostSpace> ();
484  D_block_->template modify<Kokkos::HostSpace> ();
485 
486  Teuchos::Time timer ("RBILUK::compute");
487  { // Start timing
488  Teuchos::TimeMonitor timeMon (timer);
489  this->isComputed_ = false;
490 
491  // MinMachNum should be officially defined, for now pick something a little
492  // bigger than IEEE underflow value
493 
494 // const scalar_type MinDiagonalValue = STS::rmin ();
495 // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
496  initAllValues (*A_block_);
497 
498  size_t NumIn;
499  local_ordinal_type NumL, NumU, NumURead;
500 
501  // Get Maximum Row length
502  const size_t MaxNumEntries =
503  L_block_->getNodeMaxNumRowEntries () + U_block_->getNodeMaxNumRowEntries () + 1;
504 
505  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
506 
507  // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
508  // expressing these strides explicitly, in order to finish #177
509  // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
510  const local_ordinal_type rowStride = blockSize_;
511 
512  Teuchos::Array<int> ipiv_teuchos(blockSize_);
513  Kokkos::View<int*, Kokkos::HostSpace,
514  Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
515  Teuchos::Array<IST> work_teuchos(blockSize_);
516  Kokkos::View<IST*, Kokkos::HostSpace,
517  Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
518 
519  size_t num_cols = U_block_->getColMap()->getNodeNumElements();
520  Teuchos::Array<int> colflag(num_cols);
521 
522  typename GetManagedView<little_block_type>::managed_non_const_type diagModBlock ("diagModBlock", blockSize_, blockSize_);
523  typename GetManagedView<little_block_type>::managed_non_const_type matTmp ("matTmp", blockSize_, blockSize_);
524  typename GetManagedView<little_block_type>::managed_non_const_type multiplier ("multiplier", blockSize_, blockSize_);
525 
526 // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
527 
528  // Now start the factorization.
529 
530  // Need some integer workspace and pointers
531  local_ordinal_type NumUU;
532  for (size_t j = 0; j < num_cols; ++j) {
533  colflag[j] = -1;
534  }
535  Teuchos::Array<local_ordinal_type> InI(MaxNumEntries, 0);
536  Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
537 
538  const local_ordinal_type numLocalRows = L_block_->getNodeNumRows ();
539  for (local_ordinal_type local_row = 0; local_row < numLocalRows; ++local_row) {
540 
541  // Fill InV, InI with current row of L, D and U combined
542 
543  NumIn = MaxNumEntries;
544  const local_ordinal_type * colValsL;
545  scalar_type * valsL;
546 
547  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
548  for (local_ordinal_type j = 0; j < NumL; ++j)
549  {
550  const local_ordinal_type matOffset = blockMatSize*j;
551  little_block_type lmat((typename little_block_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
552  little_block_type lmatV((typename little_block_type::value_type*) &InV[matOffset],blockSize_,rowStride);
553  //lmatV.assign(lmat);
554  Tpetra::Experimental::COPY (lmat, lmatV);
555  InI[j] = colValsL[j];
556  }
557 
558  little_block_type dmat = D_block_->getLocalBlock(local_row, local_row);
559  little_block_type dmatV((typename little_block_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
560  //dmatV.assign(dmat);
561  Tpetra::Experimental::COPY (dmat, dmatV);
562  InI[NumL] = local_row;
563 
564  const local_ordinal_type * colValsU;
565  scalar_type * valsU;
566  U_block_->getLocalRowView(local_row, colValsU, valsU, NumURead);
567  NumU = 0;
568  for (local_ordinal_type j = 0; j < NumURead; ++j)
569  {
570  if (!(colValsU[j] < numLocalRows)) continue;
571  InI[NumL+1+j] = colValsU[j];
572  const local_ordinal_type matOffset = blockMatSize*(NumL+1+j);
573  little_block_type umat((typename little_block_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
574  little_block_type umatV((typename little_block_type::value_type*) &InV[matOffset], blockSize_, rowStride);
575  //umatV.assign(umat);
576  Tpetra::Experimental::COPY (umat, umatV);
577  NumU += 1;
578  }
579  NumIn = NumL+NumU+1;
580 
581  // Set column flags
582  for (size_t j = 0; j < NumIn; ++j) {
583  colflag[InI[j]] = j;
584  }
585 
586 #ifndef IFPACK2_RBILUK_INITIAL
587  for (local_ordinal_type i = 0; i < blockSize_; ++i)
588  for (local_ordinal_type j = 0; j < blockSize_; ++j){
589  {
590  diagModBlock(i,j) = 0;
591  }
592  }
593 #else
594  scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
595  Kokkos::deep_copy (diagModBlock, diagmod);
596 #endif
597 
598  for (local_ordinal_type jj = 0; jj < NumL; ++jj) {
599  local_ordinal_type j = InI[jj];
600  little_block_type currentVal((typename little_block_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride); // current_mults++;
601  //multiplier.assign(currentVal);
602  Tpetra::Experimental::COPY (currentVal, multiplier);
603 
604  const little_block_type dmatInverse = D_block_->getLocalBlock(j,j);
605  // alpha = 1, beta = 0
606 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
607  KokkosBatched::Experimental::SerialGemm
608  <KokkosBatched::Experimental::Trans::NoTranspose,
609  KokkosBatched::Experimental::Trans::NoTranspose,
610  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
611  (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
612 #else
613  Tpetra::Experimental::GEMM ("N", "N", STS::one (), currentVal, dmatInverse,
614  STS::zero (), matTmp);
615 #endif
616  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
617  //currentVal.assign(matTmp);
618  Tpetra::Experimental::COPY (matTmp, currentVal);
619 
620  const local_ordinal_type * UUI;
621  scalar_type * UUV;
622  U_block_->getLocalRowView(j, UUI, UUV, NumUU);
623 
624  if (this->RelaxValue_ == STM::zero ()) {
625  for (local_ordinal_type k = 0; k < NumUU; ++k) {
626  if (!(UUI[k] < numLocalRows)) continue;
627  const int kk = colflag[UUI[k]];
628  if (kk > -1) {
629  little_block_type kkval((typename little_block_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
630  little_block_type uumat((typename little_block_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
631 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
632  KokkosBatched::Experimental::SerialGemm
633  <KokkosBatched::Experimental::Trans::NoTranspose,
634  KokkosBatched::Experimental::Trans::NoTranspose,
635  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
636  ( magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
637 #else
638  Tpetra::Experimental::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
639  STM::one (), kkval);
640 #endif
641  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
642  }
643  }
644  }
645  else {
646  for (local_ordinal_type k = 0; k < NumUU; ++k) {
647  if (!(UUI[k] < numLocalRows)) continue;
648  const int kk = colflag[UUI[k]];
649  little_block_type uumat((typename little_block_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
650  if (kk > -1) {
651  little_block_type kkval((typename little_block_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
652 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
653  KokkosBatched::Experimental::SerialGemm
654  <KokkosBatched::Experimental::Trans::NoTranspose,
655  KokkosBatched::Experimental::Trans::NoTranspose,
656  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
657  (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
658 #else
659  Tpetra::Experimental::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
660  STM::one (), kkval);
661 #endif
662  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
663  }
664  else {
665 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
666  KokkosBatched::Experimental::SerialGemm
667  <KokkosBatched::Experimental::Trans::NoTranspose,
668  KokkosBatched::Experimental::Trans::NoTranspose,
669  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
670  (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
671 #else
672  Tpetra::Experimental::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
673  STM::one (), diagModBlock);
674 #endif
675  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
676  }
677  }
678  }
679  }
680  if (NumL) {
681  // Replace current row of L
682  L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
683  }
684 
685  // dmat.assign(dmatV);
686  Tpetra::Experimental::COPY (dmatV, dmat);
687 
688  if (this->RelaxValue_ != STM::zero ()) {
689  //dmat.update(this->RelaxValue_, diagModBlock);
690  Tpetra::Experimental::AXPY (this->RelaxValue_, diagModBlock, dmat);
691  }
692 
693 // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
694 // if (STS::real (DV[i]) < STM::zero ()) {
695 // DV[i] = -MinDiagonalValue;
696 // }
697 // else {
698 // DV[i] = MinDiagonalValue;
699 // }
700 // }
701 // else
702  {
703  int lapackInfo = 0;
704  for (int k = 0; k < blockSize_; ++k) {
705  ipiv[k] = 0;
706  }
707 
708  Tpetra::Experimental::GETF2 (dmat, ipiv, lapackInfo);
709  //lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
711  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
712  "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
713 
714  Tpetra::Experimental::GETRI (dmat, ipiv, work, lapackInfo);
715  //lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
717  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
718  "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
719  }
720 
721  for (local_ordinal_type j = 0; j < NumU; ++j) {
722  little_block_type currentVal((typename little_block_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride); // current_mults++;
723  // scale U by the diagonal inverse
724 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
725  KokkosBatched::Experimental::SerialGemm
726  <KokkosBatched::Experimental::Trans::NoTranspose,
727  KokkosBatched::Experimental::Trans::NoTranspose,
728  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
729  (STS::one (), dmat, currentVal, STS::zero (), matTmp);
730 #else
731  Tpetra::Experimental::GEMM ("N", "N", STS::one (), dmat, currentVal,
732  STS::zero (), matTmp);
733 #endif
734  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
735  //currentVal.assign(matTmp);
736  Tpetra::Experimental::COPY (matTmp, currentVal);
737  }
738 
739  if (NumU) {
740  // Replace current row of L and U
741  U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
742  }
743 
744 #ifndef IFPACK2_RBILUK_INITIAL
745  // Reset column flags
746  for (size_t j = 0; j < NumIn; ++j) {
747  colflag[InI[j]] = -1;
748  }
749 #else
750  // Reset column flags
751  for (size_t j = 0; j < num_cols; ++j) {
752  colflag[j] = -1;
753  }
754 #endif
755  }
756 
757  } // Stop timing
758 
759  // Sync everything back to device, for efficient solves.
760  {
761  typedef typename block_crs_matrix_type::device_type device_type;
762  if (! A_block_.is_null ()) {
764  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
765  A_nc->template sync<device_type> ();
766  }
767  L_block_->template sync<device_type> ();
768  U_block_->template sync<device_type> ();
769  D_block_->template sync<device_type> ();
770  }
771 
772  this->isComputed_ = true;
773  this->numCompute_ += 1;
774  this->computeTime_ += timer.totalElapsedTime ();
775 }
776 
777 
778 template<class MatrixType>
779 void
781 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
782  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
783  Teuchos::ETransp mode,
784  scalar_type alpha,
785  scalar_type beta) const
786 {
787  using Teuchos::RCP;
788  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
790  typedef Tpetra::MultiVector<scalar_type,
792 
794  A_block_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
795  "null. Please call setMatrix() with a nonnull input, then initialize() "
796  "and compute(), before calling this method.");
798  ! this->isComputed (), std::runtime_error,
799  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
800  "you must call compute() before calling this method.");
802  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
803  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
804  "X.getNumVectors() = " << X.getNumVectors ()
805  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
807  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
808  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
809  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
810  "fixed. There is a FIXME in this file about this very issue.");
811 
812  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
813 
814  const local_ordinal_type rowStride = blockSize_;
815 
816  BMV yBlock (Y, * (A_block_->getGraph ()->getDomainMap ()), blockSize_);
817  const BMV xBlock (X, * (A_block_->getColMap ()), blockSize_);
818 
819  Teuchos::Array<scalar_type> lclarray(blockSize_);
820  little_vec_type lclvec((typename little_vec_type::value_type*)&lclarray[0], blockSize_);
821  const scalar_type one = STM::one ();
822  const scalar_type zero = STM::zero ();
823 
824  Teuchos::Time timer ("RBILUK::apply");
825  { // Start timing
826  Teuchos::TimeMonitor timeMon (timer);
827  if (alpha == one && beta == zero) {
828  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
829  // Start by solving L C = X for C. C must have the same Map
830  // as D. We have to use a temp multivector, since our
831  // implementation of triangular solves does not allow its
832  // input and output to alias one another.
833  //
834  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
835  const local_ordinal_type numVectors = xBlock.getNumVectors();
836  BMV cBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
837  BMV rBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
838  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
839  {
840  for (size_t i = 0; i < D_block_->getNodeNumRows(); ++i)
841  {
842  local_ordinal_type local_row = i;
843  little_vec_type xval = xBlock.getLocalBlock(local_row,imv);
844  little_vec_type cval = cBlock.getLocalBlock(local_row,imv);
845  //cval.assign(xval);
846  Tpetra::Experimental::COPY (xval, cval);
847 
848  local_ordinal_type NumL;
849  const local_ordinal_type * colValsL;
850  scalar_type * valsL;
851 
852  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
853 
854  for (local_ordinal_type j = 0; j < NumL; ++j)
855  {
856  local_ordinal_type col = colValsL[j];
857  little_vec_type prevVal = cBlock.getLocalBlock(col, imv);
858 
859  const local_ordinal_type matOffset = blockMatSize*j;
860  little_block_type lij((typename little_block_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
861 
862  //cval.matvecUpdate(-one, lij, prevVal);
863  Tpetra::Experimental::GEMV (-one, lij, prevVal, cval);
864  }
865  }
866  }
867 
868  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
869  D_block_->applyBlock(cBlock, rBlock);
870 
871  // Solve U Y = R.
872  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
873  {
874  const local_ordinal_type numRows = D_block_->getNodeNumRows();
875  for (local_ordinal_type i = 0; i < numRows; ++i)
876  {
877  local_ordinal_type local_row = (numRows-1)-i;
878  little_vec_type rval = rBlock.getLocalBlock(local_row,imv);
879  little_vec_type yval = yBlock.getLocalBlock(local_row,imv);
880  //yval.assign(rval);
881  Tpetra::Experimental::COPY (rval, yval);
882 
883  local_ordinal_type NumU;
884  const local_ordinal_type * colValsU;
885  scalar_type * valsU;
886 
887  U_block_->getLocalRowView(local_row, colValsU, valsU, NumU);
888 
889  for (local_ordinal_type j = 0; j < NumU; ++j)
890  {
891  local_ordinal_type col = colValsU[NumU-1-j];
892  little_vec_type prevVal = yBlock.getLocalBlock(col, imv);
893 
894  const local_ordinal_type matOffset = blockMatSize*(NumU-1-j);
895  little_block_type uij((typename little_block_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
896 
897  //yval.matvecUpdate(-one, uij, prevVal);
898  Tpetra::Experimental::GEMV (-one, uij, prevVal, yval);
899  }
900  }
901  }
902  }
903  else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
905  true, std::runtime_error,
906  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm. ");
907  }
908  }
909  else { // alpha != 1 or beta != 0
910  if (alpha == zero) {
911  if (beta == zero) {
912  Y.putScalar (zero);
913  } else {
914  Y.scale (beta);
915  }
916  } else { // alpha != zero
917  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
918  apply (X, Y_tmp, mode);
919  Y.update (alpha, Y_tmp, beta);
920  }
921  }
922  } // Stop timing
923 
924  this->numApply_ += 1;
925  this->applyTime_ = timer.totalElapsedTime ();
926 }
927 
928 
929 template<class MatrixType>
931 {
932  std::ostringstream os;
933 
934  // Output is a valid YAML dictionary in flow style. If you don't
935  // like everything on a single line, you should call describe()
936  // instead.
937  os << "\"Ifpack2::Experimental::RBILUK\": {";
938  os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
939  << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
940 
941  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
942 
943  if (A_block_.is_null ()) {
944  os << "Matrix: null";
945  }
946  else {
947  os << "Global matrix dimensions: ["
948  << A_block_->getGlobalNumRows () << ", " << A_block_->getGlobalNumCols () << "]"
949  << ", Global nnz: " << A_block_->getGlobalNumEntries();
950  }
951 
952  os << "}";
953  return os.str ();
954 }
955 
956 } // namespace Experimental
957 
958 } // namespace Ifpack2
959 
960 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
961 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
962 // handled internally via dynamic cast.
963 
964 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
965  template class Ifpack2::Experimental::RBILUK< Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
966  template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
967 
968 #endif
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:125
Ifpack2 features that are experimental. Use at your own risk.
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:187
Teuchos::RCP< const block_crs_matrix_type > getBlockMatrix() const
Get the input matrix.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:182
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:145
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:81
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:136
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:151
size_type size() const
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:243
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:148
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:781
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:142
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:447
LinearOp zero(const VectorSpace &vs)
File for utility functions.
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:139
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:97
double totalElapsedTime(bool readCurrentTime=false) const
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:86
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:930
Sparse matrix (Tpetra::RowMatrix subclass) with ghost rows.
Definition: Ifpack2_OverlappingRowMatrix_decl.hpp:58
T * getRawPtr() const
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:111
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:157
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:128