IFPACK  Development
Ifpack_PointRelaxation.cpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Ifpack_ConfigDefs.h"
44 #include <iomanip>
45 #include <cmath>
46 #include "Epetra_Operator.h"
47 #include "Epetra_CrsMatrix.h"
48 #include "Epetra_VbrMatrix.h"
49 #include "Epetra_Comm.h"
50 #include "Epetra_Map.h"
51 #include "Epetra_MultiVector.h"
52 #include "Ifpack_Preconditioner.h"
53 #include "Ifpack_PointRelaxation.h"
54 #include "Ifpack_Utils.h"
55 #include "Ifpack_Condest.h"
56 
57 static const int IFPACK_JACOBI = 0;
58 static const int IFPACK_GS = 1;
59 static const int IFPACK_SGS = 2;
60 
61 //==============================================================================
64  IsInitialized_(false),
65  IsComputed_(false),
66  NumInitialize_(0),
67  NumCompute_(0),
68  NumApplyInverse_(0),
69  InitializeTime_(0.0),
70  ComputeTime_(0.0),
71  ApplyInverseTime_(0.0),
72  ComputeFlops_(0.0),
73  ApplyInverseFlops_(0.0),
74  NumSweeps_(1),
75  DampingFactor_(1.0),
76  UseTranspose_(false),
77  Condest_(-1.0),
78  /* ComputeCondest_(false), (unused; commented out to avoid build warnings) */
79  PrecType_(IFPACK_JACOBI),
80  MinDiagonalValue_(0.0),
81  NumMyRows_(0),
82  NumMyNonzeros_(0),
83  NumGlobalRows_(0),
84  NumGlobalNonzeros_(0),
85  Matrix_(Teuchos::rcp(Matrix_in,false)),
86  IsParallel_(false),
87  ZeroStartingSolution_(true),
88  DoBackwardGS_(false),
89  DoL1Method_(false),
90  L1Eta_(1.5),
91  NumLocalSmoothingIndices_(Matrix_in->NumMyRows()),
92  LocalSmoothingIndices_(0)
93 {
94 }
95 
96 //==============================================================================
97 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
98 {
99  using std::cout;
100  using std::endl;
101 
102  std::string PT;
103  if (PrecType_ == IFPACK_JACOBI)
104  PT = "Jacobi";
105  else if (PrecType_ == IFPACK_GS)
106  PT = "Gauss-Seidel";
107  else if (PrecType_ == IFPACK_SGS)
108  PT = "symmetric Gauss-Seidel";
109 
110  PT = List.get("relaxation: type", PT);
111 
112  if (PT == "Jacobi")
113  PrecType_ = IFPACK_JACOBI;
114  else if (PT == "Gauss-Seidel")
115  PrecType_ = IFPACK_GS;
116  else if (PT == "symmetric Gauss-Seidel")
117  PrecType_ = IFPACK_SGS;
118  else {
119  IFPACK_CHK_ERR(-2);
120  }
121 
122  NumSweeps_ = List.get("relaxation: sweeps",NumSweeps_);
123  DampingFactor_ = List.get("relaxation: damping factor",
124  DampingFactor_);
125  MinDiagonalValue_ = List.get("relaxation: min diagonal value",
126  MinDiagonalValue_);
127  ZeroStartingSolution_ = List.get("relaxation: zero starting solution",
128  ZeroStartingSolution_);
129 
130  DoBackwardGS_ = List.get("relaxation: backward mode",DoBackwardGS_);
131 
132  DoL1Method_ = List.get("relaxation: use l1",DoL1Method_);
133 
134  L1Eta_ = List.get("relaxation: l1 eta",L1Eta_);
135 
136 
137  // This (partial) reordering would require a very different implementation of Jacobi than we have no, so we're not
138  // going to do it.
139  NumLocalSmoothingIndices_= List.get("relaxation: number of local smoothing indices",NumLocalSmoothingIndices_);
140  LocalSmoothingIndices_ = List.get("relaxation: local smoothing indices",LocalSmoothingIndices_);
141  if(PrecType_ == IFPACK_JACOBI && LocalSmoothingIndices_) {
142  NumLocalSmoothingIndices_=NumMyRows_;
143  LocalSmoothingIndices_=0;
144  if(!Comm().MyPID()) cout<<"Ifpack_PointRelaxation: WARNING: Reordered/Local Smoothing not implemented for Jacobi. Defaulting to regular Jacobi"<<endl;
145  }
146 
147  SetLabel();
148 
149  return(0);
150 }
151 
152 //==============================================================================
154 {
155  return(Matrix_->Comm());
156 }
157 
158 //==============================================================================
160 {
161  return(Matrix_->OperatorDomainMap());
162 }
163 
164 //==============================================================================
166 {
167  return(Matrix_->OperatorRangeMap());
168 }
169 
170 //==============================================================================
172 {
173  IsInitialized_ = false;
174 
175  if (Matrix_ == Teuchos::null)
176  IFPACK_CHK_ERR(-2);
177 
178  if (Time_ == Teuchos::null)
179  Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
180 
181  if (Matrix().NumGlobalRows64() != Matrix().NumGlobalCols64())
182  IFPACK_CHK_ERR(-2); // only square matrices
183 
184  NumMyRows_ = Matrix_->NumMyRows();
185  NumMyNonzeros_ = Matrix_->NumMyNonzeros();
186  NumGlobalRows_ = Matrix_->NumGlobalRows64();
187  NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros64();
188 
189  if (Comm().NumProc() != 1)
190  IsParallel_ = true;
191  else
192  IsParallel_ = false;
193 
194  ++NumInitialize_;
195  InitializeTime_ += Time_->ElapsedTime();
196  IsInitialized_ = true;
197  return(0);
198 }
199 
200 //==============================================================================
202 {
203  int ierr = 0;
204  if (!IsInitialized())
205  IFPACK_CHK_ERR(Initialize());
206 
207  Time_->ResetStartTime();
208 
209  // reset values
210  IsComputed_ = false;
211  Condest_ = -1.0;
212 
213  if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
214  if (NumSweeps_ < 0)
215  IFPACK_CHK_ERR(-2); // at least one application
216 
217  // NOTE: RowMatrixRowMap doesn't work correctly for Epetra_VbrMatrix
218  const Epetra_VbrMatrix * VbrMat = dynamic_cast<const Epetra_VbrMatrix*>(&Matrix());
219  if(VbrMat) Diagonal_ = Teuchos::rcp( new Epetra_Vector(VbrMat->RowMap()) );
220  else Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
221 
222  if (Diagonal_ == Teuchos::null)
223  IFPACK_CHK_ERR(-5);
224 
225  IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
226 
227  // Setup for L1 Methods.
228  // Here we add half the value of the off-processor entries in the row,
229  // but only if diagonal isn't sufficiently large.
230  //
231  // Note: This is only done in the slower-but-more-general "RowMatrix" mode.
232  //
233  // This follows from Equation (6.5) in:
234  // Baker, Falgout, Kolev and Yang. Multigrid Smoothers for Ultraparallel Computing.
235  // SIAM J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864--2887.
236  if(DoL1Method_ && IsParallel_) {
237  int maxLength = Matrix().MaxNumEntries();
238  std::vector<int> Indices(maxLength);
239  std::vector<double> Values(maxLength);
240  int NumEntries;
241 
242  for (int i = 0 ; i < NumMyRows_ ; ++i) {
243  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, maxLength,NumEntries,
244  &Values[0], &Indices[0]));
245  double diagonal_boost=0.0;
246  for (int k = 0 ; k < NumEntries ; ++k)
247  if(Indices[k] > NumMyRows_)
248  diagonal_boost+=std::abs(Values[k]/2.0);
249  if ((*Diagonal_)[i] < L1Eta_*diagonal_boost)
250  (*Diagonal_)[i]+=diagonal_boost;
251  }
252  }
253 
254  // check diagonal elements, store the inverses, and verify that
255  // no zeros are around. If an element is zero, then by default
256  // its inverse is zero as well (that is, the row is ignored).
257  for (int i = 0 ; i < NumMyRows_ ; ++i) {
258  double& diag = (*Diagonal_)[i];
259  if (IFPACK_ABS(diag) < MinDiagonalValue_)
260  diag = MinDiagonalValue_;
261  if (diag != 0.0)
262  diag = 1.0 / diag;
263  }
264 #ifdef IFPACK_FLOPCOUNTERS
265  ComputeFlops_ += NumMyRows_;
266 #endif
267 
268 #if 0
269  // some methods require the inverse of the diagonal, compute it
270  // now and store it in `Diagonal_'
271  if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
272  Diagonal_->Reciprocal(*Diagonal_);
273  // update flops
274  ComputeFlops_ += NumMyRows_;
275  }
276 #endif
277 
278  // We need to import data from external processors. Here I create an
279  // Epetra_Import object because I cannot assume that Matrix_ has one.
280  // This is a bit of waste of resources (but the code is more robust).
281  // Note that I am doing some strange stuff to set the components of Y
282  // from Y2 (to save some time).
283  //
284  if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
285  Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
286  Matrix().RowMatrixRowMap()) );
287  if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
288  }
289 
290  ++NumCompute_;
291  ComputeTime_ += Time_->ElapsedTime();
292  IsComputed_ = true;
293 
294  IFPACK_CHK_ERR(ierr);
295  return(0);
296 }
297 
298 //==============================================================================
299 std::ostream& Ifpack_PointRelaxation::Print(std::ostream & os) const
300 {
301  using std::endl;
302 
303  double MyMinVal, MyMaxVal;
304  double MinVal, MaxVal;
305 
306  if (IsComputed_) {
307  Diagonal_->MinValue(&MyMinVal);
308  Diagonal_->MaxValue(&MyMaxVal);
309  Comm().MinAll(&MyMinVal,&MinVal,1);
310  Comm().MinAll(&MyMaxVal,&MaxVal,1);
311  }
312 
313  if (!Comm().MyPID()) {
314  os << endl;
315  os << "================================================================================" << endl;
316  os << "Ifpack_PointRelaxation" << endl;
317  os << "Sweeps = " << NumSweeps_ << endl;
318  os << "damping factor = " << DampingFactor_ << endl;
319  if (PrecType_ == IFPACK_JACOBI)
320  os << "Type = Jacobi" << endl;
321  else if (PrecType_ == IFPACK_GS)
322  os << "Type = Gauss-Seidel" << endl;
323  else if (PrecType_ == IFPACK_SGS)
324  os << "Type = symmetric Gauss-Seidel" << endl;
325  if (DoBackwardGS_)
326  os << "Using backward mode (GS only)" << endl;
327  if (ZeroStartingSolution_)
328  os << "Using zero starting solution" << endl;
329  else
330  os << "Using input starting solution" << endl;
331  os << "Condition number estimate = " << Condest() << endl;
332  os << "Global number of rows = " << Matrix_->NumGlobalRows64() << endl;
333  if (IsComputed_) {
334  os << "Minimum value on stored diagonal = " << MinVal << endl;
335  os << "Maximum value on stored diagonal = " << MaxVal << endl;
336  }
337  os << endl;
338  os << "Phase # calls Total Time (s) Total MFlops MFlops/s" << endl;
339  os << "----- ------- -------------- ------------ --------" << endl;
340  os << "Initialize() " << std::setw(5) << NumInitialize_
341  << " " << std::setw(15) << InitializeTime_
342  << " 0.0 0.0" << endl;
343  os << "Compute() " << std::setw(5) << NumCompute_
344  << " " << std::setw(15) << ComputeTime_
345  << " " << std::setw(15) << 1.0e-6 * ComputeFlops_;
346  if (ComputeTime_ != 0.0)
347  os << " " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
348  else
349  os << " " << std::setw(15) << 0.0 << endl;
350  os << "ApplyInverse() " << std::setw(5) << NumApplyInverse_
351  << " " << std::setw(15) << ApplyInverseTime_
352  << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
353  if (ApplyInverseTime_ != 0.0)
354  os << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
355  else
356  os << " " << std::setw(15) << 0.0 << endl;
357  os << "================================================================================" << endl;
358  os << endl;
359  }
360 
361  return(os);
362 }
363 
364 //==============================================================================
366 Condest(const Ifpack_CondestType CT,
367  const int MaxIters, const double Tol,
368  Epetra_RowMatrix* Matrix_in)
369 {
370  if (!IsComputed()) // cannot compute right now
371  return(-1.0);
372 
373  // always computes it. Call Condest() with no parameters to get
374  // the previous estimate.
375  Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
376 
377  return(Condest_);
378 }
379 
380 //==============================================================================
381 void Ifpack_PointRelaxation::SetLabel()
382 {
383  std::string PT;
384  if (PrecType_ == IFPACK_JACOBI)
385  PT = "Jacobi";
386  else if (PrecType_ == IFPACK_GS){
387  PT = "GS";
388  if(DoBackwardGS_)
389  PT = "Backward " + PT;
390  }
391  else if (PrecType_ == IFPACK_SGS)
392  PT = "SGS";
393 
394  if(NumLocalSmoothingIndices_!=NumMyRows_) PT="Local " + PT;
395  else if(LocalSmoothingIndices_) PT="Reordered " + PT;
396 
397  Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
398  + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
399 }
400 
401 //==============================================================================
402 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
403 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
404 // way the matrix-vector produce is performed).
405 //
406 // Another ML-related observation is that the starting solution (in Y)
407 // is NOT supposed to be zero. This may slow down the application of just
408 // one sweep of Jacobi.
409 //
412 {
413  if (!IsComputed())
414  IFPACK_CHK_ERR(-3);
415 
416  if (X.NumVectors() != Y.NumVectors())
417  IFPACK_CHK_ERR(-2);
418 
419  Time_->ResetStartTime();
420 
421  // AztecOO gives X and Y pointing to the same memory location,
422  // need to create an auxiliary vector, Xcopy
423  Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
424  if (X.Pointers()[0] == Y.Pointers()[0])
425  Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
426  else
427  Xcopy = Teuchos::rcp( &X, false );
428 
429  // Flops are updated in each of the following.
430  switch (PrecType_) {
431  case IFPACK_JACOBI:
432  IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
433  break;
434  case IFPACK_GS:
435  IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
436  break;
437  case IFPACK_SGS:
438  IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
439  break;
440  default:
441  IFPACK_CHK_ERR(-1); // something wrong
442  }
443 
444  ++NumApplyInverse_;
445  ApplyInverseTime_ += Time_->ElapsedTime();
446  return(0);
447 }
448 
449 //==============================================================================
450 // This preconditioner can be much slower than AztecOO and ML versions
451 // if the matrix-vector product requires all ExtractMyRowCopy()
452 // (as done through Ifpack_AdditiveSchwarz).
453 int Ifpack_PointRelaxation::
454 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
455 {
456  int NumVectors = LHS.NumVectors();
457 
458  int startIter = 0;
459  if (NumSweeps_ > 0 && ZeroStartingSolution_) {
460  // When we have a zero initial guess, we can skip the first
461  // matrix apply call and zero initialization
462  for (int v = 0; v < NumVectors; v++)
463  IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(RHS(v)), *Diagonal_, 0.0));
464 
465  startIter = 1;
466  }
467 
468  bool zeroOut = false;
469  Epetra_MultiVector A_times_LHS(LHS.Map(), NumVectors, zeroOut);
470  for (int j = startIter; j < NumSweeps_ ; j++) {
471  IFPACK_CHK_ERR(Apply(LHS, A_times_LHS));
472  IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
473  for (int v = 0 ; v < NumVectors ; ++v)
474  IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)),
475  *Diagonal_, 1.0));
476 
477  }
478 
479  // Flops:
480  // - matrix vector (2 * NumGlobalNonzeros_)
481  // - update (2 * NumGlobalRows_)
482  // - Multiply:
483  // - DampingFactor (NumGlobalRows_)
484  // - Diagonal (NumGlobalRows_)
485  // - A + B (NumGlobalRows_)
486  // - 1.0 (NumGlobalRows_)
487 #ifdef IFPACK_FLOPCOUNTERS
488  ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
489 #endif
490 
491  return(0);
492 }
493 
494 //==============================================================================
495 int Ifpack_PointRelaxation::
496 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
497 {
498  if (ZeroStartingSolution_)
499  Y.PutScalar(0.0);
500 
501  const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
502  // try to pick the best option; performances may be improved
503  // if several sweeps are used.
504  if (CrsMatrix != 0)
505  {
506  if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
507  return(ApplyInverseGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
508  else if (CrsMatrix->StorageOptimized())
509  return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
510  else
511  return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
512  }
513  else
514  return(ApplyInverseGS_RowMatrix(X, Y));
515 } //ApplyInverseGS()
516 
517 
518 
519 //==============================================================================
520 int Ifpack_PointRelaxation::
521 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
522 {
523  int NumVectors = X.NumVectors();
524 
525  int Length = Matrix().MaxNumEntries();
526  std::vector<int> Indices(Length);
527  std::vector<double> Values(Length);
528 
529  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
530  if (IsParallel_)
531  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
532  else
533  Y2 = Teuchos::rcp( &Y, false );
534 
535  // extract views (for nicer and faster code)
536  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
537  X.ExtractView(&x_ptr);
538  Y.ExtractView(&y_ptr);
539  Y2->ExtractView(&y2_ptr);
540  Diagonal_->ExtractView(&d_ptr);
541 
542  for (int j = 0; j < NumSweeps_ ; j++) {
543 
544  // data exchange is here, once per sweep
545  if (IsParallel_)
546  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
547 
548  // FIXME: do I really need this code below?
549  if (NumVectors == 1) {
550 
551  double* y0_ptr = y_ptr[0];
552  double* y20_ptr = y2_ptr[0];
553  double* x0_ptr = x_ptr[0];
554 
555  if(!DoBackwardGS_){
556  /* Forward Mode */
557  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
558  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
559 
560  int NumEntries;
561  int col;
562  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
563  &Values[0], &Indices[0]));
564 
565  double dtemp = 0.0;
566  for (int k = 0 ; k < NumEntries ; ++k) {
567 
568  col = Indices[k];
569  dtemp += Values[k] * y20_ptr[col];
570  }
571 
572  y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
573  }
574  }
575  else {
576  /* Backward Mode */
577  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
578  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
579 
580  int NumEntries;
581  // int col; // unused
582  // (void) col; // Forestall compiler warning for unused variable.
583  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
584  &Values[0], &Indices[0]));
585  double dtemp = 0.0;
586  for (int k = 0 ; k < NumEntries ; ++k) {
587  //col = Indices[k]; // unused
588  dtemp += Values[k] * y20_ptr[i];
589  }
590 
591  y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
592  }
593  }
594 
595  // using Export() sounded quite expensive
596  if (IsParallel_)
597  for (int i = 0 ; i < NumMyRows_ ; ++i)
598  y0_ptr[i] = y20_ptr[i];
599 
600  }
601  else {
602  if(!DoBackwardGS_){
603  /* Forward Mode */
604  for (int i = 0 ; i < NumMyRows_ ; ++i) {
605 
606  int NumEntries;
607  int col;
608  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
609  &Values[0], &Indices[0]));
610 
611  for (int m = 0 ; m < NumVectors ; ++m) {
612 
613  double dtemp = 0.0;
614  for (int k = 0 ; k < NumEntries ; ++k) {
615 
616  col = Indices[k];
617  dtemp += Values[k] * y2_ptr[m][col];
618  }
619 
620  y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
621  }
622  }
623  }
624  else {
625  /* Backward Mode */
626  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
627  int NumEntries;
628  int col;
629  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
630  &Values[0], &Indices[0]));
631 
632  for (int m = 0 ; m < NumVectors ; ++m) {
633 
634  double dtemp = 0.0;
635  for (int k = 0 ; k < NumEntries ; ++k) {
636 
637  col = Indices[k];
638  dtemp += Values[k] * y2_ptr[m][col];
639  }
640 
641  y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
642 
643  }
644  }
645  }
646 
647  // using Export() sounded quite expensive
648  if (IsParallel_)
649  for (int m = 0 ; m < NumVectors ; ++m)
650  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
651  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
652  y_ptr[m][i] = y2_ptr[m][i];
653  }
654  }
655  }
656 
657 #ifdef IFPACK_FLOPCOUNTERS
658  ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
659 #endif
660 
661  return(0);
662 } //ApplyInverseGS_RowMatrix()
663 
664 //==============================================================================
665 int Ifpack_PointRelaxation::
666 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
667 {
668  int NumVectors = X.NumVectors();
669 
670  int* Indices;
671  double* Values;
672 
673  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
674  if (IsParallel_) {
675  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
676  }
677  else
678  Y2 = Teuchos::rcp( &Y, false );
679 
680  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
681  X.ExtractView(&x_ptr);
682  Y.ExtractView(&y_ptr);
683  Y2->ExtractView(&y2_ptr);
684  Diagonal_->ExtractView(&d_ptr);
685 
686  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
687 
688  // only one data exchange per sweep
689  if (IsParallel_)
690  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
691 
692  if(!DoBackwardGS_){
693  /* Forward Mode */
694  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
695  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
696 
697  int NumEntries;
698  int col;
699  double diag = d_ptr[i];
700 
701  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
702 
703  for (int m = 0 ; m < NumVectors ; ++m) {
704 
705  double dtemp = 0.0;
706 
707  for (int k = 0; k < NumEntries; ++k) {
708 
709  col = Indices[k];
710  dtemp += Values[k] * y2_ptr[m][col];
711  }
712 
713  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
714  }
715  }
716  }
717  else {
718  /* Backward Mode */
719  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
720  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
721 
722  int NumEntries;
723  int col;
724  double diag = d_ptr[i];
725 
726  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
727 
728  for (int m = 0 ; m < NumVectors ; ++m) {
729 
730  double dtemp = 0.0;
731  for (int k = 0; k < NumEntries; ++k) {
732 
733  col = Indices[k];
734  dtemp += Values[k] * y2_ptr[m][col];
735  }
736 
737  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
738 
739  }
740  }
741  }
742 
743  if (IsParallel_)
744  for (int m = 0 ; m < NumVectors ; ++m)
745  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
746  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
747  y_ptr[m][i] = y2_ptr[m][i];
748  }
749  }
750 
751 #ifdef IFPACK_FLOPCOUNTERS
752  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
753 #endif
754  return(0);
755 } //ApplyInverseGS_CrsMatrix()
756 
757 //
758 //==============================================================================
759 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
760 
761 int Ifpack_PointRelaxation::
762 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
763 {
764  int* IndexOffset;
765  int* Indices;
766  double* Values;
767  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
768 
769  int NumVectors = X.NumVectors();
770 
771  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
772  if (IsParallel_) {
773  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
774  }
775  else
776  Y2 = Teuchos::rcp( &Y, false );
777 
778  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
779  X.ExtractView(&x_ptr);
780  Y.ExtractView(&y_ptr);
781  Y2->ExtractView(&y2_ptr);
782  Diagonal_->ExtractView(&d_ptr);
783 
784  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
785 
786  // only one data exchange per sweep
787  if (IsParallel_)
788  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
789 
790  if(!DoBackwardGS_){
791  /* Forward Mode */
792  for (int i = 0 ; i < NumMyRows_ ; ++i) {
793 
794  int col;
795  double diag = d_ptr[i];
796 
797  for (int m = 0 ; m < NumVectors ; ++m) {
798 
799  double dtemp = 0.0;
800 
801  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
802 
803  col = Indices[k];
804  dtemp += Values[k] * y2_ptr[m][col];
805  }
806 
807  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
808  }
809  }
810  }
811  else {
812  /* Backward Mode */
813  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
814 
815  int col;
816  double diag = d_ptr[i];
817 
818  for (int m = 0 ; m < NumVectors ; ++m) {
819 
820  double dtemp = 0.0;
821  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
822 
823  col = Indices[k];
824  dtemp += Values[k] * y2_ptr[m][col];
825  }
826 
827  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
828 
829  }
830  }
831  }
832 
833 
834  if (IsParallel_)
835  for (int m = 0 ; m < NumVectors ; ++m)
836  for (int i = 0 ; i < NumMyRows_ ; ++i)
837  y_ptr[m][i] = y2_ptr[m][i];
838  }
839 
840 #ifdef IFPACK_FLOPCOUNTERS
841  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
842 #endif
843  return(0);
844 } //ApplyInverseGS_FastCrsMatrix()
845 
846 
847 //
848 //==============================================================================
849 // ApplyInverseGS_LocalFastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage() + LocalSmoothingIndices
850 
851 int Ifpack_PointRelaxation::
852 ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
853 {
854  int* IndexOffset;
855  int* Indices;
856  double* Values;
857  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
858 
859  int NumVectors = X.NumVectors();
860 
861  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
862  if (IsParallel_) {
863  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
864  }
865  else
866  Y2 = Teuchos::rcp( &Y, false );
867 
868  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
869  X.ExtractView(&x_ptr);
870  Y.ExtractView(&y_ptr);
871  Y2->ExtractView(&y2_ptr);
872  Diagonal_->ExtractView(&d_ptr);
873 
874  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
875 
876  // only one data exchange per sweep
877  if (IsParallel_)
878  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
879 
880  if(!DoBackwardGS_){
881  /* Forward Mode */
882  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
883  int i=LocalSmoothingIndices_[ii];
884 
885  int col;
886  double diag = d_ptr[i];
887 
888  for (int m = 0 ; m < NumVectors ; ++m) {
889 
890  double dtemp = 0.0;
891 
892  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
893 
894  col = Indices[k];
895  dtemp += Values[k] * y2_ptr[m][col];
896  }
897 
898  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
899  }
900  }
901  }
902  else {
903  /* Backward Mode */
904  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
905  int i=LocalSmoothingIndices_[ii];
906 
907  int col;
908  double diag = d_ptr[i];
909 
910  for (int m = 0 ; m < NumVectors ; ++m) {
911 
912  double dtemp = 0.0;
913  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
914 
915  col = Indices[k];
916  dtemp += Values[k] * y2_ptr[m][col];
917  }
918 
919  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
920 
921  }
922  }
923  }
924 
925 
926  if (IsParallel_)
927  for (int m = 0 ; m < NumVectors ; ++m)
928  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
929  int i=LocalSmoothingIndices_[ii];
930  y_ptr[m][i] = y2_ptr[m][i];
931  }
932  }
933 
934 #ifdef IFPACK_FLOPCOUNTERS
935  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
936 #endif
937  return(0);
938 } //ApplyInverseGS_LocalFastCrsMatrix()
939 
940 
941 //==============================================================================
942 int Ifpack_PointRelaxation::
943 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
944 {
945  if (ZeroStartingSolution_)
946  Y.PutScalar(0.0);
947 
948  const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
949  // try to pick the best option; performances may be improved
950  // if several sweeps are used.
951  if (CrsMatrix != 0)
952  {
953  if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
954  return(ApplyInverseSGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
955  else if (CrsMatrix->StorageOptimized())
956  return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
957  else
958  return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
959  }
960  else
961  return(ApplyInverseSGS_RowMatrix(X, Y));
962 }
963 
964 //==============================================================================
965 int Ifpack_PointRelaxation::
966 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
967 {
968  int NumVectors = X.NumVectors();
969  int Length = Matrix().MaxNumEntries();
970  std::vector<int> Indices(Length);
971  std::vector<double> Values(Length);
972 
973  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
974  if (IsParallel_) {
975  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
976  }
977  else
978  Y2 = Teuchos::rcp( &Y, false );
979 
980  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
981  X.ExtractView(&x_ptr);
982  Y.ExtractView(&y_ptr);
983  Y2->ExtractView(&y2_ptr);
984  Diagonal_->ExtractView(&d_ptr);
985 
986  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
987 
988  // only one data exchange per sweep
989  if (IsParallel_)
990  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
991 
992  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
993  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
994 
995  int NumEntries;
996  int col;
997  double diag = d_ptr[i];
998 
999  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
1000  &Values[0], &Indices[0]));
1001 
1002  for (int m = 0 ; m < NumVectors ; ++m) {
1003 
1004  double dtemp = 0.0;
1005 
1006  for (int k = 0 ; k < NumEntries ; ++k) {
1007 
1008  col = Indices[k];
1009  dtemp += Values[k] * y2_ptr[m][col];
1010  }
1011 
1012  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1013  }
1014  }
1015  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1016  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1017 
1018  int NumEntries;
1019  int col;
1020  double diag = d_ptr[i];
1021 
1022  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
1023  &Values[0], &Indices[0]));
1024 
1025  for (int m = 0 ; m < NumVectors ; ++m) {
1026 
1027  double dtemp = 0.0;
1028  for (int k = 0 ; k < NumEntries ; ++k) {
1029 
1030  col = Indices[k];
1031  dtemp += Values[k] * y2_ptr[m][col];
1032  }
1033 
1034  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1035 
1036  }
1037  }
1038 
1039  if (IsParallel_)
1040  for (int m = 0 ; m < NumVectors ; ++m)
1041  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1042  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1043  y_ptr[m][i] = y2_ptr[m][i];
1044  }
1045  }
1046 
1047 #ifdef IFPACK_FLOPCOUNTERS
1048  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1049 #endif
1050  return(0);
1051 }
1052 
1053 //==============================================================================
1054 int Ifpack_PointRelaxation::
1055 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1056 {
1057  int NumVectors = X.NumVectors();
1058 
1059  int* Indices;
1060  double* Values;
1061 
1062  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1063  if (IsParallel_) {
1064  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1065  }
1066  else
1067  Y2 = Teuchos::rcp( &Y, false );
1068 
1069  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1070  X.ExtractView(&x_ptr);
1071  Y.ExtractView(&y_ptr);
1072  Y2->ExtractView(&y2_ptr);
1073  Diagonal_->ExtractView(&d_ptr);
1074 
1075  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1076 
1077  // only one data exchange per sweep
1078  if (IsParallel_)
1079  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1080 
1081  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1082  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1083 
1084  int NumEntries;
1085  int col;
1086  double diag = d_ptr[i];
1087 
1088  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1089 
1090  for (int m = 0 ; m < NumVectors ; ++m) {
1091 
1092  double dtemp = 0.0;
1093 
1094  for (int k = 0; k < NumEntries; ++k) {
1095 
1096  col = Indices[k];
1097  dtemp += Values[k] * y2_ptr[m][col];
1098  }
1099 
1100  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1101  }
1102  }
1103  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1104  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1105 
1106  int NumEntries;
1107  int col;
1108  double diag = d_ptr[i];
1109 
1110  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1111 
1112  for (int m = 0 ; m < NumVectors ; ++m) {
1113 
1114  double dtemp = 0.0;
1115  for (int k = 0; k < NumEntries; ++k) {
1116 
1117  col = Indices[k];
1118  dtemp += Values[k] * y2_ptr[m][col];
1119  }
1120 
1121  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1122 
1123  }
1124  }
1125 
1126  if (IsParallel_)
1127  for (int m = 0 ; m < NumVectors ; ++m)
1128  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1129  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1130  y_ptr[m][i] = y2_ptr[m][i];
1131  }
1132  }
1133 
1134 #ifdef IFPACK_FLOPCOUNTERS
1135  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1136 #endif
1137  return(0);
1138 }
1139 
1140 //==============================================================================
1141 // this requires Epetra_CrsMatrix + OptimizeStorage()
1142 int Ifpack_PointRelaxation::
1143 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1144 {
1145  int* IndexOffset;
1146  int* Indices;
1147  double* Values;
1148  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
1149 
1150  int NumVectors = X.NumVectors();
1151 
1152  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1153  if (IsParallel_) {
1154  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1155  }
1156  else
1157  Y2 = Teuchos::rcp( &Y, false );
1158 
1159  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1160  X.ExtractView(&x_ptr);
1161  Y.ExtractView(&y_ptr);
1162  Y2->ExtractView(&y2_ptr);
1163  Diagonal_->ExtractView(&d_ptr);
1164 
1165  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1166 
1167  // only one data exchange per sweep
1168  if (IsParallel_)
1169  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1170 
1171  for (int i = 0 ; i < NumMyRows_ ; ++i) {
1172 
1173  int col;
1174  double diag = d_ptr[i];
1175 
1176  // no need to extract row i
1177  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1178 
1179  for (int m = 0 ; m < NumVectors ; ++m) {
1180 
1181  double dtemp = 0.0;
1182 
1183  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1184 
1185  col = Indices[k];
1186  dtemp += Values[k] * y2_ptr[m][col];
1187  }
1188 
1189  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1190  }
1191  }
1192 
1193  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
1194 
1195  int col;
1196  double diag = d_ptr[i];
1197 
1198  // no need to extract row i
1199  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1200 
1201  for (int m = 0 ; m < NumVectors ; ++m) {
1202 
1203  double dtemp = 0.0;
1204  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1205 
1206  col = Indices[k];
1207  dtemp += Values[k] * y2_ptr[m][col];
1208  }
1209 
1210  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1211 
1212  }
1213  }
1214 
1215  if (IsParallel_)
1216  for (int m = 0 ; m < NumVectors ; ++m)
1217  for (int i = 0 ; i < NumMyRows_ ; ++i)
1218  y_ptr[m][i] = y2_ptr[m][i];
1219  }
1220 
1221 #ifdef IFPACK_FLOPCOUNTERS
1222  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1223 #endif
1224  return(0);
1225 }
1226 
1227 
1228 //==============================================================================
1229 // this requires Epetra_CrsMatrix + OptimizeStorage() + LocalSmoothingIndices_
1230 int Ifpack_PointRelaxation::
1231 ApplyInverseSGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1232 {
1233  int* IndexOffset;
1234  int* Indices;
1235  double* Values;
1236  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
1237 
1238  int NumVectors = X.NumVectors();
1239 
1240  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1241  if (IsParallel_) {
1242  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1243  }
1244  else
1245  Y2 = Teuchos::rcp( &Y, false );
1246 
1247  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1248  X.ExtractView(&x_ptr);
1249  Y.ExtractView(&y_ptr);
1250  Y2->ExtractView(&y2_ptr);
1251  Diagonal_->ExtractView(&d_ptr);
1252 
1253  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1254 
1255  // only one data exchange per sweep
1256  if (IsParallel_)
1257  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1258 
1259  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1260  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1261 
1262  int col;
1263  double diag = d_ptr[i];
1264 
1265  // no need to extract row i
1266  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1267 
1268  for (int m = 0 ; m < NumVectors ; ++m) {
1269 
1270  double dtemp = 0.0;
1271 
1272  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1273 
1274  col = Indices[k];
1275  dtemp += Values[k] * y2_ptr[m][col];
1276  }
1277 
1278  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1279  }
1280  }
1281 
1282  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1283  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1284 
1285  int col;
1286  double diag = d_ptr[i];
1287 
1288  // no need to extract row i
1289  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1290 
1291  for (int m = 0 ; m < NumVectors ; ++m) {
1292 
1293  double dtemp = 0.0;
1294  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1295 
1296  col = Indices[k];
1297  dtemp += Values[k] * y2_ptr[m][col];
1298  }
1299 
1300  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1301 
1302  }
1303  }
1304 
1305  if (IsParallel_)
1306  for (int m = 0 ; m < NumVectors ; ++m)
1307  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1308  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1309  y_ptr[m][i] = y2_ptr[m][i];
1310  }
1311  }
1312 
1313 #ifdef IFPACK_FLOPCOUNTERS
1314  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1315 #endif
1316  return(0);
1317 }
bool StorageOptimized() const
const Epetra_BlockMap & Map() const
const Epetra_BlockMap & RowMap() const
virtual const Epetra_Map & OperatorRangeMap() const
Returns the Epetra_Map object associated with the range of this operator.
int ExtractCrsDataPointers(int *&IndexOffset, int *&Indices, double *&Values_in) const
virtual double Condest() const
Returns the condition number estimate, or -1.0 if not computed.
int PutScalar(double ScalarConstant)
virtual int ApplyInverse(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Applies the preconditioner to X, returns the result in Y.
Ifpack_PointRelaxation(const Epetra_RowMatrix *Matrix)
Ifpack_PointRelaxation constructor with given Epetra_RowMatrix.
virtual int MinAll(double *PartialMins, double *GlobalMins, int Count) const=0
int ExtractMyRowView(int MyRow, int &NumEntries, double *&Values, int *&Indices) const
virtual int SetParameters(Teuchos::ParameterList &List)
Sets all the parameters for the preconditioner.
virtual int Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Applies the matrix to an Epetra_MultiVector.
virtual int MaxNumEntries() const=0
virtual const Epetra_RowMatrix & Matrix() const
Returns a pointer to the matrix to be preconditioned.
virtual const Epetra_Comm & Comm() const
Returns a pointer to the Epetra_Comm communicator associated with this operator.
int NumVectors() const
virtual bool IsInitialized() const
Returns true if the preconditioner has been successfully initialized, false otherwise.
virtual int Compute()
Computes the preconditioners.
virtual int Initialize()
Computes all it is necessary to initialize the preconditioner.
int Multiply(char TransA, char TransB, double ScalarAB, const Epetra_MultiVector &A, const Epetra_MultiVector &B, double ScalarThis)
virtual std::ostream & Print(std::ostream &os) const
Prints object to an output stream.
virtual const Epetra_Map & OperatorDomainMap() const
Returns the Epetra_Map object associated with the domain of this operator.
int ExtractView(double **A, int *MyLDA) const
virtual bool IsComputed() const
Returns true if the preconditioner has been successfully computed.
std::string Ifpack_toString(const int &x)
Converts an integer to std::string.
double ** Pointers() const