RTOp Package Browser (Single Doxygen Collection)  Version of the Day
RTOpPack_TOpLinearCombination_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // RTOp: Interfaces and Support Software for Vector Reduction Transformation
5 // Operations
6 // Copyright (2006) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov)
39 //
40 // ***********************************************************************
41 // @HEADER
42 
43 #ifndef RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
44 #define RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
45 
46 
47 #include "Teuchos_Workspace.hpp"
48 
49 
50 namespace RTOpPack {
51 
52 
53 template<class Scalar>
55  const ArrayView<const Scalar> &alpha_in,
56  const Scalar &beta_in
57  )
58  :beta_(beta_in)
59 {
60  if (alpha_in.size())
61  this->alpha(alpha_in);
62  this->setOpNameBase("TOpLinearCombination");
63 }
64 
65 
66 
67 template<class Scalar>
69  const ArrayView<const Scalar> &alpha_in )
70 {
71  TEUCHOS_TEST_FOR_EXCEPT( alpha_in.size() == 0 );
72  alpha_ = alpha_in;
73 }
74 
75 
76 template<class Scalar>
79 { return alpha_; }
80 
81 
82 template<class Scalar>
83 void TOpLinearCombination<Scalar>::beta( const Scalar& beta_in ) { beta_ = beta_in; }
84 
85 
86 template<class Scalar>
87 Scalar TOpLinearCombination<Scalar>::beta() const { return beta_; }
88 
89 
90 template<class Scalar>
91 int TOpLinearCombination<Scalar>::num_vecs() const { return alpha_.size(); }
92 
93 
94 // Overridden from RTOpT
95 
96 
97 template<class Scalar>
99  const ArrayView<const ConstSubVectorView<Scalar> > &sub_vecs,
100  const ArrayView<const SubVectorView<Scalar> > &targ_sub_vecs,
101  const Ptr<ReductTarget> &reduct_obj_inout
102  ) const
103 {
104 
105  using Teuchos::as;
106  using Teuchos::Workspace;
108  typedef typename Teuchos::ArrayRCP<Scalar>::iterator iter_t;
109  typedef typename Teuchos::ArrayRCP<const Scalar>::iterator const_iter_t;
111 
112 #ifdef TEUCHOS_DEBUG
113  validate_apply_op<Scalar>(*this, as<int>(alpha_.size()), 1, false,
114  sub_vecs, targ_sub_vecs, reduct_obj_inout.getConst());
115 #endif
116 
117  const int l_num_vecs = alpha_.size();
118 
119  // Get iterators to local data
120  const RTOpPack::index_type subDim = targ_sub_vecs[0].subDim();
121  iter_t z0_val = targ_sub_vecs[0].values().begin();
122  const ptrdiff_t z0_s = targ_sub_vecs[0].stride();
123  Workspace<const_iter_t> v_val(wss,l_num_vecs);
124  Workspace<ptrdiff_t> v_s(wss,l_num_vecs,false);
125  for( int k = 0; k < l_num_vecs; ++k ) {
126 #ifdef TEUCHOS_DEBUG
127  TEUCHOS_TEST_FOR_EXCEPT( sub_vecs[k].subDim() != subDim );
128  TEUCHOS_TEST_FOR_EXCEPT( sub_vecs[k].globalOffset() != targ_sub_vecs[0].globalOffset() );
129 #endif
130  v_val[k] = sub_vecs[k].values().begin();
131  v_s[k] = sub_vecs[k].stride();
132  }
133 
134  //
135  // Perform the operation and specialize the cases for l_num_vecs = 1 and 2
136  // in order to get good performance.
137  //
138  if( l_num_vecs == 1 ) {
139  //
140  // z0 = alpha*v0 + beta*z0
141  //
142  const Scalar l_alpha = alpha_[0], l_beta = beta_;
143  const_iter_t v0_val = v_val[0];
144  const ptrdiff_t v0_s = v_s[0];
145  if( l_beta==ST::zero() ) {
146  // z0 = alpha*v0
147  if( z0_s==1 && v0_s==1 ) {
148  for( int j = 0; j < subDim; ++j )
149  (*z0_val++) = l_alpha * (*v0_val++);
150  }
151  else {
152  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
153  (*z0_val) = l_alpha * (*v0_val);
154  }
155  }
156  else if( l_beta==ST::one() ) {
157  //
158  // z0 = alpha*v0 + z0
159  //
160  if( z0_s==1 && v0_s==1 ) {
161  for( int j = 0; j < subDim; ++j )
162  (*z0_val++) += l_alpha * (*v0_val++);
163  }
164  else {
165  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
166  (*z0_val) += l_alpha * (*v0_val);
167  }
168  }
169  else {
170  // z0 = alpha*v0 + beta*z0
171  if( z0_s==1 && v0_s==1 ) {
172  for( int j = 0; j < subDim; ++j, ++z0_val )
173  (*z0_val) = l_alpha * (*v0_val++) + l_beta*(*z0_val);
174  }
175  else {
176  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
177  (*z0_val) = l_alpha * (*v0_val) + l_beta*(*z0_val);
178  }
179  }
180  }
181  else if( l_num_vecs == 2 ) {
182  //
183  // z0 = alpha0*v0 + alpha1*v1 + beta*z0
184  //
185  const Scalar alpha0 = alpha_[0], alpha1=alpha_[1], l_beta = beta_;
186  const_iter_t v0_val = v_val[0];
187  const ptrdiff_t v0_s = v_s[0];
188  const_iter_t v1_val = v_val[1];
189  const ptrdiff_t v1_s = v_s[1];
190  if( l_beta==ST::zero() ) {
191  if( alpha0 == ST::one() ) {
192  if( alpha1 == ST::one() ) {
193  // z0 = v0 + v1
194  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
195  for( int j = 0; j < subDim; ++j )
196  (*z0_val++) = (*v0_val++) + (*v1_val++);
197  }
198  else {
199  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
200  (*z0_val) = (*v0_val) + (*v1_val);
201  }
202  }
203  else {
204  // z0 = v0 + alpha1*v1
205  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
206  for( int j = 0; j < subDim; ++j )
207  (*z0_val++) = (*v0_val++) + alpha1*(*v1_val++);
208  }
209  else {
210  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
211  (*z0_val) = (*v0_val) + alpha1*(*v1_val);
212  }
213  }
214  }
215  else {
216  if( alpha1 == ST::one() ) {
217  // z0 = alpha0*v0 + v1
218  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
219  for( int j = 0; j < subDim; ++j )
220  (*z0_val++) = alpha0*(*v0_val++) + (*v1_val++);
221  }
222  else {
223  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
224  (*z0_val) = alpha0*(*v0_val) + (*v1_val);
225  }
226  }
227  else {
228  // z0 = alpha0*v0 + alpha1*v1
229  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
230  for( int j = 0; j < subDim; ++j )
231  (*z0_val++) = alpha0*(*v0_val++) + alpha1*(*v1_val++);
232  }
233  else {
234  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
235  (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val);
236  }
237  }
238  }
239  }
240  else if( l_beta==ST::one() ) {
241  if( alpha0 == ST::one() ) {
242  if( alpha1 == ST::one() ) {
243  // z0 = v0 + v1 + z0
244  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
245  for( int j = 0; j < subDim; ++j, ++z0_val )
246  (*z0_val) += (*v0_val++) + (*v1_val++);
247  }
248  else {
249  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
250  (*z0_val) += (*v0_val) + (*v1_val);
251  }
252  }
253  else {
254  // z0 = v0 + alpha1*v1 + z0
255  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
256  for( int j = 0; j < subDim; ++j, ++z0_val )
257  (*z0_val) += (*v0_val++) + alpha1*(*v1_val++);
258  }
259  else {
260  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
261  (*z0_val) += (*v0_val) + alpha1*(*v1_val);
262  }
263  }
264  }
265  else {
266  if( alpha1 == ST::one() ) {
267  // z0 = alpha0*v0 + v1 + z0
268  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
269  for( int j = 0; j < subDim; ++j, ++z0_val )
270  (*z0_val) += alpha0*(*v0_val++) + (*v1_val++);
271  }
272  else {
273  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
274  (*z0_val) += alpha0*(*v0_val) + (*v1_val);
275  }
276  }
277  else {
278  // z0 = alpha0*v0 + alpha1*v1 + z0
279  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
280  for( int j = 0; j < subDim; ++j, ++z0_val )
281  (*z0_val) += alpha0*(*v0_val++) + alpha1*(*v1_val++);
282  }
283  else {
284  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
285  (*z0_val) += alpha0*(*v0_val) + alpha1*(*v1_val);
286  }
287  }
288  }
289  }
290  else {
291  if( alpha0 == ST::one() ) {
292  if( alpha1 == ST::one() ) {
293  // z0 = v0 + v1 + beta*z0
294  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
295  for( int j = 0; j < subDim; ++j, ++z0_val )
296  (*z0_val) = (*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
297  }
298  else {
299  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
300  (*z0_val) = (*v0_val) + (*v1_val) + l_beta*(*z0_val);
301  }
302  }
303  else {
304  // z0 = v0 + alpha1*v1 + beta*z0
305  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
306  for( int j = 0; j < subDim; ++j, ++z0_val )
307  (*z0_val) = (*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
308  }
309  else {
310  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
311  (*z0_val) = (*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
312  }
313  }
314  }
315  else {
316  if( alpha1 == ST::one() ) {
317  // z0 = alpha0*v0 + v1 + beta*z0
318  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
319  for( int j = 0; j < subDim; ++j, ++z0_val )
320  (*z0_val) = alpha0*(*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
321  }
322  else {
323  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
324  (*z0_val) = alpha0*(*v0_val) + (*v1_val) + l_beta*(*z0_val);
325  }
326  }
327  else {
328  // z0 = alpha0*v0 + alpha1*v1 + beta*z0
329  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
330  for( int j = 0; j < subDim; ++j, ++z0_val )
331  (*z0_val) = alpha0*(*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
332  }
333  else {
334  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
335  (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
336  }
337  }
338  }
339  }
340  }
341  else {
342  //
343  // Totally general implementation (but least efficient)
344  //
345  // z0 *= beta
346  if( beta_ == ST::zero() ) {
347  for( int j = 0; j < subDim; ++j, z0_val += z0_s )
348  (*z0_val) = ST::zero();
349  }
350  else if( beta_ != ST::one() ) {
351  for( int j = 0; j < subDim; ++j, z0_val += z0_s )
352  (*z0_val) *= beta_;
353  }
354  // z0 += sum( alpha[k]*v[k], k=0...l_num_vecs-1)
355  z0_val = targ_sub_vecs[0].values().begin();
356  for( int j = 0; j < subDim; ++j, z0_val += z0_s ) {
357  for( int k = 0; k < l_num_vecs; ++k ) {
358  const Scalar
359  &alpha_k = alpha_[k],
360  &v_k_val = *v_val[k];
361  (*z0_val) += alpha_k * v_k_val;
362  v_val[k] += v_s[k];
363  }
364  }
365  }
366 }
367 
368 
369 } // namespace RTOpPack
370 
371 
372 #endif // RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
void apply_op_impl(const ArrayView< const ConstSubVectorView< Scalar > > &sub_vecs, const ArrayView< const SubVectorView< Scalar > > &targ_sub_vecs, const Ptr< ReductTarget > &reduct_obj_inout) const
Class for a changeable sub-vector.
Teuchos_Ordinal index_type
size_type size() const
Class for a non-changeable sub-vector.
const ArrayView< const Scalar > alpha() const
Ptr< const T > getConst() const
TypeTo as(const TypeFrom &t)
void setOpNameBase(const std::string &op_name_base)
Just set the operator name.
TOpLinearCombination(const ArrayView< const Scalar > &alpha_in=Teuchos::null, const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero())
#define TEUCHOS_TEST_FOR_EXCEPT(throw_exception_test)
TEUCHOSCORE_LIB_DLL_EXPORT Teuchos::RCP< WorkspaceStore > get_default_workspace_store()