43 #ifndef RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP 44 #define RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP 47 #include "Teuchos_Workspace.hpp" 53 template<
class Scalar>
55 const ArrayView<const Scalar> &alpha_in,
61 this->
alpha(alpha_in);
67 template<
class Scalar>
69 const ArrayView<const Scalar> &alpha_in )
76 template<
class Scalar>
77 const ArrayView<const Scalar>
82 template<
class Scalar>
86 template<
class Scalar>
90 template<
class Scalar>
97 template<
class Scalar>
101 const Ptr<ReductTarget> &reduct_obj_inout
113 validate_apply_op<Scalar>(*
this, as<int>(alpha_.size()), 1,
false,
114 sub_vecs, targ_sub_vecs, reduct_obj_inout.getConst());
117 const int l_num_vecs = alpha_.size();
120 const RTOpPack::index_type subDim = targ_sub_vecs[0].subDim();
121 iter_t z0_val = targ_sub_vecs[0].values().begin();
122 const ptrdiff_t z0_s = targ_sub_vecs[0].stride();
123 Workspace<const_iter_t> v_val(wss,l_num_vecs);
124 Workspace<ptrdiff_t> v_s(wss,l_num_vecs,
false);
125 for(
int k = 0; k < l_num_vecs; ++k ) {
130 v_val[k] = sub_vecs[k].values().begin();
131 v_s[k] = sub_vecs[k].stride();
138 if( l_num_vecs == 1 ) {
142 const Scalar l_alpha = alpha_[0], l_beta = beta_;
143 const_iter_t v0_val = v_val[0];
144 const ptrdiff_t v0_s = v_s[0];
145 if( l_beta==ST::zero() ) {
147 if( z0_s==1 && v0_s==1 ) {
148 for(
int j = 0; j < subDim; ++j )
149 (*z0_val++) = l_alpha * (*v0_val++);
152 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
153 (*z0_val) = l_alpha * (*v0_val);
156 else if( l_beta==ST::one() ) {
160 if( z0_s==1 && v0_s==1 ) {
161 for(
int j = 0; j < subDim; ++j )
162 (*z0_val++) += l_alpha * (*v0_val++);
165 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
166 (*z0_val) += l_alpha * (*v0_val);
171 if( z0_s==1 && v0_s==1 ) {
172 for(
int j = 0; j < subDim; ++j, ++z0_val )
173 (*z0_val) = l_alpha * (*v0_val++) + l_beta*(*z0_val);
176 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
177 (*z0_val) = l_alpha * (*v0_val) + l_beta*(*z0_val);
181 else if( l_num_vecs == 2 ) {
185 const Scalar alpha0 = alpha_[0], alpha1=alpha_[1], l_beta = beta_;
186 const_iter_t v0_val = v_val[0];
187 const ptrdiff_t v0_s = v_s[0];
188 const_iter_t v1_val = v_val[1];
189 const ptrdiff_t v1_s = v_s[1];
190 if( l_beta==ST::zero() ) {
191 if( alpha0 == ST::one() ) {
192 if( alpha1 == ST::one() ) {
194 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
195 for(
int j = 0; j < subDim; ++j )
196 (*z0_val++) = (*v0_val++) + (*v1_val++);
199 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
200 (*z0_val) = (*v0_val) + (*v1_val);
205 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
206 for(
int j = 0; j < subDim; ++j )
207 (*z0_val++) = (*v0_val++) + alpha1*(*v1_val++);
210 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
211 (*z0_val) = (*v0_val) + alpha1*(*v1_val);
216 if( alpha1 == ST::one() ) {
218 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
219 for(
int j = 0; j < subDim; ++j )
220 (*z0_val++) = alpha0*(*v0_val++) + (*v1_val++);
223 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
224 (*z0_val) = alpha0*(*v0_val) + (*v1_val);
229 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
230 for(
int j = 0; j < subDim; ++j )
231 (*z0_val++) = alpha0*(*v0_val++) + alpha1*(*v1_val++);
234 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
235 (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val);
240 else if( l_beta==ST::one() ) {
241 if( alpha0 == ST::one() ) {
242 if( alpha1 == ST::one() ) {
244 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
245 for(
int j = 0; j < subDim; ++j, ++z0_val )
246 (*z0_val) += (*v0_val++) + (*v1_val++);
249 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
250 (*z0_val) += (*v0_val) + (*v1_val);
255 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
256 for(
int j = 0; j < subDim; ++j, ++z0_val )
257 (*z0_val) += (*v0_val++) + alpha1*(*v1_val++);
260 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
261 (*z0_val) += (*v0_val) + alpha1*(*v1_val);
266 if( alpha1 == ST::one() ) {
268 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
269 for(
int j = 0; j < subDim; ++j, ++z0_val )
270 (*z0_val) += alpha0*(*v0_val++) + (*v1_val++);
273 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
274 (*z0_val) += alpha0*(*v0_val) + (*v1_val);
279 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
280 for(
int j = 0; j < subDim; ++j, ++z0_val )
281 (*z0_val) += alpha0*(*v0_val++) + alpha1*(*v1_val++);
284 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
285 (*z0_val) += alpha0*(*v0_val) + alpha1*(*v1_val);
291 if( alpha0 == ST::one() ) {
292 if( alpha1 == ST::one() ) {
294 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
295 for(
int j = 0; j < subDim; ++j, ++z0_val )
296 (*z0_val) = (*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
299 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
300 (*z0_val) = (*v0_val) + (*v1_val) + l_beta*(*z0_val);
305 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
306 for(
int j = 0; j < subDim; ++j, ++z0_val )
307 (*z0_val) = (*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
310 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
311 (*z0_val) = (*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
316 if( alpha1 == ST::one() ) {
318 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
319 for(
int j = 0; j < subDim; ++j, ++z0_val )
320 (*z0_val) = alpha0*(*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
323 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
324 (*z0_val) = alpha0*(*v0_val) + (*v1_val) + l_beta*(*z0_val);
329 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
330 for(
int j = 0; j < subDim; ++j, ++z0_val )
331 (*z0_val) = alpha0*(*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
334 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
335 (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
346 if( beta_ == ST::zero() ) {
347 for(
int j = 0; j < subDim; ++j, z0_val += z0_s )
348 (*z0_val) = ST::zero();
350 else if( beta_ != ST::one() ) {
351 for(
int j = 0; j < subDim; ++j, z0_val += z0_s )
355 z0_val = targ_sub_vecs[0].values().begin();
356 for(
int j = 0; j < subDim; ++j, z0_val += z0_s ) {
357 for(
int k = 0; k < l_num_vecs; ++k ) {
359 &alpha_k = alpha_[k],
360 &v_k_val = *v_val[k];
361 (*z0_val) += alpha_k * v_k_val;
372 #endif // RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
void apply_op_impl(const ArrayView< const ConstSubVectorView< Scalar > > &sub_vecs, const ArrayView< const SubVectorView< Scalar > > &targ_sub_vecs, const Ptr< ReductTarget > &reduct_obj_inout) const
const ArrayView< const Scalar > alpha() const
void setOpNameBase(const std::string &op_name_base)
TypeTo as(const TypeFrom &t)
TOpLinearCombination(const ArrayView< const Scalar > &alpha_in=Teuchos::null, const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero())
#define TEUCHOS_TEST_FOR_EXCEPT(throw_exception_test)
TEUCHOSCORE_LIB_DLL_EXPORT Teuchos::RCP< WorkspaceStore > get_default_workspace_store()