Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
HostScaling.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <string>
43 #include <iostream>
44 #include <cstdlib>
45 
46 #include "Kokkos_Core.hpp"
47 
48 #include "Teuchos_CommandLineProcessor.hpp"
49 #include "Teuchos_StandardCatchMacros.hpp"
50 
51 #include "TestStochastic.hpp"
52 
54 
55 // Algorithms
57 const int num_sg_alg = 2;
59 const char *sg_alg_names[] = { "Original Matrix-Free", "Product CRS" };
60 
61 std::vector<double>
62 run_test(const size_t num_cpu, const size_t num_core_per_cpu,
63  const size_t num_threads_per_core,
64  const size_t p, const size_t d, const size_t nGrid, const size_t nIter,
65  const bool symmetric, SG_Alg sg_alg,
66  const std::vector<double>& perf1 = std::vector<double>())
67 {
68  typedef double Scalar;
69  typedef Kokkos::Threads Device;
70  const size_t team_count = num_cpu * num_core_per_cpu;
71  const size_t threads_per_team = num_threads_per_core;
72  Kokkos::InitArguments init_args;
73  init_args.num_threads = team_count*threads_per_team;
74  Kokkos::initialize( init_args );
75 
76  std::vector<int> var_degree( d , p );
77 
78  std::vector<double> perf;
79  if (sg_alg == PROD_CRS)
80  perf =
81  unit_test::test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(var_degree , nGrid , nIter , symmetric );
82  else if (sg_alg == ORIG_MAT_FREE)
83  perf =
84  unit_test::test_original_matrix_free_vec<Scalar,Device,Stokhos::DefaultMultiply>(
85  var_degree , nGrid , nIter , true , symmetric );
86 
87  Kokkos::finalize();
88 
89  double speed_up;
90  if (perf1.size() > 0)
91  speed_up = perf1[1] / perf[1];
92  else
93  speed_up = perf[1] / perf[1];
94  double efficiency = speed_up / team_count;
95 
96  std::cout << team_count << " , "
97  << nGrid << " , "
98  << d << " , "
99  << p << " , "
100  << perf[1] << " , "
101  << perf[2] << " , "
102  << speed_up << " , "
103  << 100.0 * efficiency << " , "
104  << std::endl;
105 
106  return perf;
107 }
108 
109 int main(int argc, char *argv[])
110 {
111  bool success = true;
112 
113  try {
114  // Setup command line options
115  Teuchos::CommandLineProcessor CLP;
116  int p = 3;
117  CLP.setOption("p", &p, "Polynomial order");
118  int d = 4;
119  CLP.setOption("d", &d, "Stochastic dimension");
120  int nGrid = 64;
121  CLP.setOption("n", &nGrid, "Number of spatial grid points in each dimension");
122  int nIter = 1;
123  CLP.setOption("niter", &nIter, "Number of iterations");
124  int n_thread_per_core = 1;
125  CLP.setOption("nthread", &n_thread_per_core, "Number of threads per core to use");
126  int n_hyperthreads = 2;
127  CLP.setOption("nht", &n_hyperthreads, "Number of hyperthreads per core available");
128  SG_Alg sg_alg = PROD_CRS;
129  CLP.setOption("alg", &sg_alg, num_sg_alg, sg_alg_values, sg_alg_names,
130  "SG Mat-Vec Algorithm");
131  bool symmetric = true;
132  CLP.setOption("symmetric", "asymmetric", &symmetric, "Use symmetric PDF");
133  CLP.parse( argc, argv );
134 
135  // Detect number of CPUs and number of cores
136  const size_t num_cpu = Kokkos::hwloc::get_available_numa_count();
137  const size_t num_core_per_cpu = Kokkos::hwloc::get_available_cores_per_numa();
138  const size_t core_capacity = Kokkos::hwloc::get_available_threads_per_core();
139  if (static_cast<size_t>(n_thread_per_core) > core_capacity )
140  n_thread_per_core = core_capacity;
141 
142  // Print header
143  std::cout << std::endl
144  << "\"#nCore\" , "
145  << "\"#nGrid\" , "
146  << "\"#Variable\" , "
147  << "\"PolyDegree\" , "
148  << "\"" << sg_alg_names[sg_alg] << " MXV Time\" , "
149  << "\"" << sg_alg_names[sg_alg] << " MXV GFLOPS\" , "
150  << "\"" << sg_alg_names[sg_alg] << " MXV Speedup\" , "
151  << "\"" << sg_alg_names[sg_alg] << " MXV Efficiency\" , "
152  << std::endl ;
153 
154  // Do a serial run to base speedup & efficiency from
155  const std::vector<double> perf1 =
156  run_test(1, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg);
157 
158  // First do 1 core per cpu
159  for (size_t n=2; n<=num_cpu; ++n) {
160  const std::vector<double> perf =
161  run_test(n, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
162  }
163 
164  // Now do all cpus, increasing number of cores
165  for (size_t n=2; n<=num_core_per_cpu; ++n) {
166  const std::vector<double> perf =
167  run_test(num_cpu, n, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
168  }
169 
170  // Now do all cpus, all cores, with nthreads/core
171  const std::vector<double> perf =
172  run_test(num_cpu, num_core_per_cpu, n_thread_per_core, p, d, nGrid,
173  nIter, symmetric, sg_alg, perf1);
174 
175 
176  }
177  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
178 
179  if (!success)
180  return -1;
181  return 0 ;
182 }
const int num_sg_alg
Definition: HostScaling.cpp:57
SG_Alg
Definition: HostScaling.cpp:56
const SG_Alg sg_alg_values[]
Definition: HostScaling.cpp:58
std::vector< double > run_test(const size_t num_cpu, const size_t num_core_per_cpu, const size_t num_threads_per_core, const size_t p, const size_t d, const size_t nGrid, const size_t nIter, const bool symmetric, SG_Alg sg_alg, const std::vector< double > &perf1=std::vector< double >())
Definition: HostScaling.cpp:62
pce_type Scalar
const char * sg_alg_names[]
Definition: HostScaling.cpp:59
int main(int argc, char *argv[])