Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
TestSpMv.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <iostream>
43 
44 // Tests
45 #include "TestSpMv.hpp"
46 
47 // Devices
48 #include "Kokkos_Core.hpp"
49 
50 // Utilities
51 #include "Teuchos_CommandLineProcessor.hpp"
52 #include "Teuchos_StandardCatchMacros.hpp"
53 #ifdef KOKKOS_ENABLE_CUDA
54 #include "cuda_runtime_api.h"
55 #endif
56 
57 template <typename Storage>
58 void mainHost(int nGrid, int nIter, KokkosSparse::DeviceConfig dev_config);
59 template <typename Storage>
60 void mainCuda(int nGrid, int nIter, KokkosSparse::DeviceConfig dev_config);
61 
62 int main(int argc, char *argv[])
63 {
64  bool success = true;
65  bool verbose = false;
66  try {
67 
68  const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
69  const size_t num_cores_per_socket =
70  Kokkos::hwloc::get_available_cores_per_numa();
71  const size_t num_threads_per_core =
72  Kokkos::hwloc::get_available_threads_per_core();
73 
74  // Setup command line options
75  Teuchos::CommandLineProcessor CLP;
76  CLP.setDocString(
77  "This test performance of MP::Vector multiply routines.\n");
78  int nGrid = 32;
79  CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
80  int nIter = 10;
81  CLP.setOption("ni", &nIter, "Number of multiply iterations");
82  int num_cores = num_cores_per_socket * num_sockets;
83  CLP.setOption("cores", &num_cores,
84  "Number of CPU cores to use (defaults to all)");
85  int num_hyper_threads = num_threads_per_core;
86  CLP.setOption("hyperthreads", &num_hyper_threads,
87  "Number of hyper threads per core to use (defaults to all)");
88  int threads_per_vector = 1;
89  CLP.setOption("threads_per_vector", &threads_per_vector,
90  "Number of threads to use within each vector");
91 #ifdef KOKKOS_ENABLE_THREADS
92  bool threads = true;
93  CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
94 #endif
95 #ifdef KOKKOS_ENABLE_OPENMP
96  bool openmp = true;
97  CLP.setOption("openmp", "no-openmp", &openmp, "Enable OpenMP device");
98 #endif
99 #ifdef KOKKOS_ENABLE_CUDA
100  bool cuda = true;
101  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
102  int cuda_threads_per_vector = 16;
103  CLP.setOption("cuda_threads_per_vector", &cuda_threads_per_vector,
104  "Number of Cuda threads to use within each vector");
105  int cuda_block_size = 0;
106  CLP.setOption("cuda_block_size", &cuda_block_size,
107  "Cuda block size (0 implies the default choice)");
108  int num_cuda_blocks = 0;
109  CLP.setOption("num_cuda_blocks", &num_cuda_blocks,
110  "Number of Cuda blocks (0 implies the default choice)");
111  int device_id = 0;
112  CLP.setOption("device", &device_id, "CUDA device ID");
113 #endif
114  CLP.parse( argc, argv );
115 
116  typedef int Ordinal;
117  typedef double Scalar;
118 
119 #ifdef KOKKOS_ENABLE_THREADS
120  if (threads) {
121  typedef Kokkos::Threads Device;
123 
124  Kokkos::InitArguments init_args;
125  init_args.num_threads = num_cores*num_hyper_threads;
126  Kokkos::initialize( init_args );
127 
128  std::cout << std::endl
129  << "Threads performance with " << num_cores*num_hyper_threads
130  << " threads:" << std::endl;
131 
132  KokkosSparse::DeviceConfig dev_config(num_cores,
133  threads_per_vector,
134  num_hyper_threads / threads_per_vector);
135 
136  mainHost<Storage>(nGrid, nIter, dev_config);
137 
138  Kokkos::finalize();
139  }
140 #endif
141 
142 #ifdef KOKKOS_ENABLE_OPENMP
143  if (openmp) {
144  typedef Kokkos::OpenMP Device;
146 
147  Kokkos::InitArguments init_args;
148  init_args.num_threads = num_cores*num_hyper_threads;
149  Kokkos::initialize( init_args );
150 
151  std::cout << std::endl
152  << "OpenMP performance with " << num_cores*num_hyper_threads
153  << " threads:" << std::endl;
154 
155  KokkosSparse::DeviceConfig dev_config(num_cores,
156  threads_per_vector,
157  num_hyper_threads / threads_per_vector);
158 
159  mainHost<Storage>(nGrid, nIter, dev_config);
160 
161  Kokkos::finalize();
162  }
163 #endif
164 
165 #ifdef KOKKOS_ENABLE_CUDA
166  if (cuda) {
167  typedef Kokkos::Cuda Device;
169 
170  Kokkos::InitArguments init_args;
171  init_args.device_id = device_id;
172  Kokkos::initialize( init_args );
173 
174  cudaDeviceProp deviceProp;
175  cudaGetDeviceProperties(&deviceProp, device_id);
176  std::cout << std::endl
177  << "CUDA performance for device " << device_id << " ("
178  << deviceProp.name << "):"
179  << std::endl;
180 
181  KokkosSparse::DeviceConfig dev_config(
182  num_cuda_blocks,
183  cuda_threads_per_vector,
184  cuda_threads_per_vector == 0 ? 0 : cuda_block_size / cuda_threads_per_vector);
185 
186  mainCuda<Storage>(nGrid,nIter,dev_config);
187 
188  Kokkos::finalize();
189  }
190 #endif
191 
192  }
193  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
194 
195  if (success)
196  return 0;
197  return -1;
198 }
Stokhos::StandardStorage< int, double > Storage
Statically allocated storage class.
void mainCuda(int nGrid, int nIter, KokkosSparse::DeviceConfig dev_config)
void mainHost(int nGrid, int nIter, KokkosSparse::DeviceConfig dev_config)
int main(int argc, char *argv[])
Definition: TestSpMv.cpp:62
pce_type Scalar