42 #ifndef STOKHOS_CUDA_DEVICE_PROP_HPP 43 #define STOKHOS_CUDA_DEVICE_PROP_HPP 45 #include "Kokkos_Core.hpp" 47 #include "Teuchos_TestForException.hpp" 49 #include "cuda_runtime_api.h" 98 cudaGetDevice(&device_id);
102 cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
104 cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
110 TEUCHOS_TEST_FOR_EXCEPTION(
112 "Cuda compute capability >= 2 is required!");
194 TEUCHOS_TEST_FOR_EXCEPTION(
195 true, std::logic_error,
196 "DeviceProp not configured for compute capability " <<
201 template <
typename Kernel>
205 typedef void (*func_ptr_t)();
206 func_ptr_t func_ptr =
reinterpret_cast<func_ptr_t
>(kernel);
207 cudaFuncAttributes attrib;
208 cudaFuncGetAttributes(&attrib, func_ptr);
209 return attrib.numRegs;
216 template <
typename Kernel>
Kokkos::Cuda::size_type size_type
size_type max_threads_per_sm
size_type max_blocks_per_sm
size_type warp_granularity
size_type max_regs_per_sm
size_type max_shmem_per_block
size_type shared_memory_granularity
size_type compute_capability_major
Top-level namespace for Stokhos classes and functions.
size_type shared_memory_capacity
size_type max_threads_per_block
DeviceProp(int device_id=-1)
size_type max_warps_per_sm
size_type max_regs_per_block
size_type get_resident_warps_per_sm(Kernel kernel)
size_type get_kernel_registers(Kernel kernel)
size_type compute_capability_minor