#pragma once #include #include #include #include #include #include "micros.h" namespace colossalAI { namespace cuda { namespace utils { class NVGPUDevInfo { public: explicit NVGPUDevInfo(int device_num) : device_num_(device_num) { CUDA_CHECK(cudaGetDeviceProperties(&prop_, device_num)); } std::array GetMaxGridDims() const { std::array ret; ret[0] = prop_.maxGridSize[0]; ret[1] = prop_.maxGridSize[1]; ret[2] = prop_.maxGridSize[2]; return ret; } std::array GetMaxBlockDims() const { std::array ret; ret[0] = prop_.maxThreadsDim[0]; ret[1] = prop_.maxThreadsDim[1]; ret[2] = prop_.maxThreadsDim[2]; return ret; } std::array GetCapability() const { std::array ret; ret[0] = prop_.major; ret[1] = prop_.minor; return ret; } int GetMultiProcessorCount() const { return prop_.multiProcessorCount; } int GetMaxThreadsPerMultiProcessor() const { return prop_.maxThreadsPerMultiProcessor; } int GetMaxThreadsPerBlock() const { return prop_.maxThreadsPerBlock; } private: int device_num_; cudaDeviceProp prop_; }; } // namespace utils } // namespace cuda } // namespace colossalAI