diff --git a/extensions/csrc/common/dev_info_mgr.h b/extensions/csrc/common/dev_info_mgr.h new file mode 100644 index 000000000..7570666ad --- /dev/null +++ b/extensions/csrc/common/dev_info_mgr.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include "common/nvgpu_dev_info.h" +#include "target.h" + +namespace colossalAI { +namespace common { + +template +class DevInfoMgr final { + public: + static std::unique_ptr GetDevInfo(int device_num) const { + return std::make_unique(device_num); + } +}; + +} // namespace common +} // namespace colossalAI diff --git a/extensions/csrc/common/target.h b/extensions/csrc/common/target.h new file mode 100644 index 000000000..1c8a508e3 --- /dev/null +++ b/extensions/csrc/common/target.h @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include + +namespace colossalAI { +namespace common { + +class Target { + public: + enum class OS : int { + Unk = -1, + Linux, + Windows, + }; + enum class Arch : int { + Unk = -1, + X86, + Arm, + NVGPU, + AMDGPU, + Ascend, + }; + enum class BitLen : int { + Unk = -1, + k32, + k64, + }; + + explicit Target(OS os, Arch arch, BitLen bitlen) + : os_(os), arch_(arch), bitlen_(bitlen) {} + + bool defined() const { + return (os_ != OS::Unk) && (arch_ != Arch::Unk) && (bitlen_ != BitLen::Unk); + } + + std::string str() const { + std::string s{"OS: "}; + switch (os_) { + case OS::Unk: + s += "Unk"; + break; + case OS::Linux: + s += "Linux"; + break; + case OS::Windows: + s += "Windows"; + break; + default: + throw std::invalid_argument("Invalid OS type!"); + } + s += "\t"; + s += "Arch: "; + + switch (arch_) { + case Arch::Unk: + s += "Unk"; + break; + case Arch::X86: + s += "X86"; + break; + case Arch::Arm: + s += "Arm"; + break; + case Arch::NVGPU: + s += "NVGPU"; + break; + case Arch::AMDGPU: + s += "AMDGPU"; + break; + case Arch::Ascend: + s += "Ascend"; + break; + default: + throw std::invalid_argument("Invalid Arch type!"); + } + s += "\t"; + s += "BitLen: "; + + switch (bitlen_) { + case BitLen::Unk: + s += "Unk"; + break; + case BitLen::k32: + s += "k32"; + break; + case BitLen::k64: + s += "k64"; + break; + default: + throw std::invalid_argument("Invalid target bit length!"); + } + + return s; + } + + OS os() const { return os_; } + Arch arch() const { return arch_; } + BitLen bitlen() const { return bitlen_; } + + static Target DefaultX86Target(); + static Target DefaultArmTarget(); + static Target DefaultRocmTarget(); + static Target DefaultAscendTarget(); + + static Target DefaultCUDATarget() { + return Target(OS::Linux, Arch::CUDA, BitLen::k64); + } + + friend std::ostream& operator<<(std::ostream& os, const Target& target); + friend bool operator==(const Target& lhs, const Target& rhs); + friend bool operator!=(const Target& lhs, const Target& rhs); + + private: + OS os_{OS::Unk}; + Arch arch_{Arch::Unk}; + BitLen bitlen_{BitLen::Unk}; +}; + +std::ostream& operator<<(std::ostream& os, const Target& target) { + std::cout << target.str() << std::endl; +} +bool operator==(const Target& lhs, const Target& rhs) { + return (lhs.os_ == rhs.os_) && (lhs.arch_ == rhs.arch_) && + (lhs.bitlen_ == rhs.bitlen_); +} +bool operator!=(const Target& lhs, const Target& rhs) { + return (lhs.os_ != rhs.os_) && (lhs.arch_ != rhs.arch_) && + (lhs.bitlen_ != rhs.bitlen_); +} + +} // namespace common +} // namespace colossalAI diff --git a/extensions/csrc/cuda/utils/gpu_launch_config.h b/extensions/csrc/cuda/utils/gpu_launch_config.h new file mode 100644 index 000000000..c7481323a --- /dev/null +++ b/extensions/csrc/cuda/utils/gpu_launch_config.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +namespace colossalAI { +namespace cuda { +namespace utils { + +GPULaunchConfig GPUGetGPULaunchConfig1D(int64_t numel, int vec_size); + +// TODO(LiuYang): to be implemented +GPULaunchConfig GPUGetGPULaunchConfig2D(int64_t numel, int vec_size); + +// TODO(LiuYang): to be implemented +GPULaunchConfig GPUGetGPULaunchConfig3D(int64_t numel, int vec_size); + +class GPULaunchConfig { + public: + GPULaunchConfig(){}; + GPULaunchConfig(const dim3& block, const dim3& grid) + : block_(block), grid_(grid) {} + friend GPULaunchConfig GPUGetGPULaunchConfig1D(int64_t numel, int vec_size); + + protected: + void set_block(const dim3& dim) { block_ = dim; } + void set_grid(const dim3& dim) { grid_ = dim; } + + private: + dim3 block_(1, 1, 1); + dim3 grid_(1, 1, 1); +} + +} // namespace utils +} // namespace cuda +} // namespace colossalAI diff --git a/extensions/csrc/cuda/utils/micros.h b/extensions/csrc/cuda/utils/micros.h new file mode 100644 index 000000000..9b410e3d8 --- /dev/null +++ b/extensions/csrc/cuda/utils/micros.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +#define CUDA_CHECK(func) \ + { \ + auto status = func; \ + if (status != cudaSuccess) { \ + LOG(FATAL) << "CUDA Error : " << cudaGetErrorString(status); \ + } \ + } diff --git a/extensions/csrc/cuda/utils/nvgpu_dev_info.cc b/extensions/csrc/cuda/utils/nvgpu_dev_info.cc new file mode 100644 index 000000000..e52abebff --- /dev/null +++ b/extensions/csrc/cuda/utils/nvgpu_dev_info.cc @@ -0,0 +1,45 @@ +#include "nvgpu_dev_info.h" + +#include + +namespace colossalAI { +namespace cuda { +namespace utils { + +std::array NVGPUDevInfo::GetMaxGridDims() const { + std::array ret; + ret[0] = prop_->maxGridSize[0]; + ret[1] = prop_->maxGridSize[1]; + ret[2] = prop_->maxGridSize[2]; + return ret; +} + +std::array NVGPUDevInfo::GetMaxBlockDims() const { + std::array ret; + ret[0] = prop_->maxThreadsDim[0]; + ret[1] = prop_->maxThreadsDim[1]; + ret[2] = prop_->maxThreadsDim[2]; + return ret; +} + +std::array NVGPUDevInfo::GetCapability() const { + std::array ret; + ret[0] = prop_.major; + ret[1] = prop_.minor; +} + +int NVGPUDevInfo::GetMultiProcessorCount() const { + return prop_->multiProcessorCount; +} + +int NVGPUDevInfo::GetMaxThreadsPerMultiProcessor() const { + return prop_->maxThreadsPerMultiProcessor; +} + +int NVGPUDevInfo::GetMaxThreadsPerBlock() const { + return prop_->maxThreadsPerBlock; +} + +} // namespace utils +} // namespace cuda +} // namespace colossalAI diff --git a/extensions/csrc/cuda/utils/nvgpu_dev_info.h b/extensions/csrc/cuda/utils/nvgpu_dev_info.h new file mode 100644 index 000000000..c8c67c908 --- /dev/null +++ b/extensions/csrc/cuda/utils/nvgpu_dev_info.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include "micros.h" +#include "target.h" + +namespace colossalAI { +namespace cuda { +namespace utils { + +class NVGPUDevInfo { + public: + explicit NVGPUDevInfo(int device_num) : device_num_(device_num) { + CUDA_CALL(cudaGetDeviceProperties(prop_, device)); + } + + std::array GetMaxGridDims() const; + std::array GetMaxBlockDims() const; + std::array GetCapability() const; + int GetMultiProcessorCount() const; + int GetMaxThreadsPerMultiProcessor() const; + int GetMaxThreadsPerBlock() const; + + private: + int device_num_; + cudaDeviceProp* prop_; +}; + +} // namespace utils +} // namespace cuda +} // namespace colossalAI