#include #include #include "cuda_util.h" /* GPU function guard */ std::string _cudaGetErrorString(cudaError_t error) { return cudaGetErrorString(error); } std::string _cudaGetErrorString(cublasStatus_t error) { switch (error) { case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS"; case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED"; case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED"; case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE"; case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH"; case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR"; case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED"; case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR"; case CUBLAS_STATUS_NOT_SUPPORTED: return "CUBLAS_STATUS_NOT_SUPPORTED"; case CUBLAS_STATUS_LICENSE_ERROR: return "CUBLAS_STATUS_LICENSE_ERROR"; } return "CUBLAS_UNKNOW"; } template void check_gpu_error(T result, char const *const func, const char *const file, int const line) { if (result) { throw std::runtime_error(std::string("[CUDA][ERROR] ") + +file + "(" + std::to_string(line) + "): " + (_cudaGetErrorString(result)) + "\n"); } } template void check_gpu_error(cudaError_t result, char const *const func, const char *const file, int const line); template void check_gpu_error(cublasStatus_t result, char const *const func, const char *const file, int const line); template void print_vec(const T *outv, std::string outn, int num_output_ele) { std::cout << outn << ": "; std::vector hout(num_output_ele, (T)0); cudaMemcpy(hout.data(), outv, num_output_ele * sizeof(T), cudaMemcpyDeviceToHost); for (int i = 0; i < num_output_ele; i++) { std::cout << hout[i] << ", "; } std::cout << std::endl; } template <> void print_vec<__half>(const __half *outv, std::string outn, int num_output_ele) { std::cout << outn << ": "; std::vector<__half> hout(num_output_ele, (__half)0.f); cudaMemcpy(hout.data(), outv, num_output_ele * sizeof(__half), cudaMemcpyDeviceToHost); for (int i = 0; i < num_output_ele; i++) { std::cout << __half2float(hout[i]) << ", "; } std::cout << std::endl; } template void print_vec(const float *outv, std::string outn, int num_output_ele); template void print_vec(const int *outv, std::string outn, int num_output_ele); template void print_vec<__half>(const __half *outv, std::string outn, int num_output_ele); template T *cuda_malloc(size_t ele_num) { size_t byte_size = ele_num * sizeof(T); T *pdata = nullptr; CHECK_GPU_ERROR(cudaMalloc((void **)&pdata, byte_size)); return pdata; } template float *cuda_malloc(size_t ele_num); template __half *cuda_malloc<__half>(size_t ele_num); template uint8_t *cuda_malloc(size_t ele_num); void cuda_free(void *pdata) { if (pdata != nullptr) { cudaFree(pdata); } } template struct _isnan { __device__ bool operator()(T a) const { return isnan(a); } }; template <> struct _isnan<__half> { __device__ bool operator()(const __half a) const { return __hisnan(a); } }; template struct _isinf { __device__ bool operator()(T a) const { return isinf(a); } }; template <> struct _isinf<__half> { __device__ bool operator()(const __half a) const { return __hisinf(a); } }; template void check_nan_inf(const T *data_ptr, int dsize, bool check_nan_inf, std::string file, int line, cudaStream_t stream) { // check_nan_inf = 0 for checking nan // check_nan_inf = 1 for checking inf bool res = false; std::string msg = file + "(" + std::to_string(line) + "): "; if (check_nan_inf) { msg += "nan."; res = thrust::transform_reduce(thrust::cuda::par.on(stream), data_ptr, data_ptr + dsize, _isnan(), false, thrust::logical_or()); } else { msg += "inf."; res = thrust::transform_reduce(thrust::cuda::par.on(stream), data_ptr, data_ptr + dsize, _isinf(), false, thrust::logical_or()); } if (res) { throw std::runtime_error(msg); } std::cout << msg << " [check pass]." << std::endl; } template void check_nan_inf(const float *data_ptr, int dsize, bool check_nan_inf, std::string file, int line, cudaStream_t stream); template void check_nan_inf<__half>(const __half *data_ptr, int dsize, bool check_nan_inf, std::string file, int line, cudaStream_t stream);