2022-04-20 04:05:38 +00:00
import subprocess
2022-11-17 05:42:33 +00:00
import click
2022-04-20 04:05:38 +00:00
import torch
from torch . utils . cpp_extension import CUDA_HOME
2022-11-30 09:53:55 +00:00
import colossalai
2022-04-20 04:05:38 +00:00
2023-01-09 03:05:27 +00:00
def to_click_output ( val ) :
# installation check output to understandable symbols for readability
VAL_TO_SYMBOL = { True : u ' \u2713 ' , False : ' x ' , None : ' N/A ' }
if val in VAL_TO_SYMBOL :
return VAL_TO_SYMBOL [ val ]
else :
return val
2022-04-20 04:05:38 +00:00
def check_installation ( ) :
2023-01-09 03:05:27 +00:00
"""
This function will check the installation of colossalai , specifically , the version compatibility of
colossalai , pytorch and cuda .
Example :
` ` ` text
` ` `
Returns : A table of installation information .
"""
found_aot_cuda_ext = _check_aot_built_cuda_extension_installed ( )
cuda_version = _check_cuda_version ( )
torch_version , torch_cuda_version = _check_torch_version ( )
2023-05-15 03:46:25 +00:00
colossalai_version , prebuilt_torch_version_required , prebuilt_cuda_version_required = _parse_colossalai_version ( )
2022-11-30 09:53:55 +00:00
2023-01-09 03:05:27 +00:00
# if cuda_version is None, that means either
# CUDA_HOME is not found, thus cannot compare the version compatibility
if not cuda_version :
sys_torch_cuda_compatibility = None
else :
sys_torch_cuda_compatibility = _is_compatible ( [ cuda_version , torch_cuda_version ] )
2022-04-20 04:05:38 +00:00
2023-01-09 03:05:27 +00:00
# if cuda_version or cuda_version_required is None, that means either
# CUDA_HOME is not found or AOT compilation is not enabled
# thus, there is no need to compare the version compatibility at all
2023-02-21 09:04:49 +00:00
if not cuda_version or not prebuilt_cuda_version_required :
2023-01-09 03:05:27 +00:00
sys_colossalai_cuda_compatibility = None
else :
2023-02-21 09:04:49 +00:00
sys_colossalai_cuda_compatibility = _is_compatible ( [ cuda_version , prebuilt_cuda_version_required ] )
2022-04-20 04:05:38 +00:00
2023-01-09 03:05:27 +00:00
# if torch_version_required is None, that means AOT compilation is not enabled
# thus there is no need to compare the versions
2023-02-21 09:04:49 +00:00
if prebuilt_torch_version_required is None :
2023-01-09 03:05:27 +00:00
torch_compatibility = None
else :
2023-02-21 09:04:49 +00:00
torch_compatibility = _is_compatible ( [ torch_version , prebuilt_torch_version_required ] )
2023-01-09 03:05:27 +00:00
click . echo ( f ' #### Installation Report #### ' )
click . echo ( f ' \n ------------ Environment ------------ ' )
2023-05-15 03:46:25 +00:00
click . echo ( f " Colossal-AI version: { to_click_output ( colossalai_version ) } " )
2023-01-09 03:05:27 +00:00
click . echo ( f " PyTorch version: { to_click_output ( torch_version ) } " )
2023-02-21 09:04:49 +00:00
click . echo ( f " System CUDA version: { to_click_output ( cuda_version ) } " )
2023-01-09 03:05:27 +00:00
click . echo ( f " CUDA version required by PyTorch: { to_click_output ( torch_cuda_version ) } " )
click . echo ( " " )
click . echo ( f " Note: " )
click . echo ( f " 1. The table above checks the versions of the libraries/tools in the current environment " )
2023-02-21 09:04:49 +00:00
click . echo ( f " 2. If the System CUDA version is N/A, you can set the CUDA_HOME environment variable to locate it " )
click . echo (
f " 3. If the CUDA version required by PyTorch is N/A, you probably did not install a CUDA-compatible PyTorch. This value is give by torch.version.cuda and you can go to https://pytorch.org/get-started/locally/ to download the correct version. "
)
2023-01-09 03:05:27 +00:00
click . echo ( f ' \n ------------ CUDA Extensions AOT Compilation ------------ ' )
click . echo ( f " Found AOT CUDA Extension: { to_click_output ( found_aot_cuda_ext ) } " )
2023-02-21 09:04:49 +00:00
click . echo ( f " PyTorch version used for AOT compilation: { to_click_output ( prebuilt_torch_version_required ) } " )
click . echo ( f " CUDA version used for AOT compilation: { to_click_output ( prebuilt_cuda_version_required ) } " )
2023-01-09 03:05:27 +00:00
click . echo ( " " )
click . echo ( f " Note: " )
click . echo (
2023-04-26 03:38:43 +00:00
f " 1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment variable CUDA_EXT=1 is set "
2023-01-09 03:05:27 +00:00
)
click . echo ( f " 2. If AOT compilation is not enabled, stay calm as the CUDA kernels can still be built during runtime " )
click . echo ( f " \n ------------ Compatibility ------------ " )
click . echo ( f ' PyTorch version match: { to_click_output ( torch_compatibility ) } ' )
click . echo ( f " System and PyTorch CUDA version match: { to_click_output ( sys_torch_cuda_compatibility ) } " )
click . echo ( f " System and Colossal-AI CUDA version match: { to_click_output ( sys_colossalai_cuda_compatibility ) } " )
click . echo ( f " " )
click . echo ( f " Note: " )
click . echo ( f " 1. The table above checks the version compatibility of the libraries/tools in the current environment " )
click . echo (
2023-04-26 03:38:43 +00:00
f " - PyTorch version mismatch: whether the PyTorch version in the current environment is compatible with the PyTorch version used for AOT compilation "
2023-01-09 03:05:27 +00:00
)
click . echo (
f " - System and PyTorch CUDA version match: whether the CUDA version in the current environment is compatible with the CUDA version required by PyTorch "
)
click . echo (
f " - System and Colossal-AI CUDA version match: whether the CUDA version in the current environment is compatible with the CUDA version used for AOT compilation "
)
def _is_compatible ( versions ) :
"""
Compare the list of versions and return whether they are compatible .
"""
if None in versions :
return False
2022-11-30 09:53:55 +00:00
# split version into [major, minor, patch]
versions = [ version . split ( ' . ' ) for version in versions ]
for version in versions :
if len ( version ) == 2 :
# x means unknown
version . append ( ' x ' )
for idx , version_values in enumerate ( zip ( * versions ) ) :
equal = len ( set ( version_values ) ) == 1
if idx in [ 0 , 1 ] and not equal :
2023-01-09 03:05:27 +00:00
return False
2022-11-30 09:53:55 +00:00
elif idx == 1 :
2023-01-09 03:05:27 +00:00
return True
2022-11-30 09:53:55 +00:00
else :
continue
def _parse_colossalai_version ( ) :
2023-01-09 03:05:27 +00:00
"""
Get the Colossal - AI version information .
Returns :
colossalai_version : Colossal - AI version .
torch_version_for_aot_build : PyTorch version used for AOT compilation of CUDA kernels .
cuda_version_for_aot_build : CUDA version used for AOT compilation of CUDA kernels .
"""
# colossalai version can be in two formats
# 1. X.X.X+torchX.XXcuXX.X (when colossalai is installed with CUDA extensions)
# 2. X.X.X (when colossalai is not installed with CUDA extensions)
# where X represents an integer.
2023-05-15 03:46:25 +00:00
colossalai_version = colossalai . __version__ . split ( ' + ' ) [ 0 ]
2022-11-30 09:53:55 +00:00
2023-01-09 03:05:27 +00:00
try :
torch_version_for_aot_build = colossalai . __version__ . split ( ' torch ' ) [ 1 ] . split ( ' cu ' ) [ 0 ]
cuda_version_for_aot_build = colossalai . __version__ . split ( ' cu ' ) [ 1 ]
except :
torch_version_for_aot_build = None
cuda_version_for_aot_build = None
2023-05-15 03:46:25 +00:00
return colossalai_version , torch_version_for_aot_build , cuda_version_for_aot_build
2023-01-09 03:05:27 +00:00
def _check_aot_built_cuda_extension_installed ( ) :
"""
According to ` op_builder / README . md ` , the CUDA extension can be built with either
AOT ( ahead - of - time ) or JIT ( just - in - time ) compilation .
AOT compilation will build CUDA extensions to ` colossalai . _C ` during installation .
JIT ( just - in - time ) compilation will build CUDA extensions to ` ~ / . cache / colossalai / torch_extensions ` during runtime .
"""
2022-04-20 04:05:38 +00:00
try :
2022-11-17 05:42:33 +00:00
import colossalai . _C . fused_optim
2023-01-09 03:05:27 +00:00
found_aot_cuda_ext = True
2022-04-20 04:05:38 +00:00
except ImportError :
2023-01-09 03:05:27 +00:00
found_aot_cuda_ext = False
return found_aot_cuda_ext
def _check_torch_version ( ) :
"""
Get the PyTorch version information .
2022-04-20 04:05:38 +00:00
2023-01-09 03:05:27 +00:00
Returns :
torch_version : PyTorch version .
torch_cuda_version : CUDA version required by PyTorch .
"""
# get torch version
2023-02-21 09:04:49 +00:00
# torch version can be of two formats
# - 1.13.1+cu113
# - 1.13.1.devxxx
2023-01-09 03:05:27 +00:00
torch_version = torch . __version__ . split ( ' + ' ) [ 0 ]
2023-02-21 09:04:49 +00:00
torch_version = ' . ' . join ( torch_version . split ( ' . ' ) [ : 3 ] )
2022-04-20 04:05:38 +00:00
2023-01-09 03:05:27 +00:00
# get cuda version in pytorch build
2023-02-21 09:04:49 +00:00
try :
torch_cuda_major = torch . version . cuda . split ( " . " ) [ 0 ]
torch_cuda_minor = torch . version . cuda . split ( " . " ) [ 1 ]
torch_cuda_version = f ' { torch_cuda_major } . { torch_cuda_minor } '
except :
torch_cuda_version = None
2023-01-09 03:05:27 +00:00
return torch_version , torch_cuda_version
def _check_cuda_version ( ) :
"""
Get the CUDA version information .
Returns :
cuda_version : CUDA version found on the system .
"""
2023-02-21 09:04:49 +00:00
2022-04-20 04:05:38 +00:00
# get cuda version
if CUDA_HOME is None :
2023-01-09 03:05:27 +00:00
cuda_version = CUDA_HOME
2022-04-20 04:05:38 +00:00
else :
2023-02-21 09:04:49 +00:00
try :
raw_output = subprocess . check_output ( [ CUDA_HOME + " /bin/nvcc " , " -V " ] , universal_newlines = True )
output = raw_output . split ( )
release_idx = output . index ( " release " ) + 1
release = output [ release_idx ] . split ( " . " )
bare_metal_major = release [ 0 ]
bare_metal_minor = release [ 1 ] [ 0 ]
cuda_version = f ' { bare_metal_major } . { bare_metal_minor } '
except :
cuda_version = None
2023-01-09 03:05:27 +00:00
return cuda_version