@ -7,30 +7,100 @@ from torch.utils.cpp_extension import CUDA_HOME
import colossalai
def to_click_output ( val ) :
# installation check output to understandable symbols for readability
VAL_TO_SYMBOL = { True : u ' \u2713 ' , False : ' x ' , None : ' N/A ' }
if val in VAL_TO_SYMBOL :
return VAL_TO_SYMBOL [ val ]
else :
return val
def check_installation ( ) :
cuda_ext_installed = _check_cuda_extension_installed ( )
cuda_version , torch_version , torch_cuda_version = _check_cuda_torch ( )
"""
This function will check the installation of colossalai , specifically , the version compatibility of
colossalai , pytorch and cuda .
Example :
` ` ` text
` ` `
Returns : A table of installation information .
"""
found_aot_cuda_ext = _check_aot_built_cuda_extension_installed ( )
cuda_version = _check_cuda_version ( )
torch_version , torch_cuda_version = _check_torch_version ( )
colossalai_verison , torch_version_required , cuda_version_required = _parse_colossalai_version ( )
cuda_compatibility = _get_compatibility_string ( [ cuda_version , torch_cuda_version , cuda_version_required ] )
torch_compatibility = _get_compatibility_string ( [ torch_version , torch_version_required ] )
click . echo ( f ' #### Installation Report #### \n ' )
click . echo ( f " Colossal-AI version: { colossalai_verison } " )
click . echo ( f ' ---------------------------- ' )
click . echo ( f " PyTorch Version: { torch_version } " )
click . echo ( f " PyTorch Version required by Colossal-AI: { torch_version_required } " )
click . echo ( f ' PyTorch version match: { torch_compatibility } ' )
click . echo ( f ' ---------------------------- ' )
click . echo ( f " System CUDA Version: { cuda_version } " )
click . echo ( f " CUDA Version required by PyTorch: { torch_cuda_version } " )
click . echo ( f " CUDA Version required by Colossal-AI: { cuda_version_required } " )
click . echo ( f " CUDA Version Match: { cuda_compatibility } " )
click . echo ( f ' ---------------------------- ' )
click . echo ( f " CUDA Extension: { cuda_ext_installed } " )
# if cuda_version is None, that means either
# CUDA_HOME is not found, thus cannot compare the version compatibility
if not cuda_version :
sys_torch_cuda_compatibility = None
else :
sys_torch_cuda_compatibility = _is_compatible ( [ cuda_version , torch_cuda_version ] )
# if cuda_version or cuda_version_required is None, that means either
# CUDA_HOME is not found or AOT compilation is not enabled
# thus, there is no need to compare the version compatibility at all
if not cuda_version or not cuda_version_required :
sys_colossalai_cuda_compatibility = None
else :
sys_colossalai_cuda_compatibility = _is_compatible ( [ cuda_version , cuda_version_required ] )
def _get_compatibility_string ( versions ) :
# if torch_version_required is None, that means AOT compilation is not enabled
# thus there is no need to compare the versions
if torch_version_required is None :
torch_compatibility = None
else :
torch_compatibility = _is_compatible ( [ torch_version , torch_version_required ] )
click . echo ( f ' #### Installation Report #### ' )
click . echo ( f ' \n ------------ Environment ------------ ' )
click . echo ( f " Colossal-AI version: { to_click_output ( colossalai_verison ) } " )
click . echo ( f " PyTorch version: { to_click_output ( torch_version ) } " )
click . echo ( f " CUDA version: { to_click_output ( cuda_version ) } " )
click . echo ( f " CUDA version required by PyTorch: { to_click_output ( torch_cuda_version ) } " )
click . echo ( " " )
click . echo ( f " Note: " )
click . echo ( f " 1. The table above checks the versions of the libraries/tools in the current environment " )
click . echo ( f " 2. If the CUDA version is N/A, you can set the CUDA_HOME environment variable to locate it " )
click . echo ( f ' \n ------------ CUDA Extensions AOT Compilation ------------ ' )
click . echo ( f " Found AOT CUDA Extension: { to_click_output ( found_aot_cuda_ext ) } " )
click . echo ( f " PyTorch version used for AOT compilation: { to_click_output ( torch_version_required ) } " )
click . echo ( f " CUDA version used for AOT compilation: { to_click_output ( cuda_version_required ) } " )
click . echo ( " " )
click . echo ( f " Note: " )
click . echo (
f " 1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment varialbe CUDA_EXT=1 is set "
)
click . echo ( f " 2. If AOT compilation is not enabled, stay calm as the CUDA kernels can still be built during runtime " )
click . echo ( f " \n ------------ Compatibility ------------ " )
click . echo ( f ' PyTorch version match: { to_click_output ( torch_compatibility ) } ' )
click . echo ( f " System and PyTorch CUDA version match: { to_click_output ( sys_torch_cuda_compatibility ) } " )
click . echo ( f " System and Colossal-AI CUDA version match: { to_click_output ( sys_colossalai_cuda_compatibility ) } " )
click . echo ( f " " )
click . echo ( f " Note: " )
click . echo ( f " 1. The table above checks the version compatibility of the libraries/tools in the current environment " )
click . echo (
f " - PyTorch version mistach: whether the PyTorch version in the current environment is compatible with the PyTorch version used for AOT compilation "
)
click . echo (
f " - System and PyTorch CUDA version match: whether the CUDA version in the current environment is compatible with the CUDA version required by PyTorch "
)
click . echo (
f " - System and Colossal-AI CUDA version match: whether the CUDA version in the current environment is compatible with the CUDA version used for AOT compilation "
)
def _is_compatible ( versions ) :
"""
Compare the list of versions and return whether they are compatible .
"""
if None in versions :
return False
# split version into [major, minor, patch]
versions = [ version . split ( ' . ' ) for version in versions ]
@ -44,37 +114,81 @@ def _get_compatibility_string(versions):
equal = len ( set ( version_values ) ) == 1
if idx in [ 0 , 1 ] and not equal :
# if the major/minor versions do not match
# return a cross
return ' x '
return False
elif idx == 1 :
# if the minor versions match
# return a tick
return u ' \u2713 '
return True
else :
continue
def _parse_colossalai_version ( ) :
"""
Get the Colossal - AI version information .
Returns :
colossalai_version : Colossal - AI version .
torch_version_for_aot_build : PyTorch version used for AOT compilation of CUDA kernels .
cuda_version_for_aot_build : CUDA version used for AOT compilation of CUDA kernels .
"""
# colossalai version can be in two formats
# 1. X.X.X+torchX.XXcuXX.X (when colossalai is installed with CUDA extensions)
# 2. X.X.X (when colossalai is not installed with CUDA extensions)
# where X represents an integer.
colossalai_verison = colossalai . __version__ . split ( ' + ' ) [ 0 ]
torch_version_required = colossalai . __version__ . split ( ' torch ' ) [ 1 ] . split ( ' cu ' ) [ 0 ]
cuda_version_required = colossalai . __version__ . split ( ' cu ' ) [ 1 ]
return colossalai_verison , torch_version_required , cuda_version_required
def _check_cuda_extension_installed ( ) :
try :
torch_version_for_aot_build = colossalai . __version__ . split ( ' torch ' ) [ 1 ] . split ( ' cu ' ) [ 0 ]
cuda_version_for_aot_build = colossalai . __version__ . split ( ' cu ' ) [ 1 ]
except :
torch_version_for_aot_build = None
cuda_version_for_aot_build = None
return colossalai_verison , torch_version_for_aot_build , cuda_version_for_aot_build
def _check_aot_built_cuda_extension_installed ( ) :
"""
According to ` op_builder / README . md ` , the CUDA extension can be built with either
AOT ( ahead - of - time ) or JIT ( just - in - time ) compilation .
AOT compilation will build CUDA extensions to ` colossalai . _C ` during installation .
JIT ( just - in - time ) compilation will build CUDA extensions to ` ~ / . cache / colossalai / torch_extensions ` during runtime .
"""
try :
import colossalai . _C . fused_optim
is_cuda_extension_installed = u ' \u2713 '
found_aot_cuda_ext = True
except ImportError :
is_cuda_extension_installed = ' x '
return is_cuda_extension_installed
found_aot_cuda_ext = False
return found_aot_cuda_ext
def _check_torch_version ( ) :
"""
Get the PyTorch version information .
Returns :
torch_version : PyTorch version .
torch_cuda_version : CUDA version required by PyTorch .
"""
# get torch version
torch_version = torch . __version__ . split ( ' + ' ) [ 0 ]
def _check_cuda_torch ( ) :
# get cuda version in pytorch build
torch_cuda_major = torch . version . cuda . split ( " . " ) [ 0 ]
torch_cuda_minor = torch . version . cuda . split ( " . " ) [ 1 ]
torch_cuda_version = f ' { torch_cuda_major } . { torch_cuda_minor } '
return torch_version , torch_cuda_version
def _check_cuda_version ( ) :
"""
Get the CUDA version information .
Returns :
cuda_version : CUDA version found on the system .
"""
# get cuda version
if CUDA_HOME is None :
cuda_version = ' N/A (CUDA_HOME is not set) '
cuda_version = CUDA_HOME
else :
raw_output = subprocess . check_output ( [ CUDA_HOME + " /bin/nvcc " , " -V " ] , universal_newlines = True )
output = raw_output . split ( )
@ -83,13 +197,4 @@ def _check_cuda_torch():
bare_metal_major = release [ 0 ]
bare_metal_minor = release [ 1 ] [ 0 ]
cuda_version = f ' { bare_metal_major } . { bare_metal_minor } '
# get torch version
torch_version = torch . __version__ . split ( ' + ' ) [ 0 ]
# get cuda version in pytorch build
torch_cuda_major = torch . version . cuda . split ( " . " ) [ 0 ]
torch_cuda_minor = torch . version . cuda . split ( " . " ) [ 1 ]
torch_cuda_version = f ' { torch_cuda_major } . { torch_cuda_minor } '
return cuda_version , torch_version , torch_cuda_version
return cuda_version