mirror of https://github.com/hpcaitech/ColossalAI
[cli] updated installation check cli for aot/jit build (#2395)
parent
2add870138
commit
ce08661eb1
|
@ -7,30 +7,100 @@ from torch.utils.cpp_extension import CUDA_HOME
|
|||
import colossalai
|
||||
|
||||
|
||||
def to_click_output(val):
|
||||
# installation check output to understandable symbols for readability
|
||||
VAL_TO_SYMBOL = {True: u'\u2713', False: 'x', None: 'N/A'}
|
||||
|
||||
if val in VAL_TO_SYMBOL:
|
||||
return VAL_TO_SYMBOL[val]
|
||||
else:
|
||||
return val
|
||||
|
||||
|
||||
def check_installation():
|
||||
cuda_ext_installed = _check_cuda_extension_installed()
|
||||
cuda_version, torch_version, torch_cuda_version = _check_cuda_torch()
|
||||
"""
|
||||
This function will check the installation of colossalai, specifically, the version compatibility of
|
||||
colossalai, pytorch and cuda.
|
||||
|
||||
Example:
|
||||
```text
|
||||
```
|
||||
|
||||
Returns: A table of installation information.
|
||||
"""
|
||||
found_aot_cuda_ext = _check_aot_built_cuda_extension_installed()
|
||||
cuda_version = _check_cuda_version()
|
||||
torch_version, torch_cuda_version = _check_torch_version()
|
||||
colossalai_verison, torch_version_required, cuda_version_required = _parse_colossalai_version()
|
||||
|
||||
cuda_compatibility = _get_compatibility_string([cuda_version, torch_cuda_version, cuda_version_required])
|
||||
torch_compatibility = _get_compatibility_string([torch_version, torch_version_required])
|
||||
# if cuda_version is None, that means either
|
||||
# CUDA_HOME is not found, thus cannot compare the version compatibility
|
||||
if not cuda_version:
|
||||
sys_torch_cuda_compatibility = None
|
||||
else:
|
||||
sys_torch_cuda_compatibility = _is_compatible([cuda_version, torch_cuda_version])
|
||||
|
||||
click.echo(f'#### Installation Report ####\n')
|
||||
click.echo(f"Colossal-AI version: {colossalai_verison}")
|
||||
click.echo(f'----------------------------')
|
||||
click.echo(f"PyTorch Version: {torch_version}")
|
||||
click.echo(f"PyTorch Version required by Colossal-AI: {torch_version_required}")
|
||||
click.echo(f'PyTorch version match: {torch_compatibility}')
|
||||
click.echo(f'----------------------------')
|
||||
click.echo(f"System CUDA Version: {cuda_version}")
|
||||
click.echo(f"CUDA Version required by PyTorch: {torch_cuda_version}")
|
||||
click.echo(f"CUDA Version required by Colossal-AI: {cuda_version_required}")
|
||||
click.echo(f"CUDA Version Match: {cuda_compatibility}")
|
||||
click.echo(f'----------------------------')
|
||||
click.echo(f"CUDA Extension: {cuda_ext_installed}")
|
||||
# if cuda_version or cuda_version_required is None, that means either
|
||||
# CUDA_HOME is not found or AOT compilation is not enabled
|
||||
# thus, there is no need to compare the version compatibility at all
|
||||
if not cuda_version or not cuda_version_required:
|
||||
sys_colossalai_cuda_compatibility = None
|
||||
else:
|
||||
sys_colossalai_cuda_compatibility = _is_compatible([cuda_version, cuda_version_required])
|
||||
|
||||
# if torch_version_required is None, that means AOT compilation is not enabled
|
||||
# thus there is no need to compare the versions
|
||||
if torch_version_required is None:
|
||||
torch_compatibility = None
|
||||
else:
|
||||
torch_compatibility = _is_compatible([torch_version, torch_version_required])
|
||||
|
||||
click.echo(f'#### Installation Report ####')
|
||||
click.echo(f'\n------------ Environment ------------')
|
||||
click.echo(f"Colossal-AI version: {to_click_output(colossalai_verison)}")
|
||||
click.echo(f"PyTorch version: {to_click_output(torch_version)}")
|
||||
click.echo(f"CUDA version: {to_click_output(cuda_version)}")
|
||||
click.echo(f"CUDA version required by PyTorch: {to_click_output(torch_cuda_version)}")
|
||||
click.echo("")
|
||||
click.echo(f"Note:")
|
||||
click.echo(f"1. The table above checks the versions of the libraries/tools in the current environment")
|
||||
click.echo(f"2. If the CUDA version is N/A, you can set the CUDA_HOME environment variable to locate it")
|
||||
|
||||
click.echo(f'\n------------ CUDA Extensions AOT Compilation ------------')
|
||||
click.echo(f"Found AOT CUDA Extension: {to_click_output(found_aot_cuda_ext)}")
|
||||
click.echo(f"PyTorch version used for AOT compilation: {to_click_output(torch_version_required)}")
|
||||
click.echo(f"CUDA version used for AOT compilation: {to_click_output(cuda_version_required)}")
|
||||
click.echo("")
|
||||
click.echo(f"Note:")
|
||||
click.echo(
|
||||
f"1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment varialbe CUDA_EXT=1 is set"
|
||||
)
|
||||
click.echo(f"2. If AOT compilation is not enabled, stay calm as the CUDA kernels can still be built during runtime")
|
||||
|
||||
click.echo(f"\n------------ Compatibility ------------")
|
||||
click.echo(f'PyTorch version match: {to_click_output(torch_compatibility)}')
|
||||
click.echo(f"System and PyTorch CUDA version match: {to_click_output(sys_torch_cuda_compatibility)}")
|
||||
click.echo(f"System and Colossal-AI CUDA version match: {to_click_output(sys_colossalai_cuda_compatibility)}")
|
||||
click.echo(f"")
|
||||
click.echo(f"Note:")
|
||||
click.echo(f"1. The table above checks the version compatibility of the libraries/tools in the current environment")
|
||||
click.echo(
|
||||
f" - PyTorch version mistach: whether the PyTorch version in the current environment is compatible with the PyTorch version used for AOT compilation"
|
||||
)
|
||||
click.echo(
|
||||
f" - System and PyTorch CUDA version match: whether the CUDA version in the current environment is compatible with the CUDA version required by PyTorch"
|
||||
)
|
||||
click.echo(
|
||||
f" - System and Colossal-AI CUDA version match: whether the CUDA version in the current environment is compatible with the CUDA version used for AOT compilation"
|
||||
)
|
||||
|
||||
|
||||
def _get_compatibility_string(versions):
|
||||
def _is_compatible(versions):
|
||||
"""
|
||||
Compare the list of versions and return whether they are compatible.
|
||||
"""
|
||||
if None in versions:
|
||||
return False
|
||||
|
||||
# split version into [major, minor, patch]
|
||||
versions = [version.split('.') for version in versions]
|
||||
|
@ -44,46 +114,60 @@ def _get_compatibility_string(versions):
|
|||
equal = len(set(version_values)) == 1
|
||||
|
||||
if idx in [0, 1] and not equal:
|
||||
# if the major/minor versions do not match
|
||||
# return a cross
|
||||
return 'x'
|
||||
return False
|
||||
elif idx == 1:
|
||||
# if the minor versions match
|
||||
# return a tick
|
||||
return u'\u2713'
|
||||
return True
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
def _parse_colossalai_version():
|
||||
"""
|
||||
Get the Colossal-AI version information.
|
||||
|
||||
Returns:
|
||||
colossalai_version: Colossal-AI version.
|
||||
torch_version_for_aot_build: PyTorch version used for AOT compilation of CUDA kernels.
|
||||
cuda_version_for_aot_build: CUDA version used for AOT compilation of CUDA kernels.
|
||||
"""
|
||||
# colossalai version can be in two formats
|
||||
# 1. X.X.X+torchX.XXcuXX.X (when colossalai is installed with CUDA extensions)
|
||||
# 2. X.X.X (when colossalai is not installed with CUDA extensions)
|
||||
# where X represents an integer.
|
||||
colossalai_verison = colossalai.__version__.split('+')[0]
|
||||
torch_version_required = colossalai.__version__.split('torch')[1].split('cu')[0]
|
||||
cuda_version_required = colossalai.__version__.split('cu')[1]
|
||||
return colossalai_verison, torch_version_required, cuda_version_required
|
||||
|
||||
try:
|
||||
torch_version_for_aot_build = colossalai.__version__.split('torch')[1].split('cu')[0]
|
||||
cuda_version_for_aot_build = colossalai.__version__.split('cu')[1]
|
||||
except:
|
||||
torch_version_for_aot_build = None
|
||||
cuda_version_for_aot_build = None
|
||||
return colossalai_verison, torch_version_for_aot_build, cuda_version_for_aot_build
|
||||
|
||||
|
||||
def _check_cuda_extension_installed():
|
||||
def _check_aot_built_cuda_extension_installed():
|
||||
"""
|
||||
According to `op_builder/README.md`, the CUDA extension can be built with either
|
||||
AOT (ahead-of-time) or JIT (just-in-time) compilation.
|
||||
AOT compilation will build CUDA extensions to `colossalai._C` during installation.
|
||||
JIT (just-in-time) compilation will build CUDA extensions to `~/.cache/colossalai/torch_extensions` during runtime.
|
||||
"""
|
||||
try:
|
||||
import colossalai._C.fused_optim
|
||||
is_cuda_extension_installed = u'\u2713'
|
||||
found_aot_cuda_ext = True
|
||||
except ImportError:
|
||||
is_cuda_extension_installed = 'x'
|
||||
return is_cuda_extension_installed
|
||||
found_aot_cuda_ext = False
|
||||
return found_aot_cuda_ext
|
||||
|
||||
|
||||
def _check_cuda_torch():
|
||||
# get cuda version
|
||||
if CUDA_HOME is None:
|
||||
cuda_version = 'N/A (CUDA_HOME is not set)'
|
||||
else:
|
||||
raw_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True)
|
||||
output = raw_output.split()
|
||||
release_idx = output.index("release") + 1
|
||||
release = output[release_idx].split(".")
|
||||
bare_metal_major = release[0]
|
||||
bare_metal_minor = release[1][0]
|
||||
cuda_version = f'{bare_metal_major}.{bare_metal_minor}'
|
||||
def _check_torch_version():
|
||||
"""
|
||||
Get the PyTorch version information.
|
||||
|
||||
Returns:
|
||||
torch_version: PyTorch version.
|
||||
torch_cuda_version: CUDA version required by PyTorch.
|
||||
"""
|
||||
# get torch version
|
||||
torch_version = torch.__version__.split('+')[0]
|
||||
|
||||
|
@ -92,4 +176,25 @@ def _check_cuda_torch():
|
|||
torch_cuda_minor = torch.version.cuda.split(".")[1]
|
||||
torch_cuda_version = f'{torch_cuda_major}.{torch_cuda_minor}'
|
||||
|
||||
return cuda_version, torch_version, torch_cuda_version
|
||||
return torch_version, torch_cuda_version
|
||||
|
||||
|
||||
def _check_cuda_version():
|
||||
"""
|
||||
Get the CUDA version information.
|
||||
|
||||
Returns:
|
||||
cuda_version: CUDA version found on the system.
|
||||
"""
|
||||
# get cuda version
|
||||
if CUDA_HOME is None:
|
||||
cuda_version = CUDA_HOME
|
||||
else:
|
||||
raw_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True)
|
||||
output = raw_output.split()
|
||||
release_idx = output.index("release") + 1
|
||||
release = output[release_idx].split(".")
|
||||
bare_metal_major = release[0]
|
||||
bare_metal_minor = release[1][0]
|
||||
cuda_version = f'{bare_metal_major}.{bare_metal_minor}'
|
||||
return cuda_version
|
||||
|
|
Loading…
Reference in New Issue