2021-10-28 16:21:23 +00:00
import os
2022-05-09 02:56:45 +00:00
import re
2022-11-17 05:42:33 +00:00
2023-01-03 12:29:39 +00:00
from setuptools import find_packages , setup
2021-10-28 16:21:23 +00:00
2023-01-04 03:38:42 +00:00
from op_builder . utils import get_cuda_bare_metal_version
2022-12-23 08:05:13 +00:00
2022-11-30 08:45:15 +00:00
try :
import torch
from torch . utils . cpp_extension import CUDA_HOME , BuildExtension , CUDAExtension
print ( " \n \n torch.__version__ = {} \n \n " . format ( torch . __version__ ) )
TORCH_MAJOR = int ( torch . __version__ . split ( ' . ' ) [ 0 ] )
TORCH_MINOR = int ( torch . __version__ . split ( ' . ' ) [ 1 ] )
if TORCH_MAJOR < 1 or ( TORCH_MAJOR == 1 and TORCH_MINOR < 10 ) :
raise RuntimeError ( " Colossal-AI requires Pytorch 1.10 or newer. \n "
" The latest stable release can be obtained from https://pytorch.org/ " )
2023-01-05 05:53:28 +00:00
TORCH_AVAILABLE = True
2022-11-30 08:45:15 +00:00
except ImportError :
2023-01-05 05:53:28 +00:00
TORCH_AVAILABLE = False
2023-01-05 07:13:11 +00:00
CUDA_HOME = None
2022-11-30 08:45:15 +00:00
2023-01-04 03:38:42 +00:00
2021-10-28 16:21:23 +00:00
# ninja build does not work unless include_dirs are abs path
this_dir = os . path . dirname ( os . path . abspath ( __file__ ) )
2023-01-05 07:13:11 +00:00
build_cuda_ext = False
2022-02-14 09:09:30 +00:00
ext_modules = [ ]
2023-01-05 07:13:11 +00:00
if int ( os . environ . get ( ' CUDA_EXT ' , ' 0 ' ) ) == 1 :
if not TORCH_AVAILABLE :
raise ModuleNotFoundError ( " PyTorch is not found while CUDA_EXT=1. You need to install PyTorch first in order to build CUDA extensions " )
if not CUDA_HOME :
raise RuntimeError ( " CUDA_HOME is not found while CUDA_EXT=1. You need to export CUDA_HOME environment vairable or install CUDA Toolkit first in order to build CUDA extensions " )
build_cuda_ext = True
2021-10-28 16:21:23 +00:00
2021-11-15 08:43:28 +00:00
def check_cuda_torch_binary_vs_bare_metal ( cuda_dir ) :
2022-01-13 08:47:17 +00:00
raw_output , bare_metal_major , bare_metal_minor = get_cuda_bare_metal_version ( cuda_dir )
2021-11-15 08:43:28 +00:00
torch_binary_major = torch . version . cuda . split ( " . " ) [ 0 ]
torch_binary_minor = torch . version . cuda . split ( " . " ) [ 1 ]
print ( " \n Compiling cuda extensions with " )
print ( raw_output + " from " + cuda_dir + " /bin \n " )
2022-02-14 09:09:30 +00:00
if bare_metal_major != torch_binary_major :
2022-03-09 07:11:35 +00:00
print ( f ' The detected CUDA version ( { raw_output } ) mismatches the version that was used to compile PyTorch '
f ' ( { torch . version . cuda } ). CUDA extension will not be installed. ' )
2022-02-14 09:09:30 +00:00
return False
if bare_metal_minor != torch_binary_minor :
print ( " \n Warning: Cuda extensions are being compiled with a version of Cuda that does "
2022-03-09 07:11:35 +00:00
" not match the version used to compile Pytorch binaries. "
f " Pytorch binaries were compiled with Cuda { torch . version . cuda } . \n "
" In some cases, a minor-version mismatch will not cause later errors: "
" https://github.com/NVIDIA/apex/pull/323#discussion_r287021798. " )
2022-02-14 09:09:30 +00:00
return True
def check_cuda_availability ( cuda_dir ) :
if not torch . cuda . is_available ( ) :
# https://github.com/NVIDIA/apex/issues/486
2022-03-09 07:11:35 +00:00
# Extension builds after https://github.com/pytorch/pytorch/pull/23408 attempt to query
# torch.cuda.get_device_capability(), which will fail if you are compiling in an environment
# without visible GPUs (e.g. during an nvidia-docker build command).
print (
' \n Warning: Torch did not find available GPUs on this system. \n ' ,
' If your intention is to cross-compile, this is not an error. \n '
' By default, Colossal-AI will cross-compile for Pascal (compute capabilities 6.0, 6.1, 6.2), \n '
' Volta (compute capability 7.0), Turing (compute capability 7.5), \n '
' and, if the CUDA version is >= 11.0, Ampere (compute capability 8.0). \n '
' If you wish to cross-compile for a single specific architecture, \n '
' export TORCH_CUDA_ARCH_LIST= " compute capability " before running setup.py. \n ' )
2022-02-14 09:09:30 +00:00
if os . environ . get ( " TORCH_CUDA_ARCH_LIST " , None ) is None :
_ , bare_metal_major , _ = get_cuda_bare_metal_version ( cuda_dir )
if int ( bare_metal_major ) == 11 :
os . environ [ " TORCH_CUDA_ARCH_LIST " ] = " 6.0;6.1;6.2;7.0;7.5;8.0 "
else :
os . environ [ " TORCH_CUDA_ARCH_LIST " ] = " 6.0;6.1;6.2;7.0;7.5 "
return False
if cuda_dir is None :
2022-03-09 07:11:35 +00:00
print ( " nvcc was not found. CUDA extension will not be installed. If you ' re installing within a container from "
" https://hub.docker.com/r/pytorch/pytorch, only images whose names contain ' devel ' will provide nvcc. " )
2022-02-14 09:09:30 +00:00
return False
return True
2021-11-15 08:43:28 +00:00
2022-01-13 08:47:17 +00:00
def append_nvcc_threads ( nvcc_extra_args ) :
_ , bare_metal_major , bare_metal_minor = get_cuda_bare_metal_version ( CUDA_HOME )
if int ( bare_metal_major ) > = 11 and int ( bare_metal_minor ) > = 2 :
return nvcc_extra_args + [ " --threads " , " 4 " ]
return nvcc_extra_args
2021-11-15 08:43:28 +00:00
def fetch_requirements ( path ) :
with open ( path , ' r ' ) as fd :
return [ r . strip ( ) for r in fd . readlines ( ) ]
2022-02-15 07:15:03 +00:00
def fetch_readme ( ) :
with open ( ' README.md ' , encoding = ' utf-8 ' ) as f :
return f . read ( )
def get_version ( ) :
2022-11-30 08:45:15 +00:00
setup_file_path = os . path . abspath ( __file__ )
project_path = os . path . dirname ( setup_file_path )
version_txt_path = os . path . join ( project_path , ' version.txt ' )
version_py_path = os . path . join ( project_path , ' colossalai/version.py ' )
with open ( version_txt_path ) as f :
2022-04-27 07:26:12 +00:00
version = f . read ( ) . strip ( )
if build_cuda_ext :
2022-05-05 06:59:12 +00:00
torch_version = ' . ' . join ( torch . __version__ . split ( ' . ' ) [ : 2 ] )
cuda_version = ' . ' . join ( get_cuda_bare_metal_version ( CUDA_HOME ) [ 1 : ] )
2022-04-27 07:26:12 +00:00
version + = f ' +torch { torch_version } cu { cuda_version } '
2022-02-15 07:15:03 +00:00
2022-11-30 08:45:15 +00:00
# write version into version.py
with open ( version_py_path , ' w ' ) as f :
f . write ( f " __version__ = ' { version } ' \n " )
return version
2022-10-03 09:13:30 +00:00
2022-02-14 09:09:30 +00:00
if build_cuda_ext :
build_cuda_ext = check_cuda_availability ( CUDA_HOME ) and check_cuda_torch_binary_vs_bare_metal ( CUDA_HOME )
if build_cuda_ext :
# Set up macros for forward/backward compatibility hack around
# https://github.com/pytorch/pytorch/commit/4404762d7dd955383acee92e6f06b48144a0742e
# and
# https://github.com/NVIDIA/apex/issues/456
# https://github.com/pytorch/pytorch/commit/eb7b39e02f7d75c26d8a795ea8c7fd911334da7e#diff-4632522f237f1e4e728cb824300403ac
version_dependent_macros = [ ' -DVERSION_GE_1_1 ' , ' -DVERSION_GE_1_3 ' , ' -DVERSION_GE_1_5 ' ]
2022-03-04 08:05:15 +00:00
def cuda_ext_helper ( name , sources , extra_cuda_flags , extra_cxx_flags = [ ] ) :
2022-03-09 07:11:35 +00:00
return CUDAExtension (
name = name ,
sources = [ os . path . join ( ' colossalai/kernel/cuda_native/csrc ' , path ) for path in sources ] ,
include_dirs = [ os . path . join ( this_dir , ' colossalai/kernel/cuda_native/csrc/kernels/include ' ) ] ,
extra_compile_args = {
' cxx ' : [ ' -O3 ' ] + version_dependent_macros + extra_cxx_flags ,
' nvcc ' : append_nvcc_threads ( [ ' -O3 ' , ' --use_fast_math ' ] + version_dependent_macros + extra_cuda_flags )
} )
2022-12-23 08:05:13 +00:00
#### fused optim kernels ###
2023-01-04 03:38:42 +00:00
from op_builder import FusedOptimBuilder
2022-12-23 08:05:13 +00:00
ext_modules . append ( FusedOptimBuilder ( ) . builder ( ' colossalai._C.fused_optim ' ) )
#### N-D parallel kernels ###
2022-05-09 02:56:45 +00:00
cc_flag = [ ]
for arch in torch . cuda . get_arch_list ( ) :
res = re . search ( r ' sm_( \ d+) ' , arch )
if res :
arch_cap = res [ 1 ]
if int ( arch_cap ) > = 60 :
cc_flag . extend ( [ ' -gencode ' , f ' arch=compute_ { arch_cap } ,code= { arch } ' ] )
2022-02-14 09:09:30 +00:00
2022-03-09 07:11:35 +00:00
extra_cuda_flags = [
' -U__CUDA_NO_HALF_OPERATORS__ ' , ' -U__CUDA_NO_HALF_CONVERSIONS__ ' , ' --expt-relaxed-constexpr ' ,
' --expt-extended-lambda '
]
2022-02-14 09:09:30 +00:00
2023-01-04 03:38:42 +00:00
from op_builder import ScaledSoftmaxBuilder
2022-12-30 01:58:00 +00:00
ext_modules . append ( ScaledSoftmaxBuilder ( ) . builder ( ' colossalai._C.scaled_upper_triang_masked_softmax ' ) )
2022-02-14 09:09:30 +00:00
2022-03-09 07:11:35 +00:00
ext_modules . append (
2022-11-17 05:42:33 +00:00
cuda_ext_helper ( ' colossalai._C.scaled_masked_softmax ' ,
2022-03-09 07:11:35 +00:00
[ ' scaled_masked_softmax.cpp ' , ' scaled_masked_softmax_cuda.cu ' ] , extra_cuda_flags + cc_flag ) )
2022-02-14 09:09:30 +00:00
2023-01-04 03:38:42 +00:00
from op_builder import MOEBuilder
2023-01-03 12:29:39 +00:00
ext_modules . append ( MOEBuilder ( ) . builder ( ' colossalai._C.moe ' ) )
2022-02-18 12:42:31 +00:00
2022-02-14 09:09:30 +00:00
extra_cuda_flags = [ ' -maxrregcount=50 ' ]
2022-03-09 07:11:35 +00:00
ext_modules . append (
2022-11-17 05:42:33 +00:00
cuda_ext_helper ( ' colossalai._C.layer_norm ' , [ ' layer_norm_cuda.cpp ' , ' layer_norm_cuda_kernel.cu ' ] ,
2022-03-09 07:11:35 +00:00
extra_cuda_flags + cc_flag ) )
2022-12-27 08:06:09 +00:00
### MultiHeadAttn Kernel ####
2023-01-04 03:38:42 +00:00
from op_builder import MultiHeadAttnBuilder
2022-12-27 08:06:09 +00:00
ext_modules . append ( MultiHeadAttnBuilder ( ) . builder ( ' colossalai._C.multihead_attention ' ) )
2022-03-09 07:11:35 +00:00
2022-12-23 08:05:13 +00:00
### Gemini Adam kernel ####
2023-01-04 03:38:42 +00:00
from op_builder import CPUAdamBuilder
2022-12-23 08:05:13 +00:00
ext_modules . append ( CPUAdamBuilder ( ) . builder ( ' colossalai._C.cpu_optim ' ) )
2022-11-17 05:42:33 +00:00
setup ( name = ' colossalai ' ,
version = get_version ( ) ,
packages = find_packages ( exclude = (
' benchmark ' ,
' docker ' ,
' tests ' ,
' docs ' ,
' examples ' ,
' tests ' ,
' scripts ' ,
' requirements ' ,
' *.egg-info ' ,
) ) ,
description = ' An integrated large-scale model training system with efficient parallelization techniques ' ,
long_description = fetch_readme ( ) ,
long_description_content_type = ' text/markdown ' ,
license = ' Apache Software License 2.0 ' ,
url = ' https://www.colossalai.org ' ,
project_urls = {
' Forum ' : ' https://github.com/hpcaitech/ColossalAI/discussions ' ,
' Bug Tracker ' : ' https://github.com/hpcaitech/ColossalAI/issues ' ,
' Examples ' : ' https://github.com/hpcaitech/ColossalAI-Examples ' ,
' Documentation ' : ' http://colossalai.readthedocs.io ' ,
' Github ' : ' https://github.com/hpcaitech/ColossalAI ' ,
} ,
ext_modules = ext_modules ,
cmdclass = { ' build_ext ' : BuildExtension } if ext_modules else { } ,
install_requires = fetch_requirements ( ' requirements/requirements.txt ' ) ,
entry_points = '''
2022-04-19 02:59:44 +00:00
[ console_scripts ]
2022-04-19 07:14:54 +00:00
colossalai = colossalai . cli : cli
2022-04-19 02:59:44 +00:00
''' ,
2022-11-17 05:42:33 +00:00
python_requires = ' >=3.6 ' ,
classifiers = [
' Programming Language :: Python :: 3 ' ,
' License :: OSI Approved :: Apache Software License ' ,
' Environment :: GPU :: NVIDIA CUDA ' ,
' Topic :: Scientific/Engineering :: Artificial Intelligence ' ,
' Topic :: System :: Distributed Computing ' ,
] ,
package_data = { ' colossalai ' : [ ' _C/*.pyi ' ] } )