[builder] raise Error when CUDA_HOME is not set (#2213)

pull/2217/head
Jiarui Fang 2022-12-28 16:07:08 +08:00 committed by GitHub
parent 78a89d9b41
commit 7675792100
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 19 additions and 14 deletions

View File

@ -30,6 +30,13 @@ class Builder(object):
else:
return os.path.join(Path(__file__).parent.parent.absolute(), code_path)
def get_cuda_include(self):
from torch.utils.cpp_extension import CUDA_HOME
if CUDA_HOME is None:
raise RuntimeError("CUDA_HOME is None, please set CUDA_HOME to compile C++/CUDA kernels in ColossalAI.")
cuda_include = os.path.join(CUDA_HOME, "include")
return cuda_include
def strip_empty_entries(self, args):
'''
Drop any empty strings from the list of compile and link flags

View File

@ -27,9 +27,7 @@ class CPUAdamBuilder(Builder):
]
def include_paths(self):
from torch.utils.cpp_extension import CUDA_HOME
cuda_include = os.path.join(CUDA_HOME, "include")
return [os.path.join(CPUAdamBuilder.BASE_DIR, "includes"), cuda_include]
return [os.path.join(CPUAdamBuilder.BASE_DIR, "includes"), self.get_cuda_include()]
def strip_empty_entries(self, args):
'''

View File

@ -31,10 +31,7 @@ class FusedOptimBuilder(Builder):
]
def include_paths(self):
import torch
from torch.utils.cpp_extension import CUDA_HOME
cuda_include = os.path.join(CUDA_HOME, "include")
return [os.path.join(FusedOptimBuilder.BASE_DIR, "includes"), cuda_include]
return [os.path.join(FusedOptimBuilder.BASE_DIR, "includes"), self.get_cuda_include()]
def builder(self, name):
from torch.utils.cpp_extension import CUDAExtension

View File

@ -31,10 +31,8 @@ class MultiHeadAttnBuilder(Builder):
]
def include_paths(self):
from torch.utils.cpp_extension import CUDA_HOME
ret = []
cuda_include = os.path.join(CUDA_HOME, "include")
ret = [os.path.join(self.base_dir, "includes"), cuda_include]
ret = [os.path.join(self.base_dir, "includes"), self.get_cuda_include()]
ret.append(os.path.join(self.base_dir, "kernels", "include"))
print("include_paths", ret)
return ret

View File

@ -106,3 +106,8 @@ Touch the bar of model scale and batch size.
| gpt2_20b | 8 | auto | 2 | 16 | 99.871 |
| gpt2_20b | 8 | cpu | 2 | 64 | 125.170 |
| gpt2_20b | 8 | const | 2 | 32 | 105.415 |
| model | #GPU | policy | TP | batch per DP | Tflops |
| ---------- | --------- |--------- |--------- |--------- |--------- |
| gpt2_20b | 8 | cpu | 2 | 8 | 46.895 |

View File

@ -2,12 +2,12 @@
export DISTPAN="colossalai"
# The following options only valid when DISTPAN="colossalai"
export TPDEGREE=2
export TPDEGREE=4
export GPUNUM=8
export PLACEMENT='cpu'
export USE_SHARD_INIT=False
export BATCH_SIZE=64
export MODEL_TYPE="gpt2_20b"
export BATCH_SIZE=32
# export MODEL_TYPE="gpt2_24b"
mkdir -p logs
env OMP_NUM_THREADS=16 torchrun --standalone --nproc_per_node=${GPUNUM} train_gpt_demo.py --tp_degree=${TPDEGREE} --model_type=${MODEL_TYPE} --batch_size=${BATCH_SIZE} --placement ${PLACEMENT} --shardinit ${USE_SHARD_INIT} --distplan ${DISTPAN} 2>&1 | tee ./logs/${MODEL_TYPE}_${DISTPAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}.log

View File

@ -218,7 +218,7 @@ def main():
model = gemini_zero_dpp(model, pg, args.placement)
# build highly optimized cpu optimizer
optimizer = GeminiAdamOptimizer(model, lr=1e-3, initial_scale=2**5)
optimizer = GeminiAdamOptimizer(model, lr=1e-3, initial_scale=2**5, gpu_margin_mem_ratio=0.6)
logger.info(get_mem_info(prefix='After init optim, '), ranks=[0])
else:
model = model_builder(args.model_type)(checkpoint=True).cuda()