mirror of https://github.com/hpcaitech/ColossalAI
aibig-modeldata-parallelismdeep-learningdistributed-computingfoundation-modelsheterogeneous-traininghpcinferencelarge-scalemodel-parallelismpipeline-parallelism
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
21 lines
739 B
21 lines
739 B
2 years ago
|
import subprocess
|
||
|
|
||
|
|
||
|
def get_cuda_bare_metal_version(cuda_dir):
|
||
|
raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
|
||
|
output = raw_output.split()
|
||
|
release_idx = output.index("release") + 1
|
||
|
release = output[release_idx].split(".")
|
||
|
bare_metal_major = release[0]
|
||
|
bare_metal_minor = release[1][0]
|
||
|
|
||
|
return raw_output, bare_metal_major, bare_metal_minor
|
||
|
|
||
|
|
||
|
def append_nvcc_threads(nvcc_extra_args):
|
||
|
from torch.utils.cpp_extension import CUDA_HOME
|
||
|
_, bare_metal_major, bare_metal_minor = get_cuda_bare_metal_version(CUDA_HOME)
|
||
|
if int(bare_metal_major) >= 11 and int(bare_metal_minor) >= 2:
|
||
|
return nvcc_extra_args + ["--threads", "4"]
|
||
|
return nvcc_extra_args
|