import subprocess def get_cuda_bare_metal_version(cuda_dir): raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True) output = raw_output.split() release_idx = output.index("release") + 1 release = output[release_idx].split(".") bare_metal_major = release[0] bare_metal_minor = release[1][0] return raw_output, bare_metal_major, bare_metal_minor def append_nvcc_threads(nvcc_extra_args): from torch.utils.cpp_extension import CUDA_HOME _, bare_metal_major, bare_metal_minor = get_cuda_bare_metal_version(CUDA_HOME) if int(bare_metal_major) >= 11 and int(bare_metal_minor) >= 2: return nvcc_extra_args + ["--threads", "4"] return nvcc_extra_args