# This code has been adapted from the DeepSpeed library. # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. import importlib import os import time from abc import ABC, abstractmethod from pathlib import Path from typing import List, Optional, Union from .utils import check_cuda_availability, check_system_pytorch_cuda_match, print_rank_0 class ExtensionBuilder(ABC): """ Builder is the base class to build extensions for PyTorch. Args: name (str): the name of the kernel to be built prebuilt_import_path (str): the path where the extension is installed during pip install """ ext_type: str = "cuda" def __init__(self, name: str, prebuilt_import_path: str): self.name = name self.prebuilt_import_path = prebuilt_import_path self.version_dependent_macros = ["-DVERSION_GE_1_1", "-DVERSION_GE_1_3", "-DVERSION_GE_1_5"] # we store the op as an attribute to avoid repeated building and loading self.cached_op_module = None assert prebuilt_import_path.startswith( "colossalai._C" ), f"The prebuilt_import_path should start with colossalai._C, but got {self.prebuilt_import_path}" def relative_to_abs_path(self, code_path: str) -> str: """ This function takes in a path relative to the colossalai root directory and return the absolute path. """ op_builder_module_path = Path(__file__).parent # if we install from source # the current file path will be op_builder/builder.py # if we install via pip install colossalai # the current file path will be colossalai/kernel/op_builder/builder.py # this is because that the op_builder inside colossalai is a symlink # this symlink will be replaced with actual files if we install via pypi # thus we cannot tell the colossalai root directory by checking whether the op_builder # is a symlink, we can only tell whether it is inside or outside colossalai if str(op_builder_module_path).endswith("colossalai/kernel/op_builder"): root_path = op_builder_module_path.parent.parent elif str(op_builder_module_path).endswith("colossalai/kernel/extensions"): root_path = op_builder_module_path.parent.parent else: root_path = op_builder_module_path.parent.joinpath("colossalai") code_abs_path = root_path.joinpath(code_path) return str(code_abs_path) def get_cuda_home_include(self): """ return include path inside the cuda home. """ from torch.utils.cpp_extension import CUDA_HOME if CUDA_HOME is None: raise RuntimeError("CUDA_HOME is None, please set CUDA_HOME to compile C++/CUDA kernels in ColossalAI.") cuda_include = os.path.join(CUDA_HOME, "include") return cuda_include def csrc_abs_path(self, path): return os.path.join(self.relative_to_abs_path("kernel/cuda_native/csrc"), path) # functions must be overrided begin @abstractmethod def sources_files(self) -> List[str]: """ This function should return a list of source files for extensions. """ raise NotImplementedError @abstractmethod def include_dirs(self) -> List[str]: """ This function should return a list of include files for extensions. """ @abstractmethod def cxx_flags(self) -> List[str]: """ This function should return a list of cxx compilation flags for extensions. """ @abstractmethod def nvcc_flags(self) -> List[str]: """ This function should return a list of nvcc compilation flags for extensions. """ # functions must be overrided over def strip_empty_entries(self, args): """ Drop any empty strings from the list of compile and link flags """ return [x for x in args if len(x) > 0] def import_op(self): """ This function will import the op module by its string name. """ return importlib.import_module(self.prebuilt_import_path) def check_runtime_build_environment(self): """ Check whether the system environment is ready for extension compilation. """ try: from torch.utils.cpp_extension import CUDA_HOME TORCH_AVAILABLE = True except ImportError: TORCH_AVAILABLE = False CUDA_HOME = None if not TORCH_AVAILABLE: raise ModuleNotFoundError( "PyTorch is not found. You need to install PyTorch first in order to build CUDA extensions" ) if CUDA_HOME is None: raise RuntimeError( "CUDA_HOME is not found. You need to export CUDA_HOME environment variable or install CUDA Toolkit first in order to build CUDA extensions" ) # make sure CUDA is available for compilation during cuda_available = check_cuda_availability() if not cuda_available: raise RuntimeError("CUDA is not available on your system as torch.cuda.is_available() returns False.") # make sure system CUDA and pytorch CUDA match, an error will raised inside the function if not check_system_pytorch_cuda_match(CUDA_HOME) def build(self, verbose: Optional[bool] = None): """ If the kernel is not built during pip install, it will build the kernel. If the kernel is built during runtime, it will be stored in `~/.cache/colossalai/torch_extensions/`. If the kernel is built during pip install, it can be accessed through `colossalai._C`. Warning: do not load this kernel repeatedly during model execution as it could slow down the training process. Args: verbose (bool, optional): show detailed info. Defaults to True. """ if verbose is None: verbose = os.environ.get("CAI_KERNEL_VERBOSE", "0") == "1" try: # if the kernel has been pre-built during installation # we just directly import it op_module = self.import_op() if verbose: print_rank_0( f"[extension] OP {self.prebuilt_import_path} has been compiled ahead of time, skip building." ) except ImportError: # check environment if self.ext_type == "cuda": self.check_runtime_build_environment() # time the kernel compilation start_build = time.time() # construct the build directory import torch from torch.utils.cpp_extension import load torch_version_major = torch.__version__.split(".")[0] torch_version_minor = torch.__version__.split(".")[1] torch_cuda_version = torch.version.cuda home_directory = os.path.expanduser("~") extension_directory = f".cache/colossalai/torch_extensions/torch{torch_version_major}.{torch_version_minor}_cu{torch_cuda_version}" build_directory = os.path.join(home_directory, extension_directory) Path(build_directory).mkdir(parents=True, exist_ok=True) if verbose: print_rank_0(f"[extension] Compiling or loading the JIT-built {self.name} kernel during runtime now") # load the kernel op_module = load( name=self.name, sources=self.strip_empty_entries(self.sources_files()), extra_include_paths=self.strip_empty_entries(self.include_dirs()), extra_cflags=self.cxx_flags(), extra_cuda_cflags=self.nvcc_flags(), extra_ldflags=[], build_directory=build_directory, verbose=verbose, ) build_duration = time.time() - start_build # log jit compilation time if verbose: print_rank_0(f"[extension] Time to compile or load {self.name} op: {build_duration} seconds") # cache the built/loaded kernel self.cached_op_module = op_module def load(self, verbose: Optional[bool] = None): """ load the kernel during runtime. Args: verbose (bool, optional): show detailed info. Defaults to True. """ # if the kernel has be compiled and cached, we directly use it assert self.cached_op_module is not None, "Please build the kernel first before loading it." return self.cached_op_module def builder(self) -> Union["CUDAExtension", "CppExtension"]: """ get a CUDAExtension instance used for setup.py """ from torch.utils.cpp_extension import CppExtension, CUDAExtension if self.ext_type == "cpp": return CppExtension( name=self.prebuilt_import_path, sources=self.strip_empty_entries(self.sources_files()), include_dirs=self.strip_empty_entries(self.include_dirs()), extra_compile_args=self.strip_empty_entries(self.cxx_flags()), ) return CUDAExtension( name=self.prebuilt_import_path, sources=self.strip_empty_entries(self.sources_files()), include_dirs=self.strip_empty_entries(self.include_dirs()), extra_compile_args={ "cxx": self.strip_empty_entries(self.cxx_flags()), "nvcc": self.strip_empty_entries(self.nvcc_flags()), }, )