From 86ff5c152b99f13d36623acb06a252ab5e94e457 Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Fri, 9 Jun 2023 16:38:53 +0800 Subject: [PATCH] [elixir] moved simulator build to op_builder (#3939) --- colossalai/elixir/search/simulator.py | 6 ++-- op_builder/__init__.py | 4 ++- op_builder/elixir_simulator.py | 39 +++++++++++++++++++++++++ setup.py | 8 ++--- tests/test_elixir/test_src/test_move.py | 5 ++-- 5 files changed, 51 insertions(+), 11 deletions(-) create mode 100644 op_builder/elixir_simulator.py diff --git a/colossalai/elixir/search/simulator.py b/colossalai/elixir/search/simulator.py index 21cc1b96f..50f5fe74f 100644 --- a/colossalai/elixir/search/simulator.py +++ b/colossalai/elixir/search/simulator.py @@ -1,10 +1,12 @@ import math +from colossalai.kernel.op_builder import ElixirSimulatorBuilder + from .utils import to_divide def calc_move_times(param_per_step: list, param_to_chunk: dict, n_blocks: int): - from colossalai.elixir.simulator import move_count + simulator = ElixirSimulatorBuilder().load() chunk_per_step = list() for param_set in param_per_step: @@ -17,7 +19,7 @@ def calc_move_times(param_per_step: list, param_to_chunk: dict, n_blocks: int): if len(id_set) > 0: chunk_per_step.append(list(id_set)) - return move_count(chunk_per_step, n_blocks) + return simulator.move_count(chunk_per_step, n_blocks) def find_optimal_chunk_size( diff --git a/op_builder/__init__.py b/op_builder/__init__.py index 5ae7223b8..26d167bd2 100644 --- a/op_builder/__init__.py +++ b/op_builder/__init__.py @@ -1,4 +1,5 @@ from .cpu_adam import CPUAdamBuilder +from .elixir_simulator import ElixirSimulatorBuilder from .fused_optim import FusedOptimBuilder from .layernorm import LayerNormBuilder from .moe import MOEBuilder @@ -14,10 +15,11 @@ ALL_OPS = { 'scaled_masked_softmax': ScaledMaskedSoftmaxBuilder, 'scaled_upper_triangle_masked_softmax': ScaledUpperTrainglemaskedSoftmaxBuilder, 'layernorm': LayerNormBuilder, + 'elixir_simulator': ElixirSimulatorBuilder } __all__ = [ 'ALL_OPS', 'CPUAdamBuilder', 'FusedOptimBuilder', 'MultiHeadAttnBuilder', 'ScaledMaskedSoftmaxBuilder', 'ScaledUpperTrainglemaskedSoftmaxBuilder', 'MOEBuilder', 'MultiTensorSGDBuilder', 'MultiTensorAdamBuilder', - 'MultiTensorLambBuilder', 'MultiTensorScaleBuilder', 'MultiTensorL2NormBuilder' + 'MultiTensorLambBuilder', 'MultiTensorScaleBuilder', 'MultiTensorL2NormBuilder', 'ElixirSimulatorBuilder' ] diff --git a/op_builder/elixir_simulator.py b/op_builder/elixir_simulator.py new file mode 100644 index 000000000..208723392 --- /dev/null +++ b/op_builder/elixir_simulator.py @@ -0,0 +1,39 @@ +from .builder import Builder + + +class ElixirSimulatorBuilder(Builder): + NAME = "elixir_simulator" + PREBUILT_IMPORT_PATH = "colossalai._C.elixir_simulator" + + def __init__(self): + super().__init__(name=ElixirSimulatorBuilder.NAME, + prebuilt_import_path=ElixirSimulatorBuilder.PREBUILT_IMPORT_PATH) + self.version_dependent_macros = ['-DVERSION_GE_1_1', '-DVERSION_GE_1_3', '-DVERSION_GE_1_5'] + + # necessary 4 functions + def sources_files(self): + ret = [ + self.relative_to_abs_path('elixir/simulator.cpp'), + ] + return ret + + def include_dirs(self): + return [] + + def cxx_flags(self): + return ['-O3'] + self.version_dependent_macros + + def nvcc_flags(self): + return [] + + def builder(self) -> 'CppExtension': + """ + This function should return a CppExtension object. + """ + from torch.utils.cpp_extension import CppExtension + + return CppExtension(name=self.prebuilt_import_path, + sources=self.strip_empty_entries(self.sources_files()), + extra_compile_args={ + 'cxx': self.strip_empty_entries(self.cxx_flags()), + }) diff --git a/setup.py b/setup.py index 80bdccca9..5d8f83121 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ from op_builder.utils import ( try: import torch - from torch.utils.cpp_extension import CUDA_HOME, BuildExtension, CppExtension + from torch.utils.cpp_extension import CUDA_HOME, BuildExtension TORCH_AVAILABLE = True except ImportError: TORCH_AVAILABLE = False @@ -30,11 +30,7 @@ BUILD_CUDA_EXT = int(os.environ.get('CUDA_EXT', '0')) == 1 IS_NIGHTLY = int(os.environ.get('NIGHTLY', '0')) == 1 # a variable to store the op builder -ext_modules = [ - CppExtension(name='colossalai.elixir.simulator', - sources=['colossalai/elixir/simulator.cpp'], - extra_compile_args=['-O3', '-DVERSION_GE_1_1', '-DVERSION_GE_1_3', '-DVERSION_GE_1_5']) -] +ext_modules = [] # we do not support windows currently if sys.platform == 'win32': diff --git a/tests/test_elixir/test_src/test_move.py b/tests/test_elixir/test_src/test_move.py index 74a7cd31a..ef24a61fd 100644 --- a/tests/test_elixir/test_src/test_move.py +++ b/tests/test_elixir/test_src/test_move.py @@ -1,4 +1,4 @@ -from colossalai.elixir.simulator import move_count +from colossalai.kernel.op_builder import ElixirSimulatorBuilder from colossalai.testing import run_on_environment_flag @@ -6,7 +6,8 @@ from colossalai.testing import run_on_environment_flag def test_move_count(): steps = [[0], [1, 2], [3], [3], [1, 2], [0]] size = 2 - assert move_count(steps, size) == 12 + simulator = ElixirSimulatorBuilder().load() + assert simulator.move_count(steps, size) == 12 if __name__ == '__main__':