diff --git a/colossalai/context/process_group_initializer/initializer_sequence.py b/colossalai/context/process_group_initializer/initializer_sequence.py index 5bf405a20..682fe4bb7 100644 --- a/colossalai/context/process_group_initializer/initializer_sequence.py +++ b/colossalai/context/process_group_initializer/initializer_sequence.py @@ -69,8 +69,8 @@ class Initializer_Sequence(ProcessGroupInitializer): pipeline_parallel_size (int): Size of pipeline parallel. tensor_parallel_size (int): Size of tensor parallel. """ - def __init__(self, - *args, **kwargs): + + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # reuse tensor parallel initializer code self._sequence_initializer = Initializer_Tensor(*args, **kwargs) diff --git a/colossalai/context/process_group_initializer/initializer_tensor.py b/colossalai/context/process_group_initializer/initializer_tensor.py index 3724fc361..d2b5be9cf 100644 --- a/colossalai/context/process_group_initializer/initializer_tensor.py +++ b/colossalai/context/process_group_initializer/initializer_tensor.py @@ -20,6 +20,7 @@ class Initializer_Tensor(ProcessGroupInitializer): pipeline_parallel_size (int): Size of pipeline parallel. tensor_parallel_size (int): Size of tensor parallel. """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.num_tensor_parallel_group = self.world_size // self.tensor_parallel_size