From e83970e3dc4dc838db889159ef273baaa0a80061 Mon Sep 17 00:00:00 2001 From: Maruyama_Aya <38985202+MaruyamaAya@users.noreply.github.com> Date: Wed, 9 Mar 2022 16:23:33 +0800 Subject: [PATCH] fix format ColossalAI\colossalai\context\process_group_initializer --- .../initializer_1d.py | 1 - .../initializer_2d.py | 7 +- .../initializer_2p5d.py | 69 ++++++++----------- .../initializer_3d.py | 9 +-- .../initializer_model.py | 2 - 5 files changed, 37 insertions(+), 51 deletions(-) diff --git a/colossalai/context/process_group_initializer/initializer_1d.py b/colossalai/context/process_group_initializer/initializer_1d.py index 4d454f2a6..afb1d05c9 100644 --- a/colossalai/context/process_group_initializer/initializer_1d.py +++ b/colossalai/context/process_group_initializer/initializer_1d.py @@ -20,7 +20,6 @@ class Initializer_1D(ProcessGroupInitializer): def init_dist_group(self): """Initialize 1D tensor parallel groups, and assign local_ranks and groups to each gpu. - :return: (local_rank, group_world_size, process_group, ranks_in_group, mode) :rtype: Tuple """ diff --git a/colossalai/context/process_group_initializer/initializer_2d.py b/colossalai/context/process_group_initializer/initializer_2d.py index b48ce60f9..0db05deb2 100644 --- a/colossalai/context/process_group_initializer/initializer_2d.py +++ b/colossalai/context/process_group_initializer/initializer_2d.py @@ -49,8 +49,7 @@ class Initializer_2D_Row(ProcessGroupInitializer): for i in range(self.num_group): for j in range(self.summa_dim): - ranks = [i * self.tensor_parallel_size + j * self.summa_dim + k - for k in range(self.summa_dim)] + ranks = [i * self.tensor_parallel_size + j * self.summa_dim + k for k in range(self.summa_dim)] group = dist.new_group(ranks) if self.rank in ranks: @@ -93,8 +92,7 @@ class Initializer_2D_Col(ProcessGroupInitializer): for i in range(self.num_group): for j in range(self.summa_dim): - ranks = [i * self.tensor_parallel_size + j + k * self.summa_dim - for k in range(self.summa_dim)] + ranks = [i * self.tensor_parallel_size + j + k * self.summa_dim for k in range(self.summa_dim)] group = dist.new_group(ranks) if self.rank in ranks: @@ -129,7 +127,6 @@ class Initializer_2D(ProcessGroupInitializer): def init_dist_group(self): """Initialize 2D tensor row and col parallel groups, and assign local_ranks and groups to each gpu. - :return: 2D tensor parallelism's information :rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode) """ diff --git a/colossalai/context/process_group_initializer/initializer_2p5d.py b/colossalai/context/process_group_initializer/initializer_2p5d.py index 3c3e1b978..102302061 100644 --- a/colossalai/context/process_group_initializer/initializer_2p5d.py +++ b/colossalai/context/process_group_initializer/initializer_2p5d.py @@ -12,8 +12,7 @@ from ..parallel_mode import ParallelMode from .process_group_initializer import ProcessGroupInitializer -def _check_tesseract_env_var(tesseract_dim: int, - tesseract_dep: int): +def _check_tesseract_env_var(tesseract_dim: int, tesseract_dep: int): # check global variable for TESSERACT env_tesseract_dim = env.tesseract_dim env_tesseract_dep = env.tesseract_dep @@ -42,10 +41,7 @@ class Initializer_2p5D_ROW(ProcessGroupInitializer): :type tesseract_dep: int """ - def __init__(self, - tesseract_dim: int, - tesseract_dep: int, - *args): + def __init__(self, tesseract_dim: int, tesseract_dep: int, *args): super(Initializer_2p5D_ROW, self).__init__(*args) self.num_group = self.world_size // self.tensor_parallel_size self.tesseract_dep = tesseract_dep @@ -68,8 +64,10 @@ class Initializer_2p5D_ROW(ProcessGroupInitializer): for h in range(self.num_group): for j in range(self.tesseract_dim): for k in range(self.tesseract_dep): - ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * ( - j + self.tesseract_dim * k) for i in range(self.tesseract_dim)] + ranks = [ + h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k) + for i in range(self.tesseract_dim) + ] group = dist.new_group(ranks) if self.rank in ranks: @@ -92,10 +90,7 @@ class Initializer_2p5D_Col(ProcessGroupInitializer): :type tesseract_dep: int """ - def __init__(self, - tesseract_dim: int, - tesseract_dep: int, - *args): + def __init__(self, tesseract_dim: int, tesseract_dep: int, *args): super(Initializer_2p5D_Col, self).__init__(*args) self.num_group = self.world_size // self.tensor_parallel_size self.tesseract_dep = tesseract_dep @@ -118,8 +113,10 @@ class Initializer_2p5D_Col(ProcessGroupInitializer): for h in range(self.num_group): for i in range(self.tesseract_dim): for k in range(self.tesseract_dep): - ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * ( - j + self.tesseract_dim * k) for j in range(self.tesseract_dim)] + ranks = [ + h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k) + for j in range(self.tesseract_dim) + ] group = dist.new_group(ranks) if self.rank in ranks: @@ -142,10 +139,7 @@ class Initializer_2p5D_Dep(ProcessGroupInitializer): :type tesseract_dep: int """ - def __init__(self, - tesseract_dim: int, - tesseract_dep: int, - *args): + def __init__(self, tesseract_dim: int, tesseract_dep: int, *args): super(Initializer_2p5D_Dep, self).__init__(*args) self.num_group = self.world_size // self.tensor_parallel_size self.tesseract_dep = tesseract_dep @@ -168,8 +162,10 @@ class Initializer_2p5D_Dep(ProcessGroupInitializer): for h in range(self.num_group): for i in range(self.tesseract_dim): for j in range(self.tesseract_dim): - ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * ( - j + self.tesseract_dim * k) for k in range(self.tesseract_dep)] + ranks = [ + h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k) + for k in range(self.tesseract_dep) + ] group = dist.new_group(ranks) if self.rank in ranks: @@ -193,10 +189,7 @@ class Initializer_2p5D_XZ(ProcessGroupInitializer): :type tesseract_dep: int """ - def __init__(self, - tesseract_dim: int, - tesseract_dep: int, - *args): + def __init__(self, tesseract_dim: int, tesseract_dep: int, *args): super(Initializer_2p5D_XZ, self).__init__(*args) self.num_group = self.world_size // self.tensor_parallel_size self.tesseract_dep = tesseract_dep @@ -218,9 +211,11 @@ class Initializer_2p5D_XZ(ProcessGroupInitializer): for h in range(self.num_group): for i in range(self.tesseract_dim): - ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * ( - j + self.tesseract_dim * k) for k in range(self.tesseract_dep) for j in - range(self.tesseract_dim)] + ranks = [ + h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k) + for k in range(self.tesseract_dep) + for j in range(self.tesseract_dim) + ] group = dist.new_group(ranks) if self.rank in ranks: @@ -253,15 +248,8 @@ class Initializer_2p5D(ProcessGroupInitializer): :type depth: int """ - def __init__(self, - rank: int, - world_size: int, - config: Config, - data_parallel_size: int, - pipeline_parallel_size: int, - tensor_parallel_size: int, - depth: int - ): + def __init__(self, rank: int, world_size: int, config: Config, data_parallel_size: int, pipeline_parallel_size: int, + tensor_parallel_size: int, depth: int): args = (rank, world_size, config, data_parallel_size, pipeline_parallel_size, tensor_parallel_size) super().__init__(*args) self.num_group = self.world_size // self.tensor_parallel_size @@ -279,10 +267,13 @@ class Initializer_2p5D(ProcessGroupInitializer): def init_dist_group(self): """Initialize 2p5D tensor row, col, depth, and colXdepth parallel groups, and assign local_ranks and groups to each gpu. - :return: Whole 2p5D tensor parallelism's information :rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode) """ - parallel_setting = [self.col_initializer.init_dist_group(), self.row_initializer.init_dist_group(), - self.dep_initializer.init_dist_group(), self.xz_initializer.init_dist_group()] + parallel_setting = [ + self.col_initializer.init_dist_group(), + self.row_initializer.init_dist_group(), + self.dep_initializer.init_dist_group(), + self.xz_initializer.init_dist_group() + ] return parallel_setting diff --git a/colossalai/context/process_group_initializer/initializer_3d.py b/colossalai/context/process_group_initializer/initializer_3d.py index edd8b4694..674761ac9 100644 --- a/colossalai/context/process_group_initializer/initializer_3d.py +++ b/colossalai/context/process_group_initializer/initializer_3d.py @@ -158,7 +158,6 @@ class Initializer_3D_Output(ProcessGroupInitializer): @DIST_GROUP_INITIALIZER.register_module class Initializer_3D(ProcessGroupInitializer): """Serve as the single entry point to 3D parallel initialization. - :param args: Args used to initialize ProcessGroupInitializer """ @@ -176,10 +175,12 @@ class Initializer_3D(ProcessGroupInitializer): def init_dist_group(self): """Initialize 3D tensor parallel groups, and assign local_ranks and groups to each gpu. - :return: 3D tensor parallelism's information :rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode) """ - parallel_setting = [self.input_initializer.init_dist_group(), self.weight_initializer.init_dist_group(), - self.output_initializer.init_dist_group()] + parallel_setting = [ + self.input_initializer.init_dist_group(), + self.weight_initializer.init_dist_group(), + self.output_initializer.init_dist_group() + ] return parallel_setting diff --git a/colossalai/context/process_group_initializer/initializer_model.py b/colossalai/context/process_group_initializer/initializer_model.py index e4fe0e5e1..64394d6cf 100644 --- a/colossalai/context/process_group_initializer/initializer_model.py +++ b/colossalai/context/process_group_initializer/initializer_model.py @@ -2,8 +2,6 @@ # -*- encoding: utf-8 -*- import torch.distributed as dist - -from colossalai.context import Config from colossalai.registry import DIST_GROUP_INITIALIZER from .process_group_initializer import ProcessGroupInitializer from ..parallel_mode import ParallelMode