fix format ColossalAI\colossalai\context\process_group_initializer

pull/394/head
Maruyama_Aya 2022-03-09 16:23:33 +08:00 committed by Frank Lee
parent 3b88eb2259
commit e83970e3dc
5 changed files with 37 additions and 51 deletions

View File

@ -20,7 +20,6 @@ class Initializer_1D(ProcessGroupInitializer):
def init_dist_group(self): def init_dist_group(self):
"""Initialize 1D tensor parallel groups, and assign local_ranks and groups to each gpu. """Initialize 1D tensor parallel groups, and assign local_ranks and groups to each gpu.
:return: (local_rank, group_world_size, process_group, ranks_in_group, mode) :return: (local_rank, group_world_size, process_group, ranks_in_group, mode)
:rtype: Tuple :rtype: Tuple
""" """

View File

@ -49,8 +49,7 @@ class Initializer_2D_Row(ProcessGroupInitializer):
for i in range(self.num_group): for i in range(self.num_group):
for j in range(self.summa_dim): for j in range(self.summa_dim):
ranks = [i * self.tensor_parallel_size + j * self.summa_dim + k ranks = [i * self.tensor_parallel_size + j * self.summa_dim + k for k in range(self.summa_dim)]
for k in range(self.summa_dim)]
group = dist.new_group(ranks) group = dist.new_group(ranks)
if self.rank in ranks: if self.rank in ranks:
@ -93,8 +92,7 @@ class Initializer_2D_Col(ProcessGroupInitializer):
for i in range(self.num_group): for i in range(self.num_group):
for j in range(self.summa_dim): for j in range(self.summa_dim):
ranks = [i * self.tensor_parallel_size + j + k * self.summa_dim ranks = [i * self.tensor_parallel_size + j + k * self.summa_dim for k in range(self.summa_dim)]
for k in range(self.summa_dim)]
group = dist.new_group(ranks) group = dist.new_group(ranks)
if self.rank in ranks: if self.rank in ranks:
@ -129,7 +127,6 @@ class Initializer_2D(ProcessGroupInitializer):
def init_dist_group(self): def init_dist_group(self):
"""Initialize 2D tensor row and col parallel groups, and assign local_ranks and groups to each gpu. """Initialize 2D tensor row and col parallel groups, and assign local_ranks and groups to each gpu.
:return: 2D tensor parallelism's information :return: 2D tensor parallelism's information
:rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode) :rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode)
""" """

View File

@ -12,8 +12,7 @@ from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer from .process_group_initializer import ProcessGroupInitializer
def _check_tesseract_env_var(tesseract_dim: int, def _check_tesseract_env_var(tesseract_dim: int, tesseract_dep: int):
tesseract_dep: int):
# check global variable for TESSERACT # check global variable for TESSERACT
env_tesseract_dim = env.tesseract_dim env_tesseract_dim = env.tesseract_dim
env_tesseract_dep = env.tesseract_dep env_tesseract_dep = env.tesseract_dep
@ -42,10 +41,7 @@ class Initializer_2p5D_ROW(ProcessGroupInitializer):
:type tesseract_dep: int :type tesseract_dep: int
""" """
def __init__(self, def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
tesseract_dim: int,
tesseract_dep: int,
*args):
super(Initializer_2p5D_ROW, self).__init__(*args) super(Initializer_2p5D_ROW, self).__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size self.num_group = self.world_size // self.tensor_parallel_size
self.tesseract_dep = tesseract_dep self.tesseract_dep = tesseract_dep
@ -68,8 +64,10 @@ class Initializer_2p5D_ROW(ProcessGroupInitializer):
for h in range(self.num_group): for h in range(self.num_group):
for j in range(self.tesseract_dim): for j in range(self.tesseract_dim):
for k in range(self.tesseract_dep): for k in range(self.tesseract_dep):
ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * ( ranks = [
j + self.tesseract_dim * k) for i in range(self.tesseract_dim)] h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k)
for i in range(self.tesseract_dim)
]
group = dist.new_group(ranks) group = dist.new_group(ranks)
if self.rank in ranks: if self.rank in ranks:
@ -92,10 +90,7 @@ class Initializer_2p5D_Col(ProcessGroupInitializer):
:type tesseract_dep: int :type tesseract_dep: int
""" """
def __init__(self, def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
tesseract_dim: int,
tesseract_dep: int,
*args):
super(Initializer_2p5D_Col, self).__init__(*args) super(Initializer_2p5D_Col, self).__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size self.num_group = self.world_size // self.tensor_parallel_size
self.tesseract_dep = tesseract_dep self.tesseract_dep = tesseract_dep
@ -118,8 +113,10 @@ class Initializer_2p5D_Col(ProcessGroupInitializer):
for h in range(self.num_group): for h in range(self.num_group):
for i in range(self.tesseract_dim): for i in range(self.tesseract_dim):
for k in range(self.tesseract_dep): for k in range(self.tesseract_dep):
ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * ( ranks = [
j + self.tesseract_dim * k) for j in range(self.tesseract_dim)] h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k)
for j in range(self.tesseract_dim)
]
group = dist.new_group(ranks) group = dist.new_group(ranks)
if self.rank in ranks: if self.rank in ranks:
@ -142,10 +139,7 @@ class Initializer_2p5D_Dep(ProcessGroupInitializer):
:type tesseract_dep: int :type tesseract_dep: int
""" """
def __init__(self, def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
tesseract_dim: int,
tesseract_dep: int,
*args):
super(Initializer_2p5D_Dep, self).__init__(*args) super(Initializer_2p5D_Dep, self).__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size self.num_group = self.world_size // self.tensor_parallel_size
self.tesseract_dep = tesseract_dep self.tesseract_dep = tesseract_dep
@ -168,8 +162,10 @@ class Initializer_2p5D_Dep(ProcessGroupInitializer):
for h in range(self.num_group): for h in range(self.num_group):
for i in range(self.tesseract_dim): for i in range(self.tesseract_dim):
for j in range(self.tesseract_dim): for j in range(self.tesseract_dim):
ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * ( ranks = [
j + self.tesseract_dim * k) for k in range(self.tesseract_dep)] h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k)
for k in range(self.tesseract_dep)
]
group = dist.new_group(ranks) group = dist.new_group(ranks)
if self.rank in ranks: if self.rank in ranks:
@ -193,10 +189,7 @@ class Initializer_2p5D_XZ(ProcessGroupInitializer):
:type tesseract_dep: int :type tesseract_dep: int
""" """
def __init__(self, def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
tesseract_dim: int,
tesseract_dep: int,
*args):
super(Initializer_2p5D_XZ, self).__init__(*args) super(Initializer_2p5D_XZ, self).__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size self.num_group = self.world_size // self.tensor_parallel_size
self.tesseract_dep = tesseract_dep self.tesseract_dep = tesseract_dep
@ -218,9 +211,11 @@ class Initializer_2p5D_XZ(ProcessGroupInitializer):
for h in range(self.num_group): for h in range(self.num_group):
for i in range(self.tesseract_dim): for i in range(self.tesseract_dim):
ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * ( ranks = [
j + self.tesseract_dim * k) for k in range(self.tesseract_dep) for j in h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k)
range(self.tesseract_dim)] for k in range(self.tesseract_dep)
for j in range(self.tesseract_dim)
]
group = dist.new_group(ranks) group = dist.new_group(ranks)
if self.rank in ranks: if self.rank in ranks:
@ -253,15 +248,8 @@ class Initializer_2p5D(ProcessGroupInitializer):
:type depth: int :type depth: int
""" """
def __init__(self, def __init__(self, rank: int, world_size: int, config: Config, data_parallel_size: int, pipeline_parallel_size: int,
rank: int, tensor_parallel_size: int, depth: int):
world_size: int,
config: Config,
data_parallel_size: int,
pipeline_parallel_size: int,
tensor_parallel_size: int,
depth: int
):
args = (rank, world_size, config, data_parallel_size, pipeline_parallel_size, tensor_parallel_size) args = (rank, world_size, config, data_parallel_size, pipeline_parallel_size, tensor_parallel_size)
super().__init__(*args) super().__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size self.num_group = self.world_size // self.tensor_parallel_size
@ -279,10 +267,13 @@ class Initializer_2p5D(ProcessGroupInitializer):
def init_dist_group(self): def init_dist_group(self):
"""Initialize 2p5D tensor row, col, depth, and colXdepth parallel groups, and assign local_ranks and groups to each gpu. """Initialize 2p5D tensor row, col, depth, and colXdepth parallel groups, and assign local_ranks and groups to each gpu.
:return: Whole 2p5D tensor parallelism's information :return: Whole 2p5D tensor parallelism's information
:rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode) :rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode)
""" """
parallel_setting = [self.col_initializer.init_dist_group(), self.row_initializer.init_dist_group(), parallel_setting = [
self.dep_initializer.init_dist_group(), self.xz_initializer.init_dist_group()] self.col_initializer.init_dist_group(),
self.row_initializer.init_dist_group(),
self.dep_initializer.init_dist_group(),
self.xz_initializer.init_dist_group()
]
return parallel_setting return parallel_setting

View File

@ -158,7 +158,6 @@ class Initializer_3D_Output(ProcessGroupInitializer):
@DIST_GROUP_INITIALIZER.register_module @DIST_GROUP_INITIALIZER.register_module
class Initializer_3D(ProcessGroupInitializer): class Initializer_3D(ProcessGroupInitializer):
"""Serve as the single entry point to 3D parallel initialization. """Serve as the single entry point to 3D parallel initialization.
:param args: Args used to initialize ProcessGroupInitializer :param args: Args used to initialize ProcessGroupInitializer
""" """
@ -176,10 +175,12 @@ class Initializer_3D(ProcessGroupInitializer):
def init_dist_group(self): def init_dist_group(self):
"""Initialize 3D tensor parallel groups, and assign local_ranks and groups to each gpu. """Initialize 3D tensor parallel groups, and assign local_ranks and groups to each gpu.
:return: 3D tensor parallelism's information :return: 3D tensor parallelism's information
:rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode) :rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode)
""" """
parallel_setting = [self.input_initializer.init_dist_group(), self.weight_initializer.init_dist_group(), parallel_setting = [
self.output_initializer.init_dist_group()] self.input_initializer.init_dist_group(),
self.weight_initializer.init_dist_group(),
self.output_initializer.init_dist_group()
]
return parallel_setting return parallel_setting

View File

@ -2,8 +2,6 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
import torch.distributed as dist import torch.distributed as dist
from colossalai.context import Config
from colossalai.registry import DIST_GROUP_INITIALIZER from colossalai.registry import DIST_GROUP_INITIALIZER
from .process_group_initializer import ProcessGroupInitializer from .process_group_initializer import ProcessGroupInitializer
from ..parallel_mode import ParallelMode from ..parallel_mode import ParallelMode