Browse Source

fix format ColossalAI\colossalai\context\process_group_initializer

pull/394/head
Maruyama_Aya 3 years ago committed by Frank Lee
parent
commit
e83970e3dc
  1. 1
      colossalai/context/process_group_initializer/initializer_1d.py
  2. 7
      colossalai/context/process_group_initializer/initializer_2d.py
  3. 69
      colossalai/context/process_group_initializer/initializer_2p5d.py
  4. 9
      colossalai/context/process_group_initializer/initializer_3d.py
  5. 2
      colossalai/context/process_group_initializer/initializer_model.py

1
colossalai/context/process_group_initializer/initializer_1d.py

@ -20,7 +20,6 @@ class Initializer_1D(ProcessGroupInitializer):
def init_dist_group(self):
"""Initialize 1D tensor parallel groups, and assign local_ranks and groups to each gpu.
:return: (local_rank, group_world_size, process_group, ranks_in_group, mode)
:rtype: Tuple
"""

7
colossalai/context/process_group_initializer/initializer_2d.py

@ -49,8 +49,7 @@ class Initializer_2D_Row(ProcessGroupInitializer):
for i in range(self.num_group):
for j in range(self.summa_dim):
ranks = [i * self.tensor_parallel_size + j * self.summa_dim + k
for k in range(self.summa_dim)]
ranks = [i * self.tensor_parallel_size + j * self.summa_dim + k for k in range(self.summa_dim)]
group = dist.new_group(ranks)
if self.rank in ranks:
@ -93,8 +92,7 @@ class Initializer_2D_Col(ProcessGroupInitializer):
for i in range(self.num_group):
for j in range(self.summa_dim):
ranks = [i * self.tensor_parallel_size + j + k * self.summa_dim
for k in range(self.summa_dim)]
ranks = [i * self.tensor_parallel_size + j + k * self.summa_dim for k in range(self.summa_dim)]
group = dist.new_group(ranks)
if self.rank in ranks:
@ -129,7 +127,6 @@ class Initializer_2D(ProcessGroupInitializer):
def init_dist_group(self):
"""Initialize 2D tensor row and col parallel groups, and assign local_ranks and groups to each gpu.
:return: 2D tensor parallelism's information
:rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode)
"""

69
colossalai/context/process_group_initializer/initializer_2p5d.py

@ -12,8 +12,7 @@ from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer
def _check_tesseract_env_var(tesseract_dim: int,
tesseract_dep: int):
def _check_tesseract_env_var(tesseract_dim: int, tesseract_dep: int):
# check global variable for TESSERACT
env_tesseract_dim = env.tesseract_dim
env_tesseract_dep = env.tesseract_dep
@ -42,10 +41,7 @@ class Initializer_2p5D_ROW(ProcessGroupInitializer):
:type tesseract_dep: int
"""
def __init__(self,
tesseract_dim: int,
tesseract_dep: int,
*args):
def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
super(Initializer_2p5D_ROW, self).__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size
self.tesseract_dep = tesseract_dep
@ -68,8 +64,10 @@ class Initializer_2p5D_ROW(ProcessGroupInitializer):
for h in range(self.num_group):
for j in range(self.tesseract_dim):
for k in range(self.tesseract_dep):
ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * (
j + self.tesseract_dim * k) for i in range(self.tesseract_dim)]
ranks = [
h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k)
for i in range(self.tesseract_dim)
]
group = dist.new_group(ranks)
if self.rank in ranks:
@ -92,10 +90,7 @@ class Initializer_2p5D_Col(ProcessGroupInitializer):
:type tesseract_dep: int
"""
def __init__(self,
tesseract_dim: int,
tesseract_dep: int,
*args):
def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
super(Initializer_2p5D_Col, self).__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size
self.tesseract_dep = tesseract_dep
@ -118,8 +113,10 @@ class Initializer_2p5D_Col(ProcessGroupInitializer):
for h in range(self.num_group):
for i in range(self.tesseract_dim):
for k in range(self.tesseract_dep):
ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * (
j + self.tesseract_dim * k) for j in range(self.tesseract_dim)]
ranks = [
h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k)
for j in range(self.tesseract_dim)
]
group = dist.new_group(ranks)
if self.rank in ranks:
@ -142,10 +139,7 @@ class Initializer_2p5D_Dep(ProcessGroupInitializer):
:type tesseract_dep: int
"""
def __init__(self,
tesseract_dim: int,
tesseract_dep: int,
*args):
def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
super(Initializer_2p5D_Dep, self).__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size
self.tesseract_dep = tesseract_dep
@ -168,8 +162,10 @@ class Initializer_2p5D_Dep(ProcessGroupInitializer):
for h in range(self.num_group):
for i in range(self.tesseract_dim):
for j in range(self.tesseract_dim):
ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * (
j + self.tesseract_dim * k) for k in range(self.tesseract_dep)]
ranks = [
h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k)
for k in range(self.tesseract_dep)
]
group = dist.new_group(ranks)
if self.rank in ranks:
@ -193,10 +189,7 @@ class Initializer_2p5D_XZ(ProcessGroupInitializer):
:type tesseract_dep: int
"""
def __init__(self,
tesseract_dim: int,
tesseract_dep: int,
*args):
def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
super(Initializer_2p5D_XZ, self).__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size
self.tesseract_dep = tesseract_dep
@ -218,9 +211,11 @@ class Initializer_2p5D_XZ(ProcessGroupInitializer):
for h in range(self.num_group):
for i in range(self.tesseract_dim):
ranks = [h * self.tensor_parallel_size + i + self.tesseract_dim * (
j + self.tesseract_dim * k) for k in range(self.tesseract_dep) for j in
range(self.tesseract_dim)]
ranks = [
h * self.tensor_parallel_size + i + self.tesseract_dim * (j + self.tesseract_dim * k)
for k in range(self.tesseract_dep)
for j in range(self.tesseract_dim)
]
group = dist.new_group(ranks)
if self.rank in ranks:
@ -253,15 +248,8 @@ class Initializer_2p5D(ProcessGroupInitializer):
:type depth: int
"""
def __init__(self,
rank: int,
world_size: int,
config: Config,
data_parallel_size: int,
pipeline_parallel_size: int,
tensor_parallel_size: int,
depth: int
):
def __init__(self, rank: int, world_size: int, config: Config, data_parallel_size: int, pipeline_parallel_size: int,
tensor_parallel_size: int, depth: int):
args = (rank, world_size, config, data_parallel_size, pipeline_parallel_size, tensor_parallel_size)
super().__init__(*args)
self.num_group = self.world_size // self.tensor_parallel_size
@ -279,10 +267,13 @@ class Initializer_2p5D(ProcessGroupInitializer):
def init_dist_group(self):
"""Initialize 2p5D tensor row, col, depth, and colXdepth parallel groups, and assign local_ranks and groups to each gpu.
:return: Whole 2p5D tensor parallelism's information
:rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode)
"""
parallel_setting = [self.col_initializer.init_dist_group(), self.row_initializer.init_dist_group(),
self.dep_initializer.init_dist_group(), self.xz_initializer.init_dist_group()]
parallel_setting = [
self.col_initializer.init_dist_group(),
self.row_initializer.init_dist_group(),
self.dep_initializer.init_dist_group(),
self.xz_initializer.init_dist_group()
]
return parallel_setting

9
colossalai/context/process_group_initializer/initializer_3d.py

@ -158,7 +158,6 @@ class Initializer_3D_Output(ProcessGroupInitializer):
@DIST_GROUP_INITIALIZER.register_module
class Initializer_3D(ProcessGroupInitializer):
"""Serve as the single entry point to 3D parallel initialization.
:param args: Args used to initialize ProcessGroupInitializer
"""
@ -176,10 +175,12 @@ class Initializer_3D(ProcessGroupInitializer):
def init_dist_group(self):
"""Initialize 3D tensor parallel groups, and assign local_ranks and groups to each gpu.
:return: 3D tensor parallelism's information
:rtype: list of Tuples (local_rank, group_world_size, process_group, ranks_in_group, mode)
"""
parallel_setting = [self.input_initializer.init_dist_group(), self.weight_initializer.init_dist_group(),
self.output_initializer.init_dist_group()]
parallel_setting = [
self.input_initializer.init_dist_group(),
self.weight_initializer.init_dist_group(),
self.output_initializer.init_dist_group()
]
return parallel_setting

2
colossalai/context/process_group_initializer/initializer_model.py

@ -2,8 +2,6 @@
# -*- encoding: utf-8 -*-
import torch.distributed as dist
from colossalai.context import Config
from colossalai.registry import DIST_GROUP_INITIALIZER
from .process_group_initializer import ProcessGroupInitializer
from ..parallel_mode import ParallelMode

Loading…
Cancel
Save