|
|
|
@ -62,6 +62,7 @@ class Initializer_2p5D_ROW(ProcessGroupInitializer):
|
|
|
|
|
local_rank = None
|
|
|
|
|
ranks_in_group = None
|
|
|
|
|
process_group = None
|
|
|
|
|
cpu_group = None
|
|
|
|
|
group_world_size = None
|
|
|
|
|
mode = ParallelMode.PARALLEL_2P5D_ROW
|
|
|
|
|
|
|
|
|
@ -73,14 +74,16 @@ class Initializer_2p5D_ROW(ProcessGroupInitializer):
|
|
|
|
|
for i in range(self.tesseract_dim)
|
|
|
|
|
]
|
|
|
|
|
group = dist.new_group(ranks)
|
|
|
|
|
group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group
|
|
|
|
|
|
|
|
|
|
if self.rank in ranks:
|
|
|
|
|
local_rank = ranks.index(self.rank)
|
|
|
|
|
group_world_size = len(ranks)
|
|
|
|
|
process_group = group
|
|
|
|
|
cpu_group = group_cpu
|
|
|
|
|
ranks_in_group = ranks
|
|
|
|
|
|
|
|
|
|
return local_rank, group_world_size, process_group, ranks_in_group, mode
|
|
|
|
|
return local_rank, group_world_size, process_group, cpu_group, ranks_in_group, mode
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Initializer_2p5D_Col(ProcessGroupInitializer):
|
|
|
|
@ -115,6 +118,7 @@ class Initializer_2p5D_Col(ProcessGroupInitializer):
|
|
|
|
|
local_rank = None
|
|
|
|
|
ranks_in_group = None
|
|
|
|
|
process_group = None
|
|
|
|
|
cpu_group = None
|
|
|
|
|
group_world_size = None
|
|
|
|
|
mode = ParallelMode.PARALLEL_2P5D_COL
|
|
|
|
|
|
|
|
|
@ -126,14 +130,16 @@ class Initializer_2p5D_Col(ProcessGroupInitializer):
|
|
|
|
|
for j in range(self.tesseract_dim)
|
|
|
|
|
]
|
|
|
|
|
group = dist.new_group(ranks)
|
|
|
|
|
group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group
|
|
|
|
|
|
|
|
|
|
if self.rank in ranks:
|
|
|
|
|
local_rank = ranks.index(self.rank)
|
|
|
|
|
group_world_size = len(ranks)
|
|
|
|
|
process_group = group
|
|
|
|
|
cpu_group = group_cpu
|
|
|
|
|
ranks_in_group = ranks
|
|
|
|
|
|
|
|
|
|
return local_rank, group_world_size, process_group, ranks_in_group, mode
|
|
|
|
|
return local_rank, group_world_size, process_group, cpu_group, ranks_in_group, mode
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Initializer_2p5D_Dep(ProcessGroupInitializer):
|
|
|
|
@ -168,6 +174,7 @@ class Initializer_2p5D_Dep(ProcessGroupInitializer):
|
|
|
|
|
local_rank = None
|
|
|
|
|
ranks_in_group = None
|
|
|
|
|
process_group = None
|
|
|
|
|
cpu_group = None
|
|
|
|
|
group_world_size = None
|
|
|
|
|
mode = ParallelMode.PARALLEL_2P5D_DEP
|
|
|
|
|
|
|
|
|
@ -179,14 +186,16 @@ class Initializer_2p5D_Dep(ProcessGroupInitializer):
|
|
|
|
|
for k in range(self.tesseract_dep)
|
|
|
|
|
]
|
|
|
|
|
group = dist.new_group(ranks)
|
|
|
|
|
group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group
|
|
|
|
|
|
|
|
|
|
if self.rank in ranks:
|
|
|
|
|
local_rank = ranks.index(self.rank)
|
|
|
|
|
group_world_size = len(ranks)
|
|
|
|
|
process_group = group
|
|
|
|
|
cpu_group = group_cpu
|
|
|
|
|
ranks_in_group = ranks
|
|
|
|
|
|
|
|
|
|
return local_rank, group_world_size, process_group, ranks_in_group, mode
|
|
|
|
|
return local_rank, group_world_size, process_group, cpu_group, ranks_in_group, mode
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# i row j col k dep
|
|
|
|
@ -222,6 +231,7 @@ class Initializer_2p5D_XZ(ProcessGroupInitializer):
|
|
|
|
|
local_rank = None
|
|
|
|
|
ranks_in_group = None
|
|
|
|
|
process_group = None
|
|
|
|
|
cpu_group = None
|
|
|
|
|
group_world_size = None
|
|
|
|
|
mode = ParallelMode.PARALLEL_2P5D_XZ
|
|
|
|
|
|
|
|
|
@ -233,14 +243,16 @@ class Initializer_2p5D_XZ(ProcessGroupInitializer):
|
|
|
|
|
for j in range(self.tesseract_dim)
|
|
|
|
|
]
|
|
|
|
|
group = dist.new_group(ranks)
|
|
|
|
|
group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group
|
|
|
|
|
|
|
|
|
|
if self.rank in ranks:
|
|
|
|
|
local_rank = ranks.index(self.rank)
|
|
|
|
|
group_world_size = len(ranks)
|
|
|
|
|
process_group = group
|
|
|
|
|
cpu_group = group_cpu
|
|
|
|
|
ranks_in_group = ranks
|
|
|
|
|
|
|
|
|
|
return local_rank, group_world_size, process_group, ranks_in_group, mode
|
|
|
|
|
return local_rank, group_world_size, process_group, cpu_group, ranks_in_group, mode
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@DIST_GROUP_INITIALIZER.register_module
|
|
|
|
|