|
|
|
@ -10,10 +10,7 @@ from colossalai.context import ParallelMode
|
|
|
|
|
from colossalai.core import global_context as gpc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def all_gather(tensor: Tensor,
|
|
|
|
|
dim: int,
|
|
|
|
|
parallel_mode: ParallelMode,
|
|
|
|
|
async_op: bool = False) -> Tensor:
|
|
|
|
|
def all_gather(tensor: Tensor, dim: int, parallel_mode: ParallelMode, async_op: bool = False) -> Tensor:
|
|
|
|
|
r"""Gathers all tensors from the parallel group and concatenates them in a
|
|
|
|
|
specific dimension.
|
|
|
|
|
|
|
|
|
@ -163,11 +160,7 @@ def broadcast(tensor: Tensor, src: int, parallel_mode: ParallelMode, async_op: b
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def reduce(tensor: Tensor,
|
|
|
|
|
dst: int,
|
|
|
|
|
parallel_mode: ParallelMode,
|
|
|
|
|
op: ReduceOp = ReduceOp.SUM,
|
|
|
|
|
async_op: bool = False):
|
|
|
|
|
def reduce(tensor: Tensor, dst: int, parallel_mode: ParallelMode, op: ReduceOp = ReduceOp.SUM, async_op: bool = False):
|
|
|
|
|
r"""Reduce tensors across whole parallel group. Only the process with
|
|
|
|
|
rank ``dst`` is going to receive the final result.
|
|
|
|
|
|
|
|
|
|