|
|
|
@ -31,7 +31,7 @@ PYTORCHPGDICT_ = PyTorchProcessGroupDict()
|
|
|
|
|
|
|
|
|
|
class ProcessGroup:
|
|
|
|
|
"""ProcessGroup
|
|
|
|
|
Process Group contains group partition for Tensor Parallel and Data Parallel.
|
|
|
|
|
Process Group indicates how processes are organized in groups for parallel execution using Tensor Parallelism and Data Parallelism.
|
|
|
|
|
|
|
|
|
|
NOTE, the ProcessGroup must be used after `torch.distributed.initialize()`
|
|
|
|
|
|
|
|
|
@ -40,8 +40,8 @@ class ProcessGroup:
|
|
|
|
|
rank: the global rank of the current process.
|
|
|
|
|
ranks: List[int], a list of rank id belongings to this process group.
|
|
|
|
|
backend: str, the backend of the process group.
|
|
|
|
|
tp_degree: Optional[int], tensor parallelism degree, default None means 1
|
|
|
|
|
dp_degree: Optional[int], data parallelism degree, default None means len(ranks)
|
|
|
|
|
tp_degree: Optional[int], tensor parallelism degree. How many processes are inside a tp process group. default None means 1.
|
|
|
|
|
dp_degree: Optional[int], data parallelism degree. How many processes are inside a dp process group. . default None means len(ranks).
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|