[fx]get communication size between partitions (#1224)

* [CLI] add CLI launcher * Revert "[CLI] add CLI launcher" This reverts commit df7e6506d4. * [fx]get communication size between partitions. * polish
2022-07-07 16:22:00 +08:00 · 2022-07-07 16:22:00 +08:00 · 2b7dca44b5
parent 4951f7d80c
commit 2b7dca44b5
4 changed files with 209 additions and 0 deletions
--- a/colossalai/fx/passes/meta_info_prop.py
+++ b/colossalai/fx/passes/meta_info_prop.py
@ -0,0 +1,101 @@
 import torch
 import torch.fx
 from torch.fx.node import Node, map_aggregate
 from typing import Any, Tuple, NamedTuple, Optional, Dict
 from functools import reduce
 from torch.fx._compatibility import compatibility
@compatibility(is_backward_compatible=True)
 class TensorMetadata(NamedTuple):
    # TensorMetadata is a structure containing pertinent information
    # about a tensor within a PyTorch program.
    shape: torch.Size
    dtype: torch.dtype
    requires_grad: bool
    stride: Tuple[int]
    numel: int
    # TODO: we can add a list of sharding spec here, and record the sharding
    # behaviour by appending sharding spec into list.
 def _extract_tensor_metadata(result: torch.Tensor) -> TensorMetadata:
    """
    Extract a TensorMetadata NamedTuple describing `result`.
    """
    shape = result.shape
    dtype = result.dtype
    requires_grad = result.requires_grad
    stride = result.stride()
    numel = result.numel()
    return TensorMetadata(shape, dtype, requires_grad, stride, numel)
@compatibility(is_backward_compatible=True)
 class MetaInfoProp(torch.fx.Interpreter):
    """
    Execute an FX graph Node-by-Node and
    record the shape and type of the result
    into the corresponding node.
    Usage:
        BATCH_SIZE = 2
        DIM_IN = 4
        DIM_OUT = 16
        model = torch.nn.Linear(DIM_IN, DIM_OUT)
        input_sample = torch.rand(BATCH_SIZE, DIM_IN)
        orig_output = model(input_sample)
        gm = symbolic_trace(model)
        MetaInfoProp(gm).run(input_sample)
        for node in gm.graph.nodes:
            print(node.name, node.meta['tensor_meta'].dtype,
                node.meta['tensor_meta'].shape, node.meta['tensor_meta'].numel)
        # output of above code is 
        # input_1 torch.float32 torch.Size([2, 4]) 8
        # weight torch.float32 torch.Size([16, 4]) 64
        # bias torch.float32 torch.Size([16]) 16
        # linear torch.float32 torch.Size([2, 16]) 32
        # output torch.float32 torch.Size([2, 16]) 32
    Args:
         module (GraphModule): The module to be executed
    """
    def run_node(self, n: Node) -> Any:
        result = super().run_node(n)
        found_tensor = False
        def extract_tensor_meta(obj):
            if isinstance(obj, torch.Tensor):
                nonlocal found_tensor
                found_tensor = True
                return _extract_tensor_metadata(obj)
            else:
                return obj
        meta = map_aggregate(result, extract_tensor_meta)
        if found_tensor:
            n.meta['tensor_meta'] = meta
        else:
            n.meta['tensor_meta'] = TensorMetadata(None, None, False, None, 0)
        n.meta['type'] = type(result)
        return result
    def propagate(self, *args):
        """
        Run `module` via interpretation and return the result and
        record the shape and type of each node.
        Args:
            *args (Tensor): the sample input.
        Returns:
            Any: The value returned from executing the Module
        """
        return super().run(*args)
--- a/colossalai/fx/passes/utils.py
+++ b/colossalai/fx/passes/utils.py
@ -0,0 +1,27 @@
 import torch
 from typing import Dict, Set
 from torch.fx.node import Node, map_arg
 def get_comm_size(prev_partition, next_partition):
    """Given two partitions (parent and child),
    calculate the communication size between the two.
    """
    # Keep tracking the communication size between parent and child
    comm_size = 0
    # Keep tracking all the counted node
    visited_nodes = set()
    # Go through all nodes in the child partition
    # If a node has input nodes from the parent partition,
    # the output size of those input nodes will be counted
    # and added to comm_size
    parent_node_names = [n.name for n in parent_partition.graph.nodes]
    for node in child_partition.graph.nodes:
        input_nodes: Dict[Node, None] = {}
        map_arg(node.args, lambda n: input_nodes.setdefault(n))
        map_arg(node.kwargs, lambda n: input_nodes.setdefault(n))
        for n in input_nodes:
            if n.name in parent_node_names and n not in visited_nodes:
                comm_size += n.meta['tensor_meta'].numel
                visited_nodes.add(n)
    return comm_size
--- a/tests/test_fx/test_comm_size_compute.py
+++ b/tests/test_fx/test_comm_size_compute.py
@ -0,0 +1,46 @@
 import torch
 import torch.nn as nn
 import colossalai
 import colossalai.nn as col_nn
 from torch.fx import symbolic_trace
 from colossalai.fx.passes.meta_info_prop import MetaInfoProp
 from colossalai.fx.passes.adding_split_node_pass import split_with_split_nodes_pass, uniform_split_pass
 from colossalai.fx.passes.utils import get_comm_size
 MODEL_DIM = 16
 BATCH_SIZE = 8
 PIPELINE_SIZE = 2
 class MLP(torch.nn.Module):
    def __init__(self, dim: int):
        super().__init__()
        self.linear1 = torch.nn.Linear(dim, dim)
        self.linear2 = torch.nn.Linear(dim, dim)
        self.linear3 = torch.nn.Linear(dim, dim)
        self.linear4 = torch.nn.Linear(dim, dim)
    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.linear3(x)
        x = self.linear4(x)
        return x
 def test_comm_size_compute():
    model = MLP(MODEL_DIM)
    input_sample = torch.rand(BATCH_SIZE, MODEL_DIM)
    gm = symbolic_trace(model)
    MetaInfoProp(gm).run(input_sample)
    annotated_model = uniform_split_pass(gm, PIPELINE_SIZE)
    split_model, split_submodules = split_with_split_nodes_pass(annotated_model)
    submodule_list = list(split_model.children())
    comm_size = get_comm_size(submodule_list[0], submodule_list[1])
    # the shape of tensor send from partition 0 to partition 1 is (8, 16)
    assert comm_size == 128
 if __name__ == '__main__':
    test_comm_size_compute()
--- a/tests/test_fx/test_meta_info_prop.py
+++ b/tests/test_fx/test_meta_info_prop.py
@ -0,0 +1,35 @@
 import torch
 import torch.nn as nn
 import colossalai
 import colossalai.nn as col_nn
 from torch.fx import symbolic_trace
 from colossalai.fx.passes.meta_info_prop import MetaInfoProp, TensorMetadata
 BATCH_SIZE = 2
 DIM_IN = 4
 DIM_OUT = 16
 def meta_check(meta_info_spec: TensorMetadata, orig_tensor: torch.Tensor):
    assert meta_info_spec.shape == orig_tensor.shape
    assert meta_info_spec.dtype == orig_tensor.dtype
    assert meta_info_spec.requires_grad == orig_tensor.requires_grad
    assert meta_info_spec.stride == orig_tensor.stride()
    assert meta_info_spec.numel == orig_tensor.numel()
 def test_meta_info_prop():
    model = torch.nn.Linear(DIM_IN, DIM_OUT)
    input_sample = torch.rand(BATCH_SIZE, DIM_IN)
    orig_output = model(input_sample)
    gm = symbolic_trace(model)
    MetaInfoProp(gm).run(input_sample)
    for node in gm.graph.nodes:
        if node.op == 'placeholder':
            meta_check(node.meta['tensor_meta'], input_sample)
        if node.op == 'output':
            meta_check(node.meta['tensor_meta'], orig_output)
 if __name__ == '__main__':
    test_meta_info_prop()