ColossalAI/colossalai/fx/tracer/_meta_trace.py

import torch
from torch.fx import Graph, Node
from torch.utils._pytree import tree_map


def normalize_tuple(x):
    if not isinstance(x, tuple):
        return (x,)
    return x


def is_autogradable(x):
    return isinstance(x, torch.Tensor) and x.is_floating_point()


def meta_trace(module: torch.nn.Module, fake_device=None, *args, **kwargs) -> Graph:
    """Trace forward and backward graph with MetaTensor

    Args:
        module (torch.nn.Module): The target module for tracing.

    Returns:
        graph (torch.fx.Graph): The computation graph.

    Usage:
        >>> import torchvision.models as tm
        >>> model = tm.alexnet()
        >>> graph = meta_trace(model, torch.rand(1000, 3, 224, 224))
        >>> graph.print_tabular()
    """
    graph = Graph()
    namespace = graph._graph_namespace

    class MetaProxy(torch.Tensor):
        """
        A wrapping tensor that hacks `torch.autograd` without patching more `torch.ops.aten` ops.
        """

        _tensor: torch.Tensor
        _node: Node

        __slots__ = ["_tensor", "_node"]

        @staticmethod
        def __new__(cls, tensor, fake_device=None, placeholder=False, name=None):
            r = torch.Tensor._make_wrapper_subclass(
                cls,
                tensor.size(),
                strides=tensor.stride(),
                storage_offset=tensor.storage_offset(),
                dtype=tensor.dtype,
                layout=tensor.layout,
                device=fake_device if fake_device is not None else tensor.device,
                requires_grad=tensor.requires_grad,
            )  # deceive the frontend for aten selections
            r._tensor = tensor
            if placeholder:
                if name is None:
                    name = "input"
                r._node = graph.create_node(
                    "placeholder", "placeholder", (graph._root,), name=namespace.create_name(name, tensor)
                )
            # ...the real tensor is held as an element on the tensor.
            if not r._tensor.is_meta:
                r._tensor = r._tensor.to(torch.device("meta"))
            return r

        @classmethod
        def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
            def unwrap(x):
                nonlocal fake_device
                if isinstance(x, MetaProxy):
                    fake_device = x.device
                    x = x._tensor
                    # assert not isinstance(x, MetaProxy)
                elif isinstance(x, torch.Tensor):
                    fake_device = x.device
                    x = x.to(torch.device("meta"))
                return x

            def get_node(x):
                if isinstance(x, torch.Tensor) and not hasattr(x, "_node"):
                    x = MetaProxy(x, placeholder=True, name="weight")
                return x if not hasattr(x, "_node") else x._node

            args_node = tree_map(get_node, args)
            kwargs_node = tree_map(get_node, kwargs)
            node = graph.create_node("call_function", func, args_node, kwargs_node)

            if "device" in kwargs:
                fake_device = kwargs["device"]
                kwargs["device"] = torch.device("meta")

            args = tree_map(unwrap, args)
            kwargs = tree_map(unwrap, kwargs)

            # run aten for backend=CPU but actually on backend=Meta
            out = func(*args, **kwargs)

            # Now, we want to continue propagating this tensor, so we rewrap Tensors in
            # our custom tensor subclass
            def wrap(x):
                if isinstance(x, torch.Tensor):
                    nonlocal fake_device
                    if not x.is_meta:
                        x = x.to(torch.device("meta"))
                return (
                    MetaProxy(x, fake_device=fake_device)
                    if isinstance(x, torch.Tensor) and not hasattr(x, "_tensor")
                    else x
                )

            def set_node(x):
                x._node = node

            out = tree_map(wrap, out)
            tree_map(set_node, out)

            return out

    def wrap(x):
        return MetaProxy(x, fake_device=fake_device, placeholder=True) if isinstance(x, torch.Tensor) else x

    args = tree_map(wrap, args)
    kwargs = tree_map(wrap, kwargs)

    out = module(*args, **kwargs)

    for tensor in normalize_tuple(out):
        if is_autogradable(tensor) and tensor.requires_grad:
            grad = (
                torch.empty_like(tensor._tensor, device=torch.device("meta"))
                if isinstance(tensor, MetaProxy)
                else torch.empty_like(tensor, device=torch.device("meta"))
            )
            torch.autograd.backward(
                tensor, MetaProxy(grad, fake_device=tensor.device, placeholder=True), retain_graph=True
            )
    return graph
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`import torch`
[fx] refactor memory utils and extend shard utils. (#1754) * [fx] change memory.py to memory_utils.py. * [fx] add shard utils. * [fx] fix import. * [fx] check code style. * [fx] add comment. * [autoparallel] first move. * [fx] add time computations. 2 years ago			`from torch.fx import Graph, Node`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`from torch.utils._pytree import tree_map`


[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`def normalize_tuple(x):`
			`if not isinstance(x, tuple):`
			`return (x,)`
			`return x`


			`def is_autogradable(x):`
			`return isinstance(x, torch.Tensor) and x.is_floating_point()`


			`def meta_trace(module: torch.nn.Module, fake_device=None, args, *kwargs) -> Graph:`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`"""Trace forward and backward graph with MetaTensor`

			`Args:`
			`module (torch.nn.Module): The target module for tracing.`

			`Returns:`
			`graph (torch.fx.Graph): The computation graph.`

			`Usage:`
			`>>> import torchvision.models as tm`
			`>>> model = tm.alexnet()`
			`>>> graph = meta_trace(model, torch.rand(1000, 3, 224, 224))`
			`>>> graph.print_tabular()`
			`"""`
			`graph = Graph()`
[hotfix] change namespace for meta_trace. (#1541) 2 years ago			`namespace = graph._graph_namespace`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago
			`class MetaProxy(torch.Tensor):`
			`"""`
			A wrapping tensor that hacks `torch.autograd` without patching more `torch.ops.aten` ops.
			`"""`

			`_tensor: torch.Tensor`
			`_node: Node`

[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`__slots__ = ["_tensor", "_node"]`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago
			`@staticmethod`
[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`def __new__(cls, tensor, fake_device=None, placeholder=False, name=None):`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`r = torch.Tensor._make_wrapper_subclass(`
			`cls,`
			`tensor.size(),`
			`strides=tensor.stride(),`
			`storage_offset=tensor.storage_offset(),`
			`dtype=tensor.dtype,`
			`layout=tensor.layout,`
[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`device=fake_device if fake_device is not None else tensor.device,`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`requires_grad=tensor.requires_grad,`
			`) # deceive the frontend for aten selections`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`r._tensor = tensor`
			`if placeholder:`
			`if name is None:`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`name = "input"`
			`r._node = graph.create_node(`
			`"placeholder", "placeholder", (graph._root,), name=namespace.create_name(name, tensor)`
			`)`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`# ...the real tensor is held as an element on the tensor.`
[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`if not r._tensor.is_meta:`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`r._tensor = r._tensor.to(torch.device("meta"))`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`return r`

			`@classmethod`
			`def __torch_dispatch__(cls, func, types, args=(), kwargs=None):`
			`def unwrap(x):`
[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`nonlocal fake_device`
			`if isinstance(x, MetaProxy):`
			`fake_device = x.device`
			`x = x._tensor`
			`# assert not isinstance(x, MetaProxy)`
			`elif isinstance(x, torch.Tensor):`
			`fake_device = x.device`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`x = x.to(torch.device("meta"))`
[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`return x`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago
			`def get_node(x):`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`if isinstance(x, torch.Tensor) and not hasattr(x, "_node"):`
			`x = MetaProxy(x, placeholder=True, name="weight")`
			`return x if not hasattr(x, "_node") else x._node`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago
			`args_node = tree_map(get_node, args)`
			`kwargs_node = tree_map(get_node, kwargs)`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`node = graph.create_node("call_function", func, args_node, kwargs_node)`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`if "device" in kwargs:`
			`fake_device = kwargs["device"]`
			`kwargs["device"] = torch.device("meta")`
[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`args = tree_map(unwrap, args)`
			`kwargs = tree_map(unwrap, kwargs)`

			`# run aten for backend=CPU but actually on backend=Meta`
			`out = func(args, *kwargs)`

			`# Now, we want to continue propagating this tensor, so we rewrap Tensors in`
			`# our custom tensor subclass`
			`def wrap(x):`
[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`if isinstance(x, torch.Tensor):`
			`nonlocal fake_device`
			`if not x.is_meta:`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`x = x.to(torch.device("meta"))`
			`return (`
			`MetaProxy(x, fake_device=fake_device)`
			`if isinstance(x, torch.Tensor) and not hasattr(x, "_tensor")`
			`else x`
			`)`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago
			`def set_node(x):`
			`x._node = node`

			`out = tree_map(wrap, out)`
			`tree_map(set_node, out)`

			`return out`

			`def wrap(x):`
[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`return MetaProxy(x, fake_device=fake_device, placeholder=True) if isinstance(x, torch.Tensor) else x`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago
			`args = tree_map(wrap, args)`
			`kwargs = tree_map(wrap, kwargs)`

[fx/profiler] tuned the calculation of memory estimation (#1619) * [fx] tuned the meta info and rotor solver. * [fx] remove import. * [fx] remove import. * [fx] remove import. * [fx] tune the meta calculations. * [fx] polish comments. * [fx] remove assertions. * [fx] modify test cases. * [fx] modify test cases. * [fx] optimize import. * [fx 2 years ago			`out = module(args, *kwargs)`

			`for tensor in normalize_tuple(out):`
			`if is_autogradable(tensor) and tensor.requires_grad:`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`grad = (`
			`torch.empty_like(tensor._tensor, device=torch.device("meta"))`
			`if isinstance(tensor, MetaProxy)`
			`else torch.empty_like(tensor, device=torch.device("meta"))`
			`)`
			`torch.autograd.backward(`
			`tensor, MetaProxy(grad, fake_device=tensor.device, placeholder=True), retain_graph=True`
			`)`
[fx] support meta tracing for aten level computation graphs like functorch. (#1536) * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] support meta tracing for aten level computation graphs like functorch. * [fx] remove redundant import. * [fx] add docstring. 2 years ago			`return graph`