ColossalAI/colossalai/tensor/utils.py

import torch

from typing import Iterator, Tuple, Union
import torch.nn as nn
from colossalai.tensor.colo_tensor import ColoTensor


# The function is credited to PyTorch Team
def named_params_with_colotensor(
    module: nn.Module,
    prefix: str = '',
    recurse: bool = True,
) -> Iterator[Tuple[str, Union[nn.Parameter, ColoTensor]]]:
    r"""Returns an iterator over module parameters (together with the
    ColoTensor parameters), yielding both the name of the parameter
    as well as the parameter itself. This is typically passed to a
    :class:torchshard._shard.sharded_optim.ShardedOptimizer

    Args:
        prefix (str): prefix to prepend to all parameter names.
        recurse (bool): if True, then yields parameters of this module
            and all submodules. Otherwise, yields only parameters that
            are direct members of this module.

    Yields:
        (string, Union[Tensor, ColoTensor]): Tuple containing
            the name and parameter (or ColoTensor parameter)

    Example::

        >>> model = torch.nn.Linear(*linear_size)
        >>> delattr(model.weight)
        >>> setattr(model.weight, ColoTensor(...))
        >>> for name, param in named_params_with_colotensor(model):
        >>>    if name in ['weight']:
        >>>        print(param.size())

    """
    modules = module.named_modules(prefix=prefix) if recurse else [(prefix, module)]

    memo = set()
    for mod_prefix, mod in modules:
        # find all sharded tensor params
        for name, val in vars(mod).items():
            if isinstance(val, ColoTensor) and val not in memo:
                memo.add(val)
                name = mod_prefix + ('.' if mod_prefix else '') + name
                yield name, val

    # find all nn.Parameters
    for name, val in module.named_parameters():
        yield name, val


def _convert_tensor(tensor: torch.Tensor) -> ColoTensor:
    return ColoTensor(tensor)


def convert_parameter(module: torch.nn.Module, param_name: str):
    # Perform some validation first.
    if not hasattr(module, param_name):
        raise ValueError(f'module: {module} does not have parameter with name: {param_name}')

    tensor = getattr(module, param_name)
    if not isinstance(tensor, torch.Tensor):
        raise ValueError(
            f'Expected {type(module).__name__}.{param_name} to be a Tensor, but found {type(tensor).__name__}')

    if not tensor.is_contiguous():
        raise ValueError(f'param: {param_name} is not a contiguous Tensor')

    st = _convert_tensor(tensor)

    # Replace param with ColoTensor.

    # Need to delete the attribute first since param_name might be
    # torch.nn.Parameter and can't be replaced with ColoTensor which is
    # not torch.nn.Parameter.
    delattr(module, param_name)

    # Now we can set the attribute appropriately.
    setattr(module, param_name, st)
[gemini] a new tensor structure (#818) * Revert "[zero] add ZeroTensorShardStrategy (#793)" This reverts commit 88759e289efd0a7b5e0d7bf8e01dbe29db85cf71. * [gemini] set cpu memory capacity * [log] local throughput collecting * polish * polish * polish * polish code * polish * polish code * add a new tensor structure and override linear for it * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish 3 years ago			`import torch`

[Tensor] get named parameters for model using ColoTensors (#874) 3 years ago			`from typing import Iterator, Tuple, Union`
			`import torch.nn as nn`
reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago			`from colossalai.tensor.colo_tensor import ColoTensor`
[Tensor] get named parameters for model using ColoTensors (#874) 3 years ago

			`# The function is credited to PyTorch Team`
			`def named_params_with_colotensor(`
			`module: nn.Module,`
			`prefix: str = '',`
			`recurse: bool = True,`
			`) -> Iterator[Tuple[str, Union[nn.Parameter, ColoTensor]]]:`
			`r"""Returns an iterator over module parameters (together with the`
			`ColoTensor parameters), yielding both the name of the parameter`
			`as well as the parameter itself. This is typically passed to a`
			`:class:torchshard._shard.sharded_optim.ShardedOptimizer`

			`Args:`
			`prefix (str): prefix to prepend to all parameter names.`
			`recurse (bool): if True, then yields parameters of this module`
			`and all submodules. Otherwise, yields only parameters that`
			`are direct members of this module.`

			`Yields:`
			`(string, Union[Tensor, ColoTensor]): Tuple containing`
			`the name and parameter (or ColoTensor parameter)`

			`Example::`

			`>>> model = torch.nn.Linear(*linear_size)`
			`>>> delattr(model.weight)`
			`>>> setattr(model.weight, ColoTensor(...))`
			`>>> for name, param in named_params_with_colotensor(model):`
			`>>> if name in ['weight']:`
			`>>> print(param.size())`

			`"""`
			`modules = module.named_modules(prefix=prefix) if recurse else [(prefix, module)]`

			`memo = set()`
			`for mod_prefix, mod in modules:`
			`# find all sharded tensor params`
			`for name, val in vars(mod).items():`
			`if isinstance(val, ColoTensor) and val not in memo:`
			`memo.add(val)`
			`name = mod_prefix + ('.' if mod_prefix else '') + name`
			`yield name, val`

			`# find all nn.Parameters`
			`for name, val in module.named_parameters():`
			`yield name, val`

[gemini] a new tensor structure (#818) * Revert "[zero] add ZeroTensorShardStrategy (#793)" This reverts commit 88759e289efd0a7b5e0d7bf8e01dbe29db85cf71. * [gemini] set cpu memory capacity * [log] local throughput collecting * polish * polish * polish * polish code * polish * polish code * add a new tensor structure and override linear for it * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish 3 years ago
[tensor] reorganize files (#820) 3 years ago			`def _convert_tensor(tensor: torch.Tensor) -> ColoTensor:`
			`return ColoTensor(tensor)`
[gemini] a new tensor structure (#818) * Revert "[zero] add ZeroTensorShardStrategy (#793)" This reverts commit 88759e289efd0a7b5e0d7bf8e01dbe29db85cf71. * [gemini] set cpu memory capacity * [log] local throughput collecting * polish * polish * polish * polish code * polish * polish code * add a new tensor structure and override linear for it * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish 3 years ago

			`def convert_parameter(module: torch.nn.Module, param_name: str):`
			`# Perform some validation first.`
			`if not hasattr(module, param_name):`
			`raise ValueError(f'module: {module} does not have parameter with name: {param_name}')`

			`tensor = getattr(module, param_name)`
			`if not isinstance(tensor, torch.Tensor):`
			`raise ValueError(`
			`f'Expected {type(module).__name__}.{param_name} to be a Tensor, but found {type(tensor).__name__}')`

			`if not tensor.is_contiguous():`
			`raise ValueError(f'param: {param_name} is not a contiguous Tensor')`

			`st = _convert_tensor(tensor)`

[tensor] reorganize files (#820) 3 years ago			`# Replace param with ColoTensor.`
[gemini] a new tensor structure (#818) * Revert "[zero] add ZeroTensorShardStrategy (#793)" This reverts commit 88759e289efd0a7b5e0d7bf8e01dbe29db85cf71. * [gemini] set cpu memory capacity * [log] local throughput collecting * polish * polish * polish * polish code * polish * polish code * add a new tensor structure and override linear for it * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish 3 years ago
			`# Need to delete the attribute first since param_name might be`
[tensor] reorganize files (#820) 3 years ago			`# torch.nn.Parameter and can't be replaced with ColoTensor which is`
[gemini] a new tensor structure (#818) * Revert "[zero] add ZeroTensorShardStrategy (#793)" This reverts commit 88759e289efd0a7b5e0d7bf8e01dbe29db85cf71. * [gemini] set cpu memory capacity * [log] local throughput collecting * polish * polish * polish * polish code * polish * polish code * add a new tensor structure and override linear for it * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish * polish 3 years ago			`# not torch.nn.Parameter.`
			`delattr(module, param_name)`

			`# Now we can set the attribute appropriately.`
			`setattr(module, param_name, st)`