ColossalAI/colossalai/utils/model/colo_init_context.py

from .utils import InsertPostInitMethodToModuleSubClasses
import torch
from colossalai.tensor import ColoTensor
import types

from torch import nn
from typing import Iterator, Tuple, Union


def ColoModulize(module):
    """
    Replacing the parameters() and named_parameters() with our customized ones
    """

    def named_params_with_colotensor(
        module: nn.Module,
        prefix: str = '',
        recurse: bool = True,
    ) -> Iterator[Tuple[str, Union[nn.Parameter, ColoTensor]]]:
        modules = module.named_modules(prefix=prefix) if recurse else [(prefix, module)]

        memo = set()
        for mod_prefix, mod in modules:
            # find all colotensors tensor params
            for name, val in vars(mod).items():
                if isinstance(val, ColoTensor) and val not in memo:
                    memo.add(val)
                    name = mod_prefix + ('.' if mod_prefix else '') + name
                    yield name, val

        # find all nn.Parameters
        for name, val in module.old_named_parameters(recurse=recurse):
            yield name, val

    def fake_parameters(self, *args, **kargs):
        for name, p in named_params_with_colotensor(self, *args, **kargs):
            if isinstance(p, ColoTensor):
                yield p.torch_tensor()
            elif isinstance(p, torch.Tensor):
                yield p

    def fake_named_parameters(self, *args, **kargs):
        for name, p in named_params_with_colotensor(self, *args, **kargs):
            if isinstance(p, ColoTensor):
                yield name, p.torch_tensor()
            elif isinstance(p, torch.Tensor):
                yield name, p

    def colo_parameters(self, *args, **kargs):
        for _, p in named_params_with_colotensor(self, *args, **kargs):
            yield p

    def colo_named_parameters(self, *args, **kargs):
        for name, p in named_params_with_colotensor(self, *args, **kargs):
            yield name, p

    module.old_named_parameters = module.named_parameters
    module.old_parameters = module.parameters

    funcType = types.MethodType
    module.parameters = funcType(fake_parameters, module)
    module.named_parameters = funcType(fake_named_parameters, module)
    module.colo_parameters = funcType(colo_parameters, module)
    module.colo_named_parameters = funcType(colo_named_parameters, module)
    module._colo_visited = True


class ColoInitContext(InsertPostInitMethodToModuleSubClasses):

    def __init__(self, lazy_memory_allocate: bool = False, device: torch.device = torch.device('cpu')):
        """
        Args:
            lazy_memory_allocate (bool, optional): whether to allocate memory for the parameter tensors. Defaults to False.
            device (torch.device, optional): the device parameters initialized are resident on. Defaults to torch.device('cpu').
        """
        super().__init__()
        self._lazy_memory_allocate = lazy_memory_allocate
        self._device = device

    def _post_init_method(self, module: torch.nn.Module, *args, **kwargs):
        """
        The function to call at the end of the constructor of each module.
        FIXME(fjr) The module may be passed to this function multiple times?
        """
        if hasattr(module, '_colo_visited'):
            return

        name_list = []
        for name, param in module.named_parameters(recurse=False):
            if isinstance(param, ColoTensor):
                continue
            name_list.append((name, param))

        save_torch_payload = True if not self._lazy_memory_allocate else False
        for name, param in name_list:
            delattr(module, name)

            # detaching tensor is necessary for optimizers.
            requires_grad = param.requires_grad
            tensor_detached = param.to(self._device).detach()
            tensor_detached.requires_grad = requires_grad

            setattr(
                module, name,
                ColoTensor.init_from_torch_tensor(tensor=tensor_detached,
                                                  save_payload=save_torch_payload,
                                                  is_model_data=True))

        ColoModulize(module)
Init Conext supports lazy allocate model memory (#842) 2022-04-22 10:03:35 +00:00			`from .utils import InsertPostInitMethodToModuleSubClasses`
			`import torch`
			`from colossalai.tensor import ColoTensor`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 2022-04-27 07:28:59 +00:00			`import types`
Init Conext supports lazy allocate model memory (#842) 2022-04-22 10:03:35 +00:00
[Tensor] overriding paramters() for Module using ColoTensor (#889) 2022-04-27 07:28:59 +00:00			`from torch import nn`
			`from typing import Iterator, Tuple, Union`


			`def ColoModulize(module):`
			`"""`
			`Replacing the parameters() and named_parameters() with our customized ones`
			`"""`

			`def named_params_with_colotensor(`
			`module: nn.Module,`
			`prefix: str = '',`
			`recurse: bool = True,`
			`) -> Iterator[Tuple[str, Union[nn.Parameter, ColoTensor]]]:`
			`modules = module.named_modules(prefix=prefix) if recurse else [(prefix, module)]`

			`memo = set()`
			`for mod_prefix, mod in modules:`
			`# find all colotensors tensor params`
			`for name, val in vars(mod).items():`
			`if isinstance(val, ColoTensor) and val not in memo:`
			`memo.add(val)`
			`name = mod_prefix + ('.' if mod_prefix else '') + name`
			`yield name, val`

			`# find all nn.Parameters`
			`for name, val in module.old_named_parameters(recurse=recurse):`
			`yield name, val`

			`def fake_parameters(self, args, *kargs):`
			`for name, p in named_params_with_colotensor(self, args, *kargs):`
			`if isinstance(p, ColoTensor):`
			`yield p.torch_tensor()`
			`elif isinstance(p, torch.Tensor):`
			`yield p`

			`def fake_named_parameters(self, args, *kargs):`
			`for name, p in named_params_with_colotensor(self, args, *kargs):`
			`if isinstance(p, ColoTensor):`
			`yield name, p.torch_tensor()`
			`elif isinstance(p, torch.Tensor):`
			`yield name, p`

			`def colo_parameters(self, args, *kargs):`
			`for _, p in named_params_with_colotensor(self, args, *kargs):`
			`yield p`

			`def colo_named_parameters(self, args, *kargs):`
			`for name, p in named_params_with_colotensor(self, args, *kargs):`
			`yield name, p`

			`module.old_named_parameters = module.named_parameters`
			`module.old_parameters = module.parameters`

			`funcType = types.MethodType`
			`module.parameters = funcType(fake_parameters, module)`
			`module.named_parameters = funcType(fake_named_parameters, module)`
			`module.colo_parameters = funcType(colo_parameters, module)`
			`module.colo_named_parameters = funcType(colo_named_parameters, module)`
			`module._colo_visited = True`
Init Conext supports lazy allocate model memory (#842) 2022-04-22 10:03:35 +00:00

			`class ColoInitContext(InsertPostInitMethodToModuleSubClasses):`

colo init context add device attr. (#866) 2022-04-25 06:24:26 +00:00			`def __init__(self, lazy_memory_allocate: bool = False, device: torch.device = torch.device('cpu')):`
			`"""`
			`Args:`
			`lazy_memory_allocate (bool, optional): whether to allocate memory for the parameter tensors. Defaults to False.`
			`device (torch.device, optional): the device parameters initialized are resident on. Defaults to torch.device('cpu').`
			`"""`
Init Conext supports lazy allocate model memory (#842) 2022-04-22 10:03:35 +00:00			`super().__init__()`
			`self._lazy_memory_allocate = lazy_memory_allocate`
colo init context add device attr. (#866) 2022-04-25 06:24:26 +00:00			`self._device = device`
Init Conext supports lazy allocate model memory (#842) 2022-04-22 10:03:35 +00:00
[hotfix] add deconstructor for stateful tensor (#848) * add deconstructor for stateful tensor * fix colo init context 2022-04-24 07:03:04 +00:00			`def _post_init_method(self, module: torch.nn.Module, args, *kwargs):`
Init Conext supports lazy allocate model memory (#842) 2022-04-22 10:03:35 +00:00			`"""`
			`The function to call at the end of the constructor of each module.`
			`FIXME(fjr) The module may be passed to this function multiple times?`
			`"""`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 2022-04-27 07:28:59 +00:00			`if hasattr(module, '_colo_visited'):`
			`return`

Init Conext supports lazy allocate model memory (#842) 2022-04-22 10:03:35 +00:00			`name_list = []`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 2022-04-27 07:28:59 +00:00			`for name, param in module.named_parameters(recurse=False):`
Init Conext supports lazy allocate model memory (#842) 2022-04-22 10:03:35 +00:00			`if isinstance(param, ColoTensor):`
			`continue`
			`name_list.append((name, param))`

			`save_torch_payload = True if not self._lazy_memory_allocate else False`
			`for name, param in name_list:`
			`delattr(module, name)`
[Tensor] initialize the ColoOptimizer (#898) * [Tensor] activation is an attr of ColoTensor * [Tensor] add optimizer * only detach parameters in context * polish code 2022-04-28 07:23:40 +00:00
			`# detaching tensor is necessary for optimizers.`
			`requires_grad = param.requires_grad`
			`tensor_detached = param.to(self._device).detach()`
			`tensor_detached.requires_grad = requires_grad`

[Tensor] activation is an attr of ColoTensor (#897) 2022-04-28 06:43:22 +00:00			`setattr(`
			`module, name,`
[Tensor] initialize the ColoOptimizer (#898) * [Tensor] activation is an attr of ColoTensor * [Tensor] add optimizer * only detach parameters in context * polish code 2022-04-28 07:23:40 +00:00			`ColoTensor.init_from_torch_tensor(tensor=tensor_detached,`
[Tensor] activation is an attr of ColoTensor (#897) 2022-04-28 06:43:22 +00:00			`save_payload=save_torch_payload,`
			`is_model_data=True))`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 2022-04-27 07:28:59 +00:00
			`ColoModulize(module)`