ColossalAI/colossalai/utils/model/colo_init_context.py

from .utils import InsertPostInitMethodToModuleSubClasses
import torch
from colossalai.tensor import ColoTensor, ColoParameter, register_colo_module, init_colo_module, \
    ColoLinear, ColoEmbedding
import types

from torch import nn
from typing import Iterator, Tuple, Union, Optional

# find named_params includes replica


def _named_params_with_replica(
    module: nn.Module,
    prefix: str = '',
    recurse: bool = True,
) -> Iterator[Tuple[str, Union[nn.Parameter, ColoTensor]]]:
    modules = module.named_modules(prefix=prefix) if recurse else [(prefix, module)]

    for mod_prefix, mod in modules:
        for name, val in mod._parameters.items():
            if val is None:
                continue
            name = mod_prefix + ('.' if mod_prefix else '') + name
            yield name, val

def ColoModulize(module):
    """
    Replacing the parameters() and named_parameters() with our customized ones
    """

    module._colo_visited = True


class ColoInitContext(InsertPostInitMethodToModuleSubClasses):

    def __init__(self, lazy_memory_allocate: bool = False, device: torch.device = torch.device('cpu')):
        """
        Args:
            lazy_memory_allocate (bool, optional): whether to allocate memory for the parameter tensors. Defaults to False.
            device (torch.device, optional): the device parameters initialized are resident on. Defaults to torch.device('cpu').
        """
        super().__init__()
        self._lazy_memory_allocate = lazy_memory_allocate
        self._device = device

        self._register_colo_modules()

    def _register_colo_modules(self):
        register_colo_module(torch.nn.Linear, ColoLinear())
        register_colo_module(torch.nn.Embedding, ColoEmbedding())

    def _post_init_method(self, module: torch.nn.Module, *args, **kwargs):
        """
        The function to call at the end of the constructor of each module.
        FIXME(fjr) The module may be passed to this function multiple times?
        """

        if hasattr(module, '_colo_visited'):
            return

        name_list = []
        for name, param in _named_params_with_replica(module):
            if isinstance(param, ColoTensor):
                continue

            split = name.rfind('.')
            if split >= 0:    # param in submodule
                module_name = name[:split]
                param_name = name[split + 1:]
            else:
                module_name = ''    # param in current module
                param_name = name
            name_list.append((module_name, param_name))

        replaced_tensors = dict(
        )    # record mapping between (torch.Tensor, ColoTensor) to distinguish the same reference
        for module_name, param_name in name_list:
            submodule = module.get_submodule(module_name)
            param = submodule.get_parameter(param_name)
            if param in replaced_tensors:
                colo_param = replaced_tensors[param]
            else:
                save_torch_payload = True if not self._lazy_memory_allocate else False
                # detaching tensor is necessary for optimizers.
                requires_grad = param.requires_grad

                colo_param = ColoParameter(param.to(self._device), requires_grad=requires_grad)
                # add mapping record
                replaced_tensors[param] = colo_param
            delattr(submodule, param_name)
            setattr(submodule, param_name, colo_param)
            colo_param.shared_param_modules.append(submodule)

        ColoModulize(module)
Init Conext supports lazy allocate model memory (#842) 3 years ago			`from .utils import InsertPostInitMethodToModuleSubClasses`
			`import torch`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`from colossalai.tensor import ColoTensor, ColoParameter, register_colo_module, init_colo_module, \`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`ColoLinear, ColoEmbedding`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`import types`
Init Conext supports lazy allocate model memory (#842) 3 years ago
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`from torch import nn`
[Tensor] add from_pretrained support and bert pretrained test (#921) * add from_pretrained support and test * polish * polish * polish * polish 3 years ago			`from typing import Iterator, Tuple, Union, Optional`

[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`# find named_params includes replica`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago

[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`def _named_params_with_replica(`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`module: nn.Module,`
			`prefix: str = '',`
			`recurse: bool = True,`
			`) -> Iterator[Tuple[str, Union[nn.Parameter, ColoTensor]]]:`
[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`modules = module.named_modules(prefix=prefix) if recurse else [(prefix, module)]`

			`for mod_prefix, mod in modules:`
			`for name, val in mod._parameters.items():`
			`if val is None:`
			`continue`
			`name = mod_prefix + ('.' if mod_prefix else '') + name`
			`yield name, val`

[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`def ColoModulize(module):`
			`"""`
			`Replacing the parameters() and named_parameters() with our customized ones`
			`"""`

			`module._colo_visited = True`
Init Conext supports lazy allocate model memory (#842) 3 years ago
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago
Init Conext supports lazy allocate model memory (#842) 3 years ago			`class ColoInitContext(InsertPostInitMethodToModuleSubClasses):`

colo init context add device attr. (#866) 3 years ago			`def __init__(self, lazy_memory_allocate: bool = False, device: torch.device = torch.device('cpu')):`
			`"""`
			`Args:`
			`lazy_memory_allocate (bool, optional): whether to allocate memory for the parameter tensors. Defaults to False.`
			`device (torch.device, optional): the device parameters initialized are resident on. Defaults to torch.device('cpu').`
			`"""`
Init Conext supports lazy allocate model memory (#842) 3 years ago			`super().__init__()`
			`self._lazy_memory_allocate = lazy_memory_allocate`
colo init context add device attr. (#866) 3 years ago			`self._device = device`
Init Conext supports lazy allocate model memory (#842) 3 years ago
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`self._register_colo_modules()`

			`def _register_colo_modules(self):`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`register_colo_module(torch.nn.Linear, ColoLinear())`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`register_colo_module(torch.nn.Embedding, ColoEmbedding())`
[Tensor] add from_pretrained support and bert pretrained test (#921) * add from_pretrained support and test * polish * polish * polish * polish 3 years ago
[hotfix] add deconstructor for stateful tensor (#848) * add deconstructor for stateful tensor * fix colo init context 3 years ago			`def _post_init_method(self, module: torch.nn.Module, args, *kwargs):`
Init Conext supports lazy allocate model memory (#842) 3 years ago			`"""`
			`The function to call at the end of the constructor of each module.`
			`FIXME(fjr) The module may be passed to this function multiple times?`
			`"""`
[Tensor] add from_pretrained support and bert pretrained test (#921) * add from_pretrained support and test * polish * polish * polish * polish 3 years ago
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`if hasattr(module, '_colo_visited'):`
			`return`

Init Conext supports lazy allocate model memory (#842) 3 years ago			`name_list = []`
[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`for name, param in _named_params_with_replica(module):`
Init Conext supports lazy allocate model memory (#842) 3 years ago			`if isinstance(param, ColoTensor):`
			`continue`
[Tensor] initialize the ColoOptimizer (#898) * [Tensor] activation is an attr of ColoTensor * [Tensor] add optimizer * only detach parameters in context * polish code 3 years ago
[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`split = name.rfind('.')`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`if split >= 0: # param in submodule`
[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`module_name = name[:split]`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`param_name = name[split + 1:]`
[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`else:`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`module_name = '' # param in current module`
[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`param_name = name`
			`name_list.append((module_name, param_name))`

[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`replaced_tensors = dict(`
			`) # record mapping between (torch.Tensor, ColoTensor) to distinguish the same reference`
[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`for module_name, param_name in name_list:`
			`submodule = module.get_submodule(module_name)`
			`param = submodule.get_parameter(param_name)`
			`if param in replaced_tensors:`
			`colo_param = replaced_tensors[param]`
			`else:`
			`save_torch_payload = True if not self._lazy_memory_allocate else False`
			`# detaching tensor is necessary for optimizers.`
			`requires_grad = param.requires_grad`

[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`colo_param = ColoParameter(param.to(self._device), requires_grad=requires_grad)`
[Tensor] fix init context (#931) * change torch.Parameter to ColoParameter * fix post assignment for init context * polish * polish 3 years ago			`# add mapping record`
			`replaced_tensors[param] = colo_param`
			`delattr(submodule, param_name)`
			`setattr(submodule, param_name, colo_param)`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`colo_param.shared_param_modules.append(submodule)`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`ColoModulize(module)`