ColossalAI/colossalai/utils/model/colo_init_context.py

from .utils import InsertPostInitMethodToModuleSubClasses
import torch
from colossalai.tensor import ColoTensor, ColoParameter
import types

from torch import nn
from typing import Iterator, Tuple, Union, Optional

# Adapted from torch.nn.module.Module.register_param
def _register_parameter_with_colotensor(self, name: str, param):
    if '_parameters' not in self.__dict__:
        raise AttributeError(
            "cannot assign parameter before Module.__init__() call")

    if not isinstance(name, torch._six.string_classes):
        raise TypeError("parameter name should be a string. "
                        "Got {}".format(torch.typename(name)))
    if '.' in name:
        raise KeyError("parameter name can't contain \".\"")
    if name == '':
        raise KeyError("parameter name can't be empty string \"\"")
    if hasattr(self, name) and name not in self._parameters:
        raise KeyError("attribute '{}' already exists".format(name))

    if param is None:
        self._parameters[name] = None
    elif not isinstance(param, (torch.nn.Parameter, ColoParameter)):
        raise TypeError("cannot assign '{}' object to parameter '{}' "
                        "(torch.nn.Parameter or ColoParameter or None required)"
                        .format(torch.typename(param), name))
    elif param.grad_fn:
        raise ValueError(
            "Cannot assign non-leaf Tensor to parameter '{0}'. Model "
            "parameters must be created explicitly. To express '{0}' "
            "as a function of another Tensor, compute the value in "
            "the forward() method.".format(name))
    else:
        self._parameters[name] = param

# Adapted from torch.nn.module.Module.__setattr__
def _setattr_with_colotensor(self, name: str, value: Union[torch.Tensor, torch.nn.Module, ColoTensor]):
    def remove_from(*dicts_or_sets):
        for d in dicts_or_sets:
            if name in d:
                if isinstance(d, dict):
                    del d[name]
                else:
                    d.discard(name)

    params = self.__dict__.get('_parameters')
    if isinstance(value, (ColoTensor, torch.nn.Parameter)):
        if params is None:
            raise AttributeError(
                "cannot assign parameters before Module.__init__() call")
        remove_from(self.__dict__, self._buffers, self._modules, self._non_persistent_buffers_set)
        self.register_parameter(name, value)
    elif params is not None and name in params:
        if value is not None:
            raise TypeError("cannot assign '{}' as parameter '{}' "
                            "(torch.nn.Parameter or None expected)"
                            .format(torch.typename(value), name))
        self.register_parameter(name, value)
    else:
        modules = self.__dict__.get('_modules')
        if isinstance(value, torch.nn.Module):
            if modules is None:
                raise AttributeError(
                    "cannot assign module before Module.__init__() call")
            remove_from(self.__dict__, self._parameters, self._buffers, self._non_persistent_buffers_set)
            modules[name] = value
        elif modules is not None and name in modules:
            if value is not None:
                raise TypeError("cannot assign '{}' as child module '{}' "
                                "(torch.nn.Module or None expected)"
                                .format(torch.typename(value), name))
            modules[name] = value
        else:
            buffers = self.__dict__.get('_buffers')
            if buffers is not None and name in buffers:
                if value is not None and not isinstance(value, torch.Tensor):
                    raise TypeError("cannot assign '{}' as buffer '{}' "
                                    "(torch.Tensor or None expected)"
                                    .format(torch.typename(value), name))
                buffers[name] = value
            else:
                object.__setattr__(self, name, value)

def ColoModulize(module):
    """
    Replacing the parameters() and named_parameters() with our customized ones
    """

    def fake_parameters(self, *args, **kargs):
        for p in module.old_parameters(*args, **kargs):
            if isinstance(p, ColoTensor):
                yield p.torch_tensor()
            elif isinstance(p, torch.Tensor):
                yield p

    def fake_named_parameters(self, *args, **kargs):
        for name, p in module.old_named_parameters(*args, **kargs):
            if isinstance(p, ColoTensor):
                yield name, p.torch_tensor()
            elif isinstance(p, torch.Tensor):
                yield name, p

    module.old_named_parameters = module.named_parameters
    module.old_parameters = module.parameters

    funcType = types.MethodType
    module.parameters = funcType(fake_parameters, module)
    module.named_parameters = funcType(fake_named_parameters, module)
    module.colo_parameters = module.old_parameters
    module.colo_named_parameters = module.old_named_parameters
    module._colo_visited = True

class ColoInitContext(InsertPostInitMethodToModuleSubClasses):

    def __init__(self, lazy_memory_allocate: bool = False, device: torch.device = torch.device('cpu')):
        """
        Args:
            lazy_memory_allocate (bool, optional): whether to allocate memory for the parameter tensors. Defaults to False.
            device (torch.device, optional): the device parameters initialized are resident on. Defaults to torch.device('cpu').
        """
        super().__init__()
        self._lazy_memory_allocate = lazy_memory_allocate
        self._device = device

        torch.nn.Module.__setattr__ = _setattr_with_colotensor
        torch.nn.Module.register_parameter = _register_parameter_with_colotensor

    def _post_init_method(self, module: torch.nn.Module, *args, **kwargs):
        """
        The function to call at the end of the constructor of each module.
        FIXME(fjr) The module may be passed to this function multiple times?
        """

        if hasattr(module, '_colo_visited'):
            return

        name_list = []
        for name, param in module.named_parameters(recurse=False):
            if isinstance(param, ColoTensor):
                continue
            name_list.append((name, param))

        save_torch_payload = True if not self._lazy_memory_allocate else False
        for name, param in name_list:
            delattr(module, name)

            # detaching tensor is necessary for optimizers.
            requires_grad = param.requires_grad
            tensor_detached = param.to(self._device).detach()
            tensor_detached.requires_grad = requires_grad

            colo_param = ColoParameter.init_from_torch_tensor(tensor=tensor_detached, save_payload=save_torch_payload)
            setattr(module, name, colo_param)

        ColoModulize(module)
Init Conext supports lazy allocate model memory (#842) 3 years ago			`from .utils import InsertPostInitMethodToModuleSubClasses`
			`import torch`
[Tensor] init ColoParameter (#914) 3 years ago			`from colossalai.tensor import ColoTensor, ColoParameter`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`import types`
Init Conext supports lazy allocate model memory (#842) 3 years ago
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`from torch import nn`
[Tensor] add from_pretrained support and bert pretrained test (#921) * add from_pretrained support and test * polish * polish * polish * polish 3 years ago			`from typing import Iterator, Tuple, Union, Optional`

			`# Adapted from torch.nn.module.Module.register_param`
			`def _register_parameter_with_colotensor(self, name: str, param):`
			`if '_parameters' not in self.__dict__:`
			`raise AttributeError(`
			`"cannot assign parameter before Module.__init__() call")`

			`if not isinstance(name, torch._six.string_classes):`
			`raise TypeError("parameter name should be a string. "`
			`"Got {}".format(torch.typename(name)))`
			`if '.' in name:`
			`raise KeyError("parameter name can't contain \".\"")`
			`if name == '':`
			`raise KeyError("parameter name can't be empty string \"\"")`
			`if hasattr(self, name) and name not in self._parameters:`
			`raise KeyError("attribute '{}' already exists".format(name))`

			`if param is None:`
			`self._parameters[name] = None`
			`elif not isinstance(param, (torch.nn.Parameter, ColoParameter)):`
			`raise TypeError("cannot assign '{}' object to parameter '{}' "`
			`"(torch.nn.Parameter or ColoParameter or None required)"`
			`.format(torch.typename(param), name))`
			`elif param.grad_fn:`
			`raise ValueError(`
			`"Cannot assign non-leaf Tensor to parameter '{0}'. Model "`
			`"parameters must be created explicitly. To express '{0}' "`
			`"as a function of another Tensor, compute the value in "`
			`"the forward() method.".format(name))`
			`else:`
			`self._parameters[name] = param`

			`# Adapted from torch.nn.module.Module.__setattr__`
			`def _setattr_with_colotensor(self, name: str, value: Union[torch.Tensor, torch.nn.Module, ColoTensor]):`
			`def remove_from(*dicts_or_sets):`
			`for d in dicts_or_sets:`
			`if name in d:`
			`if isinstance(d, dict):`
			`del d[name]`
			`else:`
			`d.discard(name)`

			`params = self.__dict__.get('_parameters')`
			`if isinstance(value, (ColoTensor, torch.nn.Parameter)):`
			`if params is None:`
			`raise AttributeError(`
			`"cannot assign parameters before Module.__init__() call")`
			`remove_from(self.__dict__, self._buffers, self._modules, self._non_persistent_buffers_set)`
			`self.register_parameter(name, value)`
			`elif params is not None and name in params:`
			`if value is not None:`
			`raise TypeError("cannot assign '{}' as parameter '{}' "`
			`"(torch.nn.Parameter or None expected)"`
			`.format(torch.typename(value), name))`
			`self.register_parameter(name, value)`
			`else:`
			`modules = self.__dict__.get('_modules')`
			`if isinstance(value, torch.nn.Module):`
			`if modules is None:`
			`raise AttributeError(`
			`"cannot assign module before Module.__init__() call")`
			`remove_from(self.__dict__, self._parameters, self._buffers, self._non_persistent_buffers_set)`
			`modules[name] = value`
			`elif modules is not None and name in modules:`
			`if value is not None:`
			`raise TypeError("cannot assign '{}' as child module '{}' "`
			`"(torch.nn.Module or None expected)"`
			`.format(torch.typename(value), name))`
			`modules[name] = value`
			`else:`
			`buffers = self.__dict__.get('_buffers')`
			`if buffers is not None and name in buffers:`
			`if value is not None and not isinstance(value, torch.Tensor):`
			`raise TypeError("cannot assign '{}' as buffer '{}' "`
			`"(torch.Tensor or None expected)"`
			`.format(torch.typename(value), name))`
			`buffers[name] = value`
			`else:`
			`object.__setattr__(self, name, value)`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago
			`def ColoModulize(module):`
			`"""`
			`Replacing the parameters() and named_parameters() with our customized ones`
			`"""`

			`def fake_parameters(self, args, *kargs):`
[Tensor] simplify named param (#928) * simplify ColoModulize * simplify ColoModulize * polish * polish 3 years ago			`for p in module.old_parameters(args, *kargs):`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`if isinstance(p, ColoTensor):`
			`yield p.torch_tensor()`
			`elif isinstance(p, torch.Tensor):`
			`yield p`

			`def fake_named_parameters(self, args, *kargs):`
[Tensor] simplify named param (#928) * simplify ColoModulize * simplify ColoModulize * polish * polish 3 years ago			`for name, p in module.old_named_parameters(args, *kargs):`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`if isinstance(p, ColoTensor):`
			`yield name, p.torch_tensor()`
			`elif isinstance(p, torch.Tensor):`
			`yield name, p`

			`module.old_named_parameters = module.named_parameters`
			`module.old_parameters = module.parameters`

			`funcType = types.MethodType`
			`module.parameters = funcType(fake_parameters, module)`
			`module.named_parameters = funcType(fake_named_parameters, module)`
[Tensor] simplify named param (#928) * simplify ColoModulize * simplify ColoModulize * polish * polish 3 years ago			`module.colo_parameters = module.old_parameters`
			`module.colo_named_parameters = module.old_named_parameters`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`module._colo_visited = True`
Init Conext supports lazy allocate model memory (#842) 3 years ago
			`class ColoInitContext(InsertPostInitMethodToModuleSubClasses):`

colo init context add device attr. (#866) 3 years ago			`def __init__(self, lazy_memory_allocate: bool = False, device: torch.device = torch.device('cpu')):`
			`"""`
			`Args:`
			`lazy_memory_allocate (bool, optional): whether to allocate memory for the parameter tensors. Defaults to False.`
			`device (torch.device, optional): the device parameters initialized are resident on. Defaults to torch.device('cpu').`
			`"""`
Init Conext supports lazy allocate model memory (#842) 3 years ago			`super().__init__()`
			`self._lazy_memory_allocate = lazy_memory_allocate`
colo init context add device attr. (#866) 3 years ago			`self._device = device`
Init Conext supports lazy allocate model memory (#842) 3 years ago
[Tensor] add from_pretrained support and bert pretrained test (#921) * add from_pretrained support and test * polish * polish * polish * polish 3 years ago			`torch.nn.Module.__setattr__ = _setattr_with_colotensor`
			`torch.nn.Module.register_parameter = _register_parameter_with_colotensor`

[hotfix] add deconstructor for stateful tensor (#848) * add deconstructor for stateful tensor * fix colo init context 3 years ago			`def _post_init_method(self, module: torch.nn.Module, args, *kwargs):`
Init Conext supports lazy allocate model memory (#842) 3 years ago			`"""`
			`The function to call at the end of the constructor of each module.`
			`FIXME(fjr) The module may be passed to this function multiple times?`
			`"""`
[Tensor] add from_pretrained support and bert pretrained test (#921) * add from_pretrained support and test * polish * polish * polish * polish 3 years ago
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`if hasattr(module, '_colo_visited'):`
			`return`

Init Conext supports lazy allocate model memory (#842) 3 years ago			`name_list = []`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago			`for name, param in module.named_parameters(recurse=False):`
Init Conext supports lazy allocate model memory (#842) 3 years ago			`if isinstance(param, ColoTensor):`
			`continue`
			`name_list.append((name, param))`

			`save_torch_payload = True if not self._lazy_memory_allocate else False`
			`for name, param in name_list:`
			`delattr(module, name)`
[Tensor] initialize the ColoOptimizer (#898) * [Tensor] activation is an attr of ColoTensor * [Tensor] add optimizer * only detach parameters in context * polish code 3 years ago
			`# detaching tensor is necessary for optimizers.`
			`requires_grad = param.requires_grad`
			`tensor_detached = param.to(self._device).detach()`
			`tensor_detached.requires_grad = requires_grad`

[Tensor] add from_pretrained support and bert pretrained test (#921) * add from_pretrained support and test * polish * polish * polish * polish 3 years ago			`colo_param = ColoParameter.init_from_torch_tensor(tensor=tensor_detached, save_payload=save_torch_payload)`
			`setattr(module, name, colo_param)`
[Tensor] overriding paramters() for Module using ColoTensor (#889) 3 years ago
[Tensor] add from_pretrained support and bert pretrained test (#921) * add from_pretrained support and test * polish * polish * polish * polish 3 years ago			`ColoModulize(module)`