ColossalAI/colossalai/tensor/colo_parameter.py

from typing import Optional

import torch

from colossalai.tensor.colo_tensor import ColoTensor
from colossalai.tensor.const import TensorType
from colossalai.tensor.param_op_hook import ColoParamOpHookManager
from colossalai.tensor.tensor_spec import ColoTensorSpec


def filter_colo_parameters(*args, **kwargs):
    param_list = []

    def get_colo_parameters(element) -> None:
        if isinstance(element, list) or isinstance(element, tuple):
            for e in element:
                get_colo_parameters(e)
        elif isinstance(element, dict):
            raise RuntimeError("Found Dict: ColoParameter can't deal with complicated arguments.")
        elif isinstance(element, ColoParameter):
            param_list.append(element)
        return

    for a in args:
        get_colo_parameters(a)
    for v in kwargs.values():
        get_colo_parameters(v)

    return param_list


def replace_args(args, kwargs, new_args):
    args = new_args[:len(args)]
    for k, v in zip(kwargs.keys(), new_args[len(args):]):
        kwargs[k] = v
    return tuple(args), kwargs


class ColoParameter(ColoTensor, torch.nn.Parameter):
    r"""A kind of ColoTensor to be considered as a module parameter.

    """

    def __new__(cls,
                data: Optional[torch.Tensor] = None,
                requires_grad: bool = True,
                spec: ColoTensorSpec = None) -> 'ColoParameter':
        if data is None:
            data = torch.empty(0)
        return torch.Tensor._make_subclass(cls, data, requires_grad)

    def __init__(self,
                 data: Optional[torch.Tensor] = None,
                 requires_grad: bool = True,
                 spec: ColoTensorSpec = None) -> None:
        ColoTensor.__init__(self, data, spec)
        self._type = TensorType.MODEL
        # a list contains modules sharing this ColoParameter with others.
        self._shared_param_modules = []

    @property
    def shared_param_modules(self):
        return self._shared_param_modules

    @staticmethod
    def from_torch_tensor(tensor: torch.Tensor,
                          requires_grad: bool = True,
                          spec: ColoTensorSpec = None) -> 'ColoParameter':
        tensor = tensor.as_subclass(ColoParameter)
        tensor.__init__(tensor, requires_grad=requires_grad, spec=spec)
        return tensor

    def __repr__(self):
        return super(ColoParameter, self).__repr__()

    @classmethod
    def __torch_function__(cls, func, types, args=..., kwargs=None):
        if ColoParamOpHookManager.has_hook():
            if not func.__name__.startswith('__'):
                if kwargs is None:
                    kwargs = {}
                params = filter_colo_parameters(*args, **kwargs)
                if len(params) > 0:
                    with torch._C.DisableTorchFunction():
                        new_args = ColoParamOpHookManager.pre_op(params, *args, *kwargs.values())
                    args, kwargs = replace_args(args, kwargs, new_args)
                    ret = super().__torch_function__(func, types, args, kwargs)
                    with torch._C.DisableTorchFunction():
                        ret = ColoParamOpHookManager.post_op(params, ret)
                    return ret
        return super().__torch_function__(func, types, args, kwargs)

    def __deepcopy__(self, memo):
        if id(self) in memo:
            return memo[id(self)]
        else:
            with torch._C.DisableTorchFunction():
                data = self.data.clone()
            tensor = ColoParameter(data,
                                   self.requires_grad,
                                   spec=ColoTensorSpec(self.get_process_group(), self.dist_spec, self.compute_spec))
            memo[id(self)] = tensor
            return tensor

    def __reduce_ex__(self, proto):
        # Adapted from torch._utils._rebuild_parameter
        # def _rebuild_colo_parameter(data, requires_grad, backward_hooks):
        #     colo_param = ColoParameter(data, requires_grad)
        #     colo_param._backward_hooks = backward_hooks
        #     return colo_param

        # return (
        #     _rebuild_colo_parameter,
        #     (self.data, self.requires_grad, OrderedDict())
        # )

        # TODO(jzy) we don't support object reflection now.
        # distspec cannot be pickled or rebuilt because it's tightly connected to runtime attribute `process_group`.
        raise NotImplementedError
[Tensor] remove ParallelAction, use ComputeSpec instread (#1166) 2 years ago			`from typing import Optional`

[autoparallel] fix bugs caused by negative dim key (#1808) * [autoparallel] fix bugs caused by negative dim key * fix import error * fix matmul test issue * fix unit test issue 2 years ago			`import torch`

reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago			`from colossalai.tensor.colo_tensor import ColoTensor`
			`from colossalai.tensor.const import TensorType`
[Gemini] ParamOpHook -> ColoParamOpHook (#2080) 2 years ago			`from colossalai.tensor.param_op_hook import ColoParamOpHookManager`
[autoparallel] fix bugs caused by negative dim key (#1808) * [autoparallel] fix bugs caused by negative dim key * fix import error * fix matmul test issue * fix unit test issue 2 years ago			`from colossalai.tensor.tensor_spec import ColoTensorSpec`
[Tensor] init ColoParameter (#914) 3 years ago
[tensor] ColoTensor supports ZeRo (#1015) * impl chunk manager * impl param op hook * add reduce_chunk * add zero hook v2 * add zero dp * fix TensorInfo * impl load balancing when using zero without chunk * fix zero hook * polish chunk * fix bugs * ddp ok * zero ok * polish code * fix bugs about load balancing * polish code * polish code * add ene-to-end test * polish code * polish code * polish code * fix typo * add test_chunk * fix bugs * fix bugs * polish code 3 years ago
[zero] fix error for BEiT models (#2169) * [zero] fix error for BEiT models * [ColoParameter] add unpack operation for tuple arguments * fix bugs * fix chunkv2 unit testing * add assertion for gradient state 2 years ago			`def filter_colo_parameters(args, *kwargs):`
			`param_list = []`

			`def get_colo_parameters(element) -> None:`
			`if isinstance(element, list) or isinstance(element, tuple):`
			`for e in element:`
			`get_colo_parameters(e)`
			`elif isinstance(element, dict):`
			`raise RuntimeError("Found Dict: ColoParameter can't deal with complicated arguments.")`
			`elif isinstance(element, ColoParameter):`
			`param_list.append(element)`
			`return`

			`for a in args:`
			`get_colo_parameters(a)`
			`for v in kwargs.values():`
			`get_colo_parameters(v)`

			`return param_list`
fix colo parameter torch function (#1117) 2 years ago

			`def replace_args(args, kwargs, new_args):`
			`args = new_args[:len(args)]`
			`for k, v in zip(kwargs.keys(), new_args[len(args):]):`
			`kwargs[k] = v`
[hotfix] fix param op hook (#1131) * fix param op hook * update zero tp test * fix bugs 2 years ago			`return tuple(args), kwargs`
fix colo parameter torch function (#1117) 2 years ago

[Tensor] add Parameter inheritance for ColoParameter (#1041) * add Parameter inheritance for ColoParameter * remove tricks * remove tricks * polish * polish 3 years ago			`class ColoParameter(ColoTensor, torch.nn.Parameter):`
[Tensor] init ColoParameter (#914) 3 years ago			`r"""A kind of ColoTensor to be considered as a module parameter.`

			`"""`

[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`def __new__(cls,`
[Tensor] add Parameter inheritance for ColoParameter (#1041) * add Parameter inheritance for ColoParameter * remove tricks * remove tricks * polish * polish 3 years ago			`data: Optional[torch.Tensor] = None,`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`requires_grad: bool = True,`
[refactor] move process group from _DistSpec to ColoTensor. (#1203) 2 years ago			`spec: ColoTensorSpec = None) -> 'ColoParameter':`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`if data is None:`
			`data = torch.empty(0)`
			`return torch.Tensor._make_subclass(cls, data, requires_grad)`
[Tensor] init ColoParameter (#914) 3 years ago
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`def __init__(self,`
[Tensor] add Parameter inheritance for ColoParameter (#1041) * add Parameter inheritance for ColoParameter * remove tricks * remove tricks * polish * polish 3 years ago			`data: Optional[torch.Tensor] = None,`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`requires_grad: bool = True,`
[refactor] move process group from _DistSpec to ColoTensor. (#1203) 2 years ago			`spec: ColoTensorSpec = None) -> None:`
			`ColoTensor.__init__(self, data, spec)`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`self._type = TensorType.MODEL`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`# a list contains modules sharing this ColoParameter with others.`
			`self._shared_param_modules = []`

			`@property`
			`def shared_param_modules(self):`
			`return self._shared_param_modules`

[Tensor] init ColoParameter (#914) 3 years ago			`@staticmethod`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`def from_torch_tensor(tensor: torch.Tensor,`
			`requires_grad: bool = True,`
[refactor] move process group from _DistSpec to ColoTensor. (#1203) 2 years ago			`spec: ColoTensorSpec = None) -> 'ColoParameter':`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`tensor = tensor.as_subclass(ColoParameter)`
			`tensor.__init__(tensor, requires_grad=requires_grad, spec=spec)`
			`return tensor`
[tensor] refactor parallel action (#1007) * refactor parallel action * polish unit tests 3 years ago
			`def __repr__(self):`
[polish] polish ColoTensor and its submodules (#2537) 2 years ago			`return super(ColoParameter, self).__repr__()`
[Tensor] add Parameter inheritance for ColoParameter (#1041) * add Parameter inheritance for ColoParameter * remove tricks * remove tricks * polish * polish 3 years ago
[tensor] ColoTensor supports ZeRo (#1015) * impl chunk manager * impl param op hook * add reduce_chunk * add zero hook v2 * add zero dp * fix TensorInfo * impl load balancing when using zero without chunk * fix zero hook * polish chunk * fix bugs * ddp ok * zero ok * polish code * fix bugs about load balancing * polish code * polish code * add ene-to-end test * polish code * polish code * polish code * fix typo * add test_chunk * fix bugs * fix bugs * polish code 3 years ago			`@classmethod`
			`def __torch_function__(cls, func, types, args=..., kwargs=None):`
[Gemini] ParamOpHook -> ColoParamOpHook (#2080) 2 years ago			`if ColoParamOpHookManager.has_hook():`
[tensor] ColoTensor supports ZeRo (#1015) * impl chunk manager * impl param op hook * add reduce_chunk * add zero hook v2 * add zero dp * fix TensorInfo * impl load balancing when using zero without chunk * fix zero hook * polish chunk * fix bugs * ddp ok * zero ok * polish code * fix bugs about load balancing * polish code * polish code * add ene-to-end test * polish code * polish code * polish code * fix typo * add test_chunk * fix bugs * fix bugs * polish code 3 years ago			`if not func.__name__.startswith('__'):`
fix colo parameter torch function (#1117) 2 years ago			`if kwargs is None:`
			`kwargs = {}`
[zero] fix error for BEiT models (#2169) * [zero] fix error for BEiT models * [ColoParameter] add unpack operation for tuple arguments * fix bugs * fix chunkv2 unit testing * add assertion for gradient state 2 years ago			`params = filter_colo_parameters(args, *kwargs)`
[tensor] ColoTensor supports ZeRo (#1015) * impl chunk manager * impl param op hook * add reduce_chunk * add zero hook v2 * add zero dp * fix TensorInfo * impl load balancing when using zero without chunk * fix zero hook * polish chunk * fix bugs * ddp ok * zero ok * polish code * fix bugs about load balancing * polish code * polish code * add ene-to-end test * polish code * polish code * polish code * fix typo * add test_chunk * fix bugs * fix bugs * polish code 3 years ago			`if len(params) > 0:`
			`with torch._C.DisableTorchFunction():`
[Gemini] ParamOpHook -> ColoParamOpHook (#2080) 2 years ago			`new_args = ColoParamOpHookManager.pre_op(params, args, kwargs.values())`
fix colo parameter torch function (#1117) 2 years ago			`args, kwargs = replace_args(args, kwargs, new_args)`
[tensor] ColoTensor supports ZeRo (#1015) * impl chunk manager * impl param op hook * add reduce_chunk * add zero hook v2 * add zero dp * fix TensorInfo * impl load balancing when using zero without chunk * fix zero hook * polish chunk * fix bugs * ddp ok * zero ok * polish code * fix bugs about load balancing * polish code * polish code * add ene-to-end test * polish code * polish code * polish code * fix typo * add test_chunk * fix bugs * fix bugs * polish code 3 years ago			`ret = super().__torch_function__(func, types, args, kwargs)`
			`with torch._C.DisableTorchFunction():`
[Gemini] ParamOpHook -> ColoParamOpHook (#2080) 2 years ago			`ret = ColoParamOpHookManager.post_op(params, ret)`
[tensor] ColoTensor supports ZeRo (#1015) * impl chunk manager * impl param op hook * add reduce_chunk * add zero hook v2 * add zero dp * fix TensorInfo * impl load balancing when using zero without chunk * fix zero hook * polish chunk * fix bugs * ddp ok * zero ok * polish code * fix bugs about load balancing * polish code * polish code * add ene-to-end test * polish code * polish code * polish code * fix typo * add test_chunk * fix bugs * fix bugs * polish code 3 years ago			`return ret`
			`return super().__torch_function__(func, types, args, kwargs)`

[Tensor] add Parameter inheritance for ColoParameter (#1041) * add Parameter inheritance for ColoParameter * remove tricks * remove tricks * polish * polish 3 years ago			`def __deepcopy__(self, memo):`
			`if id(self) in memo:`
			`return memo[id(self)]`
			`else:`
			`with torch._C.DisableTorchFunction():`
			`data = self.data.clone()`
[refactor] move process group from _DistSpec to ColoTensor. (#1203) 2 years ago			`tensor = ColoParameter(data,`
			`self.requires_grad,`
			`spec=ColoTensorSpec(self.get_process_group(), self.dist_spec, self.compute_spec))`
[Tensor] add Parameter inheritance for ColoParameter (#1041) * add Parameter inheritance for ColoParameter * remove tricks * remove tricks * polish * polish 3 years ago			`memo[id(self)] = tensor`
			`return tensor`

			`def __reduce_ex__(self, proto):`
			`# Adapted from torch._utils._rebuild_parameter`
			`# def _rebuild_colo_parameter(data, requires_grad, backward_hooks):`
			`# colo_param = ColoParameter(data, requires_grad)`
			`# colo_param._backward_hooks = backward_hooks`
			`# return colo_param`

			`# return (`
			`# _rebuild_colo_parameter,`
			`# (self.data, self.requires_grad, OrderedDict())`
			`# )`

			`# TODO(jzy) we don't support object reflection now.`
			# distspec cannot be pickled or rebuilt because it's tightly connected to runtime attribute `process_group`.
			`raise NotImplementedError`