2022-06-23 09:34:59 +00:00
|
|
|
from typing import Optional
|
|
|
|
|
2022-11-08 09:03:50 +00:00
|
|
|
import torch
|
|
|
|
|
2022-06-03 10:04:22 +00:00
|
|
|
from colossalai.tensor.colo_tensor import ColoTensor
|
2022-12-05 09:11:06 +00:00
|
|
|
from colossalai.tensor.param_op_hook import ColoParamOpHookManager
|
2023-08-24 01:29:25 +00:00
|
|
|
|
|
|
|
from .colo_tensor import _convert_output
|
|
|
|
|
|
|
|
WHITE_LIST_FUNCS = {torch.Tensor.__getitem__}
|
|
|
|
|
|
|
|
|
|
|
|
def is_no_hook_op(func) -> bool:
|
2023-09-19 06:20:26 +00:00
|
|
|
return func.__name__.startswith("__") and func not in WHITE_LIST_FUNCS
|
2022-05-06 04:57:14 +00:00
|
|
|
|
2022-05-31 04:00:12 +00:00
|
|
|
|
2022-12-26 07:03:54 +00:00
|
|
|
def filter_colo_parameters(*args, **kwargs):
|
|
|
|
param_list = []
|
|
|
|
|
|
|
|
def get_colo_parameters(element) -> None:
|
|
|
|
if isinstance(element, list) or isinstance(element, tuple):
|
|
|
|
for e in element:
|
|
|
|
get_colo_parameters(e)
|
|
|
|
elif isinstance(element, dict):
|
|
|
|
raise RuntimeError("Found Dict: ColoParameter can't deal with complicated arguments.")
|
|
|
|
elif isinstance(element, ColoParameter):
|
|
|
|
param_list.append(element)
|
|
|
|
return
|
|
|
|
|
|
|
|
for a in args:
|
|
|
|
get_colo_parameters(a)
|
|
|
|
for v in kwargs.values():
|
|
|
|
get_colo_parameters(v)
|
|
|
|
|
|
|
|
return param_list
|
2022-06-15 06:23:27 +00:00
|
|
|
|
|
|
|
|
|
|
|
def replace_args(args, kwargs, new_args):
|
2023-09-19 06:20:26 +00:00
|
|
|
args = new_args[: len(args)]
|
|
|
|
for k, v in zip(kwargs.keys(), new_args[len(args) :]):
|
2022-06-15 06:23:27 +00:00
|
|
|
kwargs[k] = v
|
2022-06-17 08:12:05 +00:00
|
|
|
return tuple(args), kwargs
|
2022-06-15 06:23:27 +00:00
|
|
|
|
|
|
|
|
2022-05-30 09:23:44 +00:00
|
|
|
class ColoParameter(ColoTensor, torch.nn.Parameter):
|
2023-09-19 06:20:26 +00:00
|
|
|
r"""A kind of ColoTensor to be considered as a module parameter."""
|
2022-05-06 04:57:14 +00:00
|
|
|
|
2023-09-19 06:20:26 +00:00
|
|
|
def __new__(cls, data: Optional[torch.Tensor] = None, requires_grad: bool = True) -> "ColoParameter":
|
2022-05-19 04:44:59 +00:00
|
|
|
if data is None:
|
|
|
|
data = torch.empty(0)
|
|
|
|
return torch.Tensor._make_subclass(cls, data, requires_grad)
|
2022-05-06 04:57:14 +00:00
|
|
|
|
2022-05-31 04:00:12 +00:00
|
|
|
@classmethod
|
|
|
|
def __torch_function__(cls, func, types, args=..., kwargs=None):
|
2023-08-24 01:29:25 +00:00
|
|
|
if kwargs is None:
|
|
|
|
kwargs = {}
|
|
|
|
if ColoParamOpHookManager.has_hook() and not is_no_hook_op(func):
|
|
|
|
params = filter_colo_parameters(*args, **kwargs)
|
|
|
|
if len(params) > 0:
|
|
|
|
with torch._C.DisableTorchFunction():
|
|
|
|
new_args = ColoParamOpHookManager.pre_op(params, *args, *kwargs.values())
|
|
|
|
args, kwargs = replace_args(args, kwargs, new_args)
|
|
|
|
ret = super().__torch_function__(func, types, args, kwargs)
|
|
|
|
with torch._C.DisableTorchFunction():
|
|
|
|
ret = ColoParamOpHookManager.post_op(params, ret)
|
|
|
|
return _convert_output(ret, func)
|
2022-05-31 04:00:12 +00:00
|
|
|
return super().__torch_function__(func, types, args, kwargs)
|
|
|
|
|
2022-05-30 09:23:44 +00:00
|
|
|
def __deepcopy__(self, memo):
|
|
|
|
if id(self) in memo:
|
|
|
|
return memo[id(self)]
|
|
|
|
else:
|
|
|
|
with torch._C.DisableTorchFunction():
|
|
|
|
data = self.data.clone()
|
2023-08-24 01:29:25 +00:00
|
|
|
tensor = ColoParameter(data, self.requires_grad)
|
2022-05-30 09:23:44 +00:00
|
|
|
memo[id(self)] = tensor
|
|
|
|
return tensor
|
|
|
|
|
|
|
|
def __reduce_ex__(self, proto):
|
|
|
|
# Adapted from torch._utils._rebuild_parameter
|
|
|
|
# def _rebuild_colo_parameter(data, requires_grad, backward_hooks):
|
|
|
|
# colo_param = ColoParameter(data, requires_grad)
|
|
|
|
# colo_param._backward_hooks = backward_hooks
|
|
|
|
# return colo_param
|
|
|
|
|
|
|
|
# return (
|
|
|
|
# _rebuild_colo_parameter,
|
|
|
|
# (self.data, self.requires_grad, OrderedDict())
|
|
|
|
# )
|
|
|
|
|
|
|
|
# TODO(jzy) we don't support object reflection now.
|
|
|
|
# distspec cannot be pickled or rebuilt because it's tightly connected to runtime attribute `process_group`.
|
|
|
|
raise NotImplementedError
|