from functools import lru_cache from typing import Callable, Set import torch INPALCE_MAPPING = { torch.Tensor.add_: torch.Tensor.add, torch.Tensor.sub_: torch.Tensor.sub, torch.Tensor.mul_: torch.Tensor.mul, torch.Tensor.div_: torch.Tensor.div } @lru_cache(None) def _get_my_nowrap_functions() -> Set[Callable]: Tensor = torch.Tensor return { Tensor._base.__get__, Tensor.grad.__get__, Tensor._grad.__get__, Tensor.data.__get__, # make .data returns torch.Tensor rather than ColoTensor } def _convert(output): if isinstance(output, torch.Tensor) and not isinstance(output, ColoTensor): output.__class__ = ColoTensor elif isinstance(output, (list, tuple)): output = type(output)(_convert(o) for o in output) return output def _convert_output(output, func): if func in _get_my_nowrap_functions(): return output return _convert(output) class ColoTensor(torch.Tensor): """ Data Structure for Tensor in Colossal-AI. It is a subclass of torch.Tensor. It is only used to trigger the torch function hook. Args: data (torch.Tensor): a torch tensor used as the payload the colotensor. """ torch_major = int(torch.__version__.split('.')[0]) torch_minor = int(torch.__version__.split('.')[1]) def __new__(cls, data: torch.Tensor) -> 'ColoTensor': """ The signature of the __new__ has to be consistent with the torch.Tensor. Args: data (torch.Tensor): a torch tensor used as the payload the colotensor. Returns: ColoTensor: a ColoTensor wrappers the data. """ if data is None: data = torch.empty(0) return torch.Tensor._make_subclass(cls, data, data.requires_grad) @classmethod def __torch_function__(cls, func, types, args=(), kwargs=None): if kwargs is None: kwargs = {} if not all(issubclass(cls, t) for t in types): return NotImplemented if cls.torch_major > 1 or (cls.torch_major == 1 and cls.torch_minor >= 12): # in order to trigger pre-op hook in the forward of checkpoint module # we have to capture the `backward` function # and make sure that it does not in `torch._C.DisableTorchFunction()` context if func is torch.Tensor.backward: assert len(args) == 1 # only has 1 parameter backward_tensor = torch.Tensor(args[0]) tensor_kwargs = {k: torch.Tensor(v) if torch.is_tensor(v) else v for k, v in kwargs.items()} return backward_tensor.backward(**tensor_kwargs) # replace the in-place function if func in INPALCE_MAPPING: func = INPALCE_MAPPING[func] # set the 'inplace' kwargs to False if 'inplace' in kwargs: kwargs['inplace'] = False with torch._C.DisableTorchFunction(): ret = func(*args, **kwargs) return _convert_output(ret, func) def __deepcopy__(self, memo): if id(self) in memo: return memo[id(self)] else: with torch._C.DisableTorchFunction(): data = self.data.clone() tensor = ColoTensor(data) memo[id(self)] = tensor return tensor