|
|
|
from enum import Enum
|
|
|
|
from typing import Optional
|
|
|
|
import torch
|
|
|
|
|
|
|
|
|
|
|
|
class TensorState(Enum):
|
|
|
|
FREE = 0
|
|
|
|
HOLD = 1
|
|
|
|
HOLD_AFTER_FWD = 2
|
|
|
|
HOLD_AFTER_BWD = 3
|
|
|
|
COMPUTE = 4
|
|
|
|
|
|
|
|
|
|
|
|
class StatefulTensor(object):
|
|
|
|
"""A Structure stores a Torch Tensor and labeled states.
|
|
|
|
Inspired from the paper:
|
|
|
|
PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management
|
|
|
|
|
|
|
|
https://arxiv.org/abs/2108.05818
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, tensor: Optional[torch.Tensor], state: Optional[TensorState] = TensorState.HOLD) -> None:
|
|
|
|
self._state = state
|
|
|
|
self._payload = tensor
|
|
|
|
if self._state == TensorState.FREE:
|
|
|
|
assert self._payload is None, f"payload has to None if state is {self._state}"
|
|
|
|
|
|
|
|
def data_ptr(self):
|
|
|
|
if self._payload is None:
|
|
|
|
return None
|
|
|
|
return self._payload.data_ptr()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def state(self) -> TensorState:
|
|
|
|
return self._state
|
|
|
|
|
|
|
|
def set_null(self) -> None:
|
|
|
|
self._state = TensorState.FREE
|
|
|
|
self._payload = None
|
|
|
|
|
|
|
|
def is_null(self) -> bool:
|
|
|
|
if self._state == TensorState.FREE:
|
|
|
|
assert self._payload is None
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def trans_state(self, state: TensorState) -> None:
|
|
|
|
self._state = state
|
|
|
|
if state == TensorState.FREE:
|
|
|
|
self._payload = None
|
|
|
|
|
|
|
|
@property
|
|
|
|
def payload(self) -> Optional[torch.Tensor]:
|
|
|
|
return self._payload
|
|
|
|
|
|
|
|
def copy_payload(self, tensor) -> None:
|
|
|
|
self._payload.view(-1).copy_(tensor.view(-1))
|
|
|
|
|
|
|
|
def reset_payload(self, tensor) -> None:
|
|
|
|
del self._payload
|
|
|
|
self._payload = tensor
|
|
|
|
self.trans_state(TensorState.HOLD)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def device(self) -> torch.device:
|
|
|
|
return self._payload.device
|
|
|
|
|
|
|
|
@property
|
|
|
|
def dtype(self) -> torch.dtype:
|
|
|
|
return self._payload.dtype
|
|
|
|
|
|
|
|
@property
|
|
|
|
def shape(self):
|
|
|
|
return self._payload.shape
|
|
|
|
|
|
|
|
def to(self, device: torch.device):
|
|
|
|
raise RuntimeError("Use colo_model_tensor_move install of call .to() on ShardedTensor")
|
|
|
|
|
|
|
|
def to_(self, device: torch.device):
|
|
|
|
raise RuntimeError("Use colo_model_tensor_move install of call .to_() on ShardedTensor")
|