2023-03-21 09:39:30 +00:00
|
|
|
from abc import ABC, abstractmethod
|
2023-09-19 06:20:26 +00:00
|
|
|
from typing import Callable, Iterator, List, Optional, Tuple
|
2023-03-21 09:39:30 +00:00
|
|
|
|
|
|
|
import torch.nn as nn
|
|
|
|
from torch.optim import Optimizer
|
|
|
|
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
|
2023-05-08 07:44:03 +00:00
|
|
|
from torch.utils.data import DataLoader, Dataset
|
2023-03-21 09:39:30 +00:00
|
|
|
|
2023-03-27 02:24:14 +00:00
|
|
|
from colossalai.checkpoint_io import CheckpointIO
|
|
|
|
from colossalai.interface import OptimizerWrapper
|
2023-03-21 09:39:30 +00:00
|
|
|
|
2023-09-19 06:20:26 +00:00
|
|
|
__all__ = ["Plugin"]
|
2023-03-21 09:39:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Plugin(ABC):
|
|
|
|
@abstractmethod
|
|
|
|
def supported_devices(self) -> List[str]:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def supported_precisions(self) -> List[str]:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def control_precision(self) -> bool:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def control_device(self) -> bool:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def support_no_sync(self) -> bool:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def configure(
|
|
|
|
self,
|
|
|
|
model: nn.Module,
|
2023-06-15 09:38:42 +00:00
|
|
|
optimizer: Optional[Optimizer] = None,
|
|
|
|
criterion: Optional[Callable] = None,
|
|
|
|
dataloader: Optional[DataLoader] = None,
|
|
|
|
lr_scheduler: Optional[LRScheduler] = None,
|
|
|
|
) -> Tuple[nn.Module, OptimizerWrapper, Callable, DataLoader, LRScheduler]:
|
2023-03-21 09:39:30 +00:00
|
|
|
# implement this method
|
|
|
|
pass
|
2023-03-27 02:24:14 +00:00
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def control_checkpoint_io(self) -> bool:
|
|
|
|
"""
|
|
|
|
Whether the plugin controls the checkpoint io
|
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def get_checkpoint_io(self) -> CheckpointIO:
|
|
|
|
"""
|
|
|
|
Get checkpoint io object for this plugin, only invoked when control_checkpoint_io is True.
|
|
|
|
"""
|
2023-05-08 07:44:03 +00:00
|
|
|
|
2023-05-09 03:10:02 +00:00
|
|
|
@abstractmethod
|
2023-07-04 04:00:33 +00:00
|
|
|
def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> Iterator[None]:
|
2023-05-09 03:10:02 +00:00
|
|
|
"""
|
|
|
|
Context manager to disable gradient synchronization.
|
|
|
|
"""
|
|
|
|
|
2023-05-08 07:44:03 +00:00
|
|
|
@abstractmethod
|
2023-09-19 06:20:26 +00:00
|
|
|
def prepare_dataloader(
|
|
|
|
self,
|
|
|
|
dataset: Dataset,
|
|
|
|
batch_size: int,
|
|
|
|
shuffle: bool = False,
|
|
|
|
seed: int = 1024,
|
|
|
|
drop_last: bool = False,
|
|
|
|
pin_memory: bool = False,
|
|
|
|
num_workers: int = 0,
|
|
|
|
**kwargs,
|
|
|
|
):
|
2023-05-08 07:44:03 +00:00
|
|
|
"""Prepare a dataloader for distributed training. The dataloader will be wrapped by
|
|
|
|
`torch.utils.data.DataLoader`
|
|
|
|
"""
|