[booster] init module structure and definition (#3056)

2023-03-09 11:27:46 +08:00 · 2023-03-09 11:27:46 +08:00 · f19b49e164
parent faa8526b85
commit f19b49e164
6 changed files with 174 additions and 0 deletions
--- a/colossalai/booster/init.py
+++ b/colossalai/booster/init.py
@ -0,0 +1,5 @@
+from .accelerator import Accelerator
+from .booster import Booster
+from .environment_table import EnvironmentTable
+from .plugin import Plugin
+from .precision import Precision
--- a/colossalai/booster/accelerator.py
+++ b/colossalai/booster/accelerator.py
@ -0,0 +1,14 @@
+import torch
+import torch.nn as nn
+
+__all__ = ['Accelerator']
+
+
+class Accelerator:
+
+    def __init__(self, device: torch.device):
+        self.device = device
+
+    def setup_model(self, model: nn.Module) -> nn.Module:
+        # TODO: implement this method
+        pass
--- a/colossalai/booster/booster.py
+++ b/colossalai/booster/booster.py
@ -0,0 +1,66 @@
+from contextlib import contextmanager
+from typing import Callable, Iterator, List, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
+from torch.utils.data import DataLoader
+
+from .plugin import Plugin
+
+__all__ = ['Booster']
+
+
+class Booster:
+
+    def __init__(self,
+                 device: Union[str, torch.device] = 'cuda',
+                 precision: str = 'fp32',
+                 grad_clipping_type: str = 'norm',
+                 grad_clipping_value: float = 0.0,
+                 plugin: Optional[Plugin] = None) -> None:
+        # TODO: implement this method
+        pass
+
+    def boost(
+        self, *args: Union[nn.Module, Optimizer, LRScheduler, DataLoader]
+    ) -> List[Union[nn.Module, Optimizer, LRScheduler, DataLoader]]:
+        # TODO: implement this method
+        pass
+
+    def backward(self, loss: torch.Tensor, optimizer: Optimizer) -> None:
+        # TODO: implement this method
+        pass
+
+    def execute_pipeline(self,
+                         data_iter: Iterator,
+                         model: nn.Module,
+                         criterion: Callable[[torch.Tensor], torch.Tensor],
+                         optimizer: Optimizer,
+                         return_loss: bool = True,
+                         return_outputs: bool = False) -> Tuple[Optional[torch.Tensor], ...]:
+        # TODO: implement this method
+        # run pipeline forward backward pass
+        # return loss or outputs if needed
+        pass
+
+    def no_sync(self, model: nn.Module) -> contextmanager:
+        # TODO: implement this method
+        pass
+
+    def save(self,
+             obj: Union[nn.Module, Optimizer, LRScheduler],
+             path_like: str,
+             plan: str = 'torch',
+             **kwargs) -> None:
+        # TODO: implement this method
+        pass
+
+    def load(self,
+             obj: Union[nn.Module, Optimizer, LRScheduler],
+             path_like: str,
+             plan: str = 'torch',
+             **kwargs) -> None:
+        # TODO: implement this method
+        pass
--- a/colossalai/booster/environment_table.py
+++ b/colossalai/booster/environment_table.py
@ -0,0 +1,18 @@
+from typing import List
+
+__all__ = ['EnvironmentTable']
+
+
+class EnvironmentTable:
+
+    def __init__(self, intra_op_world_sizes: List[int]):
+        # TODO: implement this method
+        pass
+
+    @property
+    def is_master(self) -> bool:
+        # TODO: implement this method
+        pass
+
+    # TODO: implement more utility methods as given in
+    # https://github.com/hpcaitech/ColossalAI/issues/3051
--- a/colossalai/booster/plugin.py
+++ b/colossalai/booster/plugin.py
@ -0,0 +1,46 @@
+from typing import List, Tuple
+
+import torch
+import torch.nn as nn
+from torch.optim import Optimizer
+from torch.utils.data import DataLoader
+
+from colossalai.device.device_mesh import DeviceMesh
+
+__all__ = ['Plugin']
+
+
+class Plugin:
+
+    @property
+    def supported_devices(self) -> List[torch.device]:
+        pass
+
+    @property
+    def supported_precisions(self) -> List[str]:
+        pass
+
+    @property
+    def control_precision(self) -> bool:
+        pass
+
+    @property
+    def control_device(self) -> bool:
+        pass
+
+    @property
+    def support_no_sync(self) -> bool:
+        pass
+
+    def setup_model(self, model: nn.Module, device_mesh_pool: DeviceMesh) -> nn.Module:
+        pass
+
+    def setup_optimizer(self, optimizer: Optimizer) -> Optimizer:
+        pass
+
+    def setup_dataloader(self, dataloader: DataLoader) -> DataLoader:
+        pass
+
+    @property
+    def device_mesh_shape(self) -> List[Tuple[int, ...]]:
+        pass
--- a/colossalai/booster/precision.py
+++ b/colossalai/booster/precision.py
@ -0,0 +1,25 @@
+import torch
+import torch.nn as nn
+from torch.optim import Optimizer
+
+__all__ = ['Precision']
+
+
+class Precision:
+
+    def __init__(self, precision_type: torch.dtype, grad_clipping_type: str, grad_clipping_value: float):
+        self.precision_type = precision_type
+        self.grad_clipping_type = grad_clipping_type
+        self.grad_clipping_value = grad_clipping_value
+
+    def setup_model(self, model: nn.Module) -> nn.Module:
+        # TODO: implement this method
+        pass
+
+    def setup_optimizer(self, optimizer: Optimizer) -> Optimizer:
+        # TODO: implement this method
+        # inject grad clipping and unscale loss
+        pass
+
+    def scale_loss(self, loss: torch.Tensor) -> torch.Tensor:
+        pass