ColossalAI/tests/test_booster/test_plugin/test_dp_plugin_base.py

from typing import Callable, Iterator, List, Tuple, Union

import torch
import torch.distributed as dist
import torch.nn as nn
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
from torch.utils.data import DataLoader, TensorDataset

import colossalai
from colossalai.booster.plugin.dp_plugin_base import DPPluginBase
from colossalai.checkpoint_io import CheckpointIO
from colossalai.interface import OptimizerWrapper
from colossalai.testing import rerun_if_address_is_in_use, spawn


class DPPluginWrapper(DPPluginBase):
    """This is a wrapper class for testing DP plugin initialization and dataloader creation.
    """

    def configure(
        self,
        model: nn.Module,
        optimizer: Optimizer,
        criterion: Callable = None,
        dataloader: DataLoader = None,
        lr_scheduler: LRScheduler = None,
    ) -> Tuple[Union[nn.Module, OptimizerWrapper, LRScheduler, DataLoader]]:
        pass

    def control_checkpoint_io(self) -> bool:
        pass

    def control_device(self) -> bool:
        pass

    def control_precision(self) -> bool:
        pass

    def get_checkpoint_io(self) -> CheckpointIO:
        pass

    def support_no_sync(self) -> bool:
        pass

    def supported_devices(self) -> List[str]:
        pass

    def supported_precisions(self) -> List[str]:
        pass

    def no_sync(self, model: nn.Module) -> Iterator[None]:
        pass


def check_dataloader_sharding():
    plugin = DPPluginWrapper()

    # create a custom dataset with 0 to 10
    dataset = TensorDataset(torch.arange(0, 10))
    train_dataloader = plugin.prepare_dataloader(dataset, batch_size=2)

    # get the first batch of data
    batch = next(iter(train_dataloader))[0].cuda()
    is_rank_0 = dist.get_rank() == 0

    if is_rank_0:
        batch_to_compare = batch.clone()
    else:
        batch_to_compare = batch
    # pass to the rank 1 value to rank 0
    dist.broadcast(batch_to_compare, src=1)

    # compare on rank 0
    if is_rank_0:
        assert not torch.equal(batch,
                               batch_to_compare), 'Same number was found across ranks but expected it to be different'


def run_dist(rank, world_size, port):
    # init dist env
    colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost')
    check_dataloader_sharding()


@rerun_if_address_is_in_use()
def test_dp_plugin_dataloader():
    spawn(run_dist, 2)
[booster] fix no_sync method (#3709) * [booster] fix no_sync method * [booster] add test for ddp no_sync * [booster] fix merge * [booster] update unit test * [booster] update unit test * [booster] update unit test 2023-05-09 03:10:02 +00:00			`from typing import Callable, Iterator, List, Tuple, Union`
[booster] refactor all dp fashion plugins (#3684) * [booster] add dp plugin base * [booster] inherit dp plugin base * [booster] refactor unit tests 2023-05-05 11:36:10 +00:00
			`import torch`
			`import torch.distributed as dist`
			`import torch.nn as nn`
			`from torch.optim import Optimizer`
			`from torch.optim.lr_scheduler import _LRScheduler as LRScheduler`
			`from torch.utils.data import DataLoader, TensorDataset`

			`import colossalai`
			`from colossalai.booster.plugin.dp_plugin_base import DPPluginBase`
			`from colossalai.checkpoint_io import CheckpointIO`
			`from colossalai.interface import OptimizerWrapper`
			`from colossalai.testing import rerun_if_address_is_in_use, spawn`


			`class DPPluginWrapper(DPPluginBase):`
			`"""This is a wrapper class for testing DP plugin initialization and dataloader creation.`
			`"""`

			`def configure(`
			`self,`
			`model: nn.Module,`
			`optimizer: Optimizer,`
			`criterion: Callable = None,`
			`dataloader: DataLoader = None,`
			`lr_scheduler: LRScheduler = None,`
			`) -> Tuple[Union[nn.Module, OptimizerWrapper, LRScheduler, DataLoader]]:`
			`pass`

			`def control_checkpoint_io(self) -> bool:`
			`pass`

			`def control_device(self) -> bool:`
			`pass`

			`def control_precision(self) -> bool:`
			`pass`

			`def get_checkpoint_io(self) -> CheckpointIO:`
			`pass`

			`def support_no_sync(self) -> bool:`
			`pass`

			`def supported_devices(self) -> List[str]:`
			`pass`

			`def supported_precisions(self) -> List[str]:`
			`pass`
[booster] fix no_sync method (#3709) * [booster] fix no_sync method * [booster] add test for ddp no_sync * [booster] fix merge * [booster] update unit test * [booster] update unit test * [booster] update unit test 2023-05-09 03:10:02 +00:00
			`def no_sync(self, model: nn.Module) -> Iterator[None]:`
			`pass`
[booster] refactor all dp fashion plugins (#3684) * [booster] add dp plugin base * [booster] inherit dp plugin base * [booster] refactor unit tests 2023-05-05 11:36:10 +00:00

			`def check_dataloader_sharding():`
			`plugin = DPPluginWrapper()`

[CI] fix typo with tests/ etc. (#3727) * fix spelling error with examples/comminity/ * fix spelling error with tests/ * fix some spelling error with tests/ colossalai/ etc. * fix spelling error with tests/ etc. date:2023.5.10 2023-05-11 08:30:58 +00:00			`# create a custom dataset with 0 to 10`
[booster] refactor all dp fashion plugins (#3684) * [booster] add dp plugin base * [booster] inherit dp plugin base * [booster] refactor unit tests 2023-05-05 11:36:10 +00:00			`dataset = TensorDataset(torch.arange(0, 10))`
[booster] update prepare dataloader method for plugin (#3706) * [booster] add prepare dataloader method for plug * [booster] update examples and docstr 2023-05-08 07:44:03 +00:00			`train_dataloader = plugin.prepare_dataloader(dataset, batch_size=2)`
[booster] refactor all dp fashion plugins (#3684) * [booster] add dp plugin base * [booster] inherit dp plugin base * [booster] refactor unit tests 2023-05-05 11:36:10 +00:00
			`# get the first batch of data`
			`batch = next(iter(train_dataloader))[0].cuda()`
			`is_rank_0 = dist.get_rank() == 0`

			`if is_rank_0:`
			`batch_to_compare = batch.clone()`
			`else:`
			`batch_to_compare = batch`
			`# pass to the rank 1 value to rank 0`
			`dist.broadcast(batch_to_compare, src=1)`

			`# compare on rank 0`
			`if is_rank_0:`
			`assert not torch.equal(batch,`
			`batch_to_compare), 'Same number was found across ranks but expected it to be different'`


			`def run_dist(rank, world_size, port):`
			`# init dist env`
			`colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost')`
			`check_dataloader_sharding()`


			`@rerun_if_address_is_in_use()`
			`def test_dp_plugin_dataloader():`
			`spawn(run_dist, 2)`