From 01e9f834f533be6d2bde61fa39e5b2c891dfd7ce Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Fri, 22 Apr 2022 15:24:35 +0800 Subject: [PATCH] [dependency] removed torchvision (#833) * [dependency] removed torchvision * fixed transforms --- colossalai/registry/__init__.py | 9 +++---- requirements/requirements-test.txt | 4 +--- requirements/requirements.txt | 2 -- tests/test_data/test_cifar10_dataset.py | 21 ++++------------ tests/test_data/test_data_parallel_sampler.py | 23 +++++++++--------- .../test_deterministic_dataloader.py | 24 +++++++++---------- 6 files changed, 30 insertions(+), 53 deletions(-) diff --git a/colossalai/registry/__init__.py b/colossalai/registry/__init__.py index 62b0bb08f..b4e48badf 100644 --- a/colossalai/registry/__init__.py +++ b/colossalai/registry/__init__.py @@ -1,22 +1,19 @@ import torch.distributed.optim as dist_optim import torch.nn as nn import torch.optim as optim -import torchvision.models as tv_models -import torchvision.datasets as tv_datasets -from torchvision import transforms from .registry import Registry LAYERS = Registry("layers", third_party_library=[nn]) LOSSES = Registry("losses") -MODELS = Registry("models", third_party_library=[tv_models]) +MODELS = Registry("models") OPTIMIZERS = Registry("optimizers", third_party_library=[optim, dist_optim]) -DATASETS = Registry("datasets", third_party_library=[tv_datasets]) +DATASETS = Registry("datasets") DIST_GROUP_INITIALIZER = Registry("dist_group_initializer") GRADIENT_HANDLER = Registry("gradient_handler") LOSSES = Registry("losses", third_party_library=[nn]) HOOKS = Registry("hooks") -TRANSFORMS = Registry("transforms", third_party_library=[transforms]) +TRANSFORMS = Registry("transforms") DATA_SAMPLERS = Registry("data_samplers") LR_SCHEDULERS = Registry("lr_schedulers") SCHEDULE = Registry("schedules") diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index b04bc837c..82e9c3c66 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,5 +1,3 @@ pytest -rpyc -matplotlib -tensorboard +torchvision transformers diff --git a/requirements/requirements.txt b/requirements/requirements.txt index d60d4a81d..02a907f09 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,9 +1,7 @@ torch>=1.8 -torchvision>=0.9 numpy tqdm psutil -tensorboard packaging pre-commit rich diff --git a/tests/test_data/test_cifar10_dataset.py b/tests/test_data/test_cifar10_dataset.py index 569cea2ca..3c82d6647 100644 --- a/tests/test_data/test_cifar10_dataset.py +++ b/tests/test_data/test_cifar10_dataset.py @@ -10,23 +10,10 @@ from torch.utils.data import DataLoader from colossalai.builder import build_dataset, build_transform from colossalai.context import Config +from torchvision.transforms import ToTensor -TRAIN_DATA = dict( - dataset=dict( - type='CIFAR10', - root=Path(os.environ['DATA']), - train=True, - download=True - ), - dataloader=dict(batch_size=4, shuffle=True, num_workers=2), - transform_pipeline=[ - dict(type='ToTensor'), - dict(type='Normalize', - mean=(0.5, 0.5, 0.5), - std=(0.5, 0.5, 0.5) - ) - ] -) +TRAIN_DATA = dict(dataset=dict(type='CIFAR10', root=Path(os.environ['DATA']), train=True, download=True), + dataloader=dict(batch_size=4, shuffle=True, num_workers=2)) @pytest.mark.cpu @@ -37,7 +24,7 @@ def test_cifar10_dataset(): transform_cfg = config.transform_pipeline # build transform - transform_pipeline = [build_transform(cfg) for cfg in transform_cfg] + transform_pipeline = [ToTensor()] transform_pipeline = transforms.Compose(transform_pipeline) dataset_cfg['transform'] = transform_pipeline diff --git a/tests/test_data/test_data_parallel_sampler.py b/tests/test_data/test_data_parallel_sampler.py index 05967f7ce..2fa78a022 100644 --- a/tests/test_data/test_data_parallel_sampler.py +++ b/tests/test_data/test_data_parallel_sampler.py @@ -12,26 +12,25 @@ import torch.multiprocessing as mp from torch.utils.data import DataLoader import colossalai -from colossalai.builder import build_dataset, build_transform +from colossalai.builder import build_dataset from torchvision import transforms from colossalai.context import ParallelMode, Config from colossalai.core import global_context as gpc from colossalai.utils import get_dataloader, free_port from colossalai.testing import rerun_if_address_is_in_use +from torchvision.transforms import ToTensor CONFIG = Config( dict( - train_data=dict(dataset=dict( - type='CIFAR10', - root=Path(os.environ['DATA']), - train=True, - download=True, + train_data=dict( + dataset=dict( + type='CIFAR10', + root=Path(os.environ['DATA']), + train=True, + download=True, + ), + dataloader=dict(batch_size=8,), ), - dataloader=dict(batch_size=8,), - transform_pipeline=[ - dict(type='ToTensor'), - dict(type='Normalize', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) - ]), parallel=dict( pipeline=dict(size=1), tensor=dict(size=1, mode=None), @@ -45,7 +44,7 @@ def run_data_sampler(rank, world_size, port): colossalai.launch(**dist_args) print('finished initialization') - transform_pipeline = [build_transform(cfg) for cfg in gpc.config.train_data.transform_pipeline] + transform_pipeline = [ToTensor()] transform_pipeline = transforms.Compose(transform_pipeline) gpc.config.train_data.dataset['transform'] = transform_pipeline dataset = build_dataset(gpc.config.train_data.dataset) diff --git a/tests/test_data/test_deterministic_dataloader.py b/tests/test_data/test_deterministic_dataloader.py index 6d35267ed..c5064bbb9 100644 --- a/tests/test_data/test_deterministic_dataloader.py +++ b/tests/test_data/test_deterministic_dataloader.py @@ -13,26 +13,24 @@ from torchvision import transforms from torch.utils.data import DataLoader import colossalai -from colossalai.builder import build_dataset, build_transform +from colossalai.builder import build_dataset from colossalai.context import ParallelMode, Config from colossalai.core import global_context as gpc from colossalai.utils import free_port from colossalai.testing import rerun_if_address_is_in_use +from torchvision import transforms CONFIG = Config( dict( - train_data=dict(dataset=dict( - type='CIFAR10', - root=Path(os.environ['DATA']), - train=True, - download=True, + train_data=dict( + dataset=dict( + type='CIFAR10', + root=Path(os.environ['DATA']), + train=True, + download=True, + ), + dataloader=dict(num_workers=2, batch_size=2, shuffle=True), ), - dataloader=dict(num_workers=2, batch_size=2, shuffle=True), - transform_pipeline=[ - dict(type='ToTensor'), - dict(type='RandomCrop', size=32), - dict(type='Normalize', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) - ]), parallel=dict( pipeline=dict(size=1), tensor=dict(size=1, mode=None), @@ -50,7 +48,7 @@ def run_data_sampler(rank, world_size, port): transform_cfg = gpc.config.train_data.transform_pipeline # build transform - transform_pipeline = [build_transform(cfg) for cfg in transform_cfg] + transform_pipeline = [transforms.ToTensor(), transforms.RandomCrop(size=32)] transform_pipeline = transforms.Compose(transform_pipeline) dataset_cfg['transform'] = transform_pipeline