From 943982d29a2a6bd6c74012e6795fcf9af7a63ea5 Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Fri, 22 Apr 2022 15:39:07 +0800 Subject: [PATCH] [unittest] refactored unit tests for change in dependency (#838) --- tests/test_data/test_cifar10_dataset.py | 21 ++-------- tests/test_data/test_data_parallel_sampler.py | 40 +++++++------------ .../test_deterministic_dataloader.py | 22 +++------- 3 files changed, 25 insertions(+), 58 deletions(-) diff --git a/tests/test_data/test_cifar10_dataset.py b/tests/test_data/test_cifar10_dataset.py index 3c82d6647..4b9ca61d9 100644 --- a/tests/test_data/test_cifar10_dataset.py +++ b/tests/test_data/test_cifar10_dataset.py @@ -5,34 +5,21 @@ import os from pathlib import Path import pytest -from torchvision import transforms +from torchvision import transforms, datasets from torch.utils.data import DataLoader -from colossalai.builder import build_dataset, build_transform -from colossalai.context import Config -from torchvision.transforms import ToTensor - -TRAIN_DATA = dict(dataset=dict(type='CIFAR10', root=Path(os.environ['DATA']), train=True, download=True), - dataloader=dict(batch_size=4, shuffle=True, num_workers=2)) - @pytest.mark.cpu def test_cifar10_dataset(): - config = Config(TRAIN_DATA) - dataset_cfg = config.dataset - dataloader_cfg = config.dataloader - transform_cfg = config.transform_pipeline - # build transform - transform_pipeline = [ToTensor()] + transform_pipeline = [transforms.ToTensor()] transform_pipeline = transforms.Compose(transform_pipeline) - dataset_cfg['transform'] = transform_pipeline # build dataset - dataset = build_dataset(dataset_cfg) + dataset = datasets.CIFAR10(root=Path(os.environ['DATA']), train=True, download=True, transform=transform_pipeline) # build dataloader - dataloader = DataLoader(dataset=dataset, **dataloader_cfg) + dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2) data_iter = iter(dataloader) img, label = data_iter.next() diff --git a/tests/test_data/test_data_parallel_sampler.py b/tests/test_data/test_data_parallel_sampler.py index 2fa78a022..54fa44bdc 100644 --- a/tests/test_data/test_data_parallel_sampler.py +++ b/tests/test_data/test_data_parallel_sampler.py @@ -9,34 +9,21 @@ import pytest import torch import torch.distributed as dist import torch.multiprocessing as mp -from torch.utils.data import DataLoader import colossalai -from colossalai.builder import build_dataset -from torchvision import transforms +from torchvision import transforms, datasets from colossalai.context import ParallelMode, Config from colossalai.core import global_context as gpc from colossalai.utils import get_dataloader, free_port from colossalai.testing import rerun_if_address_is_in_use -from torchvision.transforms import ToTensor -CONFIG = Config( - dict( - train_data=dict( - dataset=dict( - type='CIFAR10', - root=Path(os.environ['DATA']), - train=True, - download=True, - ), - dataloader=dict(batch_size=8,), - ), - parallel=dict( - pipeline=dict(size=1), - tensor=dict(size=1, mode=None), - ), - seed=1024, - )) +CONFIG = Config(dict( + parallel=dict( + pipeline=dict(size=1), + tensor=dict(size=1, mode=None), + ), + seed=1024, +)) def run_data_sampler(rank, world_size, port): @@ -44,11 +31,14 @@ def run_data_sampler(rank, world_size, port): colossalai.launch(**dist_args) print('finished initialization') - transform_pipeline = [ToTensor()] + # build dataset + transform_pipeline = [transforms.ToTensor()] transform_pipeline = transforms.Compose(transform_pipeline) - gpc.config.train_data.dataset['transform'] = transform_pipeline - dataset = build_dataset(gpc.config.train_data.dataset) - dataloader = get_dataloader(dataset, **gpc.config.train_data.dataloader) + dataset = datasets.CIFAR10(root=Path(os.environ['DATA']), train=True, download=True, transform=transform_pipeline) + + # build dataloader + dataloader = get_dataloader(dataset, batch_size=8, add_sampler=True) + data_iter = iter(dataloader) img, label = data_iter.next() img = img[0] diff --git a/tests/test_data/test_deterministic_dataloader.py b/tests/test_data/test_deterministic_dataloader.py index c5064bbb9..4d76e7f13 100644 --- a/tests/test_data/test_deterministic_dataloader.py +++ b/tests/test_data/test_deterministic_dataloader.py @@ -9,14 +9,12 @@ import pytest import torch import torch.distributed as dist import torch.multiprocessing as mp -from torchvision import transforms -from torch.utils.data import DataLoader +from torchvision import transforms, datasets import colossalai -from colossalai.builder import build_dataset from colossalai.context import ParallelMode, Config from colossalai.core import global_context as gpc -from colossalai.utils import free_port +from colossalai.utils import get_dataloader, free_port from colossalai.testing import rerun_if_address_is_in_use from torchvision import transforms @@ -43,20 +41,13 @@ def run_data_sampler(rank, world_size, port): dist_args = dict(config=CONFIG, rank=rank, world_size=world_size, backend='gloo', port=port, host='localhost') colossalai.launch(**dist_args) - dataset_cfg = gpc.config.train_data.dataset - dataloader_cfg = gpc.config.train_data.dataloader - transform_cfg = gpc.config.train_data.transform_pipeline - - # build transform - transform_pipeline = [transforms.ToTensor(), transforms.RandomCrop(size=32)] - transform_pipeline = transforms.Compose(transform_pipeline) - dataset_cfg['transform'] = transform_pipeline - # build dataset - dataset = build_dataset(dataset_cfg) + transform_pipeline = [transforms.ToTensor(), transforms.RandomCrop(size=32, padding=4)] + transform_pipeline = transforms.Compose(transform_pipeline) + dataset = datasets.CIFAR10(root=Path(os.environ['DATA']), train=True, download=True, transform=transform_pipeline) # build dataloader - dataloader = DataLoader(dataset=dataset, **dataloader_cfg) + dataloader = get_dataloader(dataset, batch_size=8, add_sampler=False) data_iter = iter(dataloader) img, label = data_iter.next() @@ -76,7 +67,6 @@ def run_data_sampler(rank, world_size, port): torch.cuda.empty_cache() -@pytest.mark.skip @pytest.mark.cpu @rerun_if_address_is_in_use() def test_data_sampler():