[test] align model name with the file name. (#2045)

pull/2047/head
Jiarui Fang 2022-11-30 15:45:26 +08:00 committed by GitHub
parent 31c644027b
commit 1e885329f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 225 additions and 219 deletions

View File

@ -1,2 +1,11 @@
from . import bert, gpt, inline_op_model, nested_model, no_leaf_module, repeated_computed_layer, resnet, simple_net from . import (
bert,
gpt2,
hanging_param_model,
inline_op_model,
nested_model,
repeated_computed_layer,
resnet,
simple_net,
)
from .utils import run_fwd_bwd from .utils import run_fwd_bwd

View File

@ -8,9 +8,10 @@ from .registry import non_distributed_component_funcs
from .utils.dummy_data_generator import DummyDataGenerator from .utils.dummy_data_generator import DummyDataGenerator
class NoLeafModule(CheckpointModule): class HangingParamModule(CheckpointModule):
""" """
In this no-leaf module, it has subordinate nn.modules and a nn.Parameter. Hanging Parameter: a parameter dose not belong to a leaf Module.
It has subordinate nn.modules and a nn.Parameter.
""" """
def __init__(self, checkpoint=False) -> None: def __init__(self, checkpoint=False) -> None:
@ -34,11 +35,11 @@ class DummyDataLoader(DummyDataGenerator):
return data, label return data, label
@non_distributed_component_funcs.register(name='no_leaf_module') @non_distributed_component_funcs.register(name='hanging_param_model')
def get_training_components(): def get_training_components():
def model_builder(checkpoint=False): def model_builder(checkpoint=False):
return NoLeafModule(checkpoint) return HangingParamModule(checkpoint)
trainloader = DummyDataLoader() trainloader = DummyDataLoader()
testloader = DummyDataLoader() testloader = DummyDataLoader()

View File

@ -14,7 +14,7 @@ from tests.components_to_test.registry import non_distributed_component_funcs
def run_tracer(rank, world_size, port, use_grad_check=True): def run_tracer(rank, world_size, port, use_grad_check=True):
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
test_models = ['repeated_computed_layers', 'resnet18', 'no_leaf_module', 'bert'] test_models = ['repeated_computed_layers', 'resnet18', 'hanging_param_model', 'bert']
# test_models = ['bert'] # test_models = ['bert']
for model_name in test_models: for model_name in test_models:
get_components_func = non_distributed_component_funcs.get_callable(model_name) get_components_func = non_distributed_component_funcs.get_callable(model_name)

View File

@ -50,7 +50,7 @@ def run_model(model, inputs, label, criterion, use_param_hook=False):
def test_base_param_hook(): def test_base_param_hook():
test_models = ['repeated_computed_layers', 'resnet18', 'no_leaf_module', 'inline_op_model'] test_models = ['repeated_computed_layers', 'resnet18', 'hanging_param_model', 'inline_op_model']
# test_models = ['bert'] # test_models = ['bert']
for model_name in test_models: for model_name in test_models:

View File

@ -41,7 +41,7 @@ def check_param(model: ZeroDDP, torch_model: torch.nn.Module):
# 'gpt2', 'bert', # 'gpt2', 'bert',
TEST_MODELS = ['no_leaf_module', 'gpt2', 'bert', 'simple_net', 'nested_model', 'repeated_computed_layers'] TEST_MODELS = ['hanging_param_model', 'gpt2', 'bert', 'simple_net', 'nested_model', 'repeated_computed_layers']
@parameterize('placement_policy', ['cuda', 'cpu', 'auto', 'const']) @parameterize('placement_policy', ['cuda', 'cpu', 'auto', 'const'])

View File

@ -1,25 +1,23 @@
from functools import partial from functools import partial
import colossalai
import pytest import pytest
import torch import torch
import torch.multiprocessing as mp import torch.multiprocessing as mp
import colossalai
from colossalai.context import MOE_CONTEXT
from colossalai.engine.gradient_handler import MoeGradientHandler
from colossalai.nn import MoeLoss from colossalai.nn import MoeLoss
from colossalai.testing import parameterize, rerun_if_address_is_in_use from colossalai.testing import assert_equal_in_group, parameterize, rerun_if_address_is_in_use
from colossalai.utils import free_port from colossalai.utils import free_port
from colossalai.zero.init_ctx import ZeroInitContext from colossalai.zero.init_ctx import ZeroInitContext
from colossalai.zero.shard_utils import (BucketTensorShardStrategy, TensorShardStrategy) from colossalai.zero.shard_utils import BucketTensorShardStrategy, TensorShardStrategy
from colossalai.zero.sharded_model import ShardedModelV2 from colossalai.zero.sharded_model import ShardedModelV2
from colossalai.zero.sharded_model._utils import cast_tensor_to_fp16 from colossalai.zero.sharded_model._utils import cast_tensor_to_fp16
from colossalai.zero.sharded_model.utils import col_model_deepcopy from colossalai.zero.sharded_model.utils import col_model_deepcopy
from tests.components_to_test.registry import non_distributed_component_funcs from tests.components_to_test.registry import non_distributed_component_funcs
from colossalai.engine.gradient_handler import MoeGradientHandler
from colossalai.context import MOE_CONTEXT
from colossalai.testing import assert_equal_in_group
from tests.test_zero.common import CONFIG, check_grads_padding, run_fwd_bwd
from tests.test_moe.test_moe_zero_init import MoeModel from tests.test_moe.test_moe_zero_init import MoeModel
from tests.test_zero.common import CONFIG, check_grads_padding, run_fwd_bwd
@parameterize("enable_autocast", [False]) @parameterize("enable_autocast", [False])
@ -27,7 +25,7 @@ from tests.test_moe.test_moe_zero_init import MoeModel
def run_model_test(enable_autocast, shard_strategy_class): def run_model_test(enable_autocast, shard_strategy_class):
shard_strategy = shard_strategy_class() shard_strategy = shard_strategy_class()
get_components_func = non_distributed_component_funcs.get_callable('no_leaf_module') get_components_func = non_distributed_component_funcs.get_callable('hanging_param_model')
_, train_dataloader, _, optimizer_class, _ = get_components_func() _, train_dataloader, _, optimizer_class, _ = get_components_func()
criterion = MoeLoss(aux_weight=0.01, loss_fn=torch.nn.CrossEntropyLoss) criterion = MoeLoss(aux_weight=0.01, loss_fn=torch.nn.CrossEntropyLoss)

View File

@ -1,28 +1,26 @@
from functools import partial from functools import partial
import colossalai
import pytest import pytest
import torch import torch
import torch.multiprocessing as mp import torch.multiprocessing as mp
import colossalai
from colossalai.amp import convert_to_apex_amp from colossalai.amp import convert_to_apex_amp
from colossalai.context import MOE_CONTEXT
from colossalai.engine.gradient_handler import MoeGradientHandler
from colossalai.nn import MoeLoss from colossalai.nn import MoeLoss
from colossalai.nn.optimizer import CPUAdam from colossalai.nn.optimizer import CPUAdam
from colossalai.testing import parameterize, rerun_if_address_is_in_use from colossalai.testing import assert_equal_in_group, parameterize, rerun_if_address_is_in_use
from colossalai.utils import free_port from colossalai.utils import free_port, get_current_device
from colossalai.zero.init_ctx import ZeroInitContext from colossalai.zero.init_ctx import ZeroInitContext
from colossalai.zero.shard_utils import (BucketTensorShardStrategy, TensorShardStrategy) from colossalai.zero.shard_utils import BucketTensorShardStrategy, TensorShardStrategy
from colossalai.zero.sharded_model import ShardedModelV2 from colossalai.zero.sharded_model import ShardedModelV2
from colossalai.zero.sharded_model.utils import col_model_deepcopy from colossalai.zero.sharded_model.utils import col_model_deepcopy
from colossalai.zero.sharded_optim import ShardedOptimizerV2 from colossalai.zero.sharded_optim import ShardedOptimizerV2
from colossalai.zero.sharded_optim._utils import has_inf_or_nan from colossalai.zero.sharded_optim._utils import has_inf_or_nan
from colossalai.utils import get_current_device
from tests.components_to_test.registry import non_distributed_component_funcs from tests.components_to_test.registry import non_distributed_component_funcs
from colossalai.engine.gradient_handler import MoeGradientHandler
from colossalai.context import MOE_CONTEXT
from colossalai.testing import assert_equal_in_group
from tests.test_zero.common import CONFIG, check_sharded_model_params
from tests.test_moe.test_moe_zero_init import MoeModel from tests.test_moe.test_moe_zero_init import MoeModel
from tests.test_zero.common import CONFIG, check_sharded_model_params
def _run_step(model, optimizer, data, label, criterion, grad_handler): def _run_step(model, optimizer, data, label, criterion, grad_handler):
@ -60,7 +58,7 @@ def _run_test_sharded_optim_v2(cpu_offload,
if use_cpuadam and cpu_offload is False: if use_cpuadam and cpu_offload is False:
return return
MOE_CONTEXT.reset_loss() MOE_CONTEXT.reset_loss()
get_components_func = non_distributed_component_funcs.get_callable('no_leaf_module') get_components_func = non_distributed_component_funcs.get_callable('hanging_param_model')
_, train_dataloader, _, optimizer_class, _ = get_components_func() _, train_dataloader, _, optimizer_class, _ = get_components_func()
criterion = MoeLoss(aux_weight=0.01, loss_fn=torch.nn.CrossEntropyLoss) criterion = MoeLoss(aux_weight=0.01, loss_fn=torch.nn.CrossEntropyLoss)

View File

@ -23,7 +23,7 @@ from tests.components_to_test.registry import non_distributed_component_funcs
@parameterize("enable_autocast", [True]) @parameterize("enable_autocast", [True])
@parameterize("shard_strategy_class", [BucketTensorShardStrategy]) @parameterize("shard_strategy_class", [BucketTensorShardStrategy])
def run_model_test(enable_autocast, shard_strategy_class): def run_model_test(enable_autocast, shard_strategy_class):
test_models = ['repeated_computed_layers', 'resnet18', 'bert', 'no_leaf_module'] test_models = ['repeated_computed_layers', 'resnet18', 'bert', 'hanging_param_model']
shard_strategy = shard_strategy_class() shard_strategy = shard_strategy_class()
for model_name in test_models: for model_name in test_models:
get_components_func = non_distributed_component_funcs.get_callable(model_name) get_components_func = non_distributed_component_funcs.get_callable(model_name)

View File

@ -1,25 +1,25 @@
from functools import partial from functools import partial
import colossalai
from colossalai.utils.cuda import get_current_device
import pytest import pytest
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import torch.multiprocessing as mp import torch.multiprocessing as mp
from common import CONFIG, check_sharded_model_params
from torch.nn.parallel import DistributedDataParallel as DDP
import colossalai
from colossalai.amp import convert_to_apex_amp from colossalai.amp import convert_to_apex_amp
from colossalai.nn.optimizer import CPUAdam from colossalai.nn.optimizer import CPUAdam
from colossalai.testing import parameterize, rerun_if_address_is_in_use from colossalai.testing import parameterize, rerun_if_address_is_in_use
from colossalai.utils import free_port from colossalai.utils import free_port
from colossalai.utils.cuda import get_current_device
from colossalai.zero.init_ctx import ZeroInitContext from colossalai.zero.init_ctx import ZeroInitContext
from colossalai.zero.shard_utils import (BucketTensorShardStrategy, TensorShardStrategy) from colossalai.zero.shard_utils import BucketTensorShardStrategy, TensorShardStrategy
from colossalai.zero.sharded_model import ShardedModelV2 from colossalai.zero.sharded_model import ShardedModelV2
from colossalai.zero.sharded_model.utils import col_model_deepcopy from colossalai.zero.sharded_model.utils import col_model_deepcopy
from colossalai.zero.sharded_optim import ShardedOptimizerV2 from colossalai.zero.sharded_optim import ShardedOptimizerV2
from colossalai.zero.sharded_optim._utils import has_inf_or_nan from colossalai.zero.sharded_optim._utils import has_inf_or_nan
from tests.components_to_test.registry import non_distributed_component_funcs from tests.components_to_test.registry import non_distributed_component_funcs
from torch.nn.parallel import DistributedDataParallel as DDP
from common import CONFIG, check_sharded_model_params
def _run_step(model, optimizer, data, label, criterion, enable_autocast=False): def _run_step(model, optimizer, data, label, criterion, enable_autocast=False):
@ -45,7 +45,7 @@ def _run_step(model, optimizer, data, label, criterion, enable_autocast=False):
@parameterize("shard_strategy_class", [TensorShardStrategy, BucketTensorShardStrategy]) @parameterize("shard_strategy_class", [TensorShardStrategy, BucketTensorShardStrategy])
@parameterize("gpu_margin_mem_ratio", [0.0, 0.7]) @parameterize("gpu_margin_mem_ratio", [0.0, 0.7])
def _run_test_sharded_optim_v2(cpu_offload, shard_strategy_class, use_cpuadam, gpu_margin_mem_ratio): def _run_test_sharded_optim_v2(cpu_offload, shard_strategy_class, use_cpuadam, gpu_margin_mem_ratio):
test_models = ['repeated_computed_layers', 'resnet18', 'bert', 'no_leaf_module'] test_models = ['repeated_computed_layers', 'resnet18', 'bert', 'hanging_param_model']
shard_strategy = shard_strategy_class() shard_strategy = shard_strategy_class()
if use_cpuadam and cpu_offload is False: if use_cpuadam and cpu_offload is False: