polish code

pull/454/head
ver217 2022-03-18 13:57:20 +08:00
parent e99af94ab8
commit 8cf7ff08cf
4 changed files with 24 additions and 28 deletions

View File

@ -1,10 +1,8 @@
import imp
from functools import partial from functools import partial
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.nn.optimizer import CPUAdam
from colossalai.utils import checkpoint from colossalai.utils import checkpoint
from colossalai.zero.shard_utils import TensorShardStrategy from colossalai.zero.shard_utils import TensorShardStrategy
from colossalai.zero.sharded_model import ShardedModelV2 from colossalai.zero.sharded_model import ShardedModelV2
@ -20,23 +18,22 @@ _ZERO_MODEL_CONFIG = dict(reduce_scatter_bucket_size_mb=25,
use_memory_tracer=False, use_memory_tracer=False,
shard_strategy=TensorShardStrategy) shard_strategy=TensorShardStrategy)
_ZERO_OPTIMIZER_CONFIG = dict( _ZERO_OPTIMIZER_CONFIG = dict(cpu_offload=False,
cpu_offload=False, initial_scale=2**5,
initial_scale=2**5, min_scale=1,
min_scale=1, growth_factor=2,
growth_factor=2, backoff_factor=0.5,
backoff_factor=0.5, growth_interval=1000,
growth_interval=1000, hysteresis=2,
hysteresis=2, max_scale=2**32,
max_scale=2**32, lr=1e-3)
lr=1e-3)
ZERO_PARALLEL_CONFIG = dict(fp16=dict(mode=None,), ZERO_PARALLEL_CONFIG = dict(fp16=dict(mode=None,),
zero=dict( zero=dict(
model_config=_ZERO_MODEL_CONFIG, model_config=_ZERO_MODEL_CONFIG,
optimizer_config=_ZERO_OPTIMIZER_CONFIG, optimizer_config=_ZERO_OPTIMIZER_CONFIG,
), ),
parallel=dict(pipeline=dict(size=1), tensor=dict(size=1, mode=None))) parallel=dict(pipeline=dict(size=1), tensor=dict(size=1, mode=None)))
CONFIG = dict(fp16=dict(mode=None,), CONFIG = dict(fp16=dict(mode=None,),
zero=dict(level=3, zero=dict(level=3,

View File

@ -10,8 +10,7 @@ import torch.multiprocessing as mp
from colossalai.testing import parameterize from colossalai.testing import parameterize
from colossalai.utils import free_port from colossalai.utils import free_port
from colossalai.zero.init_ctx import ZeroInitContext from colossalai.zero.init_ctx import ZeroInitContext
from colossalai.zero.shard_utils import (BucketTensorShardStrategy, from colossalai.zero.shard_utils import (BucketTensorShardStrategy, TensorShardStrategy)
TensorShardStrategy)
from colossalai.zero.sharded_model import ShardedModelV2 from colossalai.zero.sharded_model import ShardedModelV2
from colossalai.zero.sharded_model._zero3_utils import cast_tensor_to_fp16 from colossalai.zero.sharded_model._zero3_utils import cast_tensor_to_fp16
from colossalai.zero.sharded_model.utils import col_model_deepcopy from colossalai.zero.sharded_model.utils import col_model_deepcopy
@ -22,10 +21,10 @@ from common import CONFIG, check_grads_padding, run_fwd_bwd
@parameterize("enable_autocast", [True]) @parameterize("enable_autocast", [True])
@parameterize("shard_strategy", [TensorShardStrategy, BucketTensorShardStrategy]) @parameterize("shard_strategy_class", [TensorShardStrategy, BucketTensorShardStrategy])
def run_model_test(enable_autocast, shard_strategy): def run_model_test(enable_autocast, shard_strategy_class):
test_models = ['repeated_computed_layers', 'resnet18', 'bert'] test_models = ['repeated_computed_layers', 'resnet18', 'bert']
shard_strategy = shard_strategy() shard_strategy = shard_strategy_class()
for model_name in test_models: for model_name in test_models:
get_components_func = non_distributed_component_funcs.get_callable(model_name) get_components_func = non_distributed_component_funcs.get_callable(model_name)
model_builder, train_dataloader, _, _, criterion = get_components_func() model_builder, train_dataloader, _, _, criterion = get_components_func()

View File

@ -9,8 +9,7 @@ from colossalai.nn.optimizer import CPUAdam
from colossalai.testing import parameterize from colossalai.testing import parameterize
from colossalai.utils import free_port from colossalai.utils import free_port
from colossalai.zero.init_ctx import ZeroInitContext from colossalai.zero.init_ctx import ZeroInitContext
from colossalai.zero.shard_utils import (BucketTensorShardStrategy, from colossalai.zero.shard_utils import (BucketTensorShardStrategy, TensorShardStrategy)
TensorShardStrategy)
from colossalai.zero.sharded_model import ShardedModelV2 from colossalai.zero.sharded_model import ShardedModelV2
from colossalai.zero.sharded_model.utils import col_model_deepcopy from colossalai.zero.sharded_model.utils import col_model_deepcopy
from colossalai.zero.sharded_optim import ShardedOptimizerV2 from colossalai.zero.sharded_optim import ShardedOptimizerV2
@ -41,10 +40,10 @@ def _run_step(model, optimizer, data, label, criterion, enable_autocast=False):
@parameterize("cpu_offload", [True, False]) @parameterize("cpu_offload", [True, False])
@parameterize("use_cpuadam", [True, False]) @parameterize("use_cpuadam", [True, False])
@parameterize("shard_strategy", [TensorShardStrategy, BucketTensorShardStrategy]) @parameterize("shard_strategy_class", [TensorShardStrategy, BucketTensorShardStrategy])
def _run_test_sharded_optim_v2(cpu_offload, shard_strategy, use_cpuadam): def _run_test_sharded_optim_v2(cpu_offload, shard_strategy_class, use_cpuadam):
test_models = ['repeated_computed_layers', 'resnet18', 'bert'] test_models = ['repeated_computed_layers', 'resnet18', 'bert']
shard_strategy = shard_strategy() shard_strategy = shard_strategy_class()
if use_cpuadam and cpu_offload is False: if use_cpuadam and cpu_offload is False:
return return

View File

@ -8,20 +8,21 @@ import colossalai
import pytest import pytest
import torch import torch
import torch.multiprocessing as mp import torch.multiprocessing as mp
from colossalai.testing import parameterize
from colossalai.utils import free_port from colossalai.utils import free_port
from colossalai.zero.init_ctx import ZeroInitContext from colossalai.zero.init_ctx import ZeroInitContext
from colossalai.zero.shard_utils import (BucketTensorShardStrategy, TensorShardStrategy) from colossalai.zero.shard_utils import (BucketTensorShardStrategy, TensorShardStrategy)
from colossalai.zero.sharded_model import ShardedModelV2 from colossalai.zero.sharded_model import ShardedModelV2
from colossalai.zero.sharded_model.utils import col_model_deepcopy from colossalai.zero.sharded_model.utils import col_model_deepcopy
from tests.components_to_test.registry import non_distributed_component_funcs from tests.components_to_test.registry import non_distributed_component_funcs
from colossalai.testing import parameterize
from common import CONFIG from common import CONFIG
@parameterize("shard_strategy", [TensorShardStrategy, BucketTensorShardStrategy]) @parameterize("shard_strategy_class", [TensorShardStrategy, BucketTensorShardStrategy])
def run_zero_state_dict(shard_strategy): def run_zero_state_dict(shard_strategy_class):
test_models = ['repeated_computed_layers', 'resnet18'] test_models = ['repeated_computed_layers', 'resnet18']
shard_strategy = shard_strategy() shard_strategy = shard_strategy_class()
for model_name in test_models: for model_name in test_models:
get_components_func = non_distributed_component_funcs.get_callable(model_name) get_components_func = non_distributed_component_funcs.get_callable(model_name)
model_builder, train_dataloader, test_dataloader, optimizer, criterion = get_components_func() model_builder, train_dataloader, test_dataloader, optimizer, criterion = get_components_func()