mirror of https://github.com/hpcaitech/ColossalAI
32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
from colossalai.zero.shard_utils import TensorShardStrategy
|
|
from colossalai.nn.optimizer import FusedAdam
|
|
|
|
# fp16 = dict(
|
|
# mode=AMP_TYPE.TORCH,
|
|
# )
|
|
|
|
# seed = 2
|
|
zero = dict(model_config=dict(shard_strategy=TensorShardStrategy(),
|
|
reduce_scatter_bucket_size_mb=25,
|
|
fp32_reduce_scatter=False,
|
|
tensor_placement_policy="cuda",
|
|
gradient_predivide_factor=1.0,
|
|
reuse_fp16_shard=False),
|
|
optimizer_config=dict(gpu_margin_mem_ratio=0.8,
|
|
initial_scale=2**5,
|
|
min_scale=1,
|
|
growth_factor=2,
|
|
backoff_factor=0.5,
|
|
growth_interval=1000,
|
|
hysteresis=2,
|
|
max_scale=2**32))
|
|
|
|
# gradient_accumulation = 4
|
|
clip_grad_norm = 1.0
|
|
optimizer = dict(
|
|
type=FusedAdam,
|
|
lr=0.00015,
|
|
weight_decay=1e-2,
|
|
)
|
|
|
|
# 64433 |