ColossalAI/examples/language/gpt/gpt2_configs/gpt2_zero3.py

25 lines
557 B
Python

from titans.model.gpt import gpt2_small
from colossalai.nn.optimizer import HybridAdam
from colossalai.zero.shard_utils import TensorShardStrategy
BATCH_SIZE = 2
NUM_EPOCHS = 60
SEQ_LEN = 1024
zero = dict(model_config=dict(tensor_placement_policy='auto',
shard_strategy=TensorShardStrategy(),
reuse_fp16_shard=True),
optimizer_config=dict())
optimizer = dict(
type=HybridAdam,
lr=0.00015,
weight_decay=1e-2,
)
model = dict(
type=gpt2_small,
checkpoint=True,
)