mirror of https://github.com/hpcaitech/ColossalAI
25 lines
557 B
Python
25 lines
557 B
Python
from titans.model.gpt import gpt2_small
|
|
|
|
from colossalai.nn.optimizer import HybridAdam
|
|
from colossalai.zero.shard_utils import TensorShardStrategy
|
|
|
|
BATCH_SIZE = 2
|
|
NUM_EPOCHS = 60
|
|
SEQ_LEN = 1024
|
|
|
|
zero = dict(model_config=dict(tensor_placement_policy='auto',
|
|
shard_strategy=TensorShardStrategy(),
|
|
reuse_fp16_shard=True),
|
|
optimizer_config=dict())
|
|
|
|
optimizer = dict(
|
|
type=HybridAdam,
|
|
lr=0.00015,
|
|
weight_decay=1e-2,
|
|
)
|
|
|
|
model = dict(
|
|
type=gpt2_small,
|
|
checkpoint=True,
|
|
)
|