mirror of https://github.com/hpcaitech/ColossalAI
31 lines
714 B
Python
31 lines
714 B
Python
from colossalai.amp import AMP_TYPE
|
|
|
|
VOCAB_SIZE = 50304
|
|
SEQ_LENGTH = 1024
|
|
|
|
TOTAL_BATCH_SIZE = 256
|
|
LEARNING_RATE = 0.00015
|
|
WEIGHT_DECAY = 1e-2
|
|
|
|
TENSOR_PARALLEL_SIZE = 4
|
|
DEPTH = 1
|
|
TENSOR_PARALLEL_MODE = '2.5d'
|
|
|
|
NUM_EPOCHS = 60
|
|
WARMUP_EPOCHS = int(NUM_EPOCHS * 0.36)
|
|
|
|
parallel = dict(
|
|
pipeline=1,
|
|
tensor=dict(mode=TENSOR_PARALLEL_MODE, size=TENSOR_PARALLEL_SIZE, depth=DEPTH),
|
|
)
|
|
|
|
fp16 = dict(mode=AMP_TYPE.TORCH, )
|
|
|
|
gradient_accumulation = 1
|
|
|
|
BATCH_SIZE = TOTAL_BATCH_SIZE // gradient_accumulation
|
|
|
|
clip_grad_norm = 1.0
|
|
|
|
LOG_PATH = f"./gpt2_{TENSOR_PARALLEL_MODE}_tp{TENSOR_PARALLEL_SIZE}_bs{BATCH_SIZE}_lr{LEARNING_RATE}_accum{gradient_accumulation}_clip_grad{clip_grad_norm}/"
|