mirror of https://github.com/hpcaitech/ColossalAI
33 lines
653 B
Python
33 lines
653 B
Python
|
from colossalai.amp import AMP_TYPE
|
||
|
|
||
|
# hyperparameters
|
||
|
# BATCH_SIZE is as per GPU
|
||
|
# global batch size = BATCH_SIZE x data parallel size
|
||
|
BATCH_SIZE = 256
|
||
|
LEARNING_RATE = 3e-3
|
||
|
WEIGHT_DECAY = 0.3
|
||
|
NUM_EPOCHS = 300
|
||
|
WARMUP_EPOCHS = 32
|
||
|
|
||
|
# model config
|
||
|
IMG_SIZE = 224
|
||
|
PATCH_SIZE = 16
|
||
|
HIDDEN_SIZE = 384
|
||
|
DEPTH = 12
|
||
|
NUM_HEADS = 6
|
||
|
MLP_RATIO = 4
|
||
|
NUM_CLASSES = 1000
|
||
|
CHECKPOINT = False
|
||
|
SEQ_LENGTH = (IMG_SIZE // PATCH_SIZE)**2 + 1 # add 1 for cls token
|
||
|
|
||
|
USE_DDP = True
|
||
|
TP_WORLD_SIZE = 2
|
||
|
TP_TYPE = 'row'
|
||
|
parallel = dict(tensor=dict(mode="1d", size=TP_WORLD_SIZE),)
|
||
|
|
||
|
fp16 = dict(mode=AMP_TYPE.NAIVE)
|
||
|
clip_grad_norm = 1.0
|
||
|
gradient_accumulation = 8
|
||
|
|
||
|
LOG_PATH = "./log"
|