diff --git a/applications/ColossalMoE/infer.sh b/applications/ColossalMoE/infer.sh index 70245e81f..0487fe9c1 100644 --- a/applications/ColossalMoE/infer.sh +++ b/applications/ColossalMoE/infer.sh @@ -1,5 +1,5 @@ NUM_GPU=2 -MODEL="/home/zhaoxuanlei/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-v0.1/snapshots/58301445dc1378584211722b7ebf8743ec4e192b" +MODEL="mistralai/Mixtral-8x7B-v0.1" # ep torchrun --standalone --nproc_per_node $NUM_GPU infer.py \ diff --git a/applications/ColossalMoE/train.sh b/applications/ColossalMoE/train.sh index ea95e9908..bee7f5c8f 100644 --- a/applications/ColossalMoE/train.sh +++ b/applications/ColossalMoE/train.sh @@ -1,5 +1,5 @@ NUM_GPU=8 -MODEL="/home/zhaoxuanlei/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-v0.1/snapshots/58301445dc1378584211722b7ebf8743ec4e192b" +MODEL="mistralai/Mixtral-8x7B-v0.1" SEQ_LENGTH=2048 BATCH_SIZE=1 LR=0.00001