ColossalAI/examples/language/opt/run_demo.sh

set -xe
pip install -r requirements.txt

# model name or path
MODEL="facebook/opt-350m"

# path for saving model
OUTPUT_PATH="./output_model.bin"

# plugin(training strategy)
# can only be one of "torch_ddp"/"torch_ddp_fp16"/"low_level_zero"/"gemini"
PLUGIN="hybrid_parallel"

# number of gpus to use
GPUNUM=4

# batch size per gpu
BS=16

# learning rate
LR="5e-5"

# number of epoch
EPOCH=10

# weight decay
WEIGHT_DECAY=0.01

# ratio of warmup steps
WARMUP_RATIO=0.1

# run the script for demo
torchrun \
  --standalone \
  --nproc_per_node ${GPUNUM} \
  opt_train_demo.py \
  --model_name_or_path ${MODEL} \
  --output_path ${OUTPUT_PATH} \
  --plugin ${PLUGIN} \
  --batch_size ${BS} \
  --num_epoch ${EPOCH} \
  --learning_rate ${LR} \
  --weight_decay ${WEIGHT_DECAY} \
  --warmup_ratio ${WARMUP_RATIO}
[example] update opt example using booster api (#3918) 2023-06-08 03:27:05 +00:00			`set -xe`
			`pip install -r requirements.txt`

			`# model name or path`
			`MODEL="facebook/opt-350m"`

			`# path for saving model`
			`OUTPUT_PATH="./output_model.bin"`

			`# plugin(training strategy)`
			`# can only be one of "torch_ddp"/"torch_ddp_fp16"/"low_level_zero"/"gemini"`
[shardformer] update llama2/opt finetune example and fix llama2 policy (#4645) * [shardformer] update shardformer readme [shardformer] update shardformer readme [shardformer] update shardformer readme * [shardformer] update llama2/opt finetune example and shardformer update to llama2 * [shardformer] update llama2/opt finetune example and shardformer update to llama2 * [shardformer] update llama2/opt finetune example and shardformer update to llama2 * [shardformer] change dataset * [shardformer] change dataset * [shardformer] fix CI * [shardformer] fix * [shardformer] fix * [shardformer] fix * [shardformer] fix * [shardformer] fix [example] update opt example [example] resolve comments fix fix 2023-09-09 14:45:36 +00:00			`PLUGIN="hybrid_parallel"`
[example] update opt example using booster api (#3918) 2023-06-08 03:27:05 +00:00
			`# number of gpus to use`
			`GPUNUM=4`

			`# batch size per gpu`
			`BS=16`

			`# learning rate`
			`LR="5e-5"`

			`# number of epoch`
			`EPOCH=10`

			`# weight decay`
			`WEIGHT_DECAY=0.01`

			`# ratio of warmup steps`
			`WARMUP_RATIO=0.1`

			`# run the script for demo`
			`torchrun \`
			`--standalone \`
			`--nproc_per_node ${GPUNUM} \`
			`opt_train_demo.py \`
			`--model_name_or_path ${MODEL} \`
			`--output_path ${OUTPUT_PATH} \`
			`--plugin ${PLUGIN} \`
			`--batch_size ${BS} \`
			`--num_epoch ${EPOCH} \`
			`--learning_rate ${LR} \`
			`--weight_decay ${WEIGHT_DECAY} \`
			`--warmup_ratio ${WARMUP_RATIO}`