diff --git a/applications/Chat/examples/train_rm.sh b/applications/Chat/examples/train_rm.sh index 4f9f55b6b..80abe62d2 100755 --- a/applications/Chat/examples/train_rm.sh +++ b/applications/Chat/examples/train_rm.sh @@ -1,8 +1,24 @@ -set_n_least_used_CUDA_VISIBLE_DEVICES 1 +set_n_least_used_CUDA_VISIBLE_DEVICES() { + local n=${1:-"9999"} + echo "GPU Memory Usage:" + local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \ + | tail -n +2 \ + | nl -v 0 \ + | tee /dev/tty \ + | sort -g -k 2 \ + | awk '{print $1}' \ + | head -n $n) + export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g') + echo "Now CUDA_VISIBLE_DEVICES is set to:" + echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" +} -python train_reward_model.py --pretrain 'microsoft/deberta-v3-large' \ - --model 'deberta' \ - --strategy naive \ - --loss_fn 'log_exp'\ - --save_path 'rmstatic.pt' \ - --test True +set_n_least_used_CUDA_VISIBLE_DEVICES 2 + +torchrun --standalone --nproc_per_node=2 train_reward_model.py \ + --pretrain \ + --model 'bloom' \ + --strategy colossalai_zero2 \ + --loss_fn 'log_sig'\ + --save_path \ + --dataset 'Anthropic/hh-rlhf'\