diff --git a/applications/Chat/examples/train_rm.sh b/applications/Chat/examples/train_rm.sh
index 4f9f55b6b..80abe62d2 100755
--- a/applications/Chat/examples/train_rm.sh
+++ b/applications/Chat/examples/train_rm.sh
@@ -1,8 +1,24 @@
-set_n_least_used_CUDA_VISIBLE_DEVICES 1
+set_n_least_used_CUDA_VISIBLE_DEVICES() {
+    local n=${1:-"9999"}
+    echo "GPU Memory Usage:"
+    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+        | tail -n +2 \
+        | nl -v 0 \
+        | tee /dev/tty \
+        | sort -g -k 2 \
+        | awk '{print $1}' \
+        | head -n $n)
+    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+    echo "Now CUDA_VISIBLE_DEVICES is set to:"
+    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+}
 
-python train_reward_model.py --pretrain 'microsoft/deberta-v3-large' \
-                             --model 'deberta' \
-                             --strategy naive \
-                             --loss_fn 'log_exp'\
-                             --save_path 'rmstatic.pt' \
-                             --test True
+set_n_least_used_CUDA_VISIBLE_DEVICES 2
+
+torchrun --standalone --nproc_per_node=2 train_reward_model.py \
+   --pretrain  <your pretrain path> \
+   --model 'bloom' \
+   --strategy colossalai_zero2 \
+   --loss_fn 'log_sig'\
+   --save_path <your model saving path>\
+   --dataset 'Anthropic/hh-rlhf'\