#!/usr/bin/env bash

set_n_least_used_CUDA_VISIBLE_DEVICES() {
    local n=${1:-"9999"}
    echo "GPU Memory Usage:"
    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv |
        tail -n +2 |
        nl -v 0 |
        tee /dev/tty |
        sort -g -k 2 |
        awk '{print $1}' |
        head -n $n)
    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
    echo "Now CUDA_VISIBLE_DEVICES is set to:"
    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
}

set_n_least_used_CUDA_VISIBLE_DEVICES 4

set -xu

if [ -z "$SFT_DATASET" ]; then
    echo "Please set \$SFT_DATASET to the path to sft dataset."
    exit 1
fi

if [ -z "$PROMPT_PATH" ]; then
    echo "Please set \$PROMPT_PATH to the path to prompts csv."
    exit 1
fi

if [ -z "$PRETRAIN_DATASET" ]; then
    echo "Please set \$PRETRAIN_DATASET to the path to alpaca data."
    exit 1
fi

NUM_RETRY=3
BASE_DIR=$(dirname $(dirname $(realpath $BASH_SOURCE)))
EXAMPLES_DIR=$BASE_DIR/examples
MODELS_DIR=$BASE_DIR/examples/models_config
MODELS=('gpt2' 'bloom' 'opt' 'llama')
STRATEGIES=('ddp' 'colossalai_gemini' 'colossalai_zero2')

export OMP_NUM_THREADS=8

# install requirements
pip install -r $EXAMPLES_DIR/requirements.txt

python $EXAMPLES_DIR/download_model.py --model-dir $MODELS_DIR --config-only

get_pretrain() {
    local model=$1
    if [[ $model == "gpt2" ]]; then
        echo "gpt2"
    elif [[ $model == "bloom" ]]; then
        echo "bigscience/bloom-560m"
    elif [[ $model == "opt" ]]; then
        echo "facebook/opt-350m"
    else
        echo "Unknown model $model"
        exit 1
    fi
}

random_choice() {
    local arr=("$@")
    local len=${#arr[@]}
    local idx=$((RANDOM % len))
    echo ${arr[$idx]}
}

echo "[Test]: testing sft ..."

# FIXME: This is a hack to skip tests that are not working
#  - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
#  - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
    "gpt2-ddp"
    "llama-ddp"
    "llama-colossalai_gemini"
    "llama-colossalai_zero2"
)

GRAD_CKPTS=('' '--grad_checkpoint')
for lora_rank in '0' '4'; do
    for model in ${MODELS[@]}; do
        strategies=($(shuf -e "${STRATEGIES[@]}"))
        for strategy in ${strategies[@]}; do
            if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
                echo "[Test]: Skipped $model-$strategy-$lora_rank"
                continue
            elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
                echo "[Test]: Skipped $model-$strategy"
                continue
            fi
            pretrain=$(get_pretrain $model)
            pretrain_model=""
            if [[ $lora_rank -gt 0 ]]; then
                pretrain_model="--pretrain $pretrain"
            fi
            grad_ckpt=$(random_choice "${GRAD_CKPTS[@]}")
            for i in $(seq $NUM_RETRY); do
                echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
                torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_sft.py \
                    $pretrain_model --tokenizer $MODELS_DIR/$model \
                    --model $model --strategy $strategy --lora_rank $lora_rank $grad_ckpt \
                    --dataset $SFT_DATASET --max_datasets_size 8 \
                    --max_epochs 1 --batch_size 1 --accumulation_steps 1 \
                    --save_path $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank}
                passed=$?
                if [ $passed -eq 0 ]; then
                    break
                fi
            done
            if [ $passed -ne 0 ]; then
                echo "[Test]: Failed $model-$strategy-$lora_rank"
                exit 1
            fi
        done
    done
done

echo "[Test]: testing reward model ..."

# FIXME: This is a hack to skip tests that are not working
#  - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
#  - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
    "gpt2-ddp"
    "llama-ddp"
    "llama-colossalai_gemini"
    "llama-colossalai_zero2"
)

LOSS_FNS=('log_sig' 'log_exp')
DATASETS=('Anthropic/hh-rlhf' 'Dahoas/rm-static')
for lora_rank in '0' '4'; do
    for model in ${MODELS[@]}; do
        strategies=($(shuf -e "${STRATEGIES[@]}"))
        for strategy in ${strategies[@]}; do
            if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
                echo "[Test]: Skipped $model-$strategy-$lora_rank"
                continue
            elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
                echo "[Test]: Skipped $model-$strategy"
                continue
            fi
            pretrain=$(get_pretrain $model)
            pretrain_model=""
            if [[ $lora_rank -gt 0 ]]; then
                pretrain_model="--pretrain $pretrain"
            fi
            loss_fn=$(random_choice "${LOSS_FNS[@]}")
            dataset=$(random_choice "${DATASETS[@]}")
            subset=$(if [[ $dataset == "Dahoas/rm-static" ]]; then echo "None"; else echo "harmless-base"; fi)
            for i in $(seq $NUM_RETRY); do
                echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
                torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_reward_model.py \
                    $pretrain_model --tokenizer $MODELS_DIR/$model \
                    --model $model --strategy $strategy --lora_rank $lora_rank --loss_fn $loss_fn \
                    --dataset $dataset --subset $subset --test True --batch_size 1 \
                    --save_path $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt
                passed=$?
                if [ $passed -eq 0 ]; then
                    break
                fi
            done
            if [ $passed -ne 0 ]; then
                echo "[Test]: Failed to train reward model $model-$strategy-$lora_rank"
                exit 1
            fi
        done
    done
done

echo "[Test]: testing RLHF ..."

# FIXME: This is a hack to skip tests that are not working
#  - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
#  - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
    "gpt2-ddp"
    "llama-ddp"
    "llama-colossalai_gemini"
    "llama-colossalai_zero2"
)

for model in ${MODELS[@]}; do
    for lora_rank in '0' '4'; do
        strategies=($(shuf -e "${STRATEGIES[@]}"))
        for strategy in ${strategies[@]}; do
            if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
                echo "[Test]: Skipped $model-$strategy-$lora_rank"
                continue
            elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
                echo "[Test]: Skipped $model-$strategy"
                continue
            fi
            rm_pretrain=$(get_pretrain $model)
            rm_pretrain_model=""
            if [[ $lora_rank -gt 0 ]]; then
                rm_pretrain_model="--rm_pretrain $rm_pretrain"
            fi
            for i in $(seq $NUM_RETRY); do
                echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
                torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_prompts.py \
                    --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
                    --strategy $strategy --model $model --tokenizer $MODELS_DIR/$model \
                    --num_episodes 1 --num_collect_steps 1 --num_update_steps 1 \
                    --experience_batch_size 2 --train_batch_size 1 --lora_rank $lora_rank \
                    --pretrain $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank} \
                    $rm_pretrain_model --rm_path $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt \
                    --save_path $EXAMPLES_DIR/rlhf_models/actor_checkpoint_prompts.pt
                passed=$?
                if [ $passed -eq 0 ]; then
                    break
                fi
            done
            if [ $passed -ne 0 ]; then
                echo "[Test]: Failed to train RLHF $model-$strategy-$lora_rank"
                exit 1
            fi
        done
        rm -rf $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank}
        rm $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt
    done
done
rm $EXAMPLES_DIR/rlhf_models/actor_checkpoint_prompts.pt