mirror of https://github.com/hpcaitech/ColossalAI
Browse Source
* add normalize function to value_head in bloom rm * add normalization to value_function in gpt_rm * add normalization to value_head of opt_rm * add Anthropic/hh-rlhf dataset * Update __init__.py * Add LogExpLoss in RM training * Update __init__.py * update rm trainer to use acc as target * update example/train_rm * Update train_rm.sh * code style * Update README.md * Update README.md * add rm test to ci * fix tokenier * fix typo * change batchsize to avoid oom in ci * Update test_ci.shpull/3159/head
BlueRum
2 years ago
committed by
GitHub
12 changed files with 270 additions and 111 deletions
@ -1,4 +1,4 @@
|
||||
from .reward_dataset import RewardDataset |
||||
from .reward_dataset import RmStaticDataset, HhRlhfDataset |
||||
from .utils import is_rank_0 |
||||
|
||||
__all__ = ['RewardDataset', 'is_rank_0'] |
||||
__all__ = ['RmStaticDataset', 'HhRlhfDataset','is_rank_0'] |
||||
|
@ -1,4 +1,4 @@
|
||||
from .base import Actor, Critic, RewardModel |
||||
from .loss import PairWiseLoss, PolicyLoss, PPOPtxActorLoss, ValueLoss |
||||
from .loss import PolicyLoss, PPOPtxActorLoss, ValueLoss, LogSigLoss, LogExpLoss |
||||
|
||||
__all__ = ['Actor', 'Critic', 'RewardModel', 'PolicyLoss', 'ValueLoss', 'PPOPtxActorLoss', 'PairWiseLoss'] |
||||
__all__ = ['Actor', 'Critic', 'RewardModel', 'PolicyLoss', 'ValueLoss', 'PPOPtxActorLoss', 'LogSigLoss', 'LogExpLoss'] |
||||
|
@ -1,20 +1,8 @@
|
||||
set_n_least_used_CUDA_VISIBLE_DEVICES() { |
||||
local n=${1:-"9999"} |
||||
echo "GPU Memory Usage:" |
||||
local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \ |
||||
| tail -n +2 \ |
||||
| nl -v 0 \ |
||||
| tee /dev/tty \ |
||||
| sort -g -k 2 \ |
||||
| awk '{print $1}' \ |
||||
| head -n $n) |
||||
export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g') |
||||
echo "Now CUDA_VISIBLE_DEVICES is set to:" |
||||
echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" |
||||
} |
||||
set_n_least_used_CUDA_VISIBLE_DEVICES 1 |
||||
|
||||
set_n_least_used_CUDA_VISIBLE_DEVICES 2 |
||||
|
||||
# torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain 'bigscience/bloomz-560m' --model 'bloom' --strategy colossalai_zero2 |
||||
torchrun --standalone --nproc_per_node=2 train_reward_model.py --model 'gpt2' --strategy colossalai_zero2 |
||||
# torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain "facebook/opt-350m" --model 'opt' --strategy colossalai_zero2 |
||||
python train_reward_model.py --pretrain '/home/lczht/data2/bloom-560m' \ |
||||
--model 'bloom' \ |
||||
--strategy naive \ |
||||
--loss_fn 'log_exp'\ |
||||
--save_path 'rmstatic.pt' \ |
||||
--test True |
||||
|
Loading…
Reference in new issue