From a619a190df71ea3600e8487bc7070330a9574e06 Mon Sep 17 00:00:00 2001 From: ver217 Date: Fri, 17 Feb 2023 12:43:31 +0800 Subject: [PATCH] [chatgpt] update readme about checkpoint (#2792) * [chatgpt] add save/load checkpoint sample code * [chatgpt] add save/load checkpoint readme * [chatgpt] refactor save/load checkpoint readme --- applications/ChatGPT/README.md | 87 ++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/applications/ChatGPT/README.md b/applications/ChatGPT/README.md index b3ea239a9..0516991de 100644 --- a/applications/ChatGPT/README.md +++ b/applications/ChatGPT/README.md @@ -34,26 +34,103 @@ Simplest usage: ```python from chatgpt.trainer import PPOTrainer from chatgpt.trainer.strategies import ColossalAIStrategy +from chatgpt.nn import GPTActor, GPTCritic, RewardModel +from copy import deepcopy +from colossalai.nn.optimizer import HybridAdam strategy = ColossalAIStrategy() with strategy.model_init_context(): # init your model here - actor = Actor() - critic = Critic() + # load pretrained gpt2 + actor = GPTActor(pretrained='gpt2') + critic = GPTCritic() + initial_model = deepcopy(actor).cuda() + reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).cuda() -trainer = PPOTrainer(actor = actor, critic= critic, strategy, ...) +actor_optim = HybridAdam(actor.parameters(), lr=5e-6) +critic_optim = HybridAdam(critic.parameters(), lr=5e-6) + +# prepare models and optimizers +(actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare( + (actor, actor_optim), (critic, critic_optim), reward_model, initial_model) + +# load saved model checkpoint after preparing +strategy.load_model(actor, 'actor_checkpoint.pt', strict=False) +# load saved optimizer checkpoint after preparing +strategy.load_optimizer(actor_optim, 'actor_optim_checkpoint.pt') + +trainer = PPOTrainer(strategy, + actor, + critic, + reward_model, + initial_model, + actor_optim, + critic_optim, + ...) trainer.fit(dataset, ...) + +# save model checkpoint after fitting on only rank0 +strategy.save_model(actor, 'actor_checkpoint.pt', only_rank0=True) +# save optimizer checkpoint on all ranks +strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint.pt', only_rank0=False) ``` For more details, see `examples/`. We also support training reward model with true-world data. See `examples/train_reward_model.py`. +## FAQ + +### How to save/load checkpoint + +To load pretrained model, you can simply use huggingface pretrained models: + +```python +# load OPT-350m pretrained model +actor = OPTActor(pretrained='facebook/opt-350m') +``` + +To save model checkpoint: + +```python +# save model checkpoint on only rank0 +strategy.save_model(actor, 'actor_checkpoint.pt', only_rank0=True) +``` + +This function must be called after `strategy.prepare()`. + +For DDP strategy, model weights are replicated on all ranks. And for ColossalAI strategy, model weights may be sharded, but all-gather will be applied before returning state dict. You can set `only_rank0=True` for both of them, which only saves checkpoint on rank0, to save disk space usage. The checkpoint is float32. + +To save optimizer checkpoint: + +```python +# save optimizer checkpoint on all ranks +strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint.pt', only_rank0=False) +``` + +For DDP strategy, optimizer states are replicated on all ranks. You can set `only_rank0=True`. But for ColossalAI strategy, optimizer states are sharded over all ranks, and no all-gather will be applied. So for ColossalAI strategy, you can only set `only_rank0=False`. That is to say, each rank will save a cehckpoint. When loading, each rank should load the corresponding part. + +Note that different stategy may have different shapes of optimizer checkpoint. + +To load model checkpoint: + +```python +# load saved model checkpoint after preparing +strategy.load_model(actor, 'actor_checkpoint.pt', strict=False) +``` + +To load optimizer checkpoint: + +```python +# load saved optimizer checkpoint after preparing +strategy.load_optimizer(actor_optim, 'actor_optim_checkpoint.pt') +``` + ## Todo -- [x] implement PPO training +- [x] implement PPO fine-tuning - [x] implement training reward model - [x] support LoRA - [ ] implement PPO-ptx fine-tuning @@ -65,7 +142,7 @@ Referring to the successful attempts of [BLOOM](https://bigscience.huggingface.c You may contact us or participate in the following ways: 1. Posting an [issue](https://github.com/hpcaitech/ColossalAI/issues/new/choose) or submitting a [PR](https://github.com/hpcaitech/ColossalAI/pulls) on GitHub -2. Join the Colossal-AI community on +2. Join the Colossal-AI community on [Slack](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w), and [WeChat](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your ideas. 3. Check out and fill in the [cooperation proposal](https://www.hpc-ai.tech/partners)