|
|
|
import argparse
|
|
|
|
import os
|
|
|
|
import socket
|
|
|
|
from functools import partial
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import ray
|
|
|
|
from coati.quant import llama_load_quant, low_resource_init
|
|
|
|
from coati.ray.detached_trainer_ppo import DetachedPPOTrainer
|
|
|
|
from coati.ray.experience_maker_holder import ExperienceMakerHolder
|
|
|
|
from coati.ray.utils import (
|
|
|
|
get_actor_from_args,
|
|
|
|
get_critic_from_args,
|
|
|
|
get_receivers_per_sender,
|
|
|
|
get_reward_model_from_args,
|
|
|
|
get_strategy_from_args,
|
|
|
|
)
|
|
|
|
from torch.utils.data import DataLoader
|
|
|
|
from transformers import AutoConfig, AutoTokenizer
|
|
|
|
from transformers.modeling_utils import no_init_weights
|
|
|
|
|
|
|
|
|
|
|
|
def get_free_port():
|
|
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
|
|
s.bind(("", 0))
|
|
|
|
return s.getsockname()[1]
|
|
|
|
|
|
|
|
|
|
|
|
def get_local_ip():
|
|
|
|
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
|
|
|
|
s.connect(("8.8.8.8", 80))
|
|
|
|
return s.getsockname()[0]
|
|
|
|
|
|
|
|
|
|
|
|
def main(args):
|
|
|
|
master_addr = str(get_local_ip())
|
|
|
|
# trainer_env_info
|
|
|
|
trainer_port = str(get_free_port())
|
|
|
|
env_info_trainers = [
|
|
|
|
{
|
|
|
|
"local_rank": "0",
|
|
|
|
"rank": str(rank),
|
|
|
|
"world_size": str(args.num_trainers),
|
|
|
|
"master_port": trainer_port,
|
|
|
|
"master_addr": master_addr,
|
|
|
|
}
|
|
|
|
for rank in range(args.num_trainers)
|
|
|
|
]
|
|
|
|
|
|
|
|
# maker_env_info
|
|
|
|
maker_port = str(get_free_port())
|
|
|
|
env_info_makers = [
|
|
|
|
{
|
|
|
|
"local_rank": "0",
|
|
|
|
"rank": str(rank),
|
|
|
|
"world_size": str(args.num_makers),
|
|
|
|
"master_port": maker_port,
|
|
|
|
"master_addr": master_addr,
|
|
|
|
}
|
|
|
|
for rank in range(args.num_makers)
|
|
|
|
]
|
|
|
|
|
|
|
|
# configure tokenizer
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(args.pretrain)
|
|
|
|
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
|
|
|
|
def model_fn():
|
|
|
|
actor = get_actor_from_args(args.model, args.pretrain).requires_grad_(False).half().cuda()
|
|
|
|
critic = get_critic_from_args(args.model, args.critic_pretrain).requires_grad_(False).half().cuda()
|
|
|
|
reward_model = get_reward_model_from_args(args.model, args.critic_pretrain).requires_grad_(False).half().cuda()
|
|
|
|
if args.initial_model_quant_ckpt is not None and args.model == "llama":
|
|
|
|
# quantize initial model
|
|
|
|
actor_cfg = AutoConfig.from_pretrained(args.pretrain)
|
|
|
|
with low_resource_init(), no_init_weights():
|
|
|
|
initial_model = get_actor_from_args(args.model, config=actor_cfg)
|
|
|
|
initial_model.model = (
|
|
|
|
llama_load_quant(
|
|
|
|
initial_model.model, args.initial_model_quant_ckpt, args.quant_bits, args.quant_group_size
|
|
|
|
)
|
|
|
|
.cuda()
|
|
|
|
.requires_grad_(False)
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
initial_model = get_actor_from_args(args.model, args.pretrain).requires_grad_(False).half().cuda()
|
|
|
|
return actor, critic, reward_model, initial_model
|
|
|
|
|
|
|
|
# configure Experience Maker
|
|
|
|
experience_holder_refs = [
|
|
|
|
ExperienceMakerHolder.options(name=f"maker{i}", num_gpus=1, max_concurrency=2).remote(
|
|
|
|
detached_trainer_name_list=[
|
|
|
|
f"trainer{x}"
|
|
|
|
for x in get_receivers_per_sender(i, args.num_makers, args.num_trainers, allow_idle_sender=False)
|
|
|
|
],
|
|
|
|
strategy_fn=partial(get_strategy_from_args, args.maker_strategy),
|
|
|
|
model_fn=model_fn,
|
|
|
|
env_info=env_info_maker,
|
|
|
|
kl_coef=0.1,
|
|
|
|
debug=args.debug,
|
|
|
|
update_lora_weights=not (args.lora_rank == 0),
|
|
|
|
# sync_models_from_trainers=True,
|
|
|
|
# generation kwargs:
|
|
|
|
max_length=512,
|
|
|
|
do_sample=True,
|
|
|
|
temperature=1.0,
|
|
|
|
top_k=50,
|
|
|
|
pad_token_id=tokenizer.pad_token_id,
|
|
|
|
eos_token_id=tokenizer.eos_token_id,
|
|
|
|
eval_performance=True,
|
|
|
|
use_cache=True,
|
|
|
|
)
|
|
|
|
for i, env_info_maker in enumerate(env_info_makers)
|
|
|
|
]
|
|
|
|
|
|
|
|
def trainer_model_fn():
|
|
|
|
actor = get_actor_from_args(args.model, args.pretrain, lora_rank=args.lora_rank).half().cuda()
|
|
|
|
critic = get_critic_from_args(args.model, args.critic_pretrain, lora_rank=args.lora_rank).half().cuda()
|
|
|
|
return actor, critic
|
|
|
|
|
|
|
|
# configure Trainer
|
|
|
|
trainer_refs = [
|
|
|
|
DetachedPPOTrainer.options(name=f"trainer{i}", num_gpus=1, max_concurrency=2).remote(
|
|
|
|
experience_maker_holder_name_list=[
|
|
|
|
f"maker{x}"
|
|
|
|
for x in get_receivers_per_sender(i, args.num_trainers, args.num_makers, allow_idle_sender=True)
|
|
|
|
],
|
|
|
|
strategy_fn=partial(get_strategy_from_args, args.trainer_strategy),
|
|
|
|
model_fn=trainer_model_fn,
|
|
|
|
env_info=env_info_trainer,
|
|
|
|
train_batch_size=args.train_batch_size,
|
|
|
|
buffer_limit=16,
|
|
|
|
eval_performance=True,
|
|
|
|
debug=args.debug,
|
|
|
|
update_lora_weights=not (args.lora_rank == 0),
|
|
|
|
)
|
|
|
|
for i, env_info_trainer in enumerate(env_info_trainers)
|
|
|
|
]
|
|
|
|
|
|
|
|
dataset_size = args.experience_batch_size * 4
|
|
|
|
|
|
|
|
def build_dataloader():
|
|
|
|
def tokenize_fn(texts):
|
|
|
|
batch = tokenizer(texts, return_tensors="pt", max_length=96, padding="max_length", truncation=True)
|
|
|
|
return {k: v.cuda() for k, v in batch.items()}
|
|
|
|
|
|
|
|
dataset = pd.read_csv(args.prompt_path)["prompt"]
|
|
|
|
dataloader = DataLoader(dataset=dataset, batch_size=dataset_size, shuffle=True, collate_fn=tokenize_fn)
|
|
|
|
return dataloader
|
|
|
|
|
|
|
|
# uncomment this function if sync_models_from_trainers is True
|
|
|
|
# ray.get([
|
|
|
|
# trainer_ref.sync_models_to_remote_makers.remote()
|
|
|
|
# for trainer_ref in trainer_refs
|
|
|
|
# ])
|
|
|
|
|
|
|
|
wait_tasks = []
|
|
|
|
|
|
|
|
for experience_holder_ref in experience_holder_refs:
|
|
|
|
wait_tasks.append(experience_holder_ref.workingloop.remote(build_dataloader, num_steps=args.experience_steps))
|
|
|
|
|
|
|
|
total_steps = (
|
|
|
|
args.experience_batch_size
|
|
|
|
* args.experience_steps
|
|
|
|
* args.num_makers
|
|
|
|
// (args.num_trainers * args.train_batch_size)
|
|
|
|
)
|
|
|
|
for trainer_ref in trainer_refs:
|
|
|
|
wait_tasks.append(trainer_ref.fit.remote(total_steps, args.update_steps, args.train_epochs))
|
|
|
|
|
|
|
|
ray.get(wait_tasks)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("--prompt_path", type=str, default=None)
|
|
|
|
parser.add_argument("--num_makers", type=int, default=1)
|
|
|
|
parser.add_argument("--num_trainers", type=int, default=1)
|
|
|
|
parser.add_argument(
|
|
|
|
"--trainer_strategy",
|
|
|
|
choices=["ddp", "colossalai_gemini", "colossalai_zero2", "colossalai_gemini_cpu", "colossalai_zero2_cpu"],
|
|
|
|
default="ddp",
|
|
|
|
)
|
|
|
|
parser.add_argument("--maker_strategy", choices=["naive"], default="naive")
|
|
|
|
parser.add_argument("--model", default="gpt2", choices=["gpt2", "bloom", "opt", "llama"])
|
|
|
|
parser.add_argument("--critic_model", default="gpt2", choices=["gpt2", "bloom", "opt", "llama"])
|
|
|
|
parser.add_argument("--pretrain", type=str, default=None)
|
|
|
|
parser.add_argument("--critic_pretrain", type=str, default=None)
|
|
|
|
parser.add_argument("--experience_steps", type=int, default=4)
|
|
|
|
parser.add_argument("--experience_batch_size", type=int, default=8)
|
|
|
|
parser.add_argument("--train_epochs", type=int, default=1)
|
|
|
|
parser.add_argument("--update_steps", type=int, default=2)
|
|
|
|
parser.add_argument("--train_batch_size", type=int, default=8)
|
|
|
|
parser.add_argument("--lora_rank", type=int, default=0, help="low-rank adaptation matrices rank")
|
|
|
|
|
|
|
|
parser.add_argument("--initial_model_quant_ckpt", type=str, default=None)
|
|
|
|
parser.add_argument("--quant_bits", type=int, default=4)
|
|
|
|
parser.add_argument("--quant_group_size", type=int, default=128)
|
|
|
|
parser.add_argument("--debug", action="store_true")
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
ray.init(namespace=os.environ["RAY_NAMESPACE"], runtime_env={"env_vars": dict(os.environ)})
|
|
|
|
main(args)
|