mirror of https://github.com/hpcaitech/ColossalAI
[chat] typo accimulation_steps -> accumulation_steps (#3662)
parent
816add7e7f
commit
1a60dc07a8
|
@ -251,7 +251,7 @@ trainer = SFTTrainer(model=model,
|
|||
eval_dataloader=eval_dataloader,
|
||||
batch_size=args.batch_size,
|
||||
max_epochs=args.max_epochs,
|
||||
accimulation_steps = args.accimulation_steps
|
||||
accumulation_steps = args.accumulation_steps
|
||||
)
|
||||
|
||||
trainer.fit()
|
||||
|
@ -278,7 +278,7 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \
|
|||
--save_path /path/to/Coati-7B \
|
||||
--dataset /path/to/data.json \
|
||||
--batch_size 1 \
|
||||
--accimulation_steps 8 \
|
||||
--accumulation_steps 8 \
|
||||
--lr 2e-5 \
|
||||
--max_datasets_size 512 \
|
||||
--max_epochs 1 \
|
||||
|
@ -296,7 +296,7 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \
|
|||
--save_path /path/to/Coati-7B \
|
||||
--dataset /path/to/data.json \
|
||||
--batch_size 1 \
|
||||
--accimulation_steps 8 \
|
||||
--accumulation_steps 8 \
|
||||
--lr 2e-5 \
|
||||
--max_datasets_size 512 \
|
||||
--max_epochs 1 \
|
||||
|
@ -313,7 +313,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
|
|||
--save_path /path/to/Coati-7B \
|
||||
--dataset /path/to/data.json \
|
||||
--batch_size 1 \
|
||||
--accimulation_steps 8 \
|
||||
--accumulation_steps 8 \
|
||||
--lr 2e-5 \
|
||||
--max_datasets_size 512 \
|
||||
--max_epochs 1 \
|
||||
|
|
|
@ -41,10 +41,10 @@ class SFTTrainer(Trainer):
|
|||
train_dataloader: DataLoader,
|
||||
eval_dataloader: DataLoader = None,
|
||||
max_epochs: int = 2,
|
||||
accimulation_steps: int = 8,
|
||||
accumulation_steps: int = 8,
|
||||
callbacks: List[Callback] = [],
|
||||
) -> None:
|
||||
if accimulation_steps > 1 and isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3:
|
||||
if accumulation_steps > 1 and isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3:
|
||||
raise ValueError("Accumulation steps are not supported in stage 3 of ColossalAI")
|
||||
super().__init__(strategy, max_epochs, callbacks=callbacks)
|
||||
self.train_dataloader = train_dataloader
|
||||
|
@ -52,8 +52,8 @@ class SFTTrainer(Trainer):
|
|||
self.model = model
|
||||
self.optimizer = optim
|
||||
|
||||
self.accimulation_steps = accimulation_steps
|
||||
num_update_steps_per_epoch = len(train_dataloader) // self.accimulation_steps
|
||||
self.accumulation_steps = accumulation_steps
|
||||
num_update_steps_per_epoch = len(train_dataloader) // self.accumulation_steps
|
||||
max_steps = math.ceil(self.max_epochs * num_update_steps_per_epoch)
|
||||
|
||||
self.scheduler = get_scheduler("cosine",
|
||||
|
@ -67,7 +67,7 @@ class SFTTrainer(Trainer):
|
|||
wandb.watch(self.model)
|
||||
total_loss = 0
|
||||
# epoch_bar = tqdm(range(self.epochs), desc='Epochs', disable=not is_rank_0())
|
||||
step_bar = tqdm(range(len(self.train_dataloader) // self.accimulation_steps * self.max_epochs),
|
||||
step_bar = tqdm(range(len(self.train_dataloader) // self.accumulation_steps * self.max_epochs),
|
||||
desc=f'steps',
|
||||
disable=not is_rank_0())
|
||||
for epoch in range(self.max_epochs):
|
||||
|
@ -85,20 +85,20 @@ class SFTTrainer(Trainer):
|
|||
if loss >= 2.5 and is_rank_0():
|
||||
logger.warning(f"batch_id:{batch_id}, abnormal loss: {loss}")
|
||||
|
||||
loss = loss / self.accimulation_steps
|
||||
loss = loss / self.accumulation_steps
|
||||
|
||||
self.strategy.backward(loss, self.model, self.optimizer)
|
||||
|
||||
total_loss += loss.item()
|
||||
|
||||
# gradient accumulation
|
||||
if (batch_id + 1) % self.accimulation_steps == 0:
|
||||
if (batch_id + 1) % self.accumulation_steps == 0:
|
||||
self.strategy.optimizer_step(self.optimizer)
|
||||
self.optimizer.zero_grad()
|
||||
self.scheduler.step()
|
||||
if is_rank_0() and use_wandb:
|
||||
wandb.log({
|
||||
"loss": total_loss / self.accimulation_steps,
|
||||
"loss": total_loss / self.accumulation_steps,
|
||||
"lr": self.scheduler.get_last_lr()[0],
|
||||
"epoch": epoch,
|
||||
"batch_id": batch_id
|
||||
|
|
|
@ -62,7 +62,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
|
|||
--save_path /path/to/Coati-7B \
|
||||
--dataset /path/to/data.json \
|
||||
--batch_size 4 \
|
||||
--accimulation_steps 8 \
|
||||
--accumulation_steps 8 \
|
||||
--lr 2e-5 \
|
||||
--max_datasets_size 512 \
|
||||
--max_epochs 1 \
|
||||
|
|
|
@ -154,7 +154,7 @@ def train(args):
|
|||
eval_dataloader=eval_dataloader,
|
||||
batch_size=args.batch_size,
|
||||
max_epochs=args.max_epochs,
|
||||
accimulation_steps=args.accimulation_steps)
|
||||
accumulation_steps=args.accumulation_steps)
|
||||
|
||||
trainer.fit(logger=logger, log_interval=args.log_interval)
|
||||
|
||||
|
@ -183,7 +183,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
|
||||
parser.add_argument('--log_interval', type=int, default=100, help="how many steps to log")
|
||||
parser.add_argument('--lr', type=float, default=5e-6)
|
||||
parser.add_argument('--accimulation_steps', type=int, default=8)
|
||||
parser.add_argument('--accumulation_steps', type=int, default=8)
|
||||
parser.add_argument('--enable_peft_lora', action='store_true', default=False)
|
||||
parser.add_argument("--is_short_text", action='store_true', default=False)
|
||||
args = parser.parse_args()
|
||||
|
|
|
@ -159,7 +159,7 @@ def train(args):
|
|||
train_dataloader=train_dataloader,
|
||||
eval_dataloader=eval_dataloader,
|
||||
max_epochs=args.max_epochs,
|
||||
accimulation_steps=args.accimulation_steps)
|
||||
accumulation_steps=args.accumulation_steps)
|
||||
|
||||
trainer.fit(logger=logger, use_wandb=args.use_wandb)
|
||||
|
||||
|
@ -189,7 +189,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
|
||||
parser.add_argument('--log_interval', type=int, default=100, help="how many steps to log")
|
||||
parser.add_argument('--lr', type=float, default=5e-6)
|
||||
parser.add_argument('--accimulation_steps', type=int, default=8)
|
||||
parser.add_argument('--accumulation_steps', type=int, default=8)
|
||||
parser.add_argument('--use_wandb', default=False, action='store_true')
|
||||
parser.add_argument('--grad_checkpoint', default=False, action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
|
|
@ -6,7 +6,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
|
|||
--save_path /path/to/Coati-7B \
|
||||
--dataset /path/to/data.json \
|
||||
--batch_size 4 \
|
||||
--accimulation_steps 8 \
|
||||
--accumulation_steps 8 \
|
||||
--lr 2e-5 \
|
||||
--max_datasets_size 512 \
|
||||
--max_epochs 1 \
|
||||
|
|
Loading…
Reference in New Issue