[chat] typo accimulation_steps -> accumulation_steps (#3662)

pull/3679/head
tanitna 2023-04-28 00:42:57 -07:00 committed by GitHub
parent 816add7e7f
commit 1a60dc07a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 18 additions and 18 deletions

View File

@ -251,7 +251,7 @@ trainer = SFTTrainer(model=model,
eval_dataloader=eval_dataloader, eval_dataloader=eval_dataloader,
batch_size=args.batch_size, batch_size=args.batch_size,
max_epochs=args.max_epochs, max_epochs=args.max_epochs,
accimulation_steps = args.accimulation_steps accumulation_steps = args.accumulation_steps
) )
trainer.fit() trainer.fit()
@ -278,7 +278,7 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \
--save_path /path/to/Coati-7B \ --save_path /path/to/Coati-7B \
--dataset /path/to/data.json \ --dataset /path/to/data.json \
--batch_size 1 \ --batch_size 1 \
--accimulation_steps 8 \ --accumulation_steps 8 \
--lr 2e-5 \ --lr 2e-5 \
--max_datasets_size 512 \ --max_datasets_size 512 \
--max_epochs 1 \ --max_epochs 1 \
@ -296,7 +296,7 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \
--save_path /path/to/Coati-7B \ --save_path /path/to/Coati-7B \
--dataset /path/to/data.json \ --dataset /path/to/data.json \
--batch_size 1 \ --batch_size 1 \
--accimulation_steps 8 \ --accumulation_steps 8 \
--lr 2e-5 \ --lr 2e-5 \
--max_datasets_size 512 \ --max_datasets_size 512 \
--max_epochs 1 \ --max_epochs 1 \
@ -313,7 +313,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
--save_path /path/to/Coati-7B \ --save_path /path/to/Coati-7B \
--dataset /path/to/data.json \ --dataset /path/to/data.json \
--batch_size 1 \ --batch_size 1 \
--accimulation_steps 8 \ --accumulation_steps 8 \
--lr 2e-5 \ --lr 2e-5 \
--max_datasets_size 512 \ --max_datasets_size 512 \
--max_epochs 1 \ --max_epochs 1 \

View File

@ -41,10 +41,10 @@ class SFTTrainer(Trainer):
train_dataloader: DataLoader, train_dataloader: DataLoader,
eval_dataloader: DataLoader = None, eval_dataloader: DataLoader = None,
max_epochs: int = 2, max_epochs: int = 2,
accimulation_steps: int = 8, accumulation_steps: int = 8,
callbacks: List[Callback] = [], callbacks: List[Callback] = [],
) -> None: ) -> None:
if accimulation_steps > 1 and isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3: if accumulation_steps > 1 and isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3:
raise ValueError("Accumulation steps are not supported in stage 3 of ColossalAI") raise ValueError("Accumulation steps are not supported in stage 3 of ColossalAI")
super().__init__(strategy, max_epochs, callbacks=callbacks) super().__init__(strategy, max_epochs, callbacks=callbacks)
self.train_dataloader = train_dataloader self.train_dataloader = train_dataloader
@ -52,8 +52,8 @@ class SFTTrainer(Trainer):
self.model = model self.model = model
self.optimizer = optim self.optimizer = optim
self.accimulation_steps = accimulation_steps self.accumulation_steps = accumulation_steps
num_update_steps_per_epoch = len(train_dataloader) // self.accimulation_steps num_update_steps_per_epoch = len(train_dataloader) // self.accumulation_steps
max_steps = math.ceil(self.max_epochs * num_update_steps_per_epoch) max_steps = math.ceil(self.max_epochs * num_update_steps_per_epoch)
self.scheduler = get_scheduler("cosine", self.scheduler = get_scheduler("cosine",
@ -67,7 +67,7 @@ class SFTTrainer(Trainer):
wandb.watch(self.model) wandb.watch(self.model)
total_loss = 0 total_loss = 0
# epoch_bar = tqdm(range(self.epochs), desc='Epochs', disable=not is_rank_0()) # epoch_bar = tqdm(range(self.epochs), desc='Epochs', disable=not is_rank_0())
step_bar = tqdm(range(len(self.train_dataloader) // self.accimulation_steps * self.max_epochs), step_bar = tqdm(range(len(self.train_dataloader) // self.accumulation_steps * self.max_epochs),
desc=f'steps', desc=f'steps',
disable=not is_rank_0()) disable=not is_rank_0())
for epoch in range(self.max_epochs): for epoch in range(self.max_epochs):
@ -85,20 +85,20 @@ class SFTTrainer(Trainer):
if loss >= 2.5 and is_rank_0(): if loss >= 2.5 and is_rank_0():
logger.warning(f"batch_id:{batch_id}, abnormal loss: {loss}") logger.warning(f"batch_id:{batch_id}, abnormal loss: {loss}")
loss = loss / self.accimulation_steps loss = loss / self.accumulation_steps
self.strategy.backward(loss, self.model, self.optimizer) self.strategy.backward(loss, self.model, self.optimizer)
total_loss += loss.item() total_loss += loss.item()
# gradient accumulation # gradient accumulation
if (batch_id + 1) % self.accimulation_steps == 0: if (batch_id + 1) % self.accumulation_steps == 0:
self.strategy.optimizer_step(self.optimizer) self.strategy.optimizer_step(self.optimizer)
self.optimizer.zero_grad() self.optimizer.zero_grad()
self.scheduler.step() self.scheduler.step()
if is_rank_0() and use_wandb: if is_rank_0() and use_wandb:
wandb.log({ wandb.log({
"loss": total_loss / self.accimulation_steps, "loss": total_loss / self.accumulation_steps,
"lr": self.scheduler.get_last_lr()[0], "lr": self.scheduler.get_last_lr()[0],
"epoch": epoch, "epoch": epoch,
"batch_id": batch_id "batch_id": batch_id

View File

@ -62,7 +62,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
--save_path /path/to/Coati-7B \ --save_path /path/to/Coati-7B \
--dataset /path/to/data.json \ --dataset /path/to/data.json \
--batch_size 4 \ --batch_size 4 \
--accimulation_steps 8 \ --accumulation_steps 8 \
--lr 2e-5 \ --lr 2e-5 \
--max_datasets_size 512 \ --max_datasets_size 512 \
--max_epochs 1 \ --max_epochs 1 \

View File

@ -154,7 +154,7 @@ def train(args):
eval_dataloader=eval_dataloader, eval_dataloader=eval_dataloader,
batch_size=args.batch_size, batch_size=args.batch_size,
max_epochs=args.max_epochs, max_epochs=args.max_epochs,
accimulation_steps=args.accimulation_steps) accumulation_steps=args.accumulation_steps)
trainer.fit(logger=logger, log_interval=args.log_interval) trainer.fit(logger=logger, log_interval=args.log_interval)
@ -183,7 +183,7 @@ if __name__ == '__main__':
parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank") parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
parser.add_argument('--log_interval', type=int, default=100, help="how many steps to log") parser.add_argument('--log_interval', type=int, default=100, help="how many steps to log")
parser.add_argument('--lr', type=float, default=5e-6) parser.add_argument('--lr', type=float, default=5e-6)
parser.add_argument('--accimulation_steps', type=int, default=8) parser.add_argument('--accumulation_steps', type=int, default=8)
parser.add_argument('--enable_peft_lora', action='store_true', default=False) parser.add_argument('--enable_peft_lora', action='store_true', default=False)
parser.add_argument("--is_short_text", action='store_true', default=False) parser.add_argument("--is_short_text", action='store_true', default=False)
args = parser.parse_args() args = parser.parse_args()

View File

@ -159,7 +159,7 @@ def train(args):
train_dataloader=train_dataloader, train_dataloader=train_dataloader,
eval_dataloader=eval_dataloader, eval_dataloader=eval_dataloader,
max_epochs=args.max_epochs, max_epochs=args.max_epochs,
accimulation_steps=args.accimulation_steps) accumulation_steps=args.accumulation_steps)
trainer.fit(logger=logger, use_wandb=args.use_wandb) trainer.fit(logger=logger, use_wandb=args.use_wandb)
@ -189,7 +189,7 @@ if __name__ == '__main__':
parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank") parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
parser.add_argument('--log_interval', type=int, default=100, help="how many steps to log") parser.add_argument('--log_interval', type=int, default=100, help="how many steps to log")
parser.add_argument('--lr', type=float, default=5e-6) parser.add_argument('--lr', type=float, default=5e-6)
parser.add_argument('--accimulation_steps', type=int, default=8) parser.add_argument('--accumulation_steps', type=int, default=8)
parser.add_argument('--use_wandb', default=False, action='store_true') parser.add_argument('--use_wandb', default=False, action='store_true')
parser.add_argument('--grad_checkpoint', default=False, action='store_true') parser.add_argument('--grad_checkpoint', default=False, action='store_true')
args = parser.parse_args() args = parser.parse_args()

View File

@ -6,7 +6,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
--save_path /path/to/Coati-7B \ --save_path /path/to/Coati-7B \
--dataset /path/to/data.json \ --dataset /path/to/data.json \
--batch_size 4 \ --batch_size 4 \
--accimulation_steps 8 \ --accumulation_steps 8 \
--lr 2e-5 \ --lr 2e-5 \
--max_datasets_size 512 \ --max_datasets_size 512 \
--max_epochs 1 \ --max_epochs 1 \