From 33eef714db460d3db42698a2d969cb6a669dc583 Mon Sep 17 00:00:00 2001 From: digger yu Date: Thu, 8 Jun 2023 16:09:32 +0800 Subject: [PATCH] fix typo examples and docs (#3932) --- .../parallelize_your_training_like_Megatron.md | 6 +++--- .../parallelize_your_training_like_Megatron.md | 6 +++--- examples/images/dreambooth/README.md | 2 +- examples/language/bert/README.md | 2 +- examples/language/gpt/gemini/train_gpt_demo.py | 8 ++++---- examples/language/gpt/titans/model/embed.py | 2 +- examples/language/opt/opt_train_demo.py | 2 +- examples/language/palm/train.py | 6 +++--- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/source/en/advanced_tutorials/parallelize_your_training_like_Megatron.md b/docs/source/en/advanced_tutorials/parallelize_your_training_like_Megatron.md index 22d52fb3c..978ac32fc 100644 --- a/docs/source/en/advanced_tutorials/parallelize_your_training_like_Megatron.md +++ b/docs/source/en/advanced_tutorials/parallelize_your_training_like_Megatron.md @@ -141,16 +141,16 @@ for mn, module in model.named_modules(): if 'mlp.c_fc' in mn: if 'weight' in pn or 'bias' in pn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice # keep the shape of the output from c_fc param.compute_spec.set_output_replicate(False) elif 'mlp.c_proj' in mn: if 'weight' in pn: split_param_row_tp1d(param, pg) # row slice elif 'wte' in mn or 'wpe' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice elif 'c_attn' in mn or 'c_proj' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice ``` The modified model is illustrated below. diff --git a/docs/source/zh-Hans/advanced_tutorials/parallelize_your_training_like_Megatron.md b/docs/source/zh-Hans/advanced_tutorials/parallelize_your_training_like_Megatron.md index c4131e593..b4e0d18a2 100644 --- a/docs/source/zh-Hans/advanced_tutorials/parallelize_your_training_like_Megatron.md +++ b/docs/source/zh-Hans/advanced_tutorials/parallelize_your_training_like_Megatron.md @@ -126,16 +126,16 @@ for mn, module in model.named_modules(): if 'mlp.c_fc' in mn: if 'weight' in pn or 'bias' in pn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice # keep the shape of the output from c_fc param.compute_spec.set_output_replicate(False) elif 'mlp.c_proj' in mn: if 'weight' in pn: split_param_row_tp1d(param, pg) # row slice elif 'wte' in mn or 'wpe' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice elif 'c_attn' in mn or 'c_proj' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice ``` 修改后的模型如下图所示。 diff --git a/examples/images/dreambooth/README.md b/examples/images/dreambooth/README.md index 7c117d841..5b350bc95 100644 --- a/examples/images/dreambooth/README.md +++ b/examples/images/dreambooth/README.md @@ -37,7 +37,7 @@ The `text` include the tag `Teyvat`, `Name`,`Element`, `Weapon`, `Region`, `Mode ## Training -We provide the script `colossalai.sh` to run the training task with colossalai. Meanwhile, we also provided traditional training process of dreambooth, `dreambooth.sh`, for possible comparation. For instance, the script of training process for [stable-diffusion-v1-4] model can be modified into: +We provide the script `colossalai.sh` to run the training task with colossalai. Meanwhile, we also provided traditional training process of dreambooth, `dreambooth.sh`, for possible comparison. For instance, the script of training process for [stable-diffusion-v1-4] model can be modified into: ```bash export MODEL_NAME="CompVis/stable-diffusion-v1-4" diff --git a/examples/language/bert/README.md b/examples/language/bert/README.md index c845a5c50..81c3f03ff 100644 --- a/examples/language/bert/README.md +++ b/examples/language/bert/README.md @@ -1,6 +1,6 @@ ## Overview -This directory includes two parts: Using the Booster API fintune Huggingface Bert and AlBert models and benchmarking Bert and AlBert models with different Booster Plugin. +This directory includes two parts: Using the Booster API finetune Huggingface Bert and AlBert models and benchmarking Bert and AlBert models with different Booster Plugin. ## Finetune ``` diff --git a/examples/language/gpt/gemini/train_gpt_demo.py b/examples/language/gpt/gemini/train_gpt_demo.py index 92751c7e2..4b78624f0 100644 --- a/examples/language/gpt/gemini/train_gpt_demo.py +++ b/examples/language/gpt/gemini/train_gpt_demo.py @@ -162,7 +162,7 @@ def tensor_parallelize(model: torch.nn.Module, pg: ProcessGroup): # shard it w.r.t tp pattern if 'mlp.c_fc' in mn: if 'weight' in pn or 'bias' in pn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice # keep the shape of the output from c_fc param.compute_spec.set_output_replicate(False) else: @@ -173,9 +173,9 @@ def tensor_parallelize(model: torch.nn.Module, pg: ProcessGroup): else: param.set_dist_spec(ReplicaSpec()) elif 'wte' in mn or 'wpe' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice elif 'c_attn' in mn or 'c_proj' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice else: param.set_dist_spec(ReplicaSpec()) param.visited = True @@ -237,7 +237,7 @@ def main(): if args.tp_degree > 1: tensor_parallelize(model, tp_pg) - # asign running configurations + # assign running configurations if args.distplan == "CAI_ZeRO1": zero_stage = 1 elif args.distplan == "CAI_ZeRO2": diff --git a/examples/language/gpt/titans/model/embed.py b/examples/language/gpt/titans/model/embed.py index 6369b9f8c..d825ae92a 100644 --- a/examples/language/gpt/titans/model/embed.py +++ b/examples/language/gpt/titans/model/embed.py @@ -305,7 +305,7 @@ class _VocabParallelCrossEntropy(torch.autograd.Function): @staticmethod def backward(ctx, grad_output): - # Retreive tensors from the forward path. + # Retrieve tensors from the forward path. softmax, target_mask, masked_target_1d = ctx.saved_tensors # All the inputs have softmax as their gradient. diff --git a/examples/language/opt/opt_train_demo.py b/examples/language/opt/opt_train_demo.py index 8a2ad5f55..bb2eb52ce 100644 --- a/examples/language/opt/opt_train_demo.py +++ b/examples/language/opt/opt_train_demo.py @@ -38,7 +38,7 @@ def train_epoch(epoch, model, optimizer, lr_scheduler, dataloader, booster, coor for batch in pbar: - # Foward + # Forward optimizer.zero_grad() batch = move_to_cuda(batch, torch.cuda.current_device()) diff --git a/examples/language/palm/train.py b/examples/language/palm/train.py index 62062e8bd..a0600db1b 100644 --- a/examples/language/palm/train.py +++ b/examples/language/palm/train.py @@ -140,15 +140,15 @@ def tensor_parallelize(model: torch.nn.Module, pg: ProcessGroup): continue param.set_dist_spec(ReplicaSpec()) if 'net.0' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice elif 'to_q' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice elif 'to_kv' in mn: split_param_row_tp1d(param, pg) # row slice elif 'to_out' in mn: split_param_row_tp1d(param, pg) # row slice elif '1.1' in mn: - split_param_col_tp1d(param, pg) # colmn slice + split_param_col_tp1d(param, pg) # column slice elif '1.2' in mn: split_param_row_tp1d(param, pg) # row slice else: