fix typo examples and docs (#3932)

2023-06-08 16:09:32 +08:00 · 2023-06-08 16:09:32 +08:00 · 33eef714db
parent 407aa48461
commit 33eef714db
8 changed files with 17 additions and 17 deletions
--- a/docs/source/en/advanced_tutorials/parallelize_your_training_like_Megatron.md
+++ b/docs/source/en/advanced_tutorials/parallelize_your_training_like_Megatron.md
@ -141,16 +141,16 @@ for mn, module in model.named_modules():

        if 'mlp.c_fc' in mn:
            if 'weight' in pn or 'bias' in pn:
-                split_param_col_tp1d(param, pg)  # colmn slice
+                split_param_col_tp1d(param, pg)  # column slice
                # keep the shape of the output from c_fc
                param.compute_spec.set_output_replicate(False)
        elif 'mlp.c_proj' in mn:
            if 'weight' in pn:
                split_param_row_tp1d(param, pg)  # row slice
        elif 'wte' in mn or 'wpe' in mn:
-            split_param_col_tp1d(param, pg)  # colmn slice
+            split_param_col_tp1d(param, pg)  # column slice
        elif 'c_attn' in mn or 'c_proj' in mn:
-            split_param_col_tp1d(param, pg)  # colmn slice
+            split_param_col_tp1d(param, pg)  # column slice
 ```

 The modified model is illustrated below.
--- a/docs/source/zh-Hans/advanced_tutorials/parallelize_your_training_like_Megatron.md
+++ b/docs/source/zh-Hans/advanced_tutorials/parallelize_your_training_like_Megatron.md
@ -126,16 +126,16 @@ for mn, module in model.named_modules():

        if 'mlp.c_fc' in mn:
            if 'weight' in pn or 'bias' in pn:
-                split_param_col_tp1d(param, pg)  # colmn slice
+                split_param_col_tp1d(param, pg)  # column slice
                # keep the shape of the output from c_fc
                param.compute_spec.set_output_replicate(False)
        elif 'mlp.c_proj' in mn:
            if 'weight' in pn:
                split_param_row_tp1d(param, pg)  # row slice
        elif 'wte' in mn or 'wpe' in mn:
-            split_param_col_tp1d(param, pg)  # colmn slice
+            split_param_col_tp1d(param, pg)  # column slice
        elif 'c_attn' in mn or 'c_proj' in mn:
-            split_param_col_tp1d(param, pg)  # colmn slice
+            split_param_col_tp1d(param, pg)  # column slice
 ```

 修改后的模型如下图所示。
--- a/examples/images/dreambooth/README.md
+++ b/examples/images/dreambooth/README.md
@ -37,7 +37,7 @@ The `text` include the tag `Teyvat`, `Name`,`Element`, `Weapon`, `Region`, `Mode

 ## Training

-We provide the script `colossalai.sh` to run the training task with colossalai. Meanwhile, we also provided traditional training process of dreambooth, `dreambooth.sh`, for possible comparation. For instance, the script of training process for [stable-diffusion-v1-4] model can be modified into:
+We provide the script `colossalai.sh` to run the training task with colossalai. Meanwhile, we also provided traditional training process of dreambooth, `dreambooth.sh`, for possible comparison. For instance, the script of training process for [stable-diffusion-v1-4] model can be modified into:

 ```bash
 export MODEL_NAME="CompVis/stable-diffusion-v1-4"
--- a/examples/language/bert/README.md
+++ b/examples/language/bert/README.md
@ -1,6 +1,6 @@
 ## Overview

-This directory includes two parts: Using the Booster API fintune Huggingface Bert and AlBert models and benchmarking Bert and AlBert models with different Booster Plugin.
+This directory includes two parts: Using the Booster API finetune Huggingface Bert and AlBert models and benchmarking Bert and AlBert models with different Booster Plugin.

 ## Finetune
 ```
--- a/examples/language/gpt/gemini/train_gpt_demo.py
+++ b/examples/language/gpt/gemini/train_gpt_demo.py
@ -162,7 +162,7 @@ def tensor_parallelize(model: torch.nn.Module, pg: ProcessGroup):
            # shard it w.r.t tp pattern
            if 'mlp.c_fc' in mn:
                if 'weight' in pn or 'bias' in pn:
-                    split_param_col_tp1d(param, pg)    # colmn slice
+                    split_param_col_tp1d(param, pg)    # column slice
                    # keep the shape of the output from c_fc
                    param.compute_spec.set_output_replicate(False)
                else:
@ -173,9 +173,9 @@ def tensor_parallelize(model: torch.nn.Module, pg: ProcessGroup):
                else:
                    param.set_dist_spec(ReplicaSpec())
            elif 'wte' in mn or 'wpe' in mn:
-                split_param_col_tp1d(param, pg)    # colmn slice
+                split_param_col_tp1d(param, pg)    # column slice
            elif 'c_attn' in mn or 'c_proj' in mn:
-                split_param_col_tp1d(param, pg)    # colmn slice
+                split_param_col_tp1d(param, pg)    # column slice
            else:
                param.set_dist_spec(ReplicaSpec())
            param.visited = True
@ -237,7 +237,7 @@ def main():
        if args.tp_degree > 1:
            tensor_parallelize(model, tp_pg)

-        # asign running configurations
+        # assign running configurations
        if args.distplan == "CAI_ZeRO1":
            zero_stage = 1
        elif args.distplan == "CAI_ZeRO2":
--- a/examples/language/gpt/titans/model/embed.py
+++ b/examples/language/gpt/titans/model/embed.py
@ -305,7 +305,7 @@ class _VocabParallelCrossEntropy(torch.autograd.Function):
    @staticmethod
    def backward(ctx, grad_output):

-        # Retreive tensors from the forward path.
+        # Retrieve tensors from the forward path.
        softmax, target_mask, masked_target_1d = ctx.saved_tensors

        # All the inputs have softmax as their gradient.
--- a/examples/language/opt/opt_train_demo.py
+++ b/examples/language/opt/opt_train_demo.py
@ -38,7 +38,7 @@ def train_epoch(epoch, model, optimizer, lr_scheduler, dataloader, booster, coor
        
        for batch in pbar:

-            # Foward
+            # Forward
            optimizer.zero_grad()
            batch = move_to_cuda(batch, torch.cuda.current_device())
            
--- a/examples/language/palm/train.py
+++ b/examples/language/palm/train.py
@ -140,15 +140,15 @@ def tensor_parallelize(model: torch.nn.Module, pg: ProcessGroup):
                continue
            param.set_dist_spec(ReplicaSpec())
            if 'net.0' in mn:
-                split_param_col_tp1d(param, pg)    # colmn slice
+                split_param_col_tp1d(param, pg)    # column slice
            elif 'to_q' in mn:
-                split_param_col_tp1d(param, pg)    # colmn slice
+                split_param_col_tp1d(param, pg)    # column slice
            elif 'to_kv' in mn:
                split_param_row_tp1d(param, pg)    # row slice
            elif 'to_out' in mn:
                split_param_row_tp1d(param, pg)    # row slice
            elif '1.1' in mn:
-                split_param_col_tp1d(param, pg)    # colmn slice
+                split_param_col_tp1d(param, pg)    # column slice
            elif '1.2' in mn:
                split_param_row_tp1d(param, pg)    # row slice
            else: