[hotfix] Fix examples no pad token & auto parallel codegen bug; (#5606)

* fix no pad token bug * fixed some auto parallel codegen bug, but might not run on torch 2.1 --------- Co-authored-by: Edenzzzz <wtan45@wisc.edu>
2024-04-18 18:15:50 +08:00 · 2024-04-18 18:15:50 +08:00 · d83c633ca6
parent a0ad587c24
commit d83c633ca6
6 changed files with 8 additions and 5 deletions
--- a/colossalai/_analyzer/fx/codegen.py
+++ b/colossalai/_analyzer/fx/codegen.py
@ -246,7 +246,7 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func,

@compatibility(is_backward_compatible=True)
 class ActivationCheckpointCodeGen(CodeGen):
-    def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace) -> PythonCode:
+    def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace, verbose=None) -> PythonCode:
        free_vars: List[str] = []
        body: List[str] = []
        globals_: Dict[str, Any] = {}
--- a/colossalai/auto_parallel/offload/base_offload_module.py
+++ b/colossalai/auto_parallel/offload/base_offload_module.py
@ -5,7 +5,7 @@ import torch
 import torch.nn as nn

 from colossalai.utils import _cast_float
-from colossalai.zero.legacy.gemini.tensor_utils import free_storage
+from colossalai.utils.common import free_storage

 from .region_manager import RegionManager
 from .util import GlobalRuntimeInfo
--- a/colossalai/auto_parallel/offload/region.py
+++ b/colossalai/auto_parallel/offload/region.py
@ -3,7 +3,8 @@ from typing import Dict, List, Tuple
 import torch
 from torch.fx import Node

-from colossalai.zero.legacy.gemini.tensor_utils import alloc_storage, free_storage
+from colossalai.utils.common import free_storage
+from colossalai.zero.gemini.chunk.chunk import alloc_storage


 class Region:
--- a/colossalai/autochunk/autochunk_codegen.py
+++ b/colossalai/autochunk/autochunk_codegen.py
@ -372,7 +372,7 @@ if AUTOCHUNK_AVAILABLE:
            if print_progress:
                get_logger().info("AutoChunk start codegen")

-        def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace) -> PythonCode:
+        def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace, verbose=None) -> PythonCode:
            free_vars: List[str] = []
            body: List[str] = []
            globals_: Dict[str, Any] = {}
--- a/colossalai/fx/codegen/activation_checkpoint_codegen.py
+++ b/colossalai/fx/codegen/activation_checkpoint_codegen.py
@ -625,7 +625,7 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func,
 if CODEGEN_AVAILABLE:

    class ActivationCheckpointCodeGen(CodeGen):
-        def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace) -> PythonCode:
+        def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace, verbose=None) -> PythonCode:
            free_vars: List[str] = []
            body: List[str] = []
            globals_: Dict[str, Any] = {}
--- a/examples/language/gpt/hybridparallelism/data.py
+++ b/examples/language/gpt/hybridparallelism/data.py
@ -62,6 +62,8 @@ class GLUEDataBuilder:
        self.text_fields = self.task_text_field_map[task_name]
        self.num_labels = self.glue_task_num_labels[task_name]
        self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)
+        if not getattr(self.tokenizer, "pad_token", None):
+            self.tokenizer.pad_token = self.tokenizer._eos_token
        self.setup()

    def setup(self):