Browse Source

[misc] fit torch api upgradation and remove legecy import (#6093)

* [amp] fit torch's new api

* [amp] fix api call

* [amp] fix api call

* [misc] fit torch pytree api upgrade

* [misc] remove legacy import

* [misc] fit torch amp api

* [misc] fit torch amp api
pull/6094/head
Hongxin Liu 1 month ago committed by GitHub
parent
commit
58d8b8a2dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      colossalai/accelerator/cuda_accelerator.py
  2. 2
      colossalai/kernel/jit/option.py
  3. 10
      colossalai/pipeline/schedule/_utils.py
  4. 11
      colossalai/zero/gemini/memory_tracer/runtime_mem_tracer.py
  5. 3
      colossalai/zero/gemini/placement_policy.py
  6. 2
      docs/source/en/features/mixed_precision_training_with_booster.md
  7. 2
      docs/source/zh-Hans/features/mixed_precision_training_with_booster.md

2
colossalai/accelerator/cuda_accelerator.py

@ -279,4 +279,4 @@ class CudaAccelerator(BaseAccelerator):
""" """
Return autocast function Return autocast function
""" """
return torch.cuda.amp.autocast(enabled=enabled, dtype=dtype, cache_enabled=cache_enabled) return torch.amp.autocast(device_type="cuda", enabled=enabled, dtype=dtype, cache_enabled=cache_enabled)

2
colossalai/kernel/jit/option.py

@ -1,7 +1,6 @@
import torch import torch
from colossalai.accelerator import get_accelerator from colossalai.accelerator import get_accelerator
from colossalai.legacy.nn.layer.colossalai_layer import Embedding, Linear
from .bias_dropout_add import bias_dropout_add_fused_train from .bias_dropout_add import bias_dropout_add_fused_train
from .bias_gelu import bias_gelu_impl from .bias_gelu import bias_gelu_impl
@ -45,6 +44,7 @@ def warmup_jit_fusion(
dtype: torch.dtype = torch.float32, dtype: torch.dtype = torch.float32,
): ):
"""Compile JIT functions before the main training steps""" """Compile JIT functions before the main training steps"""
from colossalai.legacy.nn.layer.colossalai_layer import Embedding, Linear
embed = Embedding(vocab_size, hidden_size).to(get_accelerator().get_current_device()) embed = Embedding(vocab_size, hidden_size).to(get_accelerator().get_current_device())
linear_1 = Linear(hidden_size, hidden_size * 4, skip_bias_add=True).to(get_accelerator().get_current_device()) linear_1 = Linear(hidden_size, hidden_size * 4, skip_bias_add=True).to(get_accelerator().get_current_device())

10
colossalai/pipeline/schedule/_utils.py

@ -3,8 +3,9 @@ from typing import Any, List, Optional, Tuple
import torch import torch
import torch.cuda import torch.cuda
from packaging.version import Version
from torch.nn import Module from torch.nn import Module
from torch.utils._pytree import SUPPORTED_NODES, TreeSpec, _register_pytree_node, tree_flatten, tree_map, tree_unflatten from torch.utils._pytree import SUPPORTED_NODES, TreeSpec, tree_flatten, tree_map, tree_unflatten
# this register are for torch under version 1.13.1, maybe removed in the future # this register are for torch under version 1.13.1, maybe removed in the future
@ -16,7 +17,12 @@ def _odict_unflatten(values: List[Any], context: Any) -> "OrderedDict[Any, Any]"
return OrderedDict((key, value) for key, value in zip(context, values)) return OrderedDict((key, value) for key, value in zip(context, values))
_register_pytree_node(OrderedDict, _odict_flatten, _odict_unflatten) if Version(torch.__version__) <= Version("1.13.1"):
try:
from torch.utils._pytree import register_pytree_node as _register_pytree_node
except ImportError:
from torch.utils._pytree import _register_pytree_node
_register_pytree_node(OrderedDict, _odict_flatten, _odict_unflatten)
def tree_map_hf(fn: Any, pytree: Any): def tree_map_hf(fn: Any, pytree: Any):

11
colossalai/zero/gemini/memory_tracer/runtime_mem_tracer.py

@ -1,10 +1,5 @@
import torch.nn import torch.nn
from colossalai.legacy.zero.gemini.ophooks.runtime_mem_tracer_hook import (
GradMemStats,
GradMemTracerHook,
ParamMemTracerHook,
)
from colossalai.tensor.param_op_hook import ColoParamOpHookManager from colossalai.tensor.param_op_hook import ColoParamOpHookManager
from colossalai.utils import _cast_float from colossalai.utils import _cast_float
@ -27,6 +22,12 @@ class RuntimeMemTracer:
def __init__(self, module: torch.nn.Module, dtype: torch.dtype = torch.half): def __init__(self, module: torch.nn.Module, dtype: torch.dtype = torch.half):
super().__init__() super().__init__()
from colossalai.legacy.zero.gemini.ophooks.runtime_mem_tracer_hook import (
GradMemStats,
GradMemTracerHook,
ParamMemTracerHook,
)
self.module = module self.module = module
self.dtype = dtype self.dtype = dtype
self._gradstat = GradMemStats() self._gradstat = GradMemStats()

3
colossalai/zero/gemini/placement_policy.py

@ -8,7 +8,6 @@ import torch
import torch.distributed as dist import torch.distributed as dist
from colossalai.accelerator import get_accelerator from colossalai.accelerator import get_accelerator
from colossalai.legacy.utils.memory import colo_device_memory_capacity
from colossalai.zero.gemini.chunk import Chunk from colossalai.zero.gemini.chunk import Chunk
from .chunk import Chunk, ChunkManager from .chunk import Chunk, ChunkManager
@ -172,6 +171,8 @@ class AutoPlacementPolicy(PlacementPolicy):
Returns: Returns:
int: the volume of memory that is evicted int: the volume of memory that is evicted
""" """
from colossalai.legacy.utils.memory import colo_device_memory_capacity
start = time() start = time()
cuda_capacity = colo_device_memory_capacity(get_accelerator().get_current_device()) cuda_capacity = colo_device_memory_capacity(get_accelerator().get_current_device())
used_cuda_model_data = self.chunk_manager.total_mem["cuda"] used_cuda_model_data = self.chunk_manager.total_mem["cuda"]

2
docs/source/en/features/mixed_precision_training_with_booster.md

@ -16,7 +16,7 @@ Author: [Mingyan Jiang](https://github.com/jiangmingyan)
AMP stands for automatic mixed precision training. AMP stands for automatic mixed precision training.
In Colossal-AI, we have incorporated different implementations of mixed precision training: In Colossal-AI, we have incorporated different implementations of mixed precision training:
1. torch.cuda.amp 1. torch.amp
2. apex.amp 2. apex.amp
3. naive amp 3. naive amp

2
docs/source/zh-Hans/features/mixed_precision_training_with_booster.md

@ -16,7 +16,7 @@
AMP 代表自动混合精度训练。 AMP 代表自动混合精度训练。
在 Colossal-AI 中, 我们结合了混合精度训练的不同实现: 在 Colossal-AI 中, 我们结合了混合精度训练的不同实现:
1. torch.cuda.amp 1. torch.amp
2. apex.amp 2. apex.amp
3. naive amp 3. naive amp

Loading…
Cancel
Save