mirror of https://github.com/hpcaitech/ColossalAI
[misc] fit torch api upgradation and remove legecy import (#6093)
* [amp] fit torch's new api * [amp] fix api call * [amp] fix api call * [misc] fit torch pytree api upgrade * [misc] remove legacy import * [misc] fit torch amp api * [misc] fit torch amp apipull/6094/head
parent
5ddad486ca
commit
58d8b8a2dd
|
@ -279,4 +279,4 @@ class CudaAccelerator(BaseAccelerator):
|
||||||
"""
|
"""
|
||||||
Return autocast function
|
Return autocast function
|
||||||
"""
|
"""
|
||||||
return torch.cuda.amp.autocast(enabled=enabled, dtype=dtype, cache_enabled=cache_enabled)
|
return torch.amp.autocast(device_type="cuda", enabled=enabled, dtype=dtype, cache_enabled=cache_enabled)
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.legacy.nn.layer.colossalai_layer import Embedding, Linear
|
|
||||||
|
|
||||||
from .bias_dropout_add import bias_dropout_add_fused_train
|
from .bias_dropout_add import bias_dropout_add_fused_train
|
||||||
from .bias_gelu import bias_gelu_impl
|
from .bias_gelu import bias_gelu_impl
|
||||||
|
@ -45,6 +44,7 @@ def warmup_jit_fusion(
|
||||||
dtype: torch.dtype = torch.float32,
|
dtype: torch.dtype = torch.float32,
|
||||||
):
|
):
|
||||||
"""Compile JIT functions before the main training steps"""
|
"""Compile JIT functions before the main training steps"""
|
||||||
|
from colossalai.legacy.nn.layer.colossalai_layer import Embedding, Linear
|
||||||
|
|
||||||
embed = Embedding(vocab_size, hidden_size).to(get_accelerator().get_current_device())
|
embed = Embedding(vocab_size, hidden_size).to(get_accelerator().get_current_device())
|
||||||
linear_1 = Linear(hidden_size, hidden_size * 4, skip_bias_add=True).to(get_accelerator().get_current_device())
|
linear_1 = Linear(hidden_size, hidden_size * 4, skip_bias_add=True).to(get_accelerator().get_current_device())
|
||||||
|
|
|
@ -3,8 +3,9 @@ from typing import Any, List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.cuda
|
import torch.cuda
|
||||||
|
from packaging.version import Version
|
||||||
from torch.nn import Module
|
from torch.nn import Module
|
||||||
from torch.utils._pytree import SUPPORTED_NODES, TreeSpec, _register_pytree_node, tree_flatten, tree_map, tree_unflatten
|
from torch.utils._pytree import SUPPORTED_NODES, TreeSpec, tree_flatten, tree_map, tree_unflatten
|
||||||
|
|
||||||
|
|
||||||
# this register are for torch under version 1.13.1, maybe removed in the future
|
# this register are for torch under version 1.13.1, maybe removed in the future
|
||||||
|
@ -16,6 +17,11 @@ def _odict_unflatten(values: List[Any], context: Any) -> "OrderedDict[Any, Any]"
|
||||||
return OrderedDict((key, value) for key, value in zip(context, values))
|
return OrderedDict((key, value) for key, value in zip(context, values))
|
||||||
|
|
||||||
|
|
||||||
|
if Version(torch.__version__) <= Version("1.13.1"):
|
||||||
|
try:
|
||||||
|
from torch.utils._pytree import register_pytree_node as _register_pytree_node
|
||||||
|
except ImportError:
|
||||||
|
from torch.utils._pytree import _register_pytree_node
|
||||||
_register_pytree_node(OrderedDict, _odict_flatten, _odict_unflatten)
|
_register_pytree_node(OrderedDict, _odict_flatten, _odict_unflatten)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,5 @@
|
||||||
import torch.nn
|
import torch.nn
|
||||||
|
|
||||||
from colossalai.legacy.zero.gemini.ophooks.runtime_mem_tracer_hook import (
|
|
||||||
GradMemStats,
|
|
||||||
GradMemTracerHook,
|
|
||||||
ParamMemTracerHook,
|
|
||||||
)
|
|
||||||
from colossalai.tensor.param_op_hook import ColoParamOpHookManager
|
from colossalai.tensor.param_op_hook import ColoParamOpHookManager
|
||||||
from colossalai.utils import _cast_float
|
from colossalai.utils import _cast_float
|
||||||
|
|
||||||
|
@ -27,6 +22,12 @@ class RuntimeMemTracer:
|
||||||
|
|
||||||
def __init__(self, module: torch.nn.Module, dtype: torch.dtype = torch.half):
|
def __init__(self, module: torch.nn.Module, dtype: torch.dtype = torch.half):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
from colossalai.legacy.zero.gemini.ophooks.runtime_mem_tracer_hook import (
|
||||||
|
GradMemStats,
|
||||||
|
GradMemTracerHook,
|
||||||
|
ParamMemTracerHook,
|
||||||
|
)
|
||||||
|
|
||||||
self.module = module
|
self.module = module
|
||||||
self.dtype = dtype
|
self.dtype = dtype
|
||||||
self._gradstat = GradMemStats()
|
self._gradstat = GradMemStats()
|
||||||
|
|
|
@ -8,7 +8,6 @@ import torch
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
|
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.legacy.utils.memory import colo_device_memory_capacity
|
|
||||||
from colossalai.zero.gemini.chunk import Chunk
|
from colossalai.zero.gemini.chunk import Chunk
|
||||||
|
|
||||||
from .chunk import Chunk, ChunkManager
|
from .chunk import Chunk, ChunkManager
|
||||||
|
@ -172,6 +171,8 @@ class AutoPlacementPolicy(PlacementPolicy):
|
||||||
Returns:
|
Returns:
|
||||||
int: the volume of memory that is evicted
|
int: the volume of memory that is evicted
|
||||||
"""
|
"""
|
||||||
|
from colossalai.legacy.utils.memory import colo_device_memory_capacity
|
||||||
|
|
||||||
start = time()
|
start = time()
|
||||||
cuda_capacity = colo_device_memory_capacity(get_accelerator().get_current_device())
|
cuda_capacity = colo_device_memory_capacity(get_accelerator().get_current_device())
|
||||||
used_cuda_model_data = self.chunk_manager.total_mem["cuda"]
|
used_cuda_model_data = self.chunk_manager.total_mem["cuda"]
|
||||||
|
|
|
@ -16,7 +16,7 @@ Author: [Mingyan Jiang](https://github.com/jiangmingyan)
|
||||||
AMP stands for automatic mixed precision training.
|
AMP stands for automatic mixed precision training.
|
||||||
In Colossal-AI, we have incorporated different implementations of mixed precision training:
|
In Colossal-AI, we have incorporated different implementations of mixed precision training:
|
||||||
|
|
||||||
1. torch.cuda.amp
|
1. torch.amp
|
||||||
2. apex.amp
|
2. apex.amp
|
||||||
3. naive amp
|
3. naive amp
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
AMP 代表自动混合精度训练。
|
AMP 代表自动混合精度训练。
|
||||||
在 Colossal-AI 中, 我们结合了混合精度训练的不同实现:
|
在 Colossal-AI 中, 我们结合了混合精度训练的不同实现:
|
||||||
|
|
||||||
1. torch.cuda.amp
|
1. torch.amp
|
||||||
2. apex.amp
|
2. apex.amp
|
||||||
3. naive amp
|
3. naive amp
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue