mirror of https://github.com/hpcaitech/ColossalAI
[builder] unified cpu_optim fused_optim inferface (#2190)
parent
9587b080ba
commit
355ffb386e
|
@ -3,19 +3,12 @@
|
|||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
try:
|
||||
from colossalai._C import fused_optim
|
||||
except:
|
||||
print('Colossalai should be built with cuda extension to use the FP16 optimizer')
|
||||
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
|
||||
from torch.distributed import ProcessGroup
|
||||
from torch.optim import Optimizer
|
||||
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.kernel import fused_optim
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.utils import clip_grad_norm_fp32, copy_tensor_parallel_attributes, multi_tensor_applier
|
||||
|
||||
|
|
|
@ -1,3 +1,15 @@
|
|||
from .cuda_native import LayerNorm, FusedScaleMaskSoftmax, MultiHeadAttention
|
||||
from .cuda_native import FusedScaleMaskSoftmax, LayerNorm, MultiHeadAttention
|
||||
|
||||
__all__ = ["LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention"]
|
||||
try:
|
||||
from colossalai._C import fused_optim
|
||||
except:
|
||||
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
|
||||
try:
|
||||
from colossalai._C import cpu_optim
|
||||
except ImportError:
|
||||
from colossalai.kernel.op_builder import CPUAdamBuilder
|
||||
cpu_optim = CPUAdamBuilder().load()
|
||||
|
||||
__all__ = ["fused_optim", "cpu_optim", "LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention"]
|
||||
|
|
|
@ -65,11 +65,8 @@ class FusedAdam(torch.optim.Optimizer):
|
|||
self.adamw_mode = 1 if adamw_mode else 0
|
||||
self.set_grad_none = set_grad_none
|
||||
if multi_tensor_applier.available:
|
||||
try:
|
||||
from colossalai._C import fused_optim
|
||||
except:
|
||||
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
from colossalai.kernel import fused_optim
|
||||
|
||||
# Skip buffer
|
||||
self._dummy_overflow_buf = torch.cuda.IntTensor([0])
|
||||
self.multi_tensor_adam = fused_optim.multi_tensor_adam
|
||||
|
|
|
@ -76,11 +76,7 @@ class FusedLAMB(torch.optim.Optimizer):
|
|||
max_grad_norm=max_grad_norm)
|
||||
super(FusedLAMB, self).__init__(params, defaults)
|
||||
if multi_tensor_applier.available:
|
||||
try:
|
||||
from colossalai._C import fused_optim
|
||||
except:
|
||||
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
from colossalai.kernel import fused_optim
|
||||
|
||||
self.multi_tensor_l2norm = fused_optim.multi_tensor_l2norm
|
||||
# Skip buffer
|
||||
|
|
|
@ -80,11 +80,8 @@ class FusedSGD(Optimizer):
|
|||
self.wd_after_momentum = wd_after_momentum
|
||||
|
||||
if multi_tensor_applier.available:
|
||||
try:
|
||||
from colossalai._C import fused_optim
|
||||
except:
|
||||
from colossalai.kernel.op_builder import FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
from colossalai.kernel import fused_optim
|
||||
|
||||
# Skip buffer
|
||||
self._dummy_overflow_buf = torch.tensor([0],
|
||||
dtype=torch.int,
|
||||
|
|
|
@ -76,13 +76,8 @@ class HybridAdam(NVMeOptimizer):
|
|||
default_args = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, bias_correction=bias_correction)
|
||||
super(HybridAdam, self).__init__(model_params, default_args, nvme_offload_fraction, nvme_offload_dir)
|
||||
self.adamw_mode = adamw_mode
|
||||
try:
|
||||
from colossalai._C import cpu_optim, fused_optim
|
||||
except ImportError:
|
||||
from colossalai.kernel.op_builder import CPUAdamBuilder, FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
cpu_optim = CPUAdamBuilder().load()
|
||||
|
||||
from colossalai.kernel import cpu_optim, fused_optim
|
||||
self.cpu_adam_op = cpu_optim.CPUAdamOptimizer(lr, betas[0], betas[1], eps, weight_decay, adamw_mode)
|
||||
|
||||
self.gpu_adam_op = fused_optim.multi_tensor_adam
|
||||
|
|
|
@ -4,28 +4,21 @@ import functools
|
|||
import os
|
||||
import random
|
||||
import socket
|
||||
from collections import defaultdict
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from torch._six import inf
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
try:
|
||||
from colossalai._C import fused_optim
|
||||
except:
|
||||
from colossalai.kernel.op_builder import FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
|
||||
from collections import defaultdict
|
||||
from contextlib import contextmanager
|
||||
|
||||
import torch.distributed as dist
|
||||
|
||||
from colossalai.constants import IS_TENSOR_PARALLEL, NUM_PARTITIONS, TENSOR_PARALLEL_ATTRIBUTES
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.global_variables import tensor_parallel_env as env
|
||||
from colossalai.kernel import fused_optim
|
||||
from colossalai.tensor import ColoParameter, ProcessGroup
|
||||
|
||||
from .multi_tensor_apply import multi_tensor_applier
|
||||
|
|
|
@ -14,7 +14,7 @@ class MultiTensorApply(object):
|
|||
|
||||
def __init__(self, chunk_size):
|
||||
try:
|
||||
import colossalai._C.fused_optim
|
||||
from colossalai.kernel import fused_optim
|
||||
MultiTensorApply.available = True
|
||||
self.chunk_size = chunk_size
|
||||
except ImportError as err:
|
||||
|
|
|
@ -46,13 +46,8 @@ def torch_adam_update(
|
|||
@parameterize('p_dtype', [torch.float, torch.half])
|
||||
@parameterize('g_dtype', [torch.float, torch.half])
|
||||
def test_adam(adamw, step, p_dtype, g_dtype):
|
||||
try:
|
||||
import colossalai._C.fused_optim
|
||||
fused_adam = colossalai._C.fused_optim.multi_tensor_adam
|
||||
except:
|
||||
from colossalai.kernel.op_builder import FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
fused_adam = fused_optim.multi_tensor_adam
|
||||
from colossalai.kernel import fused_optim
|
||||
fused_adam = fused_optim.multi_tensor_adam
|
||||
|
||||
dummy_overflow_buf = torch.cuda.IntTensor([0])
|
||||
|
||||
|
|
Loading…
Reference in New Issue