[builder] unified cpu_optim fused_optim inferface (#2190)

pull/2191/head
Jiarui Fang 2022-12-23 20:57:41 +08:00 committed by GitHub
parent 9587b080ba
commit 355ffb386e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 28 additions and 50 deletions

View File

@ -3,19 +3,12 @@
import torch
import torch.distributed as dist
try:
from colossalai._C import fused_optim
except:
print('Colossalai should be built with cuda extension to use the FP16 optimizer')
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
from torch.distributed import ProcessGroup
from torch.optim import Optimizer
from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.kernel import fused_optim
from colossalai.logging import get_dist_logger
from colossalai.utils import clip_grad_norm_fp32, copy_tensor_parallel_attributes, multi_tensor_applier

View File

@ -1,3 +1,15 @@
from .cuda_native import LayerNorm, FusedScaleMaskSoftmax, MultiHeadAttention
from .cuda_native import FusedScaleMaskSoftmax, LayerNorm, MultiHeadAttention
__all__ = ["LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention"]
try:
from colossalai._C import fused_optim
except:
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
try:
from colossalai._C import cpu_optim
except ImportError:
from colossalai.kernel.op_builder import CPUAdamBuilder
cpu_optim = CPUAdamBuilder().load()
__all__ = ["fused_optim", "cpu_optim", "LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention"]

View File

@ -65,11 +65,8 @@ class FusedAdam(torch.optim.Optimizer):
self.adamw_mode = 1 if adamw_mode else 0
self.set_grad_none = set_grad_none
if multi_tensor_applier.available:
try:
from colossalai._C import fused_optim
except:
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
from colossalai.kernel import fused_optim
# Skip buffer
self._dummy_overflow_buf = torch.cuda.IntTensor([0])
self.multi_tensor_adam = fused_optim.multi_tensor_adam

View File

@ -76,11 +76,7 @@ class FusedLAMB(torch.optim.Optimizer):
max_grad_norm=max_grad_norm)
super(FusedLAMB, self).__init__(params, defaults)
if multi_tensor_applier.available:
try:
from colossalai._C import fused_optim
except:
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
from colossalai.kernel import fused_optim
self.multi_tensor_l2norm = fused_optim.multi_tensor_l2norm
# Skip buffer

View File

@ -80,11 +80,8 @@ class FusedSGD(Optimizer):
self.wd_after_momentum = wd_after_momentum
if multi_tensor_applier.available:
try:
from colossalai._C import fused_optim
except:
from colossalai.kernel.op_builder import FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
from colossalai.kernel import fused_optim
# Skip buffer
self._dummy_overflow_buf = torch.tensor([0],
dtype=torch.int,

View File

@ -76,13 +76,8 @@ class HybridAdam(NVMeOptimizer):
default_args = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, bias_correction=bias_correction)
super(HybridAdam, self).__init__(model_params, default_args, nvme_offload_fraction, nvme_offload_dir)
self.adamw_mode = adamw_mode
try:
from colossalai._C import cpu_optim, fused_optim
except ImportError:
from colossalai.kernel.op_builder import CPUAdamBuilder, FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
cpu_optim = CPUAdamBuilder().load()
from colossalai.kernel import cpu_optim, fused_optim
self.cpu_adam_op = cpu_optim.CPUAdamOptimizer(lr, betas[0], betas[1], eps, weight_decay, adamw_mode)
self.gpu_adam_op = fused_optim.multi_tensor_adam

View File

@ -4,28 +4,21 @@ import functools
import os
import random
import socket
from collections import defaultdict
from contextlib import contextmanager
from pathlib import Path
from typing import Callable, Dict, List, Optional, Union
import torch
import torch.distributed as dist
from torch._six import inf
from torch.nn.parameter import Parameter
try:
from colossalai._C import fused_optim
except:
from colossalai.kernel.op_builder import FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
from collections import defaultdict
from contextlib import contextmanager
import torch.distributed as dist
from colossalai.constants import IS_TENSOR_PARALLEL, NUM_PARTITIONS, TENSOR_PARALLEL_ATTRIBUTES
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.kernel import fused_optim
from colossalai.tensor import ColoParameter, ProcessGroup
from .multi_tensor_apply import multi_tensor_applier

View File

@ -14,7 +14,7 @@ class MultiTensorApply(object):
def __init__(self, chunk_size):
try:
import colossalai._C.fused_optim
from colossalai.kernel import fused_optim
MultiTensorApply.available = True
self.chunk_size = chunk_size
except ImportError as err:

View File

@ -46,13 +46,8 @@ def torch_adam_update(
@parameterize('p_dtype', [torch.float, torch.half])
@parameterize('g_dtype', [torch.float, torch.half])
def test_adam(adamw, step, p_dtype, g_dtype):
try:
import colossalai._C.fused_optim
fused_adam = colossalai._C.fused_optim.multi_tensor_adam
except:
from colossalai.kernel.op_builder import FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
fused_adam = fused_optim.multi_tensor_adam
from colossalai.kernel import fused_optim
fused_adam = fused_optim.multi_tensor_adam
dummy_overflow_buf = torch.cuda.IntTensor([0])