from typing import Optional from colossalai.utils import get_current_device from torch import nn from ... import init as init from ..parallel_1d import * from ..parallel_2d import * from ..parallel_2p5d import * from ..parallel_3d import * from ..utils import get_tensor_parallel_mode from ..vanilla import * _parallel_layernorm = {'2d': LayerNorm2D, '2.5d': LayerNorm2p5D, '3d': LayerNorm3D} class LayerNorm(nn.Module): r""" Layer Normalization for colossalai :param normalized_shape: input shape from an expected input of size. :math:`[* \times \text{normalized_shape}[0] \times \text{normalized_shape}[1] \times \ldots \times \text{normalized_shape}[-1]]` If a single integer is used, it is treated as a singleton list, and this module will normalize over the last dimension which is expected to be of that specific size. :type normalized_shape: int :param eps: a value added to the denominator for numerical stability, defaults to 1e-05 :type eps: float, optional :param dtype: The dtype of parameters, defaults to None :type dtype: torch.dtype, optional """ def __init__(self, normalized_shape: int, eps=1e-05, dtype=None) -> None: super().__init__() tensor_parallel = get_tensor_parallel_mode() if tensor_parallel in ['None', '1d']: self.norm = nn.LayerNorm(normalized_shape, eps=eps, device=get_current_device(), dtype=dtype) else: self.norm = _parallel_layernorm[tensor_parallel](normalized_shape, eps=eps, dtype=dtype) @property def weight(self): return self.norm.weight @property def bias(self): return self.norm.bias def forward(self, *args): return self.norm(*args)