2021-12-29 15:32:10 +00:00
|
|
|
from colossalai.utils import get_current_device
|
|
|
|
from torch import nn
|
|
|
|
|
2022-04-01 08:49:56 +00:00
|
|
|
from ..parallel_1d import LayerNorm1D
|
|
|
|
from ..parallel_2d import LayerNorm2D
|
|
|
|
from ..parallel_2p5d import LayerNorm2p5D
|
|
|
|
from ..parallel_3d import LayerNorm3D
|
2021-12-29 15:32:10 +00:00
|
|
|
from ..utils import get_tensor_parallel_mode
|
2022-04-01 08:49:56 +00:00
|
|
|
from ._utils import ColossalaiModule
|
2021-12-29 15:32:10 +00:00
|
|
|
|
2022-04-01 08:49:56 +00:00
|
|
|
_parallel_layernorm = {'1d': LayerNorm1D, '2d': LayerNorm2D, '2.5d': LayerNorm2p5D, '3d': LayerNorm3D}
|
2021-12-29 15:32:10 +00:00
|
|
|
|
|
|
|
|
2022-04-01 08:49:56 +00:00
|
|
|
class LayerNorm(ColossalaiModule):
|
2022-03-25 05:02:39 +00:00
|
|
|
r"""Layer Normalization for colossalai.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
normalized_shape (int): input shape from an expected input of size.
|
|
|
|
:math:`[* \times \text{normalized_shape}[0] \times \text{normalized_shape}[1] \times \ldots \times \text{normalized_shape}[-1]]`
|
|
|
|
If a single integer is used, it is treated as a singleton list, and this module will
|
|
|
|
normalize over the last dimension which is expected to be of that specific size.
|
|
|
|
eps (float, optional): a value added to the denominator for numerical stability, defaults to 1e-05
|
|
|
|
dtype (:class:`torch.dtype`, optional): The dtype of parameters, defaults to None.
|
2022-01-10 10:05:58 +00:00
|
|
|
"""
|
2022-02-14 03:15:02 +00:00
|
|
|
|
2021-12-29 15:32:10 +00:00
|
|
|
def __init__(self, normalized_shape: int, eps=1e-05, dtype=None) -> None:
|
|
|
|
tensor_parallel = get_tensor_parallel_mode()
|
2022-02-14 03:15:02 +00:00
|
|
|
if tensor_parallel is None:
|
2022-04-01 08:49:56 +00:00
|
|
|
norm = nn.LayerNorm(normalized_shape, eps=eps).to(dtype).to(get_current_device())
|
2021-12-29 15:32:10 +00:00
|
|
|
else:
|
2022-04-01 08:49:56 +00:00
|
|
|
norm = _parallel_layernorm[tensor_parallel](normalized_shape, eps=eps, dtype=dtype)
|
|
|
|
super().__init__(norm)
|