ColossalAI/colossalai/amp/naive_amp/grad_scaler/base_grad_scaler.py

#!/usr/bin/env python
# -*- encoding: utf-8 -*-

from abc import ABC, abstractmethod
from typing import Dict

import torch
from torch import Tensor

from colossalai.logging import get_dist_logger

__all__ = ['BaseGradScaler']


class BaseGradScaler(ABC):
    """A base class for the gradient scaler.

    Args:
        initial_scale (float): the initial loss scale
        verbose (bool): whether to log messages
    """

    def __init__(self, initial_scale: float, verbose: bool):
        assert initial_scale > 0
        self._scale = torch.cuda.FloatTensor([initial_scale])
        self._verbose = verbose

        if self._verbose:
            self._logger = get_dist_logger()

    @property
    def scale(self) -> Tensor:
        """Returns the loss scale.
        """

        return self._scale

    @property
    def inv_scale(self) -> Tensor:
        """Returns the inverse of the loss scale.
        """

        return self._scale.double().reciprocal().float()

    def state_dict(self) -> Dict:
        """Returns the states of the gradient scaler as a dict object.
        """

        state_dict = dict()
        state_dict['scale'] = self.scale
        return state_dict

    def load_state_dict(self, state_dict: Dict) -> None:
        """Load the states of the gradient scaler from a dict object.

        Args:
            state_dict (dict): the states of the gradient scaler
        """

        self._scale = state_dict['scale']

    @abstractmethod
    def update(self, overflow: bool) -> None:
        """Update the loss scale.

        Args:
            overflow (bool): whether overflow occurs
        """

        pass

    def log(self, message, *args, **kwargs):
        """Log messages.

        Args:
            message (str): the message to log
            *args: positional arguments for :class:`colossalai.logging.DistributedLogger`
            **kwargs: key-word arguments for :class:`colossalai.logging.DistributedLogger`
        """

        if self._verbose:
            self._logger.info(message, *args, **kwargs)
refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`#!/usr/bin/env python`
			`# -- encoding: utf-8 --`

			`from abc import ABC, abstractmethod`
			`from typing import Dict`

[NFC] polish amp.naive_amp.grad_scaler code style 2022-11-09 05:34:19 +00:00			`import torch`
			`from torch import Tensor`

			`from colossalai.logging import get_dist_logger`

refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`__all__ = ['BaseGradScaler']`


			`class BaseGradScaler(ABC):`
[doc] improved docstring in the amp module (#857) 2022-04-25 05:42:17 +00:00			`"""A base class for the gradient scaler.`

			`Args:`
			`initial_scale (float): the initial loss scale`
			`verbose (bool): whether to log messages`
			`"""`
refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00
[hotfix] fix memory leak in zero (#781) 2022-04-18 05:57:03 +00:00			`def __init__(self, initial_scale: float, verbose: bool):`
refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`assert initial_scale > 0`
			`self._scale = torch.cuda.FloatTensor([initial_scale])`
			`self._verbose = verbose`

			`if self._verbose:`
			`self._logger = get_dist_logger()`

			`@property`
			`def scale(self) -> Tensor:`
[doc] improved docstring in the amp module (#857) 2022-04-25 05:42:17 +00:00			`"""Returns the loss scale.`
			`"""`

refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`return self._scale`

			`@property`
			`def inv_scale(self) -> Tensor:`
[doc] improved docstring in the amp module (#857) 2022-04-25 05:42:17 +00:00			`"""Returns the inverse of the loss scale.`
			`"""`

refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`return self._scale.double().reciprocal().float()`

			`def state_dict(self) -> Dict:`
[doc] improved docstring in the amp module (#857) 2022-04-25 05:42:17 +00:00			`"""Returns the states of the gradient scaler as a dict object.`
			`"""`

refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`state_dict = dict()`
			`state_dict['scale'] = self.scale`
[hotfix] fix memory leak in zero (#781) 2022-04-18 05:57:03 +00:00			`return state_dict`
refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00
			`def load_state_dict(self, state_dict: Dict) -> None:`
[doc] improved docstring in the amp module (#857) 2022-04-25 05:42:17 +00:00			`"""Load the states of the gradient scaler from a dict object.`

			`Args:`
			`state_dict (dict): the states of the gradient scaler`
			`"""`

refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`self._scale = state_dict['scale']`

			`@abstractmethod`
			`def update(self, overflow: bool) -> None:`
[doc] improved docstring in the amp module (#857) 2022-04-25 05:42:17 +00:00			`"""Update the loss scale.`

			`Args:`
			`overflow (bool): whether overflow occurs`
			`"""`

refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`pass`

			`def log(self, message, args, *kwargs):`
[doc] improved docstring in the amp module (#857) 2022-04-25 05:42:17 +00:00			`"""Log messages.`

			`Args:`
			`message (str): the message to log`
			*args: positional arguments for :class:`colossalai.logging.DistributedLogger`
			**kwargs: key-word arguments for :class:`colossalai.logging.DistributedLogger`
			`"""`

refactored grad scaler (#338) 2022-03-09 03:52:43 +00:00			`if self._verbose:`
			`self._logger.info(message, args, *kwargs)`