AMP docstring/markdown update (#160)

2022-01-18 18:33:36 +08:00 · 2022-01-18 18:33:36 +08:00 · 9473a1b9c8
parent 2499faa2db
commit 9473a1b9c8
4 changed files with 15 additions and 3 deletions
--- a/colossalai/amp/apex_amp/init.py
+++ b/colossalai/amp/apex_amp/init.py
@ -6,7 +6,7 @@ from torch.optim import Optimizer
 def convert_to_apex_amp(model: nn.Module,
                        optimizer: Optimizer,
                        amp_config):
-    """A helper function to wrap training components with Torch AMP modules
+    """A helper function to wrap training components with Apex AMP modules

    :param model: your model object
    :type model: :class:`torch.nn.Module`
--- a/colossalai/amp/naive_amp/init.py
+++ b/colossalai/amp/naive_amp/init.py
@ -8,7 +8,7 @@ from .naive_amp import NaiveAMPOptimizer, NaiveAMPModel
 def convert_to_naive_amp(model: nn.Module,
                         optimizer: Optimizer,
                         amp_config):
-    """A helper function to wrap training components with Torch AMP modules
+    """A helper function to wrap training components with naive AMP modules

    :param model: your model object
    :type model: :class:`torch.nn.Module`
--- a/colossalai/amp/torch_amp/torch_amp.py
+++ b/colossalai/amp/torch_amp/torch_amp.py
@ -18,6 +18,17 @@ class TorchAMPOptimizer(ColossalaiOptimizer):

    :param optim: a normal optimizer like Adam or SGD
    :type optim: torch.optim.Optimizer
+    :param init_scale: Initial scale factor
+    :type init_scale: float, optional, default=2.**16
+    :param growth_factor: Factor by which the scale is multiplied during :meth:`update` if no inf/NaN gradients occur for ``growth_interval`` consecutive iterations.
+    :type growth_factor: float, optional, default=2.0
+    :param backoff_factor: Factor by which the scale is multiplied during :meth:`update` if inf/NaN gradients occur in an iteration.
+    :type backoff_factor: float, optional, default=0.5
+    :param growth_interval: Number of consecutive iterations without inf/NaN gradients that must occur for the scale to be multiplied by ``growth_factor``.
+    :type growth_interval: int, optional, default=2000
+    :param enabled: If ``False``, disables gradient scaling. :meth:`step` simply invokes the underlying ``optimizer.step()``, and other methods become no-ops.
+    :type enabled: bool, optional, default=True
+
    """

    def __init__(self, optim: Optimizer, *args, **kwargs):
@ -68,6 +79,7 @@ class TorchAMPLoss(nn.Module):
    :param loss: a loss function object
    :type loss: torch.nn.modules.loss._Loss
    """
+
    def __init__(self, loss: _Loss):
        super().__init__()
        self.loss = loss
--- a/docs/amp.md
+++ b/docs/amp.md
@ -82,7 +82,7 @@ fp16 = dict(
 We leveraged the Megatron-LM implementation to achieve mixed precision training while maintaining compatibility with complex tensor 
 and pipeline parallelism. This AMP mode will cast all operations into fp16.

-The following conde block show a config file for this mode.
+The following code block shows a config file for this mode.

 ```python
 from colossalai.amp import AMP_TYPE