[API] add docstrings and initialization to apex amp, naive amp (#3783)

* [mixed_precison] add naive amp demo

* [mixed_precison] add naive amp demo

* [api] add docstrings and initialization to apex amp, naive amp

* [api] add docstring to apex amp/ naive amp

* [api] add docstring to apex amp/ naive amp

* [api] add docstring to apex amp/ naive amp

* [api] add docstring to apex amp/ naive amp

* [api] add docstring to apex amp/ naive amp

* [api] add docstring to apex amp/ naive amp

* [api] fix

* [api] fix
pull/3808/head
jiangmingyan 2023-05-23 15:17:24 +08:00 committed by GitHub
parent 615e2e5fc1
commit e871e342b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 2 deletions

View File

@ -1,5 +1,38 @@
from typing import Any, Optional, Union
import torch
from .mixed_precision_base import MixedPrecision
class FP16ApexMixedPrecision(MixedPrecision):
pass
"""
Precision for mixed precision training in FP16 using apex AMP.
Args:
opt_level(str, optional, default="O1" ): Pure or mixed precision optimization level. Accepted values are O0, O1, O2, and O3, explained in detail above Apex AMP Documentation.
cast_model_type (torch.dtype, optional, default=None): Casts your models parameters and buffers to the desired type.
patch_torch_functions (bool, optional, default=None): Patch all Torch functions and Tensor methods to perform Tensor Core-friendly ops like GEMMs and convolutions in FP16, and any ops that benefit from FP32 precision in FP32.
keep_batchnorm_fp32 (bool or str, optional, default=None): To enhance precision and enable cudnn batchnorm (which improves performance), its often beneficial to keep batchnorm weights in FP32 even if the rest of the model is FP16.
master_weights (bool, optional, default=None): Maintain FP32 master weights to accompany any FP16 model weights. FP32 master weights are stepped by the optimizer to enhance precision and capture small gradients.
loss_scale (float or str, optional, default=None): If loss_scale is a float value, use this value as the static (fixed) loss scale. If loss_scale is the string "dynamic", adaptively adjust the loss scale over time. Dynamic loss scale adjustments are performed by Amp automatically.
cast_model_outputs (torch.dpython:type, optional, default=None): Option to ensure that the outputs of your model(s) are always cast to a particular type regardless of opt_level.
num_losses(int, optional, default=1): Option to tell AMP in advance how many losses/backward passes you plan to use. When used in conjunction with the loss_id argument to `amp.scale_loss`, enables Amp to use a different loss scale per loss/backward pass, which can improve stability. If num_losses is left to 1, Amp will still support multiple losses/backward passes, but use a single global loss scale for all of them.
verbosity(int, default=1): Set to 0 to suppress Amp-related output.
min_loss_scale(float, default=None): Sets a floor for the loss scale values that can be chosen by dynamic loss scaling. The default value of None means that no floor is imposed. If dynamic loss scaling is not used, min_loss_scale is ignored.
max_loss_scale(float, default=2.**24 ): Sets a ceiling for the loss scale values that can be chosen by dynamic loss scaling. If dynamic loss scaling is not used, max_loss_scale is ignored.
"""
def __init__(self,
opt_level: Optional[str] = "O1",
cast_model_type: torch.dtype = None,
patch_torch_functions: bool = None,
keep_batchnorm_fp32: Union[bool, str] = None,
master_weights: bool = None,
loss_scale: Union[float, str] = None,
cast_model_outputs: Any = None,
num_losses: Optional[int] = 1,
verbosity: int = 1,
min_loss_scale: float = None,
max_loss_scale: float = 2.**24) -> None:
pass

View File

@ -2,4 +2,25 @@ from .mixed_precision_base import MixedPrecision
class FP16NaiveMixedPrecision(MixedPrecision):
pass
"""
Precision for mixed precision training in FP16 using naive AMP.
Args:
log_num_zeros_in_grad(bool): return number of zeros in the gradients.
initial_scale(int): initial scale of gradient scaler.
growth_factor(int): the growth rate of loss scale.
backoff_factor(float): the decrease rate of loss scale.
hysteresis(int): delay shift in dynamic loss scaling.
max_scale(int): maximum loss scale allowed.
verbose(bool): if set to `True`, will print debug info.
"""
def __init__(self,
log_num_zeros_in_grad: bool,
initial_scale: int,
growth_factor: int,
backoff_factor: float,
hysteresis: int,
max_scale: int,
verbose: bool = None) -> None:
pass