ColossalAI/colossalai/fx/profiler/experimental/profiler_module/normalization.py

from typing import Tuple, Union
import torch
from ..registry import meta_profiler_module


@meta_profiler_module.register(torch.nn.InstanceNorm1d)
@meta_profiler_module.register(torch.nn.InstanceNorm2d)
@meta_profiler_module.register(torch.nn.InstanceNorm3d)
@meta_profiler_module.register(torch.nn.LayerNorm)
@meta_profiler_module.register(torch.nn.GroupNorm)
@meta_profiler_module.register(torch.nn.BatchNorm1d)
@meta_profiler_module.register(torch.nn.BatchNorm2d)
@meta_profiler_module.register(torch.nn.BatchNorm3d)
def torch_nn_normalize(self: Union[torch.nn.LayerNorm, torch.nn.GroupNorm, torch.nn.BatchNorm1d, torch.nn.BatchNorm2d,
                                   torch.nn.BatchNorm3d], input: torch.Tensor) -> Tuple[int, int]:
    # adopted from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/profiling/flops_profiler/profiler.py#L615
    has_affine = self.weight is not None
    if self.training:
        flops = input.numel() * (2 if has_affine else 1)
    else:
        flops = input.numel() * (5 if has_affine else 4)
    macs = 0
    return flops, macs


try:
    import apex
    meta_profiler_module.register(apex.normalization.FusedLayerNorm)(torch_nn_normalize)
    meta_profiler_module.register(apex.normalization.FusedRMSNorm)(torch_nn_normalize)
    meta_profiler_module.register(apex.normalization.MixedFusedLayerNorm)(torch_nn_normalize)
    meta_profiler_module.register(apex.normalization.MixedFusedRMSNorm)(torch_nn_normalize)
except (ImportError, AttributeError):
    pass
[fx] add profiler for fx nodes. (#1480) * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] merge development into main (#1) * [fx] activation checkpointing using Chen strategies. * [fx] add test for ckpt_solver_chen * [fx] add vanilla activation checkpoint search with test on resnet and densenet * [fx] add a namespace code for solver_chen. * [fx] fix the false interpretation of algorithm 3 in https://arxiv.org/abs/1604.06174. * [fx] fix lowercase naming conventions. * [fx] simplify test for ckpt. * [fx] add rules to linearize computation graphs for searching. (#2) * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] merge development into main (#1) * [fx] activation checkpointing using Chen strategies. * [fx] add test for ckpt_solver_chen * [fx] add vanilla activation checkpoint search with test on resnet and densenet * [fx] add a namespace code for solver_chen. * [fx] fix the false interpretation of algorithm 3 in https://arxiv.org/abs/1604.06174. * [fx] fix lowercase naming conventions. * [fx] simplify test for ckpt. * [fx] fix test and algorithm bugs in activation checkpointing. * [fx] polish ckpt_test. * [fx] add rules to linearize computation graphs for searching. * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] fix inconsistencies. * [fx] fix MetaInfoProp. * [fx] fix MetaInfoProp. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] fix error in tests. * [fx] unfix bug. * [fx] unfix bug. 2022-08-24 08:22:44 +00:00			`from typing import Tuple, Union`
			`import torch`
			`from ..registry import meta_profiler_module`


			`@meta_profiler_module.register(torch.nn.InstanceNorm1d)`
			`@meta_profiler_module.register(torch.nn.InstanceNorm2d)`
			`@meta_profiler_module.register(torch.nn.InstanceNorm3d)`
			`@meta_profiler_module.register(torch.nn.LayerNorm)`
			`@meta_profiler_module.register(torch.nn.GroupNorm)`
			`@meta_profiler_module.register(torch.nn.BatchNorm1d)`
			`@meta_profiler_module.register(torch.nn.BatchNorm2d)`
			`@meta_profiler_module.register(torch.nn.BatchNorm3d)`
			`def torch_nn_normalize(self: Union[torch.nn.LayerNorm, torch.nn.GroupNorm, torch.nn.BatchNorm1d, torch.nn.BatchNorm2d,`
			`torch.nn.BatchNorm3d], input: torch.Tensor) -> Tuple[int, int]:`
			`# adopted from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/profiling/flops_profiler/profiler.py#L615`
			`has_affine = self.weight is not None`
			`if self.training:`
			`flops = input.numel() * (2 if has_affine else 1)`
			`else:`
			`flops = input.numel() * (5 if has_affine else 4)`
			`macs = 0`
			`return flops, macs`


			`try:`
			`import apex`
			`meta_profiler_module.register(apex.normalization.FusedLayerNorm)(torch_nn_normalize)`
			`meta_profiler_module.register(apex.normalization.FusedRMSNorm)(torch_nn_normalize)`
			`meta_profiler_module.register(apex.normalization.MixedFusedLayerNorm)(torch_nn_normalize)`
			`meta_profiler_module.register(apex.normalization.MixedFusedRMSNorm)(torch_nn_normalize)`
			`except (ImportError, AttributeError):`
			`pass`