Revert "[NFC] polish code format" (#2372)

2 years ago · a881d6d000
6 changed files with 27 additions and 30 deletions
--- a/colossalai/auto_parallel/tensor_shard/deprecated/constants.py
+++ b/colossalai/auto_parallel/tensor_shard/deprecated/constants.py
@ -1,6 +1,5 @@
-import operator
-
 import torch
+import operator

 __all__ = [
    'ELEMENTWISE_MODULE_OP', 'ELEMENTWISE_FUNC_OP', 'RESHAPE_FUNC_OP', 'CONV_MODULE_OP', 'CONV_FUNC_OP',
--- a/colossalai/auto_parallel/tensor_shard/deprecated/graph_analysis.py
+++ b/colossalai/auto_parallel/tensor_shard/deprecated/graph_analysis.py
@ -1,11 +1,9 @@
-from collections import OrderedDict as ODict
 from dataclasses import dataclass
-from typing import Any, List, OrderedDict, Union
-
+from torch.fx.node import Node
 from torch.fx.graph import Graph
 from torch.fx.graph_module import GraphModule
-from torch.fx.node import Node
-
+from collections import OrderedDict as ODict
+from typing import List, OrderedDict, Union, Any
 from colossalai.fx.passes.utils import get_node_module

 __all__ = ['LiveVariable', 'LiveVariableVector', 'LiveStage', 'GraphAnalyser']
--- a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/batch_norm_handler.py
+++ b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/batch_norm_handler.py
@ -2,9 +2,9 @@ import operator
 from functools import reduce

 import torch
-
-from colossalai.auto_parallel.tensor_shard.deprecated._utils import ignore_sharding_exception
-from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
+from colossalai.auto_parallel.tensor_shard.deprecated._utils import \
+    ignore_sharding_exception
+from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import (ShardingStrategy, StrategiesVector)

 from .operator_handler import OperatorHandler

@ -76,19 +76,19 @@ class BatchNormHandler(OperatorHandler):
        Argument:
            sharding_size_forward(int): The forward activation will be divided
                into sharding_size_forward number partions.
-            sharding_size_backward_activation(int): The backward activation will
+            sharding_size_backward_activation(int): The backward activation will 
                be divided into sharding_size_backward_activation number partions.
            sharding_size_weight(int): The backward weight will be divided
                into sharding_size_weight number partions.

        Return:
-            memory_cost(Tuple[float]): Memory cost per device with this
+            memory_cost(Tuple[float]): Memory cost per device with this 
                specific strategy, the first element of this tuple is forward
                memory cost, and the second element of this tuple is backward
                memory cost.
-            memory_cost_forward(float): Memory cost of forward activation per
+            memory_cost_forward(float): Memory cost of forward activation per 
                device with this specific strategy.
-            memory_cost_backward_activation(float): Memory cost of backward activation
+            memory_cost_backward_activation(float): Memory cost of backward activation 
                per device with this specific strategy.
        '''
        # compute the memory cost of this strategy
@ -458,7 +458,7 @@ class BatchNormHandler(OperatorHandler):
            norm_handler.register_strategy()
            for strategy in norm_handler.strategies_vector:
                print(f'{strategy.name}, computation_cost: {strategy.compute_cost}, memory_cost: {strategy.memory_cost}')
-
+        
        Output:
            RS0 = RS0 x S0, computation_cost: 131072, memory_cost: 524288.0
            RS1 = RS1 x S1, computation_cost: 131072, memory_cost: 524288.0
--- a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/embedding_handler.py
+++ b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/embedding_handler.py
@ -5,9 +5,9 @@ from functools import reduce
 from typing import Dict, List

 import torch
-
-from colossalai.auto_parallel.tensor_shard.deprecated._utils import ignore_sharding_exception
-from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
+from colossalai.auto_parallel.tensor_shard.deprecated._utils import \
+    ignore_sharding_exception
+from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import (ShardingStrategy, StrategiesVector)
 from colossalai.tensor.shape_consistency import ShapeConsistencyManager
 from colossalai.tensor.sharding_spec import ShardingSpec

@ -42,19 +42,19 @@ class EmbeddingHandler(OperatorHandler):
        Argument:
            sharding_size_forward(int): The forward activation will be divided
                into sharding_size_forward number partions.
-            sharding_size_backward_activation(int): The backward activation will
+            sharding_size_backward_activation(int): The backward activation will 
                be divided into sharding_size_backward_activation number partions.
            sharding_size_weight(int): The backward weight will be divided
                into sharding_size_weight number partions.

        Return:
-            memory_cost(Tuple[float]): Memory cost per device with this
+            memory_cost(Tuple[float]): Memory cost per device with this 
                specific strategy, the first element of this tuple is forward
                memory cost, and the second element of this tuple is backward
                memory cost.
-            memory_cost_forward(float): Memory cost of forward activation per
+            memory_cost_forward(float): Memory cost of forward activation per 
                device with this specific strategy.
-            memory_cost_backward_activation(float): Memory cost of backward activation
+            memory_cost_backward_activation(float): Memory cost of backward activation 
                per device with this specific strategy.
        '''
        # compute the memory cost of this strategy
--- a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/unary_elementwise_handler.py
+++ b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/unary_elementwise_handler.py
@ -6,10 +6,11 @@ from functools import reduce
 from typing import Dict, List

 import torch
-
-from colossalai.auto_parallel.tensor_shard.deprecated._utils import ignore_sharding_exception
-from colossalai.auto_parallel.tensor_shard.deprecated.constants import INFINITY_COST
-from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
+from colossalai.auto_parallel.tensor_shard.deprecated._utils import \
+    ignore_sharding_exception
+from colossalai.auto_parallel.tensor_shard.deprecated.constants import \
+    INFINITY_COST
+from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import (ShardingStrategy, StrategiesVector)
 from colossalai.tensor.shape_consistency import ShapeConsistencyManager
 from colossalai.tensor.sharding_spec import ShardingSpec

--- a/colossalai/auto_parallel/tensor_shard/utils/factory.py
+++ b/colossalai/auto_parallel/tensor_shard/utils/factory.py
@ -4,11 +4,10 @@ from functools import reduce
 from typing import Dict, List, Optional, Union

 import torch
-from torch.fx.node import Node
-
 from colossalai.device.device_mesh import DeviceMesh
 from colossalai.tensor.shape_consistency import ShapeConsistencyManager
 from colossalai.tensor.sharding_spec import ShardingSpec
+from torch.fx.node import Node

 from ..constants import INFINITY_COST

@ -19,7 +18,7 @@ def generate_sharding_spec(input_: Union[Node, torch.Tensor], device_mesh: Devic
                           dim_partition_dict: Dict[int, List[int]]) -> ShardingSpec:
    """
    Generate the sharding spec of the tensor based on the given dim_partition_dict.
-
+    

    Args:
        input_ (Union[Node, torch.Tensor]): the input can be a Node object or a PyTorch tensor. If a node is used, it will look for its meta data associated with this node.
@ -60,7 +59,7 @@ def generate_resharding_costs(nodes: List[Node],
        nodes (List[Node]): a list of nodes
        sharding_spec_for_input(ShardingSpec): a list of ShardingSpec for the nodes.
        count_backward (Optional[bool]): whether to include the cost of resharding in the backward pass, default is True. False can be used for inference.
-        dtype (Optional[torch.dtype]): the data type for cost calculation, default is None.
+        dtype (Optional[torch.dtype]): the data type for cost calculation, default is None. 
    '''
    # The resharding_cost of weight is counted due to sharing weight cases.
    resharding_costs = {}