From 35427bcab4c6f54494ec1e019facd5318e6f38ca Mon Sep 17 00:00:00 2001 From: Zihao <804673818@qq.com> Date: Thu, 5 Jan 2023 12:18:08 +0800 Subject: [PATCH 1/6] [NFC] polish colossalai/auto_parallel/tensor_shard/deprecated/op_handler/unary_elementwise_handler.py code style (#2326) --- .../deprecated/op_handler/unary_elementwise_handler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/unary_elementwise_handler.py b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/unary_elementwise_handler.py index c929d2fad..3eb2d911a 100644 --- a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/unary_elementwise_handler.py +++ b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/unary_elementwise_handler.py @@ -6,11 +6,10 @@ from functools import reduce from typing import Dict, List import torch -from colossalai.auto_parallel.tensor_shard.deprecated._utils import \ - ignore_sharding_exception -from colossalai.auto_parallel.tensor_shard.deprecated.constants import \ - INFINITY_COST -from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import (ShardingStrategy, StrategiesVector) + +from colossalai.auto_parallel.tensor_shard.deprecated._utils import ignore_sharding_exception +from colossalai.auto_parallel.tensor_shard.deprecated.constants import INFINITY_COST +from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector from colossalai.tensor.shape_consistency import ShapeConsistencyManager from colossalai.tensor.sharding_spec import ShardingSpec From bd12a49e2a1e08f9048e6059ee61a16adc57998a Mon Sep 17 00:00:00 2001 From: Maruyama_Aya <38985202+MaruyamaAya@users.noreply.github.com> Date: Thu, 5 Jan 2023 16:20:54 +0800 Subject: [PATCH 2/6] [NFC] polish code style (#2339) --- colossalai/auto_parallel/tensor_shard/deprecated/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/colossalai/auto_parallel/tensor_shard/deprecated/constants.py b/colossalai/auto_parallel/tensor_shard/deprecated/constants.py index 91c20d343..3d100b745 100644 --- a/colossalai/auto_parallel/tensor_shard/deprecated/constants.py +++ b/colossalai/auto_parallel/tensor_shard/deprecated/constants.py @@ -1,6 +1,7 @@ -import torch import operator +import torch + __all__ = [ 'ELEMENTWISE_MODULE_OP', 'ELEMENTWISE_FUNC_OP', 'RESHAPE_FUNC_OP', 'CONV_MODULE_OP', 'CONV_FUNC_OP', 'LINEAR_MODULE_OP', 'LINEAR_FUNC_OP', 'BATCHNORM_MODULE_OP', 'POOL_MODULE_OP', 'NON_PARAM_FUNC_OP', 'BCAST_FUNC_OP', From 28e2d16794108bdf5828505e073297ec11a21a0b Mon Sep 17 00:00:00 2001 From: yuxuan-lou <83441848+yuxuan-lou@users.noreply.github.com> Date: Thu, 5 Jan 2023 16:53:24 +0800 Subject: [PATCH 3/6] [NFC] polish colossalai/auto_parallel/tensor_shard/deprecated/graph_analysis.py code style (#2340) --- .../tensor_shard/deprecated/graph_analysis.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/colossalai/auto_parallel/tensor_shard/deprecated/graph_analysis.py b/colossalai/auto_parallel/tensor_shard/deprecated/graph_analysis.py index 831e7eadd..9f7a6a5ec 100644 --- a/colossalai/auto_parallel/tensor_shard/deprecated/graph_analysis.py +++ b/colossalai/auto_parallel/tensor_shard/deprecated/graph_analysis.py @@ -1,9 +1,11 @@ +from collections import OrderedDict as ODict from dataclasses import dataclass -from torch.fx.node import Node +from typing import Any, List, OrderedDict, Union + from torch.fx.graph import Graph from torch.fx.graph_module import GraphModule -from collections import OrderedDict as ODict -from typing import List, OrderedDict, Union, Any +from torch.fx.node import Node + from colossalai.fx.passes.utils import get_node_module __all__ = ['LiveVariable', 'LiveVariableVector', 'LiveStage', 'GraphAnalyser'] From e11a005c026f104d0bb5061de2e2b8531f5cbc67 Mon Sep 17 00:00:00 2001 From: LuGY <74758262+Gy-Lu@users.noreply.github.com> Date: Thu, 5 Jan 2023 21:17:42 +0800 Subject: [PATCH 4/6] [NFC] polish colossalai/auto_parallel/tensor_shard/utils/factory.py code style (#2349) --- colossalai/auto_parallel/tensor_shard/utils/factory.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/colossalai/auto_parallel/tensor_shard/utils/factory.py b/colossalai/auto_parallel/tensor_shard/utils/factory.py index fd3ba3d41..563375bc2 100644 --- a/colossalai/auto_parallel/tensor_shard/utils/factory.py +++ b/colossalai/auto_parallel/tensor_shard/utils/factory.py @@ -4,10 +4,11 @@ from functools import reduce from typing import Dict, List, Optional, Union import torch +from torch.fx.node import Node + from colossalai.device.device_mesh import DeviceMesh from colossalai.tensor.shape_consistency import ShapeConsistencyManager from colossalai.tensor.sharding_spec import ShardingSpec -from torch.fx.node import Node from ..constants import INFINITY_COST @@ -18,7 +19,7 @@ def generate_sharding_spec(input_: Union[Node, torch.Tensor], device_mesh: Devic dim_partition_dict: Dict[int, List[int]]) -> ShardingSpec: """ Generate the sharding spec of the tensor based on the given dim_partition_dict. - + Args: input_ (Union[Node, torch.Tensor]): the input can be a Node object or a PyTorch tensor. If a node is used, it will look for its meta data associated with this node. @@ -59,7 +60,7 @@ def generate_resharding_costs(nodes: List[Node], nodes (List[Node]): a list of nodes sharding_spec_for_input(ShardingSpec): a list of ShardingSpec for the nodes. count_backward (Optional[bool]): whether to include the cost of resharding in the backward pass, default is True. False can be used for inference. - dtype (Optional[torch.dtype]): the data type for cost calculation, default is None. + dtype (Optional[torch.dtype]): the data type for cost calculation, default is None. ''' # The resharding_cost of weight is counted due to sharing weight cases. resharding_costs = {} From ac0d30fe2ec7e161dc18c1f6b2c9d447d3f7f6ae Mon Sep 17 00:00:00 2001 From: ExtremeViscent Date: Fri, 6 Jan 2023 13:41:38 +0800 Subject: [PATCH 5/6] [NFC] polish batch_norm_handler.py code style (#2359) --- .../deprecated/op_handler/batch_norm_handler.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/batch_norm_handler.py b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/batch_norm_handler.py index 519436270..868600b39 100644 --- a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/batch_norm_handler.py +++ b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/batch_norm_handler.py @@ -2,9 +2,9 @@ import operator from functools import reduce import torch -from colossalai.auto_parallel.tensor_shard.deprecated._utils import \ - ignore_sharding_exception -from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import (ShardingStrategy, StrategiesVector) + +from colossalai.auto_parallel.tensor_shard.deprecated._utils import ignore_sharding_exception +from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector from .operator_handler import OperatorHandler @@ -76,19 +76,19 @@ class BatchNormHandler(OperatorHandler): Argument: sharding_size_forward(int): The forward activation will be divided into sharding_size_forward number partions. - sharding_size_backward_activation(int): The backward activation will + sharding_size_backward_activation(int): The backward activation will be divided into sharding_size_backward_activation number partions. sharding_size_weight(int): The backward weight will be divided into sharding_size_weight number partions. Return: - memory_cost(Tuple[float]): Memory cost per device with this + memory_cost(Tuple[float]): Memory cost per device with this specific strategy, the first element of this tuple is forward memory cost, and the second element of this tuple is backward memory cost. - memory_cost_forward(float): Memory cost of forward activation per + memory_cost_forward(float): Memory cost of forward activation per device with this specific strategy. - memory_cost_backward_activation(float): Memory cost of backward activation + memory_cost_backward_activation(float): Memory cost of backward activation per device with this specific strategy. ''' # compute the memory cost of this strategy @@ -458,7 +458,7 @@ class BatchNormHandler(OperatorHandler): norm_handler.register_strategy() for strategy in norm_handler.strategies_vector: print(f'{strategy.name}, computation_cost: {strategy.compute_cost}, memory_cost: {strategy.memory_cost}') - + Output: RS0 = RS0 x S0, computation_cost: 131072, memory_cost: 524288.0 RS1 = RS1 x S1, computation_cost: 131072, memory_cost: 524288.0 From d42aecdda1f81f3777bc4c5788df39ef2ec2e04f Mon Sep 17 00:00:00 2001 From: Shawn-Kong Date: Thu, 5 Jan 2023 23:47:10 -0800 Subject: [PATCH 6/6] [NFC] polish colossalai/auto_parallel/tensor_shard/deprecated/op_handler/embedding_handler.py code style (#2368) --- .../deprecated/op_handler/embedding_handler.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/embedding_handler.py b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/embedding_handler.py index d01a487ad..d3f51d489 100644 --- a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/embedding_handler.py +++ b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/embedding_handler.py @@ -5,9 +5,9 @@ from functools import reduce from typing import Dict, List import torch -from colossalai.auto_parallel.tensor_shard.deprecated._utils import \ - ignore_sharding_exception -from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import (ShardingStrategy, StrategiesVector) + +from colossalai.auto_parallel.tensor_shard.deprecated._utils import ignore_sharding_exception +from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector from colossalai.tensor.shape_consistency import ShapeConsistencyManager from colossalai.tensor.sharding_spec import ShardingSpec @@ -42,19 +42,19 @@ class EmbeddingHandler(OperatorHandler): Argument: sharding_size_forward(int): The forward activation will be divided into sharding_size_forward number partions. - sharding_size_backward_activation(int): The backward activation will + sharding_size_backward_activation(int): The backward activation will be divided into sharding_size_backward_activation number partions. sharding_size_weight(int): The backward weight will be divided into sharding_size_weight number partions. Return: - memory_cost(Tuple[float]): Memory cost per device with this + memory_cost(Tuple[float]): Memory cost per device with this specific strategy, the first element of this tuple is forward memory cost, and the second element of this tuple is backward memory cost. - memory_cost_forward(float): Memory cost of forward activation per + memory_cost_forward(float): Memory cost of forward activation per device with this specific strategy. - memory_cost_backward_activation(float): Memory cost of backward activation + memory_cost_backward_activation(float): Memory cost of backward activation per device with this specific strategy. ''' # compute the memory cost of this strategy