ColossalAI/colossalai/auto_parallel/solver/sharding_strategy.py

from dataclasses import dataclass
from colossalai.tensor.sharding_spec import ShardingSpec
from typing import Dict, List
from torch.fx.node import Node

__all__ = ['ShardingStrategy', 'StrategiesVector']


@dataclass
class ShardingStrategy:
    '''
    ShardingStrategy is a structure containing sharding strategies of inputs and output of this node
    and costs information using in solver.

    Argument:
        name(str): express the sharding strategies in string, such as 'S0S1 = S0R x RS1'.
        output_sharding_spec(ShardingSpec): ShardingSpec of the output node.
        compute_cost(float): Computation cost to complete this strategy.(default to 0)
        communication_cost(float): Communication cost to complete this strategy.(default to 0)
        memory_cost(float): Memory cost of the output node using this strategy.(default to 0)
        resharding_costs(Dict[int, List[float]]): resharding_cost[i][j] means the cost of i-th argument in the output node argument list
                                                  with j-th strategy in its strategies_vector transforms to sharding spec wanted in this
                                                  strategy.(default to None)
        input_shardings(List(ShardingSpec)): The ShardingSpecs of the input nodes.
    '''

    name: str
    output_sharding_spec: ShardingSpec
    compute_cost: float = 0.
    communication_cost: float = 0.
    memory_cost: float = 0.
    resharding_costs: Dict[int, List[float]] = None
    input_shardings: ShardingSpec = None


class StrategiesVector(list):
    '''
    Each node in fx graph will have a corresponding StrategiesVector, to store all the possible
    strategies of the node.

    Argument:
        node (Node): node for which the list of sharding strategies are generated.
    '''

    def __init__(self, node: Node):
        super().__init__()
        self.node = node
        # fetch its input and output nodes
        self.predecessor_nodes = list(node._input_nodes.keys())
        self.successor_nodes = list(node.users.keys())

    def check_merge(self):
        pass
[autoparallel] standardize the code structure (#1469) 2022-08-19 07:51:54 +00:00			`from dataclasses import dataclass`
			`from colossalai.tensor.sharding_spec import ShardingSpec`
			`from typing import Dict, List`
[autoparallel] integrate auto parallel with torch fx (#1479) 2022-08-23 06:23:08 +00:00			`from torch.fx.node import Node`

			`__all__ = ['ShardingStrategy', 'StrategiesVector']`
[autoparallel] standardize the code structure (#1469) 2022-08-19 07:51:54 +00:00

			`@dataclass`
[autoparallel] Add conv handler to generate strategies and costs info for conv (#1467) 2022-08-19 06:57:23 +00:00			`class ShardingStrategy:`
			`'''`
			`ShardingStrategy is a structure containing sharding strategies of inputs and output of this node`
			`and costs information using in solver.`

			`Argument:`
			`name(str): express the sharding strategies in string, such as 'S0S1 = S0R x RS1'.`
			`output_sharding_spec(ShardingSpec): ShardingSpec of the output node.`
			`compute_cost(float): Computation cost to complete this strategy.(default to 0)`
			`communication_cost(float): Communication cost to complete this strategy.(default to 0)`
			`memory_cost(float): Memory cost of the output node using this strategy.(default to 0)`
			`resharding_costs(Dict[int, List[float]]): resharding_cost[i][j] means the cost of i-th argument in the output node argument list`
			`with j-th strategy in its strategies_vector transforms to sharding spec wanted in this`
			`strategy.(default to None)`
			`input_shardings(List(ShardingSpec)): The ShardingSpecs of the input nodes.`
			`'''`

[autoparallel] standardize the code structure (#1469) 2022-08-19 07:51:54 +00:00			`name: str`
			`output_sharding_spec: ShardingSpec`
			`compute_cost: float = 0.`
			`communication_cost: float = 0.`
			`memory_cost: float = 0.`
			`resharding_costs: Dict[int, List[float]] = None`
			`input_shardings: ShardingSpec = None`
[autoparallel] Add conv handler to generate strategies and costs info for conv (#1467) 2022-08-19 06:57:23 +00:00

[autoparallel] integrate auto parallel with torch fx (#1479) 2022-08-23 06:23:08 +00:00			`class StrategiesVector(list):`
[autoparallel] Add conv handler to generate strategies and costs info for conv (#1467) 2022-08-19 06:57:23 +00:00			`'''`
			`Each node in fx graph will have a corresponding StrategiesVector, to store all the possible`
			`strategies of the node.`

			`Argument:`
[autoparallel] integrate auto parallel with torch fx (#1479) 2022-08-23 06:23:08 +00:00			`node (Node): node for which the list of sharding strategies are generated.`
[autoparallel] Add conv handler to generate strategies and costs info for conv (#1467) 2022-08-19 06:57:23 +00:00			`'''`

[autoparallel] integrate auto parallel with torch fx (#1479) 2022-08-23 06:23:08 +00:00			`def __init__(self, node: Node):`
			`super().__init__()`
[autoparallel] Add conv handler to generate strategies and costs info for conv (#1467) 2022-08-19 06:57:23 +00:00			`self.node = node`
[autoparallel] integrate auto parallel with torch fx (#1479) 2022-08-23 06:23:08 +00:00			`# fetch its input and output nodes`
			`self.predecessor_nodes = list(node._input_nodes.keys())`
[autoparallel] add cost graph class (#1481) * [autoparallel] add cost graph class * polish code 2022-08-25 09:19:59 +00:00			`self.successor_nodes = list(node.users.keys())`
[autoparallel] Add conv handler to generate strategies and costs info for conv (#1467) 2022-08-19 06:57:23 +00:00
			`def check_merge(self):`
			`pass`